sctp: Tag messages that can be Nagle delayed at creation.

[safe/jmp/linux-2.6] / include / net / sock.h
diff --git a/include/net/sock.h b/include/net/sock.h

index 8ab0514..e1777db 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -198,6 +198,7 @@ struct sock_common {
    *    @sk_rcvlowat: %SO_RCVLOWAT setting
    *    @sk_rcvtimeo: %SO_RCVTIMEO setting
    *    @sk_sndtimeo: %SO_SNDTIMEO setting
+  *    @sk_rxhash: flow hash received from netif layer
    *    @sk_filter: socket filtering instructions
    *    @sk_protinfo: private area, net family specific, when not using slab
    *    @sk_timer: sock cleanup timer
@@ -255,7 +256,6 @@ struct sock {
                 struct sk_buff *head;
                 struct sk_buff *tail;
                 int len;
-               int limit;
         } sk_backlog;
         wait_queue_head_t       *sk_sleep;
         struct dst_entry        *sk_dst_cache;
@@ -279,6 +279,9 @@ struct sock {
         int                     sk_gso_type;
         unsigned int            sk_gso_max_size;
         int                     sk_rcvlowat;
+#ifdef CONFIG_RPS
+       __u32                   sk_rxhash;
+#endif
         unsigned long           sk_flags;
         unsigned long           sk_lingertime;
         struct sk_buff_head     sk_error_queue;
@@ -604,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
         skb->next = NULL;
  }
  
+/*
+ * Take into account size of receive queue and backlog queue
+ */
+static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
+{
+       unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
+
+       return qsize + skb->truesize > sk->sk_rcvbuf;
+}
+
  /* The per-socket spinlock must be held here. */
  static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
  {
-       if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
+       if (sk_rcvqueues_full(sk, skb))
                 return -ENOBUFS;
  
         __sk_add_backlog(sk, skb);
@@ -620,6 +633,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
         return sk->sk_backlog_rcv(sk, skb);
  }
  
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+       struct rps_sock_flow_table *sock_flow_table;
+
+       rcu_read_lock();
+       sock_flow_table = rcu_dereference(rps_sock_flow_table);
+       rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
+       rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_reset_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+       struct rps_sock_flow_table *sock_flow_table;
+
+       rcu_read_lock();
+       sock_flow_table = rcu_dereference(rps_sock_flow_table);
+       rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
+       rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash)
+{
+#ifdef CONFIG_RPS
+       if (unlikely(sk->sk_rxhash != rxhash)) {
+               sock_rps_reset_flow(sk);
+               sk->sk_rxhash = rxhash;
+       }
+#endif
+}
+
  #define sk_wait_event(__sk, __timeo, __condition)                      \
         ({      int __rc;                                               \
                 release_sock(__sk);                                     \
@@ -974,6 +1021,16 @@ extern void release_sock(struct sock *sk);
                                 SINGLE_DEPTH_NESTING)
  #define bh_unlock_sock(__sk)   spin_unlock(&((__sk)->sk_lock.slock))
  
+static inline void lock_sock_bh(struct sock *sk)
+{
+       spin_lock_bh(&sk->sk_lock.slock);
+}
+
+static inline void unlock_sock_bh(struct sock *sk)
+{
+       spin_unlock_bh(&sk->sk_lock.slock);
+}
+
  extern struct sock             *sk_alloc(struct net *net, int family,
                                           gfp_t priority,
                                           struct proto *prot);
@@ -1197,7 +1254,8 @@ static inline struct dst_entry *
  __sk_dst_get(struct sock *sk)
  {
         return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() ||
-                                                      sock_owned_by_user(sk));
+                                                      sock_owned_by_user(sk) ||
+                                                      lockdep_is_held(&sk->sk_lock.slock));
  }
  
  static inline struct dst_entry *
@@ -1235,8 +1293,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
         struct dst_entry *old_dst;
  
         sk_tx_queue_clear(sk);
-       old_dst = rcu_dereference_check(sk->sk_dst_cache,
-                                       lockdep_is_held(&sk->sk_dst_lock));
+       /*
+        * This can be called while sk is owned by the caller only,
+        * with no state that can be checked in a rcu_dereference_check() cond
+        */
+       old_dst = rcu_dereference_raw(sk->sk_dst_cache);
         rcu_assign_pointer(sk->sk_dst_cache, dst);
         dst_release(old_dst);
  }
@@ -1574,7 +1635,24 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
                 sk->sk_stamp = kt;
  }
  
-extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb);
+extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+                                    struct sk_buff *skb);
+
+static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+                                         struct sk_buff *skb)
+{
+#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)                      | \
+                          (1UL << SOCK_RCVTSTAMP)                      | \
+                          (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)       | \
+                          (1UL << SOCK_TIMESTAMPING_SOFTWARE)          | \
+                          (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE)      | \
+                          (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
+
+       if (sk->sk_flags & FLAGS_TS_OR_DROPS)
+               __sock_recv_ts_and_drops(msg, sk, skb);
+       else
+               sk->sk_stamp = skb->tstamp;
+}
  
  /**
   * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped