[NET]: Keep sk_backlog near sk_lock

[safe/jmp/linux-2.6] / include / net / sock.h
diff --git a/include/net/sock.h b/include/net/sock.h

index 83805fe..a3366c3 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -44,8 +44,10 @@
  #include <linux/timer.h>
  #include <linux/cache.h>
  #include <linux/module.h>
+#include <linux/lockdep.h>
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>      /* struct sk_buff */
+#include <linux/mm.h>
  #include <linux/security.h>
  
  #include <linux/filter.h>
@@ -78,18 +80,17 @@ typedef struct {
         spinlock_t              slock;
         struct sock_iocb        *owner;
         wait_queue_head_t       wq;
+       /*
+        * We express the mutex-alike socket_lock semantics
+        * to the lock validator by explicitly managing
+        * the slock as a lock variant (in addition to
+        * the slock itself):
+        */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lockdep_map dep_map;
+#endif
  } socket_lock_t;
  
-extern struct lock_class_key af_family_keys[AF_MAX];
-
-#define sock_lock_init(__sk) \
-do {   spin_lock_init(&((__sk)->sk_lock.slock)); \
-       lockdep_set_class(&(__sk)->sk_lock.slock, \
-                         af_family_keys + (__sk)->sk_family); \
-       (__sk)->sk_lock.owner = NULL; \
-       init_waitqueue_head(&((__sk)->sk_lock.wq)); \
-} while(0)
-
  struct sock;
  struct proto;
  
@@ -201,6 +202,15 @@ struct sock {
         unsigned short          sk_type;
         int                     sk_rcvbuf;
         socket_lock_t           sk_lock;
+       /*
+        * The backlog queue is special, it is always used with
+        * the per-socket spinlock held and requires low latency
+        * access. Therefore we special case it's implementation.
+        */
+       struct {
+               struct sk_buff *head;
+               struct sk_buff *tail;
+       } sk_backlog;
         wait_queue_head_t       *sk_sleep;
         struct dst_entry        *sk_dst_cache;
         struct xfrm_policy      *sk_policy[2];
@@ -220,15 +230,6 @@ struct sock {
         int                     sk_rcvlowat;
         unsigned long           sk_flags;
         unsigned long           sk_lingertime;
-       /*
-        * The backlog queue is special, it is always used with
-        * the per-socket spinlock held and requires low latency
-        * access. Therefore we special case it's implementation.
-        */
-       struct {
-               struct sk_buff *head;
-               struct sk_buff *tail;
-       } sk_backlog;
         struct sk_buff_head     sk_error_queue;
         struct proto            *sk_prot_creator;
         rwlock_t                sk_callback_lock;
@@ -570,7 +571,7 @@ struct proto {
         int                     *sysctl_rmem;
         int                     max_header;
  
-       kmem_cache_t            *slab;
+       struct kmem_cache               *slab;
         unsigned int            obj_size;
  
         atomic_t                *orphan_count;
@@ -665,7 +666,6 @@ struct sock_iocb {
         struct sock             *sk;
         struct scm_cookie       *scm;
         struct msghdr           *msg, async_msg;
-       struct iovec            async_iov;
         struct kiocb            *kiocb;
  };
  
@@ -746,11 +746,39 @@ static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
   */
  #define sock_owned_by_user(sk) ((sk)->sk_lock.owner)
  
-extern void FASTCALL(lock_sock(struct sock *sk));
+/*
+ * Macro so as to not evaluate some arguments when
+ * lockdep is not enabled.
+ *
+ * Mark both the sk_lock and the sk_lock.slock as a
+ * per-address-family lock class.
+ */
+#define sock_lock_init_class_and_name(sk, sname, skey, name, key)      \
+do {                                                                   \
+       sk->sk_lock.owner = NULL;                                       \
+       init_waitqueue_head(&sk->sk_lock.wq);                           \
+       spin_lock_init(&(sk)->sk_lock.slock);                           \
+       debug_check_no_locks_freed((void *)&(sk)->sk_lock,              \
+                       sizeof((sk)->sk_lock));                         \
+       lockdep_set_class_and_name(&(sk)->sk_lock.slock,                \
+                       (skey), (sname));                               \
+       lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0);     \
+} while (0)
+
+extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass));
+
+static inline void lock_sock(struct sock *sk)
+{
+       lock_sock_nested(sk, 0);
+}
+
  extern void FASTCALL(release_sock(struct sock *sk));
  
  /* BH context may only use the following locking interface. */
  #define bh_lock_sock(__sk)     spin_lock(&((__sk)->sk_lock.slock))
+#define bh_lock_sock_nested(__sk) \
+                               spin_lock_nested(&((__sk)->sk_lock.slock), \
+                               SINGLE_DEPTH_NESTING)
  #define bh_unlock_sock(__sk)   spin_unlock(&((__sk)->sk_lock.slock))
  
  extern struct sock             *sk_alloc(int family,
@@ -859,41 +887,45 @@ extern void sock_init_data(struct socket *sock, struct sock *sk);
   *
   */
  
-static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
  {
         int err;
+       struct sk_filter *filter;
         
         err = security_sock_rcv_skb(sk, skb);
         if (err)
                 return err;
         
-       if (sk->sk_filter) {
-               struct sk_filter *filter;
-               
-               if (needlock)
-                       bh_lock_sock(sk);
-               
-               filter = sk->sk_filter;
-               if (filter) {
-                       unsigned int pkt_len = sk_run_filter(skb, filter->insns,
-                                                            filter->len);
-                       err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
-               }
-
-               if (needlock)
-                       bh_unlock_sock(sk);
+       rcu_read_lock_bh();
+       filter = sk->sk_filter;
+       if (filter) {
+               unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+                               filter->len);
+               err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
         }
+       rcu_read_unlock_bh();
+
         return err;
  }
  
  /**
+ *     sk_filter_rcu_free: Free a socket filter
+ *     @rcu: rcu_head that contains the sk_filter to free
+ */
+static inline void sk_filter_rcu_free(struct rcu_head *rcu)
+{
+       struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+       kfree(fp);
+}
+
+/**
   *     sk_filter_release: Release a socket filter
   *     @sk: socket
   *     @fp: filter to remove
   *
   *     Remove a filter from a socket and release its resources.
   */
- 
+
  static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
  {
         unsigned int size = sk_filter_len(fp);
@@ -901,7 +933,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
         atomic_sub(size, &sk->sk_omem_alloc);
  
         if (atomic_dec_and_test(&fp->refcnt))
-               kfree(fp);
+               call_rcu_bh(&fp->rcu, sk_filter_rcu_free);
  }
  
  static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
@@ -942,7 +974,8 @@ static inline void sock_put(struct sock *sk)
                 sk_free(sk);
  }
  
-extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb);
+extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+                         const int nested);
  
  /* Detach socket from process context.
   * Announce socket dead, detach it from wait queue and inode.
@@ -966,9 +999,23 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
         sk->sk_sleep = &parent->wait;
         parent->sk = sk;
         sk->sk_socket = parent;
+       security_sock_graft(sk, parent);
         write_unlock_bh(&sk->sk_callback_lock);
  }
  
+static inline void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+       void *sptr = nsk->sk_security;
+#endif
+
+       memcpy(nsk, osk, osk->sk_prot->obj_size);
+#ifdef CONFIG_SECURITY_NETWORK
+       nsk->sk_security = sptr;
+       security_sk_clone(osk, nsk);
+#endif
+}
+
  extern int sock_i_uid(struct sock *sk);
  extern unsigned long sock_i_ino(struct sock *sk);
  
@@ -1062,7 +1109,7 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from,
  {
         if (skb->ip_summed == CHECKSUM_NONE) {
                 int err = 0;
-               unsigned int csum = csum_and_copy_from_user(from,
+               __wsum csum = csum_and_copy_from_user(from,
                                                      page_address(page) + off,
                                                             copy, 0, &err);
                 if (err)
@@ -1231,7 +1278,7 @@ static inline int sock_writeable(const struct sock *sk)
  
  static inline gfp_t gfp_any(void)
  {
-       return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
+       return in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
  }
  
  static inline long sock_rcvtimeo(const struct sock *sk, int noblock)