9p: VFS switches for 9p2000.L: protocol and client changes

[safe/jmp/linux-2.6] / net / netlink / af_netlink.c
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index 8b6bbb3..a2eb965 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,6 +83,11 @@ struct netlink_sock {
         struct module           *module;
  };
  
+struct listeners_rcu_head {
+       struct rcu_head rcu_head;
+       void *ptr;
+};
+
  #define NETLINK_KERNEL_SOCKET  0x1
  #define NETLINK_RECV_PKTINFO   0x2
  #define NETLINK_BROADCAST_SEND_ERROR   0x4
@@ -172,9 +177,11 @@ static void netlink_sock_destruct(struct sock *sk)
   * this, _but_ remember, it adds useless work on UP machines.
   */
  
-static void netlink_table_grab(void)
+void netlink_table_grab(void)
         __acquires(nl_table_lock)
  {
+       might_sleep();
+
         write_lock_irq(&nl_table_lock);
  
         if (atomic_read(&nl_table_users)) {
@@ -195,7 +202,7 @@ static void netlink_table_grab(void)
         }
  }
  
-static void netlink_table_ungrab(void)
+void netlink_table_ungrab(void)
         __releases(nl_table_lock)
  {
         write_unlock_irq(&nl_table_lock);
@@ -421,7 +428,8 @@ static int __netlink_create(struct net *net, struct socket *sock,
         return 0;
  }
  
-static int netlink_create(struct net *net, struct socket *sock, int protocol)
+static int netlink_create(struct net *net, struct socket *sock, int protocol,
+                         int kern)
  {
         struct module *module = NULL;
         struct mutex *cb_mutex;
@@ -447,9 +455,14 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
         if (nl_table[protocol].registered &&
             try_module_get(nl_table[protocol].module))
                 module = nl_table[protocol].module;
+       else
+               err = -EPROTONOSUPPORT;
         cb_mutex = nl_table[protocol].cb_mutex;
         netlink_unlock_table();
  
+       if (err < 0)
+               goto out;
+
         err = __netlink_create(net, sock, cb_mutex, protocol);
         if (err < 0)
                 goto out_module;
@@ -490,7 +503,7 @@ static int netlink_release(struct socket *sock)
  
         skb_queue_purge(&sk->sk_write_queue);
  
-       if (nlk->pid && !nlk->subscriptions) {
+       if (nlk->pid) {
                 struct netlink_notify n = {
                                                 .net = sock_net(sk),
                                                 .protocol = sk->sk_protocol,
@@ -532,7 +545,7 @@ static int netlink_autobind(struct socket *sock)
         struct hlist_head *head;
         struct sock *osk;
         struct hlist_node *node;
-       s32 pid = current->tgid;
+       s32 pid = task_tgid_vnr(current);
         int err;
         static s32 rover = -4097;
  
@@ -670,6 +683,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
         struct netlink_sock *nlk = nlk_sk(sk);
         struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
  
+       if (alen < sizeof(addr->sa_family))
+               return -EINVAL;
+
         if (addr->sa_family == AF_UNSPEC) {
                 sk->sk_state    = NETLINK_UNCONNECTED;
                 nlk->dst_pid    = 0;
@@ -700,7 +716,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
  {
         struct sock *sk = sock->sk;
         struct netlink_sock *nlk = nlk_sk(sk);
-       struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+       DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
  
         nladdr->nl_family = AF_NETLINK;
         nladdr->nl_pad = 0;
@@ -962,6 +978,8 @@ struct netlink_broadcast_data {
         int delivered;
         gfp_t allocation;
         struct sk_buff *skb, *skb2;
+       int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+       void *tx_data;
  };
  
  static inline int do_one_broadcast(struct sock *sk,
@@ -1004,6 +1022,9 @@ static inline int do_one_broadcast(struct sock *sk,
                 p->failure = 1;
                 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
                         p->delivery_failure = 1;
+       } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+               kfree_skb(p->skb2);
+               p->skb2 = NULL;
         } else if (sk_filter(sk, p->skb2)) {
                 kfree_skb(p->skb2);
                 p->skb2 = NULL;
@@ -1022,8 +1043,10 @@ out:
         return 0;
  }
  
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-                     u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+       u32 group, gfp_t allocation,
+       int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+       void *filter_data)
  {
         struct net *net = sock_net(ssk);
         struct netlink_broadcast_data info;
@@ -1043,6 +1066,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
         info.allocation = allocation;
         info.skb = skb;
         info.skb2 = NULL;
+       info.tx_filter = filter;
+       info.tx_data = filter_data;
  
         /* While we sleep in clone, do not allow to change socket list */
  
@@ -1067,6 +1092,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
         }
         return -ESRCH;
  }
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+                     u32 group, gfp_t allocation)
+{
+       return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+               NULL, NULL);
+}
  EXPORT_SYMBOL(netlink_broadcast);
  
  struct netlink_set_err_data {
@@ -1080,21 +1113,27 @@ static inline int do_one_set_err(struct sock *sk,
                                  struct netlink_set_err_data *p)
  {
         struct netlink_sock *nlk = nlk_sk(sk);
+       int ret = 0;
  
         if (sk == p->exclude_sk)
                 goto out;
  
-       if (sock_net(sk) != sock_net(p->exclude_sk))
+       if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
                 goto out;
  
         if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
             !test_bit(p->group - 1, nlk->groups))
                 goto out;
  
+       if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+               ret = 1;
+               goto out;
+       }
+
         sk->sk_err = p->code;
         sk->sk_error_report(sk);
  out:
-       return 0;
+       return ret;
  }
  
  /**
@@ -1103,12 +1142,16 @@ out:
   * @pid: the PID of a process that we want to skip (if any)
   * @groups: the broadcast group that will notice the error
   * @code: error code, must be negative (as usual in kernelspace)
+ *
+ * This function returns the number of broadcast listeners that have set the
+ * NETLINK_RECV_NO_ENOBUFS socket option.
   */
-void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
  {
         struct netlink_set_err_data info;
         struct hlist_node *node;
         struct sock *sk;
+       int ret = 0;
  
         info.exclude_sk = ssk;
         info.pid = pid;
@@ -1119,9 +1162,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
         read_lock(&nl_table_lock);
  
         sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
-               do_one_set_err(sk, &info);
+               ret += do_one_set_err(sk, &info);
  
         read_unlock(&nl_table_lock);
+       return ret;
  }
  EXPORT_SYMBOL(netlink_set_err);
  
@@ -1143,7 +1187,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
  }
  
  static int netlink_setsockopt(struct socket *sock, int level, int optname,
-                             char __user *optval, int optlen)
+                             char __user *optval, unsigned int optlen)
  {
         struct sock *sk = sock->sk;
         struct netlink_sock *nlk = nlk_sk(sk);
@@ -1356,7 +1400,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
         struct netlink_sock *nlk = nlk_sk(sk);
         int noblock = flags&MSG_DONTWAIT;
         size_t copied;
-       struct sk_buff *skb;
+       struct sk_buff *skb, *frag __maybe_unused = NULL;
         int err;
  
         if (flags&MSG_OOB)
@@ -1368,6 +1412,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
         if (skb == NULL)
                 goto out;
  
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+       if (unlikely(skb_shinfo(skb)->frag_list)) {
+               bool need_compat = !!(flags & MSG_CMSG_COMPAT);
+
+               /*
+                * If this skb has a frag_list, then here that means that
+                * we will have to use the frag_list skb for compat tasks
+                * and the regular skb for non-compat tasks.
+                *
+                * The skb might (and likely will) be cloned, so we can't
+                * just reset frag_list and go on with things -- we need to
+                * keep that. For the compat case that's easy -- simply get
+                * a reference to the compat skb and free the regular one
+                * including the frag. For the non-compat case, we need to
+                * avoid sending the frag to the user -- so assign NULL but
+                * restore it below before freeing the skb.
+                */
+               if (need_compat) {
+                       struct sk_buff *compskb = skb_shinfo(skb)->frag_list;
+                       skb_get(compskb);
+                       kfree_skb(skb);
+                       skb = compskb;
+               } else {
+                       frag = skb_shinfo(skb)->frag_list;
+                       skb_shinfo(skb)->frag_list = NULL;
+               }
+       }
+#endif
+
         msg->msg_namelen = 0;
  
         copied = skb->len;
@@ -1398,6 +1471,11 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
         siocb->scm->creds = *NETLINK_CREDS(skb);
         if (flags & MSG_TRUNC)
                 copied = skb->len;
+
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+       skb_shinfo(skb)->frag_list = frag;
+#endif
+
         skb_free_datagram(sk, skb);
  
         if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
@@ -1453,7 +1531,8 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
         if (groups < 32)
                 groups = 32;
  
-       listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL);
+       listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
+                           GFP_KERNEL);
         if (!listeners)
                 goto out_sock_release;
  
@@ -1501,6 +1580,49 @@ netlink_kernel_release(struct sock *sk)
  EXPORT_SYMBOL(netlink_kernel_release);
  
  
+static void netlink_free_old_listeners(struct rcu_head *rcu_head)
+{
+       struct listeners_rcu_head *lrh;
+
+       lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head);
+       kfree(lrh->ptr);
+}
+
+int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
+{
+       unsigned long *listeners, *old = NULL;
+       struct listeners_rcu_head *old_rcu_head;
+       struct netlink_table *tbl = &nl_table[sk->sk_protocol];
+
+       if (groups < 32)
+               groups = 32;
+
+       if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
+               listeners = kzalloc(NLGRPSZ(groups) +
+                                   sizeof(struct listeners_rcu_head),
+                                   GFP_ATOMIC);
+               if (!listeners)
+                       return -ENOMEM;
+               old = tbl->listeners;
+               memcpy(listeners, old, NLGRPSZ(tbl->groups));
+               rcu_assign_pointer(tbl->listeners, listeners);
+               /*
+                * Free the old memory after an RCU grace period so we
+                * don't leak it. We use call_rcu() here in order to be
+                * able to call this function from atomic contexts. The
+                * allocation of this memory will have reserved enough
+                * space for struct listeners_rcu_head at the end.
+                */
+               old_rcu_head = (void *)(tbl->listeners +
+                                       NLGRPLONGS(tbl->groups));
+               old_rcu_head->ptr = old;
+               call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners);
+       }
+       tbl->groups = groups;
+
+       return 0;
+}
+
  /**
   * netlink_change_ngroups - change number of multicast groups
   *
@@ -1515,33 +1637,24 @@ EXPORT_SYMBOL(netlink_kernel_release);
   */
  int netlink_change_ngroups(struct sock *sk, unsigned int groups)
  {
-       unsigned long *listeners, *old = NULL;
-       struct netlink_table *tbl = &nl_table[sk->sk_protocol];
-       int err = 0;
-
-       if (groups < 32)
-               groups = 32;
+       int err;
  
         netlink_table_grab();
-       if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
-               listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC);
-               if (!listeners) {
-                       err = -ENOMEM;
-                       goto out_ungrab;
-               }
-               old = tbl->listeners;
-               memcpy(listeners, old, NLGRPSZ(tbl->groups));
-               rcu_assign_pointer(tbl->listeners, listeners);
-       }
-       tbl->groups = groups;
-
- out_ungrab:
+       err = __netlink_change_ngroups(sk, groups);
         netlink_table_ungrab();
-       synchronize_rcu();
-       kfree(old);
+
         return err;
  }
-EXPORT_SYMBOL(netlink_change_ngroups);
+
+void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+       struct sock *sk;
+       struct hlist_node *node;
+       struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+       sk_for_each_bound(sk, node, &tbl->mc_list)
+               netlink_update_socket_mc(nlk_sk(sk), group, 0);
+}
  
  /**
   * netlink_clear_multicast_users - kick off multicast listeners
@@ -1553,18 +1666,10 @@ EXPORT_SYMBOL(netlink_change_ngroups);
   */
  void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
  {
-       struct sock *sk;
-       struct hlist_node *node;
-       struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
-
         netlink_table_grab();
-
-       sk_for_each_bound(sk, node, &tbl->mc_list)
-               netlink_update_socket_mc(nlk_sk(sk), group, 0);
-
+       __netlink_clear_multicast_users(ksk, group);
         netlink_table_ungrab();
  }
-EXPORT_SYMBOL(netlink_clear_multicast_users);
  
  void netlink_set_nonroot(int protocol, unsigned int flags)
  {
@@ -1647,7 +1752,7 @@ errout:
  }
  
  int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
-                      struct nlmsghdr *nlh,
+                      const struct nlmsghdr *nlh,
                        int (*dump)(struct sk_buff *skb,
                                    struct netlink_callback *),
                        int (*done)(struct netlink_callback *))
@@ -1720,7 +1825,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
         }
  
         rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
-                         NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
+                         NLMSG_ERROR, payload, 0);
         errmsg = nlmsg_data(rep);
         errmsg->error = err;
         memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
@@ -1904,21 +2009,22 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
         if (v == SEQ_START_TOKEN)
                 seq_puts(seq,
                          "sk       Eth Pid    Groups   "
-                        "Rmem     Wmem     Dump     Locks     Drops\n");
+                        "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
         else {
                 struct sock *s = v;
                 struct netlink_sock *nlk = nlk_sk(s);
  
-               seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n",
+               seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d %-8lu\n",
                            s,
                            s->sk_protocol,
                            nlk->pid,
                            nlk->groups ? (u32)nlk->groups[0] : 0,
-                          atomic_read(&s->sk_rmem_alloc),
-                          atomic_read(&s->sk_wmem_alloc),
+                          sk_rmem_alloc_get(s),
+                          sk_wmem_alloc_get(s),
                            nlk->cb,
                            atomic_read(&s->sk_refcnt),
-                          atomic_read(&s->sk_drops)
+                          atomic_read(&s->sk_drops),
+                          sock_i_ino(s)
                         );
  
         }
@@ -1982,7 +2088,7 @@ static const struct proto_ops netlink_ops = {
         .sendpage =     sock_no_sendpage,
  };
  
-static struct net_proto_family netlink_family_ops = {
+static const struct net_proto_family netlink_family_ops = {
         .family = PF_NETLINK,
         .create = netlink_create,
         .owner  = THIS_MODULE,  /* for consistency 8) */
@@ -2026,10 +2132,10 @@ static int __init netlink_proto_init(void)
         if (!nl_table)
                 goto panic;
  
-       if (num_physpages >= (128 * 1024))
-               limit = num_physpages >> (21 - PAGE_SHIFT);
+       if (totalram_pages >= (128 * 1024))
+               limit = totalram_pages >> (21 - PAGE_SHIFT);
         else
-               limit = num_physpages >> (23 - PAGE_SHIFT);
+               limit = totalram_pages >> (23 - PAGE_SHIFT);
  
         order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
         limit = (1UL << order) / sizeof(struct hlist_head);