Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
authorDavid S. Miller <davem@davemloft.net>
Tue, 24 Mar 2009 20:24:36 +0000 (13:24 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 24 Mar 2009 20:24:36 +0000 (13:24 -0700)
1  2 
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nfnetlink_log.c
net/netfilter/x_tables.c
net/netlink/af_netlink.c

@@@ -126,6 -126,10 +126,10 @@@ static bool icmpv6_new(struct nf_conn *
                pr_debug("icmpv6: can't create new conn with type %u\n",
                         type + 128);
                nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+               if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
+                       nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+                                     "nf_ct_icmpv6: invalid new with type %d ",
+                                     type + 128);
                return false;
        }
        atomic_set(&ct->proto.icmp.count, 0);
@@@ -201,9 -205,8 +205,9 @@@ icmpv6_error(struct net *net, struct sk
  
        if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
            nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
 -              nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
 -                            "nf_ct_icmpv6: ICMPv6 checksum failed\n");
 +              if (LOG_INVALID(net, IPPROTO_ICMPV6))
 +                      nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
 +                                    "nf_ct_icmpv6: ICMPv6 checksum failed ");
                return -NF_ACCEPT;
        }
  
@@@ -54,7 -54,7 +54,7 @@@ EXPORT_SYMBOL_GPL(nf_conntrack_lock)
  unsigned int nf_conntrack_htable_size __read_mostly;
  EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
  
- int nf_conntrack_max __read_mostly;
unsigned int nf_conntrack_max __read_mostly;
  EXPORT_SYMBOL_GPL(nf_conntrack_max);
  
  struct nf_conn nf_conntrack_untracked __read_mostly;
@@@ -472,7 -472,8 +472,8 @@@ struct nf_conn *nf_conntrack_alloc(stru
        struct nf_conn *ct;
  
        if (unlikely(!nf_conntrack_hash_rnd_initted)) {
-               get_random_bytes(&nf_conntrack_hash_rnd, 4);
+               get_random_bytes(&nf_conntrack_hash_rnd,
+                               sizeof(nf_conntrack_hash_rnd));
                nf_conntrack_hash_rnd_initted = 1;
        }
  
@@@ -516,16 -517,17 +517,17 @@@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc)
  static void nf_conntrack_free_rcu(struct rcu_head *head)
  {
        struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
  
        nf_ct_ext_free(ct);
        kmem_cache_free(nf_conntrack_cachep, ct);
-       atomic_dec(&net->ct.count);
  }
  
  void nf_conntrack_free(struct nf_conn *ct)
  {
+       struct net *net = nf_ct_net(ct);
        nf_ct_ext_destroy(ct);
+       atomic_dec(&net->ct.count);
        call_rcu(&ct->rcu, nf_conntrack_free_rcu);
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_free);
@@@ -726,13 -728,15 +728,15 @@@ nf_conntrack_in(struct net *net, u_int8
        NF_CT_ASSERT(skb->nfct);
  
        ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
 -      if (ret < 0) {
 +      if (ret <= 0) {
                /* Invalid: inverse of the return code tells
                 * the netfilter core what to do */
                pr_debug("nf_conntrack_in: Can't track with proto module\n");
                nf_conntrack_put(skb->nfct);
                skb->nfct = NULL;
                NF_CT_STAT_INC_ATOMIC(net, invalid);
+               if (ret == -NF_DROP)
+                       NF_CT_STAT_INC_ATOMIC(net, drop);
                return -ret;
        }
  
@@@ -1103,7 -1107,7 +1107,7 @@@ int nf_conntrack_set_hashsize(const cha
  
        /* We have to rehahs for the new table anyway, so we also can
         * use a newrandom seed */
-       get_random_bytes(&rnd, 4);
+       get_random_bytes(&rnd, sizeof(rnd));
  
        /* Lookups in the old hash might happen in parallel, which means we
         * might get false negatives during connection lookup. New connections
@@@ -518,6 -518,7 +518,7 @@@ static int ctnetlink_conntrack_event(st
  nla_put_failure:
        rcu_read_unlock();
  nlmsg_failure:
+       nfnetlink_set_err(0, group, -ENOBUFS);
        kfree_skb(skb);
        return NOTIFY_DONE;
  }
@@@ -599,7 -600,8 +600,8 @@@ ctnetlink_parse_tuple_ip(struct nlattr 
  
        nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
  
-       l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+       rcu_read_lock();
+       l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
  
        if (likely(l3proto->nlattr_to_tuple)) {
                ret = nla_validate_nested(attr, CTA_IP_MAX,
                        ret = l3proto->nlattr_to_tuple(tb, tuple);
        }
  
-       nf_ct_l3proto_put(l3proto);
+       rcu_read_unlock();
  
        return ret;
  }
@@@ -633,7 -635,8 +635,8 @@@ ctnetlink_parse_tuple_proto(struct nlat
                return -EINVAL;
        tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
  
-       l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+       rcu_read_lock();
+       l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
  
        if (likely(l4proto->nlattr_to_tuple)) {
                ret = nla_validate_nested(attr, CTA_PROTO_MAX,
                        ret = l4proto->nlattr_to_tuple(tb, tuple);
        }
  
-       nf_ct_l4proto_put(l4proto);
+       rcu_read_unlock();
  
        return ret;
  }
@@@ -989,10 -992,11 +992,11 @@@ ctnetlink_change_protoinfo(struct nf_co
  
        nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL);
  
-       l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       rcu_read_lock();
+       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
        if (l4proto->from_nlattr)
                err = l4proto->from_nlattr(tb, ct);
-       nf_ct_l4proto_put(l4proto);
+       rcu_read_unlock();
  
        return err;
  }
@@@ -1062,6 -1066,10 +1066,10 @@@ ctnetlink_change_conntrack(struct nf_co
  {
        int err;
  
+       /* only allow NAT changes and master assignation for new conntracks */
+       if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST] || cda[CTA_TUPLE_MASTER])
+               return -EOPNOTSUPP;
        if (cda[CTA_HELP]) {
                err = ctnetlink_change_helper(ct, cda);
                if (err < 0)
@@@ -1124,13 -1132,11 +1132,11 @@@ ctnetlink_event_report(struct nf_conn *
                                  report);
  }
  
- static int
+ static struct nf_conn *
  ctnetlink_create_conntrack(struct nlattr *cda[],
                           struct nf_conntrack_tuple *otuple,
                           struct nf_conntrack_tuple *rtuple,
-                          struct nf_conn *master_ct,
-                          u32 pid,
-                          int report)
+                          u8 u3)
  {
        struct nf_conn *ct;
        int err = -EINVAL;
  
        ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC);
        if (IS_ERR(ct))
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
  
        if (!cda[CTA_TIMEOUT])
-               goto err;
+               goto err1;
        ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
  
        ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
                char *helpname;
   
                err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
-               if (err < 0) {
-                       rcu_read_unlock();
-                       goto err;
-               }
+               if (err < 0)
+                       goto err2;
  
                helper = __nf_conntrack_helper_find_byname(helpname);
                if (helper == NULL) {
  #ifdef CONFIG_MODULES
                        if (request_module("nfct-helper-%s", helpname) < 0) {
                                err = -EOPNOTSUPP;
-                               goto err;
+                               goto err1;
                        }
  
                        rcu_read_lock();
                        helper = __nf_conntrack_helper_find_byname(helpname);
                        if (helper) {
-                               rcu_read_unlock();
                                err = -EAGAIN;
-                               goto err;
+                               goto err2;
                        }
                        rcu_read_unlock();
  #endif
                        err = -EOPNOTSUPP;
-                       goto err;
+                       goto err1;
                } else {
                        struct nf_conn_help *help;
  
                        help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
                        if (help == NULL) {
-                               rcu_read_unlock();
                                err = -ENOMEM;
-                               goto err;
+                               goto err2;
                        }
  
                        /* not in hash table yet so not strictly necessary */
        } else {
                /* try an implicit helper assignation */
                err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
-               if (err < 0) {
-                       rcu_read_unlock();
-                       goto err;
-               }
+               if (err < 0)
+                       goto err2;
        }
  
        if (cda[CTA_STATUS]) {
                err = ctnetlink_change_status(ct, cda);
-               if (err < 0) {
-                       rcu_read_unlock();
-                       goto err;
-               }
+               if (err < 0)
+                       goto err2;
        }
  
        if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
                err = ctnetlink_change_nat(ct, cda);
-               if (err < 0) {
-                       rcu_read_unlock();
-                       goto err;
-               }
+               if (err < 0)
+                       goto err2;
        }
  
  #ifdef CONFIG_NF_NAT_NEEDED
        if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
                err = ctnetlink_change_nat_seq_adj(ct, cda);
-               if (err < 0) {
-                       rcu_read_unlock();
-                       goto err;
-               }
+               if (err < 0)
+                       goto err2;
        }
  #endif
  
        if (cda[CTA_PROTOINFO]) {
                err = ctnetlink_change_protoinfo(ct, cda);
-               if (err < 0) {
-                       rcu_read_unlock();
-                       goto err;
-               }
+               if (err < 0)
+                       goto err2;
        }
  
        nf_ct_acct_ext_add(ct, GFP_ATOMIC);
  #endif
  
        /* setup master conntrack: this is a confirmed expectation */
-       if (master_ct) {
+       if (cda[CTA_TUPLE_MASTER]) {
+               struct nf_conntrack_tuple master;
+               struct nf_conntrack_tuple_hash *master_h;
+               struct nf_conn *master_ct;
+               err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3);
+               if (err < 0)
+                       goto err2;
+               master_h = __nf_conntrack_find(&init_net, &master);
+               if (master_h == NULL) {
+                       err = -ENOENT;
+                       goto err2;
+               }
+               master_ct = nf_ct_tuplehash_to_ctrack(master_h);
+               nf_conntrack_get(&master_ct->ct_general);
                __set_bit(IPS_EXPECTED_BIT, &ct->status);
                ct->master = master_ct;
        }
  
        add_timer(&ct->timeout);
        nf_conntrack_hash_insert(ct);
        rcu_read_unlock();
-       ctnetlink_event_report(ct, pid, report);
-       nf_ct_put(ct);
  
-       return 0;
+       return ct;
  
- err:
+ err2:
+       rcu_read_unlock();
+ err1:
        nf_conntrack_free(ct);
-       return err;
+       return ERR_PTR(err);
  }
  
  static int
@@@ -1289,38 -1295,25 +1295,25 @@@ ctnetlink_new_conntrack(struct sock *ct
                h = __nf_conntrack_find(&init_net, &rtuple);
  
        if (h == NULL) {
-               struct nf_conntrack_tuple master;
-               struct nf_conntrack_tuple_hash *master_h = NULL;
-               struct nf_conn *master_ct = NULL;
-               if (cda[CTA_TUPLE_MASTER]) {
-                       err = ctnetlink_parse_tuple(cda,
-                                                   &master,
-                                                   CTA_TUPLE_MASTER,
-                                                   u3);
-                       if (err < 0)
-                               goto out_unlock;
+               err = -ENOENT;
+               if (nlh->nlmsg_flags & NLM_F_CREATE) {
+                       struct nf_conn *ct;
  
-                       master_h = __nf_conntrack_find(&init_net, &master);
-                       if (master_h == NULL) {
-                               err = -ENOENT;
+                       ct = ctnetlink_create_conntrack(cda, &otuple,
+                                                       &rtuple, u3);
+                       if (IS_ERR(ct)) {
+                               err = PTR_ERR(ct);
                                goto out_unlock;
                        }
-                       master_ct = nf_ct_tuplehash_to_ctrack(master_h);
-                       nf_conntrack_get(&master_ct->ct_general);
-               }
-               err = -ENOENT;
-               if (nlh->nlmsg_flags & NLM_F_CREATE)
-                       err = ctnetlink_create_conntrack(cda,
-                                                        &otuple,
-                                                        &rtuple,
-                                                        master_ct,
-                                                        NETLINK_CB(skb).pid,
-                                                        nlmsg_report(nlh));
-               spin_unlock_bh(&nf_conntrack_lock);
-               if (err < 0 && master_ct)
-                       nf_ct_put(master_ct);
+                       err = 0;
+                       nf_conntrack_get(&ct->ct_general);
+                       spin_unlock_bh(&nf_conntrack_lock);
+                       ctnetlink_event_report(ct,
+                                              NETLINK_CB(skb).pid,
+                                              nlmsg_report(nlh));
+                       nf_ct_put(ct);
+               } else
+                       spin_unlock_bh(&nf_conntrack_lock);
  
                return err;
        }
        if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
  
-               /* we only allow nat config for new conntracks */
-               if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
-                       err = -EOPNOTSUPP;
-                       goto out_unlock;
-               }
-               /* can't link an existing conntrack to a master */
-               if (cda[CTA_TUPLE_MASTER]) {
-                       err = -EOPNOTSUPP;
-                       goto out_unlock;
-               }
                err = ctnetlink_change_conntrack(ct, cda);
                if (err == 0) {
                        nf_conntrack_get(&ct->ct_general);
@@@ -1533,6 -1515,7 +1515,7 @@@ static int ctnetlink_expect_event(struc
  nla_put_failure:
        rcu_read_unlock();
  nlmsg_failure:
+       nfnetlink_set_err(0, 0, -ENOBUFS);
        kfree_skb(skb);
        return NOTIFY_DONE;
  }
@@@ -1780,7 -1763,6 +1763,7 @@@ ctnetlink_create_expect(struct nlattr *
                goto out;
        }
  
 +      exp->class = 0;
        exp->expectfn = NULL;
        exp->flags = 0;
        exp->master = ct;
@@@ -25,6 -25,8 +25,8 @@@
  #include <net/netfilter/nf_conntrack_l4proto.h>
  #include <net/netfilter/nf_conntrack_ecache.h>
  #include <net/netfilter/nf_log.h>
+ #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+ #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
  
  /* Protects ct->proto.tcp */
  static DEFINE_RWLOCK(tcp_lock);
@@@ -859,7 -861,7 +861,7 @@@ static int tcp_packet(struct nf_conn *c
                         */
                        if (nf_ct_kill(ct))
                                return -NF_REPEAT;
 -                      return -NF_DROP;
 +                      return NF_DROP;
                }
                /* Fall through */
        case TCP_CONNTRACK_IGNORE:
                                nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
                                          "nf_ct_tcp: killing out of sync session ");
                        nf_ct_kill(ct);
 -                      return -NF_DROP;
 +                      return NF_DROP;
                }
                ct->proto.tcp.last_index = index;
                ct->proto.tcp.last_dir = dir;
@@@ -39,7 -39,7 +39,7 @@@
  #endif
  
  #define NFULNL_NLBUFSIZ_DEFAULT       NLMSG_GOODSIZE
 -#define NFULNL_TIMEOUT_DEFAULT        HZ      /* every second */
 +#define NFULNL_TIMEOUT_DEFAULT        100     /* every second */
  #define NFULNL_QTHRESH_DEFAULT        100     /* 100 packets */
  #define NFULNL_COPY_RANGE_MAX 0xFFFF  /* max packet size is limited by 16-bit struct nfattr nfa_len field */
  
@@@ -590,10 -590,8 +590,10 @@@ nfulnl_log_packet(u_int8_t pf
  
        qthreshold = inst->qthreshold;
        /* per-rule qthreshold overrides per-instance */
 -      if (qthreshold > li->u.ulog.qthreshold)
 -              qthreshold = li->u.ulog.qthreshold;
 +      if (li->u.ulog.qthreshold)
 +              if (qthreshold > li->u.ulog.qthreshold)
 +                      qthreshold = li->u.ulog.qthreshold;
 +
  
        switch (inst->copy_mode) {
        case NFULNL_COPY_META:
@@@ -693,7 -691,7 +693,7 @@@ nfulnl_recv_unsupp(struct sock *ctnl, s
        return -ENOTSUPP;
  }
  
- static const struct nf_logger nfulnl_logger = {
+ static struct nf_logger nfulnl_logger __read_mostly = {
        .name   = "nfnetlink_log",
        .logfn  = &nfulnl_log_packet,
        .me     = THIS_MODULE,
@@@ -725,9 -723,9 +725,9 @@@ nfulnl_recv_config(struct sock *ctnl, s
                /* Commands without queue context */
                switch (cmd->command) {
                case NFULNL_CFG_CMD_PF_BIND:
-                       return nf_log_register(pf, &nfulnl_logger);
+                       return nf_log_bind_pf(pf, &nfulnl_logger);
                case NFULNL_CFG_CMD_PF_UNBIND:
-                       nf_log_unregister_pf(pf);
+                       nf_log_unbind_pf(pf);
                        return 0;
                }
        }
@@@ -952,17 -950,25 +952,25 @@@ static int __init nfnetlink_log_init(vo
                goto cleanup_netlink_notifier;
        }
  
+       status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
+       if (status < 0) {
+               printk(KERN_ERR "log: failed to register logger\n");
+               goto cleanup_subsys;
+       }
  #ifdef CONFIG_PROC_FS
        if (!proc_create("nfnetlink_log", 0440,
                         proc_net_netfilter, &nful_file_ops))
-               goto cleanup_subsys;
+               goto cleanup_logger;
  #endif
        return status;
  
  #ifdef CONFIG_PROC_FS
+ cleanup_logger:
+       nf_log_unregister(&nfulnl_logger);
+ #endif
  cleanup_subsys:
        nfnetlink_subsys_unregister(&nfulnl_subsys);
- #endif
  cleanup_netlink_notifier:
        netlink_unregister_notifier(&nfulnl_rtnl_notifier);
        return status;
diff --combined net/netfilter/x_tables.c
@@@ -625,6 -625,20 +625,20 @@@ void xt_free_table_info(struct xt_table
  }
  EXPORT_SYMBOL(xt_free_table_info);
  
+ void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
+                            struct xt_table_info *newinfo)
+ {
+       unsigned int cpu;
+       for_each_possible_cpu(cpu) {
+               void *p = oldinfo->entries[cpu];
+               rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
+               newinfo->entries[cpu] = p;
+       }
+ }
+ EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
  /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
  struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
                                    const char *name)
@@@ -671,21 -685,22 +685,22 @@@ xt_replace_table(struct xt_table *table
        struct xt_table_info *oldinfo, *private;
  
        /* Do the substitution. */
-       write_lock_bh(&table->lock);
+       mutex_lock(&table->lock);
        private = table->private;
        /* Check inside lock: is the old number correct? */
        if (num_counters != private->number) {
                duprintf("num_counters != table->private->number (%u/%u)\n",
                         num_counters, private->number);
-               write_unlock_bh(&table->lock);
+               mutex_unlock(&table->lock);
                *error = -EAGAIN;
                return NULL;
        }
        oldinfo = private;
-       table->private = newinfo;
+       rcu_assign_pointer(table->private, newinfo);
        newinfo->initial_entries = oldinfo->initial_entries;
-       write_unlock_bh(&table->lock);
+       mutex_unlock(&table->lock);
  
+       synchronize_net();
        return oldinfo;
  }
  EXPORT_SYMBOL_GPL(xt_replace_table);
@@@ -719,7 -734,8 +734,8 @@@ struct xt_table *xt_register_table(stru
  
        /* Simplifies replace_table code. */
        table->private = bootstrap;
-       rwlock_init(&table->lock);
+       mutex_init(&table->lock);
        if (!xt_replace_table(table, 0, newinfo, &ret))
                goto unlock;
  
@@@ -827,143 -843,59 +843,143 @@@ static const struct file_operations xt_
        .release = seq_release_net,
  };
  
 -static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
 +/*
 + * Traverse state for ip{,6}_{tables,matches} for helping crossing
 + * the multi-AF mutexes.
 + */
 +struct nf_mttg_trav {
 +      struct list_head *head, *curr;
 +      uint8_t class, nfproto;
 +};
 +
 +enum {
 +      MTTG_TRAV_INIT,
 +      MTTG_TRAV_NFP_UNSPEC,
 +      MTTG_TRAV_NFP_SPEC,
 +      MTTG_TRAV_DONE,
 +};
 +
 +static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
 +    bool is_target)
  {
 -      struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
 -      u_int16_t af = (unsigned long)pde->data;
 +      static const uint8_t next_class[] = {
 +              [MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
 +              [MTTG_TRAV_NFP_SPEC]   = MTTG_TRAV_DONE,
 +      };
 +      struct nf_mttg_trav *trav = seq->private;
 +
 +      switch (trav->class) {
 +      case MTTG_TRAV_INIT:
 +              trav->class = MTTG_TRAV_NFP_UNSPEC;
 +              mutex_lock(&xt[NFPROTO_UNSPEC].mutex);
 +              trav->head = trav->curr = is_target ?
 +                      &xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match;
 +              break;
 +      case MTTG_TRAV_NFP_UNSPEC:
 +              trav->curr = trav->curr->next;
 +              if (trav->curr != trav->head)
 +                      break;
 +              mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
 +              mutex_lock(&xt[trav->nfproto].mutex);
 +              trav->head = trav->curr = is_target ?
 +                      &xt[trav->nfproto].target : &xt[trav->nfproto].match;
 +              trav->class = next_class[trav->class];
 +              break;
 +      case MTTG_TRAV_NFP_SPEC:
 +              trav->curr = trav->curr->next;
 +              if (trav->curr != trav->head)
 +                      break;
 +              /* fallthru, _stop will unlock */
 +      default:
 +              return NULL;
 +      }
  
 -      mutex_lock(&xt[af].mutex);
 -      return seq_list_start(&xt[af].match, *pos);
 +      if (ppos != NULL)
 +              ++*ppos;
 +      return trav;
  }
  
 -static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 +static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
 +    bool is_target)
  {
 -      struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
 -      u_int16_t af = (unsigned long)pde->data;
 +      struct nf_mttg_trav *trav = seq->private;
 +      unsigned int j;
  
 -      return seq_list_next(v, &xt[af].match, pos);
 +      trav->class = MTTG_TRAV_INIT;
 +      for (j = 0; j < *pos; ++j)
 +              if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL)
 +                      return NULL;
 +      return trav;
  }
  
 -static void xt_match_seq_stop(struct seq_file *seq, void *v)
 +static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
  {
 -      struct proc_dir_entry *pde = seq->private;
 -      u_int16_t af = (unsigned long)pde->data;
 +      struct nf_mttg_trav *trav = seq->private;
 +
 +      switch (trav->class) {
 +      case MTTG_TRAV_NFP_UNSPEC:
 +              mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
 +              break;
 +      case MTTG_TRAV_NFP_SPEC:
 +              mutex_unlock(&xt[trav->nfproto].mutex);
 +              break;
 +      }
 +}
  
 -      mutex_unlock(&xt[af].mutex);
 +static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
 +{
 +      return xt_mttg_seq_start(seq, pos, false);
  }
  
 -static int xt_match_seq_show(struct seq_file *seq, void *v)
 +static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
  {
 -      struct xt_match *match = list_entry(v, struct xt_match, list);
 +      return xt_mttg_seq_next(seq, v, ppos, false);
 +}
  
 -      if (strlen(match->name))
 -              return seq_printf(seq, "%s\n", match->name);
 -      else
 -              return 0;
 +static int xt_match_seq_show(struct seq_file *seq, void *v)
 +{
 +      const struct nf_mttg_trav *trav = seq->private;
 +      const struct xt_match *match;
 +
 +      switch (trav->class) {
 +      case MTTG_TRAV_NFP_UNSPEC:
 +      case MTTG_TRAV_NFP_SPEC:
 +              if (trav->curr == trav->head)
 +                      return 0;
 +              match = list_entry(trav->curr, struct xt_match, list);
 +              return (*match->name == '\0') ? 0 :
 +                     seq_printf(seq, "%s\n", match->name);
 +      }
 +      return 0;
  }
  
  static const struct seq_operations xt_match_seq_ops = {
        .start  = xt_match_seq_start,
        .next   = xt_match_seq_next,
 -      .stop   = xt_match_seq_stop,
 +      .stop   = xt_mttg_seq_stop,
        .show   = xt_match_seq_show,
  };
  
  static int xt_match_open(struct inode *inode, struct file *file)
  {
 +      struct seq_file *seq;
 +      struct nf_mttg_trav *trav;
        int ret;
  
 -      ret = seq_open(file, &xt_match_seq_ops);
 -      if (!ret) {
 -              struct seq_file *seq = file->private_data;
 +      trav = kmalloc(sizeof(*trav), GFP_KERNEL);
 +      if (trav == NULL)
 +              return -ENOMEM;
  
 -              seq->private = PDE(inode);
 +      ret = seq_open(file, &xt_match_seq_ops);
 +      if (ret < 0) {
 +              kfree(trav);
 +              return ret;
        }
 -      return ret;
 +
 +      seq = file->private_data;
 +      seq->private = trav;
 +      trav->nfproto = (unsigned long)PDE(inode)->data;
 +      return 0;
  }
  
  static const struct file_operations xt_match_ops = {
        .open    = xt_match_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
 -      .release = seq_release,
 +      .release = seq_release_private,
  };
  
  static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
  {
 -      struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
 -      u_int16_t af = (unsigned long)pde->data;
 -
 -      mutex_lock(&xt[af].mutex);
 -      return seq_list_start(&xt[af].target, *pos);
 +      return xt_mttg_seq_start(seq, pos, true);
  }
  
 -static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 +static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
  {
 -      struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
 -      u_int16_t af = (unsigned long)pde->data;
 -
 -      return seq_list_next(v, &xt[af].target, pos);
 -}
 -
 -static void xt_target_seq_stop(struct seq_file *seq, void *v)
 -{
 -      struct proc_dir_entry *pde = seq->private;
 -      u_int16_t af = (unsigned long)pde->data;
 -
 -      mutex_unlock(&xt[af].mutex);
 +      return xt_mttg_seq_next(seq, v, ppos, true);
  }
  
  static int xt_target_seq_show(struct seq_file *seq, void *v)
  {
 -      struct xt_target *target = list_entry(v, struct xt_target, list);
 -
 -      if (strlen(target->name))
 -              return seq_printf(seq, "%s\n", target->name);
 -      else
 -              return 0;
 +      const struct nf_mttg_trav *trav = seq->private;
 +      const struct xt_target *target;
 +
 +      switch (trav->class) {
 +      case MTTG_TRAV_NFP_UNSPEC:
 +      case MTTG_TRAV_NFP_SPEC:
 +              if (trav->curr == trav->head)
 +                      return 0;
 +              target = list_entry(trav->curr, struct xt_target, list);
 +              return (*target->name == '\0') ? 0 :
 +                     seq_printf(seq, "%s\n", target->name);
 +      }
 +      return 0;
  }
  
  static const struct seq_operations xt_target_seq_ops = {
        .start  = xt_target_seq_start,
        .next   = xt_target_seq_next,
 -      .stop   = xt_target_seq_stop,
 +      .stop   = xt_mttg_seq_stop,
        .show   = xt_target_seq_show,
  };
  
  static int xt_target_open(struct inode *inode, struct file *file)
  {
 +      struct seq_file *seq;
 +      struct nf_mttg_trav *trav;
        int ret;
  
 -      ret = seq_open(file, &xt_target_seq_ops);
 -      if (!ret) {
 -              struct seq_file *seq = file->private_data;
 +      trav = kmalloc(sizeof(*trav), GFP_KERNEL);
 +      if (trav == NULL)
 +              return -ENOMEM;
  
 -              seq->private = PDE(inode);
 +      ret = seq_open(file, &xt_target_seq_ops);
 +      if (ret < 0) {
 +              kfree(trav);
 +              return ret;
        }
 -      return ret;
 +
 +      seq = file->private_data;
 +      seq->private = trav;
 +      trav->nfproto = (unsigned long)PDE(inode)->data;
 +      return 0;
  }
  
  static const struct file_operations xt_target_ops = {
        .open    = xt_target_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
 -      .release = seq_release,
 +      .release = seq_release_private,
  };
  
  #define FORMAT_TABLES "_tables_names"
diff --combined net/netlink/af_netlink.c
@@@ -85,7 -85,6 +85,7 @@@ struct netlink_sock 
  
  #define NETLINK_KERNEL_SOCKET 0x1
  #define NETLINK_RECV_PKTINFO  0x2
 +#define NETLINK_BROADCAST_SEND_ERROR  0x4
  
  static inline struct netlink_sock *nlk_sk(struct sock *sk)
  {
@@@ -996,15 -995,12 +996,15 @@@ static inline int do_one_broadcast(stru
                netlink_overrun(sk);
                /* Clone failed. Notify ALL listeners. */
                p->failure = 1;
 +              if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
 +                      p->delivery_failure = 1;
        } else if (sk_filter(sk, p->skb2)) {
                kfree_skb(p->skb2);
                p->skb2 = NULL;
        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
                netlink_overrun(sk);
 -              p->delivery_failure = 1;
 +              if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
 +                      p->delivery_failure = 1;
        } else {
                p->congested |= val;
                p->delivered = 1;
@@@ -1049,9 -1045,10 +1049,9 @@@ int netlink_broadcast(struct sock *ssk
  
        netlink_unlock_table();
  
 -      if (info.skb2)
 -              kfree_skb(info.skb2);
 +      kfree_skb(info.skb2);
  
 -      if (info.delivery_failure || info.failure)
 +      if (info.delivery_failure)
                return -ENOBUFS;
  
        if (info.delivered) {
        return 0;
  }
  
 +/**
 + * netlink_set_err - report error to broadcast listeners
 + * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
 + * @pid: the PID of a process that we want to skip (if any)
 + * @groups: the broadcast group that will notice the error
 + * @code: error code, must be negative (as usual in kernelspace)
 + */
  void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
  {
        struct netlink_set_err_data info;
        info.exclude_sk = ssk;
        info.pid = pid;
        info.group = group;
 -      info.code = code;
 +      /* sk->sk_err wants a positive error value */
 +      info.code = -code;
  
        read_lock(&nl_table_lock);
  
  
        read_unlock(&nl_table_lock);
  }
+ EXPORT_SYMBOL(netlink_set_err);
  
  /* must be called with netlink table grabbed */
  static void netlink_update_socket_mc(struct netlink_sock *nlk,
@@@ -1174,13 -1164,6 +1175,13 @@@ static int netlink_setsockopt(struct so
                err = 0;
                break;
        }
 +      case NETLINK_BROADCAST_ERROR:
 +              if (val)
 +                      nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
 +              else
 +                      nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
 +              err = 0;
 +              break;
        default:
                err = -ENOPROTOOPT;
        }
@@@ -1213,16 -1196,6 +1214,16 @@@ static int netlink_getsockopt(struct so
                        return -EFAULT;
                err = 0;
                break;
 +      case NETLINK_BROADCAST_ERROR:
 +              if (len < sizeof(int))
 +                      return -EINVAL;
 +              len = sizeof(int);
 +              val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
 +              if (put_user(len, optlen) ||
 +                  put_user(val, optval))
 +                      return -EFAULT;
 +              err = 0;
 +              break;
        default:
                err = -ENOPROTOOPT;
        }
@@@ -1549,7 -1522,8 +1550,7 @@@ EXPORT_SYMBOL(netlink_set_nonroot)
  
  static void netlink_destroy_callback(struct netlink_callback *cb)
  {
 -      if (cb->skb)
 -              kfree_skb(cb->skb);
 +      kfree_skb(cb->skb);
        kfree(cb);
  }
  
@@@ -1766,18 -1740,12 +1767,18 @@@ int nlmsg_notify(struct sock *sk, struc
                        exclude_pid = pid;
                }
  
 -              /* errors reported via destination sk->sk_err */
 -              nlmsg_multicast(sk, skb, exclude_pid, group, flags);
 +              /* errors reported via destination sk->sk_err, but propagate
 +               * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
 +              err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
        }
  
 -      if (report)
 -              err = nlmsg_unicast(sk, skb, pid);
 +      if (report) {
 +              int err2;
 +
 +              err2 = nlmsg_unicast(sk, skb, pid);
 +              if (!err || err == -ESRCH)
 +                      err = err2;
 +      }
  
        return err;
  }