Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[safe/jmp/linux-2.6] / net / netfilter / nfnetlink_queue.c
index 449b880..8c86011 100644 (file)
@@ -3,6 +3,7 @@
  * userspace via nfetlink.
  *
  * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2007 by Patrick McHardy <kaber@trash.net>
  *
  * Based on the old ipv4-only ip_queue.c:
  * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
 
 #define NFQNL_QMAX_DEFAULT 1024
 
-#if 0
-#define QDEBUG(x, args ...)    printk(KERN_DEBUG "%s(%d):%s(): " x,       \
-                                       __FILE__, __LINE__, __FUNCTION__,  \
-                                       ## args)
-#else
-#define QDEBUG(x, ...)
-#endif
-
 struct nfqnl_instance {
        struct hlist_node hlist;                /* global list of queues */
        struct rcu_head rcu;
@@ -71,7 +64,7 @@ typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
 static DEFINE_SPINLOCK(instances_lock);
 
 #define INSTANCE_BUCKETS       16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
+static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
 
 static inline u_int8_t instance_hashfn(u_int16_t queue_num)
 {
@@ -98,19 +91,19 @@ instance_create(u_int16_t queue_num, int pid)
 {
        struct nfqnl_instance *inst;
        unsigned int h;
-
-       QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
+       int err;
 
        spin_lock(&instances_lock);
        if (instance_lookup(queue_num)) {
-               inst = NULL;
-               QDEBUG("aborting, instance already exists\n");
+               err = -EEXIST;
                goto out_unlock;
        }
 
        inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
-       if (!inst)
+       if (!inst) {
+               err = -ENOMEM;
                goto out_unlock;
+       }
 
        inst->queue_num = queue_num;
        inst->peer_pid = pid;
@@ -121,23 +114,23 @@ instance_create(u_int16_t queue_num, int pid)
        INIT_LIST_HEAD(&inst->queue_list);
        INIT_RCU_HEAD(&inst->rcu);
 
-       if (!try_module_get(THIS_MODULE))
+       if (!try_module_get(THIS_MODULE)) {
+               err = -EAGAIN;
                goto out_free;
+       }
 
        h = instance_hashfn(queue_num);
        hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
 
        spin_unlock(&instances_lock);
 
-       QDEBUG("successfully created new instance\n");
-
        return inst;
 
 out_free:
        kfree(inst);
 out_unlock:
        spin_unlock(&instances_lock);
-       return NULL;
+       return ERR_PTR(err);
 }
 
 static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
@@ -176,35 +169,6 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
        queue->queue_total++;
 }
 
-static inline int
-__nfqnl_set_mode(struct nfqnl_instance *queue,
-                unsigned char mode, unsigned int range)
-{
-       int status = 0;
-
-       switch (mode) {
-       case NFQNL_COPY_NONE:
-       case NFQNL_COPY_META:
-               queue->copy_mode = mode;
-               queue->copy_range = 0;
-               break;
-
-       case NFQNL_COPY_PACKET:
-               queue->copy_mode = mode;
-               /* we're using struct nlattr which has 16bit nla_len */
-               if (range > 0xffff)
-                       queue->copy_range = 0xffff;
-               else
-                       queue->copy_range = range;
-               break;
-
-       default:
-               status = -EINVAL;
-
-       }
-       return status;
-}
-
 static struct nf_queue_entry *
 find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 {
@@ -247,7 +211,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 
 static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
-                          struct nf_queue_entry *entry, int *errp)
+                          struct nf_queue_entry *entry)
 {
        sk_buff_data_t old_tail;
        size_t size;
@@ -259,11 +223,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
        struct sk_buff *entskb = entry->skb;
        struct net_device *indev;
        struct net_device *outdev;
-       __be32 tmp_uint;
-
-       QDEBUG("entered\n");
 
-       size =    NLMSG_ALIGN(sizeof(struct nfgenmsg))
+       size =    NLMSG_SPACE(sizeof(struct nfgenmsg))
                + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
                + nla_total_size(sizeof(u_int32_t))     /* ifindex */
                + nla_total_size(sizeof(u_int32_t))     /* ifindex */
@@ -279,16 +240,15 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
        spin_lock_bh(&queue->lock);
 
-       switch (queue->copy_mode) {
+       switch ((enum nfqnl_config_mode)queue->copy_mode) {
        case NFQNL_COPY_META:
        case NFQNL_COPY_NONE:
-               data_len = 0;
                break;
 
        case NFQNL_COPY_PACKET:
                if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
                     entskb->ip_summed == CHECKSUM_COMPLETE) &&
-                   (*errp = skb_checksum_help(entskb))) {
+                   skb_checksum_help(entskb)) {
                        spin_unlock_bh(&queue->lock);
                        return NULL;
                }
@@ -300,11 +260,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
                size += nla_total_size(data_len);
                break;
-
-       default:
-               *errp = -EINVAL;
-               spin_unlock_bh(&queue->lock);
-               return NULL;
        }
 
        entry->id = queue->id_sequence++;
@@ -332,69 +287,57 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
        indev = entry->indev;
        if (indev) {
-               tmp_uint = htonl(indev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
-               NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
+               NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
 #else
                if (entry->pf == PF_BRIDGE) {
                        /* Case 1: indev is physical input device, we need to
                         * look for bridge group (when called from
                         * netfilter_bridge) */
-                       NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint),
-                               &tmp_uint);
+                       NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+                                    htonl(indev->ifindex));
                        /* this is the bridge group "brX" */
-                       tmp_uint = htonl(indev->br_port->br->dev->ifindex);
-                       NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
-                               &tmp_uint);
+                       NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+                                    htonl(indev->br_port->br->dev->ifindex));
                } else {
                        /* Case 2: indev is bridge group, we need to look for
                         * physical device (when called from ipv4) */
-                       NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
-                               &tmp_uint);
-                       if (entskb->nf_bridge
-                           && entskb->nf_bridge->physindev) {
-                               tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex);
-                               NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
-                                       sizeof(tmp_uint), &tmp_uint);
-                       }
+                       NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+                                    htonl(indev->ifindex));
+                       if (entskb->nf_bridge && entskb->nf_bridge->physindev)
+                               NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+                                            htonl(entskb->nf_bridge->physindev->ifindex));
                }
 #endif
        }
 
        if (outdev) {
-               tmp_uint = htonl(outdev->ifindex);
 #ifndef CONFIG_BRIDGE_NETFILTER
-               NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
+               NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
 #else
                if (entry->pf == PF_BRIDGE) {
                        /* Case 1: outdev is physical output device, we need to
                         * look for bridge group (when called from
                         * netfilter_bridge) */
-                       NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint),
-                               &tmp_uint);
+                       NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+                                    htonl(outdev->ifindex));
                        /* this is the bridge group "brX" */
-                       tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
-                       NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
-                               &tmp_uint);
+                       NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+                                    htonl(outdev->br_port->br->dev->ifindex));
                } else {
                        /* Case 2: outdev is bridge group, we need to look for
                         * physical output device (when called from ipv4) */
-                       NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
-                               &tmp_uint);
-                       if (entskb->nf_bridge
-                           && entskb->nf_bridge->physoutdev) {
-                               tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex);
-                               NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
-                                       sizeof(tmp_uint), &tmp_uint);
-                       }
+                       NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+                                    htonl(outdev->ifindex));
+                       if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
+                               NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+                                            htonl(entskb->nf_bridge->physoutdev->ifindex));
                }
 #endif
        }
 
-       if (entskb->mark) {
-               tmp_uint = htonl(entskb->mark);
-               NLA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
-       }
+       if (entskb->mark)
+               NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
 
        if (indev && entskb->dev) {
                struct nfqnl_msg_packet_hw phw;
@@ -416,7 +359,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
        if (data_len) {
                struct nlattr *nla;
-               int size = nla_attr_size(data_len);
+               int sz = nla_attr_size(data_len);
 
                if (skb_tailroom(skb) < nla_total_size(data_len)) {
                        printk(KERN_WARNING "nf_queue: no tailroom!\n");
@@ -425,7 +368,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
                nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
                nla->nla_type = NFQA_PAYLOAD;
-               nla->nla_len = size;
+               nla->nla_len = sz;
 
                if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
                        BUG();
@@ -438,7 +381,6 @@ nlmsg_failure:
 nla_put_failure:
        if (skb)
                kfree_skb(skb);
-       *errp = -EINVAL;
        if (net_ratelimit())
                printk(KERN_ERR "nf_queue: error creating packet message\n");
        return NULL;
@@ -447,27 +389,21 @@ nla_put_failure:
 static int
 nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
-       int status = -EINVAL;
        struct sk_buff *nskb;
        struct nfqnl_instance *queue;
-
-       QDEBUG("entered\n");
+       int err;
 
        /* rcu_read_lock()ed by nf_hook_slow() */
        queue = instance_lookup(queuenum);
-       if (!queue) {
-               QDEBUG("no queue instance matching\n");
-               return -EINVAL;
-       }
+       if (!queue)
+               goto err_out;
 
-       if (queue->copy_mode == NFQNL_COPY_NONE) {
-               QDEBUG("mode COPY_NONE, aborting\n");
-               return -EAGAIN;
-       }
+       if (queue->copy_mode == NFQNL_COPY_NONE)
+               goto err_out;
 
-       nskb = nfqnl_build_packet_message(queue, entry, &status);
+       nskb = nfqnl_build_packet_message(queue, entry);
        if (nskb == NULL)
-               return status;
+               goto err_out;
 
        spin_lock_bh(&queue->lock);
 
@@ -476,7 +412,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
        if (queue->queue_total >= queue->queue_maxlen) {
                queue->queue_dropped++;
-               status = -ENOSPC;
                if (net_ratelimit())
                          printk(KERN_WARNING "nf_queue: full at %d entries, "
                                 "dropping packets(s). Dropped: %d\n",
@@ -485,8 +420,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
        }
 
        /* nfnetlink_unicast will either free the nskb or add it to a socket */
-       status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
-       if (status < 0) {
+       err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+       if (err < 0) {
                queue->queue_user_dropped++;
                goto err_out_unlock;
        }
@@ -494,21 +429,21 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
        __enqueue_entry(queue, entry);
 
        spin_unlock_bh(&queue->lock);
-       return status;
+       return 0;
 
 err_out_free_nskb:
        kfree_skb(nskb);
-
 err_out_unlock:
        spin_unlock_bh(&queue->lock);
-       return status;
+err_out:
+       return -1;
 }
 
 static int
 nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
 {
+       struct sk_buff *nskb;
        int diff;
-       int err;
 
        diff = data_len - e->skb->len;
        if (diff < 0) {
@@ -518,14 +453,15 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
                if (data_len > 0xFFFF)
                        return -EINVAL;
                if (diff > skb_tailroom(e->skb)) {
-                       err = pskb_expand_head(e->skb, 0,
-                                              diff - skb_tailroom(e->skb),
-                                              GFP_ATOMIC);
-                       if (err) {
+                       nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+                                              diff, GFP_ATOMIC);
+                       if (!nskb) {
                                printk(KERN_WARNING "nf_queue: OOM "
                                      "in mangle, dropping packet\n");
-                               return err;
+                               return -ENOMEM;
                        }
+                       kfree_skb(e->skb);
+                       e->skb = nskb;
                }
                skb_put(e->skb, diff);
        }
@@ -540,10 +476,29 @@ static int
 nfqnl_set_mode(struct nfqnl_instance *queue,
               unsigned char mode, unsigned int range)
 {
-       int status;
+       int status = 0;
 
        spin_lock_bh(&queue->lock);
-       status = __nfqnl_set_mode(queue, mode, range);
+       switch (mode) {
+       case NFQNL_COPY_NONE:
+       case NFQNL_COPY_META:
+               queue->copy_mode = mode;
+               queue->copy_range = 0;
+               break;
+
+       case NFQNL_COPY_PACKET:
+               queue->copy_mode = mode;
+               /* we're using struct nlattr which has 16bit nla_len */
+               if (range > 0xffff)
+                       queue->copy_range = 0xffff;
+               else
+                       queue->copy_range = range;
+               break;
+
+       default:
+               status = -EINVAL;
+
+       }
        spin_unlock_bh(&queue->lock);
 
        return status;
@@ -578,8 +533,6 @@ nfqnl_dev_drop(int ifindex)
 {
        int i;
 
-       QDEBUG("entering for ifindex %u\n", ifindex);
-
        rcu_read_lock();
 
        for (i = 0; i < INSTANCE_BUCKETS; i++) {
@@ -602,7 +555,7 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
 {
        struct net_device *dev = ptr;
 
-       if (dev->nd_net != &init_net)
+       if (!net_eq(dev_net(dev), &init_net))
                return NOTIFY_DONE;
 
        /* Drop any packets associated with the downed device */
@@ -705,8 +658,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
        }
 
        if (nfqa[NFQA_MARK])
-               entry->skb->mark = ntohl(*(__be32 *)
-                                        nla_data(nfqa[NFQA_MARK]));
+               entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
 
        nf_reinject(entry, verdict);
        return 0;
@@ -743,27 +695,18 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
        struct nfqnl_msg_config_cmd *cmd = NULL;
        int ret = 0;
 
-       QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
-
        if (nfqa[NFQA_CFG_CMD]) {
                cmd = nla_data(nfqa[NFQA_CFG_CMD]);
 
                /* Commands without queue context - might sleep */
                switch (cmd->command) {
                case NFQNL_CFG_CMD_PF_BIND:
-                       ret = nf_register_queue_handler(ntohs(cmd->pf),
-                                                       &nfqh);
-                       break;
+                       return nf_register_queue_handler(ntohs(cmd->pf),
+                                                        &nfqh);
                case NFQNL_CFG_CMD_PF_UNBIND:
-                       ret = nf_unregister_queue_handler(ntohs(cmd->pf),
-                                                         &nfqh);
-                       break;
-               default:
-                       break;
+                       return nf_unregister_queue_handler(ntohs(cmd->pf),
+                                                          &nfqh);
                }
-
-               if (ret < 0)
-                       return ret;
        }
 
        rcu_read_lock();
@@ -781,8 +724,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
                                goto err_out_unlock;
                        }
                        queue = instance_create(queue_num, NETLINK_CB(skb).pid);
-                       if (!queue) {
-                               ret = -EINVAL;
+                       if (IS_ERR(queue)) {
+                               ret = PTR_ERR(queue);
                                goto err_out_unlock;
                        }
                        break;
@@ -797,7 +740,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
                case NFQNL_CFG_CMD_PF_UNBIND:
                        break;
                default:
-                       ret = -EINVAL;
+                       ret = -ENOTSUPP;
                        break;
                }
        }
@@ -895,6 +838,7 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *seq_start(struct seq_file *seq, loff_t *pos)
+       __acquires(instances_lock)
 {
        spin_lock(&instances_lock);
        return get_idx(seq, *pos);
@@ -907,6 +851,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void seq_stop(struct seq_file *s, void *v)
+       __releases(instances_lock)
 {
        spin_unlock(&instances_lock);
 }
@@ -949,9 +894,6 @@ static const struct file_operations nfqnl_file_ops = {
 static int __init nfnetlink_queue_init(void)
 {
        int i, status = -ENOMEM;
-#ifdef CONFIG_PROC_FS
-       struct proc_dir_entry *proc_nfqueue;
-#endif
 
        for (i = 0; i < INSTANCE_BUCKETS; i++)
                INIT_HLIST_HEAD(&instance_table[i]);
@@ -964,11 +906,9 @@ static int __init nfnetlink_queue_init(void)
        }
 
 #ifdef CONFIG_PROC_FS
-       proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440,
-                                        proc_net_netfilter);
-       if (!proc_nfqueue)
+       if (!proc_create("nfnetlink_queue", 0440,
+                        proc_net_netfilter, &nfqnl_file_ops))
                goto cleanup_subsys;
-       proc_nfqueue->proc_fops = &nfqnl_file_ops;
 #endif
 
        register_netdevice_notifier(&nfqnl_dev_notifier);