X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fnetfilter%2Fnfnetlink_queue.c;h=5c589b27d6eb7d92e7f437c836518b0cf0824b28;hb=cd8c20b650f49354722b8cc1f03320b004815a0a;hp=6e4ada3c1844dde85e8ef81cbe1c00a4cd834f29;hpb=febf0a431e42f5a1fdb2b763273700610552ddcc;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 6e4ada3..5c589b2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1,8 +1,9 @@ /* * This is a module which is used for queueing packets and communicating with - * userspace via nfetlink. + * userspace via nfnetlink. * * (C) 2005 by Harald Welte + * (C) 2007 by Patrick McHardy * * Based on the old ipv4-only ip_queue.c: * (C) 2000-2002 James Morris @@ -27,6 +28,7 @@ #include #include #include +#include #include @@ -36,24 +38,9 @@ #define NFQNL_QMAX_DEFAULT 1024 -#if 0 -#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ - __FILE__, __LINE__, __FUNCTION__, \ - ## args) -#else -#define QDEBUG(x, ...) -#endif - -struct nfqnl_queue_entry { - struct list_head list; - struct nf_info *info; - struct sk_buff *skb; - unsigned int id; -}; - struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ - atomic_t use; + struct rcu_head rcu; int peer_pid; unsigned int queue_maxlen; @@ -62,7 +49,7 @@ struct nfqnl_instance { unsigned int queue_dropped; unsigned int queue_user_dropped; - atomic_t id_sequence; /* 'sequence' of pkt ids */ + unsigned int id_sequence; /* 'sequence' of pkt ids */ u_int16_t queue_num; /* number of this queue */ u_int8_t copy_mode; @@ -72,12 +59,12 @@ struct nfqnl_instance { struct list_head queue_list; /* packets in queue */ }; -typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); +typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); -static DEFINE_RWLOCK(instances_lock); +static DEFINE_SPINLOCK(instances_lock); #define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS]; +static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; static inline u_int8_t instance_hashfn(u_int16_t queue_num) { @@ -85,14 +72,14 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num) } static struct nfqnl_instance * -__instance_lookup(u_int16_t queue_num) +instance_lookup(u_int16_t queue_num) { struct hlist_head *head; struct hlist_node *pos; struct nfqnl_instance *inst; head = &instance_table[instance_hashfn(queue_num)]; - hlist_for_each_entry(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, pos, head, hlist) { if (inst->queue_num == queue_num) return inst; } @@ -100,588 +87,431 @@ __instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_lookup_get(u_int16_t queue_num) -{ - struct nfqnl_instance *inst; - - read_lock_bh(&instances_lock); - inst = __instance_lookup(queue_num); - if (inst) - atomic_inc(&inst->use); - read_unlock_bh(&instances_lock); - - return inst; -} - -static void -instance_put(struct nfqnl_instance *inst) -{ - if (inst && atomic_dec_and_test(&inst->use)) { - QDEBUG("kfree(inst=%p)\n", inst); - kfree(inst); - } -} - -static struct nfqnl_instance * instance_create(u_int16_t queue_num, int pid) { struct nfqnl_instance *inst; + unsigned int h; + int err; - QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); - - write_lock_bh(&instances_lock); - if (__instance_lookup(queue_num)) { - inst = NULL; - QDEBUG("aborting, instance already exists\n"); + spin_lock(&instances_lock); + if (instance_lookup(queue_num)) { + err = -EEXIST; goto out_unlock; } inst = kzalloc(sizeof(*inst), GFP_ATOMIC); - if (!inst) + if (!inst) { + err = -ENOMEM; goto out_unlock; + } inst->queue_num = queue_num; inst->peer_pid = pid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; - atomic_set(&inst->id_sequence, 0); - /* needs to be two, since we _put() after creation */ - atomic_set(&inst->use, 2); spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); + INIT_RCU_HEAD(&inst->rcu); - if (!try_module_get(THIS_MODULE)) + if (!try_module_get(THIS_MODULE)) { + err = -EAGAIN; goto out_free; + } - hlist_add_head(&inst->hlist, - &instance_table[instance_hashfn(queue_num)]); - - write_unlock_bh(&instances_lock); + h = instance_hashfn(queue_num); + hlist_add_head_rcu(&inst->hlist, &instance_table[h]); - QDEBUG("successfully created new instance\n"); + spin_unlock(&instances_lock); return inst; out_free: kfree(inst); out_unlock: - write_unlock_bh(&instances_lock); - return NULL; + spin_unlock(&instances_lock); + return ERR_PTR(err); } -static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); +static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data); static void -_instance_destroy2(struct nfqnl_instance *inst, int lock) +instance_destroy_rcu(struct rcu_head *head) { - /* first pull it out of the global list */ - if (lock) - write_lock_bh(&instances_lock); - - QDEBUG("removing instance %p (queuenum=%u) from hash\n", - inst, inst->queue_num); - hlist_del(&inst->hlist); - - if (lock) - write_unlock_bh(&instances_lock); - - /* then flush all pending skbs from the queue */ - nfqnl_flush(inst, NF_DROP); - - /* and finally put the refcount */ - instance_put(inst); + struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, + rcu); + nfqnl_flush(inst, NULL, 0); + kfree(inst); module_put(THIS_MODULE); } -static inline void +static void __instance_destroy(struct nfqnl_instance *inst) { - _instance_destroy2(inst, 0); + hlist_del_rcu(&inst->hlist); + call_rcu(&inst->rcu, instance_destroy_rcu); } -static inline void -instance_destroy(struct nfqnl_instance *inst) -{ - _instance_destroy2(inst, 1); -} - - - static void -issue_verdict(struct nfqnl_queue_entry *entry, int verdict) +instance_destroy(struct nfqnl_instance *inst) { - QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); - - /* TCP input path (and probably other bits) assume to be called - * from softirq context, not from syscall, like issue_verdict is - * called. TCP input path deadlocks with locks taken from timer - * softirq, e.g. We therefore emulate this by local_bh_disable() */ - - local_bh_disable(); - nf_reinject(entry->skb, entry->info, verdict); - local_bh_enable(); - - kfree(entry); + spin_lock(&instances_lock); + __instance_destroy(inst); + spin_unlock(&instances_lock); } static inline void -__enqueue_entry(struct nfqnl_instance *queue, - struct nfqnl_queue_entry *entry) +__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) { - list_add(&entry->list, &queue->queue_list); + list_add_tail(&entry->list, &queue->queue_list); queue->queue_total++; } -/* - * Find and return a queued entry matched by cmpfn, or return the last - * entry if cmpfn is NULL. - */ -static inline struct nfqnl_queue_entry * -__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, - unsigned long data) +static struct nf_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) { - struct list_head *p; - - list_for_each_prev(p, &queue->queue_list) { - struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p; - - if (!cmpfn || cmpfn(entry, data)) - return entry; - } - return NULL; -} + struct nf_queue_entry *entry = NULL, *i; -static inline void -__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry) -{ - list_del(&entry->list); - q->queue_total--; -} - -static inline struct nfqnl_queue_entry * -__find_dequeue_entry(struct nfqnl_instance *queue, - nfqnl_cmpfn cmpfn, unsigned long data) -{ - struct nfqnl_queue_entry *entry; - - entry = __find_entry(queue, cmpfn, data); - if (entry == NULL) - return NULL; - - __dequeue_entry(queue, entry); - return entry; -} - - -static inline void -__nfqnl_flush(struct nfqnl_instance *queue, int verdict) -{ - struct nfqnl_queue_entry *entry; - - while ((entry = __find_dequeue_entry(queue, NULL, 0))) - issue_verdict(entry, verdict); -} + spin_lock_bh(&queue->lock); -static inline int -__nfqnl_set_mode(struct nfqnl_instance *queue, - unsigned char mode, unsigned int range) -{ - int status = 0; - - switch (mode) { - case NFQNL_COPY_NONE: - case NFQNL_COPY_META: - queue->copy_mode = mode; - queue->copy_range = 0; - break; - - case NFQNL_COPY_PACKET: - queue->copy_mode = mode; - /* we're using struct nfattr which has 16bit nfa_len */ - if (range > 0xffff) - queue->copy_range = 0xffff; - else - queue->copy_range = range; - break; - - default: - status = -EINVAL; + list_for_each_entry(i, &queue->queue_list, list) { + if (i->id == id) { + entry = i; + break; + } + } + if (entry) { + list_del(&entry->list); + queue->queue_total--; } - return status; -} -static struct nfqnl_queue_entry * -find_dequeue_entry(struct nfqnl_instance *queue, - nfqnl_cmpfn cmpfn, unsigned long data) -{ - struct nfqnl_queue_entry *entry; - - spin_lock_bh(&queue->lock); - entry = __find_dequeue_entry(queue, cmpfn, data); spin_unlock_bh(&queue->lock); return entry; } static void -nfqnl_flush(struct nfqnl_instance *queue, int verdict) +nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) { + struct nf_queue_entry *entry, *next; + spin_lock_bh(&queue->lock); - __nfqnl_flush(queue, verdict); + list_for_each_entry_safe(entry, next, &queue->queue_list, list) { + if (!cmpfn || cmpfn(entry, data)) { + list_del(&entry->list); + queue->queue_total--; + nf_reinject(entry, NF_DROP); + } + } spin_unlock_bh(&queue->lock); } static struct sk_buff * nfqnl_build_packet_message(struct nfqnl_instance *queue, - struct nfqnl_queue_entry *entry, int *errp) + struct nf_queue_entry *entry) { - unsigned char *old_tail; + sk_buff_data_t old_tail; size_t size; size_t data_len = 0; struct sk_buff *skb; struct nfqnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_info *entinf = entry->info; struct sk_buff *entskb = entry->skb; struct net_device *indev; struct net_device *outdev; - unsigned int tmp_uint; - - QDEBUG("entered\n"); - /* all macros expand to constant values at compile time */ - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #ifdef CONFIG_BRIDGE_NETFILTER - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif - + NFA_SPACE(sizeof(u_int32_t)) /* mark */ - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_hw)) - + NFA_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); - outdev = entinf->outdev; + outdev = entry->outdev; spin_lock_bh(&queue->lock); - - switch (queue->copy_mode) { + + switch ((enum nfqnl_config_mode)queue->copy_mode) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: - data_len = 0; break; - + case NFQNL_COPY_PACKET: if ((entskb->ip_summed == CHECKSUM_PARTIAL || entskb->ip_summed == CHECKSUM_COMPLETE) && - (*errp = skb_checksum_help(entskb))) { + skb_checksum_help(entskb)) { spin_unlock_bh(&queue->lock); return NULL; } - if (queue->copy_range == 0 + if (queue->copy_range == 0 || queue->copy_range > entskb->len) data_len = entskb->len; else data_len = queue->copy_range; - - size += NFA_SPACE(data_len); + + size += nla_total_size(data_len); break; - - default: - *errp = -EINVAL; - spin_unlock_bh(&queue->lock); - return NULL; } + entry->id = queue->id_sequence++; + spin_unlock_bh(&queue->lock); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) goto nlmsg_failure; - - old_tail= skb->tail; - nlh = NLMSG_PUT(skb, 0, 0, + + old_tail = skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg)); nfmsg = NLMSG_DATA(nlh); - nfmsg->nfgen_family = entinf->pf; + nfmsg->nfgen_family = entry->pf; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(queue->queue_num); pmsg.packet_id = htonl(entry->id); pmsg.hw_protocol = entskb->protocol; - pmsg.hook = entinf->hook; + pmsg.hook = entry->hook; - NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); - indev = entinf->indev; + indev = entry->indev; if (indev) { - tmp_uint = htonl(indev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)); #else - if (entinf->pf == PF_BRIDGE) { + if (entry->pf == PF_BRIDGE) { /* Case 1: indev is physical input device, we need to - * look for bridge group (when called from + * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(indev->ifindex)); /* this is the bridge group "brX" */ - tmp_uint = htonl(indev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, + htonl(indev->br_port->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), - &tmp_uint); - if (entskb->nf_bridge - && entskb->nf_bridge->physindev) { - tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, - sizeof(tmp_uint), &tmp_uint); - } + NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, + htonl(indev->ifindex)); + if (entskb->nf_bridge && entskb->nf_bridge->physindev) + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(entskb->nf_bridge->physindev->ifindex)); } #endif } if (outdev) { - tmp_uint = htonl(outdev->ifindex); #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)); #else - if (entinf->pf == PF_BRIDGE) { + if (entry->pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to - * look for bridge group (when called from + * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(outdev->ifindex)); /* this is the bridge group "brX" */ - tmp_uint = htonl(outdev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, + htonl(outdev->br_port->br->dev->ifindex)); } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), - &tmp_uint); - if (entskb->nf_bridge - && entskb->nf_bridge->physoutdev) { - tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, - sizeof(tmp_uint), &tmp_uint); - } + NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, + htonl(outdev->ifindex)); + if (entskb->nf_bridge && entskb->nf_bridge->physoutdev) + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(entskb->nf_bridge->physoutdev->ifindex)); } #endif } - if (entskb->nfmark) { - tmp_uint = htonl(entskb->nfmark); - NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); - } + if (entskb->mark) + NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); - if (indev && entskb->dev - && entskb->dev->hard_header_parse) { + if (indev && entskb->dev) { struct nfqnl_msg_packet_hw phw; - - phw.hw_addrlen = - entskb->dev->hard_header_parse(entskb, - phw.hw_addr); - phw.hw_addrlen = htons(phw.hw_addrlen); - NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + int len = dev_parse_header(entskb, phw.hw_addr); + if (len) { + phw.hw_addrlen = htons(len); + NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } } - if (entskb->tstamp.off_sec) { + if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; + struct timeval tv = ktime_to_timeval(entskb->tstamp); + ts.sec = cpu_to_be64(tv.tv_sec); + ts.usec = cpu_to_be64(tv.tv_usec); - ts.sec = cpu_to_be64(entskb->tstamp.off_sec); - ts.usec = cpu_to_be64(entskb->tstamp.off_usec); - - NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } if (data_len) { - struct nfattr *nfa; - int size = NFA_LENGTH(data_len); + struct nlattr *nla; + int sz = nla_attr_size(data_len); - if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + if (skb_tailroom(skb) < nla_total_size(data_len)) { printk(KERN_WARNING "nf_queue: no tailroom!\n"); goto nlmsg_failure; } - nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); - nfa->nfa_type = NFQA_PAYLOAD; - nfa->nfa_len = size; + nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla->nla_type = NFQA_PAYLOAD; + nla->nla_len = sz; - if (skb_copy_bits(entskb, 0, NFA_DATA(nfa), data_len)) + if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) BUG(); } - + nlh->nlmsg_len = skb->tail - old_tail; return skb; nlmsg_failure: -nfattr_failure: +nla_put_failure: if (skb) kfree_skb(skb); - *errp = -EINVAL; if (net_ratelimit()) printk(KERN_ERR "nf_queue: error creating packet message\n"); return NULL; } static int -nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, - unsigned int queuenum, void *data) +nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) { - int status = -EINVAL; struct sk_buff *nskb; struct nfqnl_instance *queue; - struct nfqnl_queue_entry *entry; - - QDEBUG("entered\n"); - - queue = instance_lookup_get(queuenum); - if (!queue) { - QDEBUG("no queue instance matching\n"); - return -EINVAL; - } - - if (queue->copy_mode == NFQNL_COPY_NONE) { - QDEBUG("mode COPY_NONE, aborting\n"); - status = -EAGAIN; - goto err_out_put; - } + int err; - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) { - if (net_ratelimit()) - printk(KERN_ERR - "nf_queue: OOM in nfqnl_enqueue_packet()\n"); - status = -ENOMEM; - goto err_out_put; - } + /* rcu_read_lock()ed by nf_hook_slow() */ + queue = instance_lookup(queuenum); + if (!queue) + goto err_out; - entry->info = info; - entry->skb = skb; - entry->id = atomic_inc_return(&queue->id_sequence); + if (queue->copy_mode == NFQNL_COPY_NONE) + goto err_out; - nskb = nfqnl_build_packet_message(queue, entry, &status); + nskb = nfqnl_build_packet_message(queue, entry); if (nskb == NULL) - goto err_out_free; - + goto err_out; + spin_lock_bh(&queue->lock); - + if (!queue->peer_pid) - goto err_out_free_nskb; + goto err_out_free_nskb; if (queue->queue_total >= queue->queue_maxlen) { - queue->queue_dropped++; - status = -ENOSPC; + queue->queue_dropped++; if (net_ratelimit()) - printk(KERN_WARNING "nf_queue: full at %d entries, " - "dropping packets(s). Dropped: %d\n", + printk(KERN_WARNING "nf_queue: full at %d entries, " + "dropping packets(s). Dropped: %d\n", queue->queue_total, queue->queue_dropped); goto err_out_free_nskb; } /* nfnetlink_unicast will either free the nskb or add it to a socket */ - status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); - if (status < 0) { - queue->queue_user_dropped++; + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + if (err < 0) { + queue->queue_user_dropped++; goto err_out_unlock; } __enqueue_entry(queue, entry); spin_unlock_bh(&queue->lock); - instance_put(queue); - return status; + return 0; err_out_free_nskb: - kfree_skb(nskb); - + kfree_skb(nskb); err_out_unlock: spin_unlock_bh(&queue->lock); - -err_out_free: - kfree(entry); -err_out_put: - instance_put(queue); - return status; +err_out: + return -1; } static int -nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) +nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) { + struct sk_buff *nskb; int diff; diff = data_len - e->skb->len; - if (diff < 0) - skb_trim(e->skb, data_len); - else if (diff > 0) { + if (diff < 0) { + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { + nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), + diff, GFP_ATOMIC); + if (!nskb) { printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); return -ENOMEM; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); kfree_skb(e->skb); - e->skb = newskb; + e->skb = nskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, data_len)) + if (!skb_make_writable(e->skb, data_len)) return -ENOMEM; - memcpy(e->skb->data, data, data_len); + skb_copy_to_linear_data(e->skb, data, data_len); e->skb->ip_summed = CHECKSUM_NONE; return 0; } -static inline int -id_cmp(struct nfqnl_queue_entry *e, unsigned long id) -{ - return (id == e->id); -} - static int nfqnl_set_mode(struct nfqnl_instance *queue, unsigned char mode, unsigned int range) { - int status; + int status = 0; spin_lock_bh(&queue->lock); - status = __nfqnl_set_mode(queue, mode, range); + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nlattr which has 16bit nla_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } spin_unlock_bh(&queue->lock); return status; } static int -dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) +dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) { - struct nf_info *entinf = entry->info; - - if (entinf->indev) - if (entinf->indev->ifindex == ifindex) + if (entry->indev) + if (entry->indev->ifindex == ifindex) return 1; - if (entinf->outdev) - if (entinf->outdev->ifindex == ifindex) + if (entry->outdev) + if (entry->outdev->ifindex == ifindex) return 1; #ifdef CONFIG_BRIDGE_NETFILTER if (entry->skb->nf_bridge) { @@ -702,28 +532,19 @@ static void nfqnl_dev_drop(int ifindex) { int i; - - QDEBUG("entering for ifindex %u\n", ifindex); - /* this only looks like we have to hold the readlock for a way too long - * time, issue_verdict(), nf_reinject(), ... - but we always only - * issue NF_DROP, which is processed directly in nf_reinject() */ - read_lock_bh(&instances_lock); + rcu_read_lock(); - for (i = 0; i < INSTANCE_BUCKETS; i++) { + for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry(inst, tmp, head, hlist) { - struct nfqnl_queue_entry *entry; - while ((entry = find_dequeue_entry(inst, dev_cmp, - ifindex)) != NULL) - issue_verdict(entry, NF_DROP); - } + hlist_for_each_entry_rcu(inst, tmp, head, hlist) + nfqnl_flush(inst, dev_cmp, ifindex); } - read_unlock_bh(&instances_lock); + rcu_read_unlock(); } #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) @@ -734,6 +555,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) nfqnl_dev_drop(dev->ifindex); @@ -750,23 +574,23 @@ nfqnl_rcv_nl_event(struct notifier_block *this, { struct netlink_notify *n = ptr; - if (event == NETLINK_URELEASE && - n->protocol == NETLINK_NETFILTER && n->pid) { + if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this pid */ - write_lock_bh(&instances_lock); - for (i = 0; i < INSTANCE_BUCKETS; i++) { + spin_lock(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } - write_unlock_bh(&instances_lock); + spin_unlock(&instances_lock); } return NOTIFY_DONE; } @@ -775,15 +599,16 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; -static const int nfqa_verdict_min[NFQA_MAX] = { - [NFQA_VERDICT_HDR-1] = sizeof(struct nfqnl_msg_verdict_hdr), - [NFQA_MARK-1] = sizeof(u_int32_t), - [NFQA_PAYLOAD-1] = 0, +static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, + [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, }; static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -791,177 +616,179 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_msg_verdict_hdr *vhdr; struct nfqnl_instance *queue; unsigned int verdict; - struct nfqnl_queue_entry *entry; + struct nf_queue_entry *entry; int err; - if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; + rcu_read_lock(); + queue = instance_lookup(queue_num); + if (!queue) { + err = -ENODEV; + goto err_out_unlock; } - queue = instance_lookup_get(queue_num); - if (!queue) - return -ENODEV; - if (queue->peer_pid != NETLINK_CB(skb).pid) { err = -EPERM; - goto err_out_put; + goto err_out_unlock; } - if (!nfqa[NFQA_VERDICT_HDR-1]) { + if (!nfqa[NFQA_VERDICT_HDR]) { err = -EINVAL; - goto err_out_put; + goto err_out_unlock; } - vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); verdict = ntohl(vhdr->verdict); if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { err = -EINVAL; - goto err_out_put; + goto err_out_unlock; } - entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); + entry = find_dequeue_entry(queue, ntohl(vhdr->id)); if (entry == NULL) { err = -ENOENT; - goto err_out_put; + goto err_out_unlock; } + rcu_read_unlock(); - if (nfqa[NFQA_PAYLOAD-1]) { - if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), - NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + if (nfqa[NFQA_PAYLOAD]) { + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), + nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) verdict = NF_DROP; } - if (nfqa[NFQA_MARK-1]) - entry->skb->nfmark = ntohl(*(u_int32_t *) - NFA_DATA(nfqa[NFQA_MARK-1])); - - issue_verdict(entry, verdict); - instance_put(queue); + if (nfqa[NFQA_MARK]) + entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + + nf_reinject(entry, verdict); return 0; -err_out_put: - instance_put(queue); +err_out_unlock: + rcu_read_unlock(); return err; } static int nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } -static const int nfqa_cfg_min[NFQA_CFG_MAX] = { - [NFQA_CFG_CMD-1] = sizeof(struct nfqnl_msg_config_cmd), - [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), +static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { + [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, + [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, }; -static struct nf_queue_handler nfqh = { +static const struct nf_queue_handler nfqh = { .name = "nf_queue", .outfn = &nfqnl_enqueue_packet, }; static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; + struct nfqnl_msg_config_cmd *cmd = NULL; int ret = 0; - QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + if (nfqa[NFQA_CFG_CMD]) { + cmd = nla_data(nfqa[NFQA_CFG_CMD]); - if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; + /* Commands without queue context - might sleep */ + switch (cmd->command) { + case NFQNL_CFG_CMD_PF_BIND: + return nf_register_queue_handler(ntohs(cmd->pf), + &nfqh); + case NFQNL_CFG_CMD_PF_UNBIND: + return nf_unregister_queue_handler(ntohs(cmd->pf), + &nfqh); + } } - queue = instance_lookup_get(queue_num); - if (nfqa[NFQA_CFG_CMD-1]) { - struct nfqnl_msg_config_cmd *cmd; - cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); - QDEBUG("found CFG_CMD\n"); + rcu_read_lock(); + queue = instance_lookup(queue_num); + if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto err_out_unlock; + } + if (cmd != NULL) { switch (cmd->command) { case NFQNL_CFG_CMD_BIND: - if (queue) - return -EBUSY; - + if (queue) { + ret = -EBUSY; + goto err_out_unlock; + } queue = instance_create(queue_num, NETLINK_CB(skb).pid); - if (!queue) - return -EINVAL; + if (IS_ERR(queue)) { + ret = PTR_ERR(queue); + goto err_out_unlock; + } break; case NFQNL_CFG_CMD_UNBIND: - if (!queue) - return -ENODEV; - - if (queue->peer_pid != NETLINK_CB(skb).pid) { - ret = -EPERM; - goto out_put; + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; } - instance_destroy(queue); break; case NFQNL_CFG_CMD_PF_BIND: - QDEBUG("registering queue handler for pf=%u\n", - ntohs(cmd->pf)); - ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh); - break; case NFQNL_CFG_CMD_PF_UNBIND: - QDEBUG("unregistering queue handler for pf=%u\n", - ntohs(cmd->pf)); - /* This is a bug and a feature. We can unregister - * other handlers(!) */ - ret = nf_unregister_queue_handler(ntohs(cmd->pf)); break; default: - ret = -EINVAL; + ret = -ENOTSUPP; break; } - } else { - if (!queue) { - QDEBUG("no config command, and no instance ENOENT\n"); - ret = -ENOENT; - goto out_put; - } - - if (queue->peer_pid != NETLINK_CB(skb).pid) { - QDEBUG("no config command, and wrong pid\n"); - ret = -EPERM; - goto out_put; - } } - if (nfqa[NFQA_CFG_PARAMS-1]) { + if (nfqa[NFQA_CFG_PARAMS]) { struct nfqnl_msg_config_params *params; if (!queue) { - ret = -ENOENT; - goto out_put; + ret = -ENODEV; + goto err_out_unlock; } - params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + params = nla_data(nfqa[NFQA_CFG_PARAMS]); nfqnl_set_mode(queue, params->copy_mode, ntohl(params->copy_range)); } -out_put: - instance_put(queue); + if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { + __be32 *queue_maxlen; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); + spin_lock_bh(&queue->lock); + queue->queue_maxlen = ntohl(*queue_maxlen); + spin_unlock_bh(&queue->lock); + } + +err_out_unlock: + rcu_read_unlock(); return ret; } -static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { +static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, .attr_count = NFQA_MAX, }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, - .attr_count = NFQA_MAX, }, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_policy }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, - .attr_count = NFQA_CFG_MAX, }, + .attr_count = NFQA_CFG_MAX, + .policy = nfqa_cfg_policy }, }; -static struct nfnetlink_subsystem nfqnl_subsys = { +static const struct nfnetlink_subsystem nfqnl_subsys = { .name = "nf_queue", .subsys_id = NFNL_SUBSYS_QUEUE, .cb_count = NFQNL_MSG_MAX, @@ -1013,8 +840,9 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) } static void *seq_start(struct seq_file *seq, loff_t *pos) + __acquires(instances_lock) { - read_lock_bh(&instances_lock); + spin_lock(&instances_lock); return get_idx(seq, *pos); } @@ -1025,8 +853,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) + __releases(instances_lock) { - read_unlock_bh(&instances_lock); + spin_unlock(&instances_lock); } static int seq_show(struct seq_file *s, void *v) @@ -1038,11 +867,10 @@ static int seq_show(struct seq_file *s, void *v) inst->peer_pid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, - atomic_read(&inst->id_sequence), - atomic_read(&inst->use)); + inst->id_sequence, 1); } -static struct seq_operations nfqnl_seq_ops = { +static const struct seq_operations nfqnl_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, @@ -1051,25 +879,11 @@ static struct seq_operations nfqnl_seq_ops = { static int nfqnl_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct iter_state *is; - int ret; - - is = kzalloc(sizeof(*is), GFP_KERNEL); - if (!is) - return -ENOMEM; - ret = seq_open(file, &nfqnl_seq_ops); - if (ret < 0) - goto out_free; - seq = file->private_data; - seq->private = is; - return ret; -out_free: - kfree(is); - return ret; + return seq_open_private(file, &nfqnl_seq_ops, + sizeof(struct iter_state)); } -static struct file_operations nfqnl_file_ops = { +static const struct file_operations nfqnl_file_ops = { .owner = THIS_MODULE, .open = nfqnl_open, .read = seq_read, @@ -1082,10 +896,7 @@ static struct file_operations nfqnl_file_ops = { static int __init nfnetlink_queue_init(void) { int i, status = -ENOMEM; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_nfqueue; -#endif - + for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&instance_table[i]); @@ -1097,11 +908,9 @@ static int __init nfnetlink_queue_init(void) } #ifdef CONFIG_PROC_FS - proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, - proc_net_netfilter); - if (!proc_nfqueue) + if (!proc_create("nfnetlink_queue", 0440, + proc_net_netfilter, &nfqnl_file_ops)) goto cleanup_subsys; - proc_nfqueue->proc_fops = &nfqnl_file_ops; #endif register_netdevice_notifier(&nfqnl_dev_notifier); @@ -1125,6 +934,8 @@ static void __exit nfnetlink_queue_fini(void) #endif nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_DESCRIPTION("netfilter packet queue handler");