X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fnetfilter%2Fnfnetlink_queue.c;h=10522c04ed248fe3c8998661ca61393a3f5db970;hb=8eeee8b152ae6bbe181518efaf62ba8e9c613693;hp=052fc97dae985e9661a329c7f77e00adedabcd62;hpb=395dde20fb06153feb65d79d7fe83eda41bf50e7;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 052fc97..10522c0 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -3,6 +3,7 @@ * userspace via nfetlink. * * (C) 2005 by Harald Welte + * (C) 2007 by Patrick McHardy * * Based on the old ipv4-only ip_queue.c: * (C) 2000-2002 James Morris @@ -27,6 +28,7 @@ #include #include #include +#include #include @@ -36,24 +38,9 @@ #define NFQNL_QMAX_DEFAULT 1024 -#if 0 -#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ - __FILE__, __LINE__, __FUNCTION__, \ - ## args) -#else -#define QDEBUG(x, ...) -#endif - -struct nfqnl_queue_entry { - struct list_head list; - struct nf_info *info; - struct sk_buff *skb; - unsigned int id; -}; - struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ - atomic_t use; + struct rcu_head rcu; int peer_pid; unsigned int queue_maxlen; @@ -62,7 +49,7 @@ struct nfqnl_instance { unsigned int queue_dropped; unsigned int queue_user_dropped; - atomic_t id_sequence; /* 'sequence' of pkt ids */ + unsigned int id_sequence; /* 'sequence' of pkt ids */ u_int16_t queue_num; /* number of this queue */ u_int8_t copy_mode; @@ -72,23 +59,12 @@ struct nfqnl_instance { struct list_head queue_list; /* packets in queue */ }; -typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); +typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); -static DEFINE_RWLOCK(instances_lock); - -static u_int64_t htonll(u_int64_t in) -{ - u_int64_t out; - int i; - - for (i = 0; i < sizeof(u_int64_t); i++) - ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i]; - - return out; -} +static DEFINE_SPINLOCK(instances_lock); #define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS]; +static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; static inline u_int8_t instance_hashfn(u_int16_t queue_num) { @@ -96,14 +72,14 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num) } static struct nfqnl_instance * -__instance_lookup(u_int16_t queue_num) +instance_lookup(u_int16_t queue_num) { struct hlist_head *head; struct hlist_node *pos; struct nfqnl_instance *inst; head = &instance_table[instance_hashfn(queue_num)]; - hlist_for_each_entry(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, pos, head, hlist) { if (inst->queue_num == queue_num) return inst; } @@ -111,576 +87,444 @@ __instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_lookup_get(u_int16_t queue_num) -{ - struct nfqnl_instance *inst; - - read_lock_bh(&instances_lock); - inst = __instance_lookup(queue_num); - if (inst) - atomic_inc(&inst->use); - read_unlock_bh(&instances_lock); - - return inst; -} - -static void -instance_put(struct nfqnl_instance *inst) -{ - if (inst && atomic_dec_and_test(&inst->use)) { - QDEBUG("kfree(inst=%p)\n", inst); - kfree(inst); - } -} - -static struct nfqnl_instance * instance_create(u_int16_t queue_num, int pid) { struct nfqnl_instance *inst; + unsigned int h; + int err; - QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); - - write_lock_bh(&instances_lock); - if (__instance_lookup(queue_num)) { - inst = NULL; - QDEBUG("aborting, instance already exists\n"); + spin_lock(&instances_lock); + if (instance_lookup(queue_num)) { + err = -EEXIST; goto out_unlock; } - inst = kmalloc(sizeof(*inst), GFP_ATOMIC); - if (!inst) + inst = kzalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) { + err = -ENOMEM; goto out_unlock; + } - memset(inst, 0, sizeof(*inst)); inst->queue_num = queue_num; inst->peer_pid = pid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; - atomic_set(&inst->id_sequence, 0); - /* needs to be two, since we _put() after creation */ - atomic_set(&inst->use, 2); - inst->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); + INIT_RCU_HEAD(&inst->rcu); - if (!try_module_get(THIS_MODULE)) + if (!try_module_get(THIS_MODULE)) { + err = -EAGAIN; goto out_free; + } - hlist_add_head(&inst->hlist, - &instance_table[instance_hashfn(queue_num)]); - - write_unlock_bh(&instances_lock); + h = instance_hashfn(queue_num); + hlist_add_head_rcu(&inst->hlist, &instance_table[h]); - QDEBUG("successfully created new instance\n"); + spin_unlock(&instances_lock); return inst; out_free: kfree(inst); out_unlock: - write_unlock_bh(&instances_lock); - return NULL; + spin_unlock(&instances_lock); + return ERR_PTR(err); } -static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); +static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data); static void -_instance_destroy2(struct nfqnl_instance *inst, int lock) +instance_destroy_rcu(struct rcu_head *head) { - /* first pull it out of the global list */ - if (lock) - write_lock_bh(&instances_lock); - - QDEBUG("removing instance %p (queuenum=%u) from hash\n", - inst, inst->queue_num); - hlist_del(&inst->hlist); - - if (lock) - write_unlock_bh(&instances_lock); - - /* then flush all pending skbs from the queue */ - nfqnl_flush(inst, NF_DROP); - - /* and finally put the refcount */ - instance_put(inst); + struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, + rcu); + nfqnl_flush(inst, NULL, 0); + kfree(inst); module_put(THIS_MODULE); } -static inline void +static void __instance_destroy(struct nfqnl_instance *inst) { - _instance_destroy2(inst, 0); + hlist_del_rcu(&inst->hlist); + call_rcu(&inst->rcu, instance_destroy_rcu); } -static inline void -instance_destroy(struct nfqnl_instance *inst) -{ - _instance_destroy2(inst, 1); -} - - - static void -issue_verdict(struct nfqnl_queue_entry *entry, int verdict) +instance_destroy(struct nfqnl_instance *inst) { - QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); - - /* TCP input path (and probably other bits) assume to be called - * from softirq context, not from syscall, like issue_verdict is - * called. TCP input path deadlocks with locks taken from timer - * softirq, e.g. We therefore emulate this by local_bh_disable() */ - - local_bh_disable(); - nf_reinject(entry->skb, entry->info, verdict); - local_bh_enable(); - - kfree(entry); + spin_lock(&instances_lock); + __instance_destroy(inst); + spin_unlock(&instances_lock); } static inline void -__enqueue_entry(struct nfqnl_instance *queue, - struct nfqnl_queue_entry *entry) +__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) { - list_add(&entry->list, &queue->queue_list); + list_add_tail(&entry->list, &queue->queue_list); queue->queue_total++; } -/* - * Find and return a queued entry matched by cmpfn, or return the last - * entry if cmpfn is NULL. - */ -static inline struct nfqnl_queue_entry * -__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, - unsigned long data) +static struct nf_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) { - struct list_head *p; + struct nf_queue_entry *entry = NULL, *i; - list_for_each_prev(p, &queue->queue_list) { - struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p; - - if (!cmpfn || cmpfn(entry, data)) - return entry; - } - return NULL; -} - -static inline void -__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry) -{ - list_del(&entry->list); - q->queue_total--; -} - -static inline struct nfqnl_queue_entry * -__find_dequeue_entry(struct nfqnl_instance *queue, - nfqnl_cmpfn cmpfn, unsigned long data) -{ - struct nfqnl_queue_entry *entry; - - entry = __find_entry(queue, cmpfn, data); - if (entry == NULL) - return NULL; - - __dequeue_entry(queue, entry); - return entry; -} - - -static inline void -__nfqnl_flush(struct nfqnl_instance *queue, int verdict) -{ - struct nfqnl_queue_entry *entry; - - while ((entry = __find_dequeue_entry(queue, NULL, 0))) - issue_verdict(entry, verdict); -} + spin_lock_bh(&queue->lock); -static inline int -__nfqnl_set_mode(struct nfqnl_instance *queue, - unsigned char mode, unsigned int range) -{ - int status = 0; - - switch (mode) { - case NFQNL_COPY_NONE: - case NFQNL_COPY_META: - queue->copy_mode = mode; - queue->copy_range = 0; - break; - - case NFQNL_COPY_PACKET: - queue->copy_mode = mode; - /* we're using struct nfattr which has 16bit nfa_len */ - if (range > 0xffff) - queue->copy_range = 0xffff; - else - queue->copy_range = range; - break; - - default: - status = -EINVAL; + list_for_each_entry(i, &queue->queue_list, list) { + if (i->id == id) { + entry = i; + break; + } + } + if (entry) { + list_del(&entry->list); + queue->queue_total--; } - return status; -} -static struct nfqnl_queue_entry * -find_dequeue_entry(struct nfqnl_instance *queue, - nfqnl_cmpfn cmpfn, unsigned long data) -{ - struct nfqnl_queue_entry *entry; - - spin_lock_bh(&queue->lock); - entry = __find_dequeue_entry(queue, cmpfn, data); spin_unlock_bh(&queue->lock); return entry; } static void -nfqnl_flush(struct nfqnl_instance *queue, int verdict) +nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) { + struct nf_queue_entry *entry, *next; + spin_lock_bh(&queue->lock); - __nfqnl_flush(queue, verdict); + list_for_each_entry_safe(entry, next, &queue->queue_list, list) { + if (!cmpfn || cmpfn(entry, data)) { + list_del(&entry->list); + queue->queue_total--; + nf_reinject(entry, NF_DROP); + } + } spin_unlock_bh(&queue->lock); } static struct sk_buff * nfqnl_build_packet_message(struct nfqnl_instance *queue, - struct nfqnl_queue_entry *entry, int *errp) + struct nf_queue_entry *entry) { - unsigned char *old_tail; + sk_buff_data_t old_tail; size_t size; size_t data_len = 0; struct sk_buff *skb; struct nfqnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int tmp_uint; - - QDEBUG("entered\n"); - - /* all macros expand to constant values at compile time */ - size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) - + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + struct sk_buff *entskb = entry->skb; + struct net_device *indev; + struct net_device *outdev; + + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #ifdef CONFIG_BRIDGE_NETFILTER - + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ - + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif - + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ - + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) - + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + + outdev = entry->outdev; spin_lock_bh(&queue->lock); - - switch (queue->copy_mode) { + + switch ((enum nfqnl_config_mode)queue->copy_mode) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: data_len = 0; break; - + case NFQNL_COPY_PACKET: - if (queue->copy_range == 0 - || queue->copy_range > entry->skb->len) - data_len = entry->skb->len; + if ((entskb->ip_summed == CHECKSUM_PARTIAL || + entskb->ip_summed == CHECKSUM_COMPLETE) && + skb_checksum_help(entskb)) { + spin_unlock_bh(&queue->lock); + return NULL; + } + if (queue->copy_range == 0 + || queue->copy_range > entskb->len) + data_len = entskb->len; else data_len = queue->copy_range; - - size += NLMSG_SPACE(data_len); + + size += nla_total_size(data_len); break; - - default: - *errp = -EINVAL; - spin_unlock_bh(&queue->lock); - return NULL; } + entry->id = queue->id_sequence++; + spin_unlock_bh(&queue->lock); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) goto nlmsg_failure; - - old_tail= skb->tail; - nlh = NLMSG_PUT(skb, 0, 0, + + old_tail = skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg)); nfmsg = NLMSG_DATA(nlh); - nfmsg->nfgen_family = entry->info->pf; + nfmsg->nfgen_family = entry->pf; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(queue->queue_num); pmsg.packet_id = htonl(entry->id); - pmsg.hw_protocol = htons(entry->skb->protocol); - pmsg.hook = entry->info->hook; + pmsg.hw_protocol = entskb->protocol; + pmsg.hook = entry->hook; - NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); - if (entry->info->indev) { - tmp_uint = htonl(entry->info->indev->ifindex); + indev = entry->indev; + if (indev) { #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)); #else - if (entry->info->pf == PF_BRIDGE) { + if (entry->pf == PF_BRIDGE) { /* Case 1: indev is physical input device, we need to - * look for bridge group (when called from + * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(indev->ifindex)); /* this is the bridge group "brX" */ - tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, + htonl(indev->br_port->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), - &tmp_uint); - if (entry->skb->nf_bridge - && entry->skb->nf_bridge->physindev) { - tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, - sizeof(tmp_uint), &tmp_uint); - } + NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, + htonl(indev->ifindex)); + if (entskb->nf_bridge && entskb->nf_bridge->physindev) + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(entskb->nf_bridge->physindev->ifindex)); } #endif } - if (entry->info->outdev) { - tmp_uint = htonl(entry->info->outdev->ifindex); + if (outdev) { #ifndef CONFIG_BRIDGE_NETFILTER - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)); #else - if (entry->info->pf == PF_BRIDGE) { + if (entry->pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to - * look for bridge group (when called from + * look for bridge group (when called from * netfilter_bridge) */ - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(outdev->ifindex)); /* this is the bridge group "brX" */ - tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), - &tmp_uint); + NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, + htonl(outdev->br_port->br->dev->ifindex)); } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ - NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), - &tmp_uint); - if (entry->skb->nf_bridge - && entry->skb->nf_bridge->physoutdev) { - tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex); - NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, - sizeof(tmp_uint), &tmp_uint); - } + NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, + htonl(outdev->ifindex)); + if (entskb->nf_bridge && entskb->nf_bridge->physoutdev) + NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(entskb->nf_bridge->physoutdev->ifindex)); } #endif } - if (entry->skb->nfmark) { - tmp_uint = htonl(entry->skb->nfmark); - NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); - } + if (entskb->mark) + NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); - if (entry->info->indev && entry->skb->dev - && entry->skb->dev->hard_header_parse) { + if (indev && entskb->dev) { struct nfqnl_msg_packet_hw phw; - - phw.hw_addrlen = - entry->skb->dev->hard_header_parse(entry->skb, - phw.hw_addr); - phw.hw_addrlen = htons(phw.hw_addrlen); - NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + int len = dev_parse_header(entskb, phw.hw_addr); + if (len) { + phw.hw_addrlen = htons(len); + NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } } - if (entry->skb->tstamp.off_sec) { + if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; + struct timeval tv = ktime_to_timeval(entskb->tstamp); + ts.sec = cpu_to_be64(tv.tv_sec); + ts.usec = cpu_to_be64(tv.tv_usec); - ts.sec = htonll(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); - ts.usec = htonll(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); - - NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } if (data_len) { - struct nfattr *nfa; - int size = NFA_LENGTH(data_len); + struct nlattr *nla; + int sz = nla_attr_size(data_len); - if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + if (skb_tailroom(skb) < nla_total_size(data_len)) { printk(KERN_WARNING "nf_queue: no tailroom!\n"); goto nlmsg_failure; } - nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); - nfa->nfa_type = NFQA_PAYLOAD; - nfa->nfa_len = size; + nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla->nla_type = NFQA_PAYLOAD; + nla->nla_len = sz; - if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len)) + if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) BUG(); } - + nlh->nlmsg_len = skb->tail - old_tail; return skb; nlmsg_failure: -nfattr_failure: +nla_put_failure: if (skb) kfree_skb(skb); - *errp = -EINVAL; if (net_ratelimit()) printk(KERN_ERR "nf_queue: error creating packet message\n"); return NULL; } static int -nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, - unsigned int queuenum, void *data) +nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) { - int status = -EINVAL; struct sk_buff *nskb; struct nfqnl_instance *queue; - struct nfqnl_queue_entry *entry; - - QDEBUG("entered\n"); - - queue = instance_lookup_get(queuenum); - if (!queue) { - QDEBUG("no queue instance matching\n"); - return -EINVAL; - } - - if (queue->copy_mode == NFQNL_COPY_NONE) { - QDEBUG("mode COPY_NONE, aborting\n"); - status = -EAGAIN; - goto err_out_put; - } + int err; - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) { - if (net_ratelimit()) - printk(KERN_ERR - "nf_queue: OOM in nfqnl_enqueue_packet()\n"); - status = -ENOMEM; - goto err_out_put; - } + /* rcu_read_lock()ed by nf_hook_slow() */ + queue = instance_lookup(queuenum); + if (!queue) + goto err_out; - entry->info = info; - entry->skb = skb; - entry->id = atomic_inc_return(&queue->id_sequence); + if (queue->copy_mode == NFQNL_COPY_NONE) + goto err_out; - nskb = nfqnl_build_packet_message(queue, entry, &status); + nskb = nfqnl_build_packet_message(queue, entry); if (nskb == NULL) - goto err_out_free; - + goto err_out; + spin_lock_bh(&queue->lock); - + if (!queue->peer_pid) - goto err_out_free_nskb; + goto err_out_free_nskb; if (queue->queue_total >= queue->queue_maxlen) { - queue->queue_dropped++; - status = -ENOSPC; + queue->queue_dropped++; if (net_ratelimit()) - printk(KERN_WARNING "ip_queue: full at %d entries, " - "dropping packets(s). Dropped: %d\n", + printk(KERN_WARNING "nf_queue: full at %d entries, " + "dropping packets(s). Dropped: %d\n", queue->queue_total, queue->queue_dropped); goto err_out_free_nskb; } /* nfnetlink_unicast will either free the nskb or add it to a socket */ - status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); - if (status < 0) { - queue->queue_user_dropped++; + err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + if (err < 0) { + queue->queue_user_dropped++; goto err_out_unlock; } __enqueue_entry(queue, entry); spin_unlock_bh(&queue->lock); - instance_put(queue); - return status; + return 0; err_out_free_nskb: - kfree_skb(nskb); - + kfree_skb(nskb); err_out_unlock: spin_unlock_bh(&queue->lock); - -err_out_free: - kfree(entry); -err_out_put: - instance_put(queue); - return status; +err_out: + return -1; } static int -nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) +nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) { + struct sk_buff *nskb; int diff; diff = data_len - e->skb->len; - if (diff < 0) - skb_trim(e->skb, data_len); - else if (diff > 0) { + if (diff < 0) { + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { - printk(KERN_WARNING "ip_queue: OOM " + nskb = skb_copy_expand(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (!nskb) { + printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); return -ENOMEM; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); kfree_skb(e->skb); - e->skb = newskb; + e->skb = nskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, data_len)) + if (!skb_make_writable(e->skb, data_len)) return -ENOMEM; - memcpy(e->skb->data, data, data_len); - + skb_copy_to_linear_data(e->skb, data, data_len); + e->skb->ip_summed = CHECKSUM_NONE; return 0; } -static inline int -id_cmp(struct nfqnl_queue_entry *e, unsigned long id) -{ - return (id == e->id); -} - static int nfqnl_set_mode(struct nfqnl_instance *queue, unsigned char mode, unsigned int range) { - int status; + int status = 0; spin_lock_bh(&queue->lock); - status = __nfqnl_set_mode(queue, mode, range); + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nlattr which has 16bit nla_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } spin_unlock_bh(&queue->lock); return status; } static int -dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) +dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) { - if (entry->info->indev) - if (entry->info->indev->ifindex == ifindex) + if (entry->indev) + if (entry->indev->ifindex == ifindex) return 1; - - if (entry->info->outdev) - if (entry->info->outdev->ifindex == ifindex) + if (entry->outdev) + if (entry->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } @@ -690,28 +534,19 @@ static void nfqnl_dev_drop(int ifindex) { int i; - - QDEBUG("entering for ifindex %u\n", ifindex); - /* this only looks like we have to hold the readlock for a way too long - * time, issue_verdict(), nf_reinject(), ... - but we always only - * issue NF_DROP, which is processed directly in nf_reinject() */ - read_lock_bh(&instances_lock); + rcu_read_lock(); - for (i = 0; i < INSTANCE_BUCKETS; i++) { + for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry(inst, tmp, head, hlist) { - struct nfqnl_queue_entry *entry; - while ((entry = find_dequeue_entry(inst, dev_cmp, - ifindex)) != NULL) - issue_verdict(entry, NF_DROP); - } + hlist_for_each_entry_rcu(inst, tmp, head, hlist) + nfqnl_flush(inst, dev_cmp, ifindex); } - read_unlock_bh(&instances_lock); + rcu_read_unlock(); } #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) @@ -722,6 +557,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) nfqnl_dev_drop(dev->ifindex); @@ -743,18 +581,19 @@ nfqnl_rcv_nl_event(struct notifier_block *this, int i; /* destroy all instances for this pid */ - write_lock_bh(&instances_lock); - for (i = 0; i < INSTANCE_BUCKETS; i++) { + spin_lock(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } - write_unlock_bh(&instances_lock); + spin_unlock(&instances_lock); } return NOTIFY_DONE; } @@ -763,15 +602,15 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; -static const int nfqa_verdict_min[NFQA_MAX] = { - [NFQA_VERDICT_HDR-1] = sizeof(struct nfqnl_msg_verdict_hdr), - [NFQA_MARK-1] = sizeof(u_int32_t), - [NFQA_PAYLOAD-1] = 0, +static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, + [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, }; static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -779,175 +618,177 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_msg_verdict_hdr *vhdr; struct nfqnl_instance *queue; unsigned int verdict; - struct nfqnl_queue_entry *entry; + struct nf_queue_entry *entry; int err; - if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; + rcu_read_lock(); + queue = instance_lookup(queue_num); + if (!queue) { + err = -ENODEV; + goto err_out_unlock; } - queue = instance_lookup_get(queue_num); - if (!queue) - return -ENODEV; - if (queue->peer_pid != NETLINK_CB(skb).pid) { err = -EPERM; - goto err_out_put; + goto err_out_unlock; } - if (!nfqa[NFQA_VERDICT_HDR-1]) { + if (!nfqa[NFQA_VERDICT_HDR]) { err = -EINVAL; - goto err_out_put; + goto err_out_unlock; } - vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); verdict = ntohl(vhdr->verdict); if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { err = -EINVAL; - goto err_out_put; + goto err_out_unlock; } - entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); + entry = find_dequeue_entry(queue, ntohl(vhdr->id)); if (entry == NULL) { err = -ENOENT; - goto err_out_put; + goto err_out_unlock; } + rcu_read_unlock(); - if (nfqa[NFQA_PAYLOAD-1]) { - if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), - NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + if (nfqa[NFQA_PAYLOAD]) { + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), + nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) verdict = NF_DROP; } - if (nfqa[NFQA_MARK-1]) - skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); - - issue_verdict(entry, verdict); - instance_put(queue); + if (nfqa[NFQA_MARK]) + entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + + nf_reinject(entry, verdict); return 0; -err_out_put: - instance_put(queue); +err_out_unlock: + rcu_read_unlock(); return err; } static int nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { return -ENOTSUPP; } -static const int nfqa_cfg_min[NFQA_CFG_MAX] = { - [NFQA_CFG_CMD-1] = sizeof(struct nfqnl_msg_config_cmd), - [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), +static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { + [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, + [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, }; -static struct nf_queue_handler nfqh = { +static const struct nf_queue_handler nfqh = { .name = "nf_queue", .outfn = &nfqnl_enqueue_packet, }; static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) + struct nlmsghdr *nlh, struct nlattr *nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; + struct nfqnl_msg_config_cmd *cmd = NULL; int ret = 0; - QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + if (nfqa[NFQA_CFG_CMD]) { + cmd = nla_data(nfqa[NFQA_CFG_CMD]); - if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) { - QDEBUG("bad attribute size\n"); - return -EINVAL; + /* Commands without queue context - might sleep */ + switch (cmd->command) { + case NFQNL_CFG_CMD_PF_BIND: + return nf_register_queue_handler(ntohs(cmd->pf), + &nfqh); + case NFQNL_CFG_CMD_PF_UNBIND: + return nf_unregister_queue_handler(ntohs(cmd->pf), + &nfqh); + } } - queue = instance_lookup_get(queue_num); - if (nfqa[NFQA_CFG_CMD-1]) { - struct nfqnl_msg_config_cmd *cmd; - cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); - QDEBUG("found CFG_CMD\n"); + rcu_read_lock(); + queue = instance_lookup(queue_num); + if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto err_out_unlock; + } + if (cmd != NULL) { switch (cmd->command) { case NFQNL_CFG_CMD_BIND: - if (queue) - return -EBUSY; - + if (queue) { + ret = -EBUSY; + goto err_out_unlock; + } queue = instance_create(queue_num, NETLINK_CB(skb).pid); - if (!queue) - return -EINVAL; + if (IS_ERR(queue)) { + ret = PTR_ERR(queue); + goto err_out_unlock; + } break; case NFQNL_CFG_CMD_UNBIND: - if (!queue) - return -ENODEV; - - if (queue->peer_pid != NETLINK_CB(skb).pid) { - ret = -EPERM; - goto out_put; + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; } - instance_destroy(queue); break; case NFQNL_CFG_CMD_PF_BIND: - QDEBUG("registering queue handler for pf=%u\n", - ntohs(cmd->pf)); - ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh); - break; case NFQNL_CFG_CMD_PF_UNBIND: - QDEBUG("unregistering queue handler for pf=%u\n", - ntohs(cmd->pf)); - /* This is a bug and a feature. We can unregister - * other handlers(!) */ - ret = nf_unregister_queue_handler(ntohs(cmd->pf)); break; default: - ret = -EINVAL; + ret = -ENOTSUPP; break; } - } else { - if (!queue) { - QDEBUG("no config command, and no instance ENOENT\n"); - ret = -ENOENT; - goto out_put; - } - - if (queue->peer_pid != NETLINK_CB(skb).pid) { - QDEBUG("no config command, and wrong pid\n"); - ret = -EPERM; - goto out_put; - } } - if (nfqa[NFQA_CFG_PARAMS-1]) { + if (nfqa[NFQA_CFG_PARAMS]) { struct nfqnl_msg_config_params *params; - params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + params = nla_data(nfqa[NFQA_CFG_PARAMS]); nfqnl_set_mode(queue, params->copy_mode, ntohl(params->copy_range)); } -out_put: - instance_put(queue); + if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { + __be32 *queue_maxlen; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); + spin_lock_bh(&queue->lock); + queue->queue_maxlen = ntohl(*queue_maxlen); + spin_unlock_bh(&queue->lock); + } + +err_out_unlock: + rcu_read_unlock(); return ret; } -static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { +static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, - .attr_count = NFQA_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = NFQA_MAX, }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, .attr_count = NFQA_MAX, - .cap_required = CAP_NET_ADMIN }, + .policy = nfqa_verdict_policy }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, .attr_count = NFQA_CFG_MAX, - .cap_required = CAP_NET_ADMIN }, + .policy = nfqa_cfg_policy }, }; -static struct nfnetlink_subsystem nfqnl_subsys = { +static const struct nfnetlink_subsystem nfqnl_subsys = { .name = "nf_queue", .subsys_id = NFNL_SUBSYS_QUEUE, .cb_count = NFQNL_MSG_MAX, @@ -999,8 +840,9 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) } static void *seq_start(struct seq_file *seq, loff_t *pos) + __acquires(instances_lock) { - read_lock_bh(&instances_lock); + spin_lock(&instances_lock); return get_idx(seq, *pos); } @@ -1011,8 +853,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) + __releases(instances_lock) { - read_unlock_bh(&instances_lock); + spin_unlock(&instances_lock); } static int seq_show(struct seq_file *s, void *v) @@ -1024,11 +867,10 @@ static int seq_show(struct seq_file *s, void *v) inst->peer_pid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, - atomic_read(&inst->id_sequence), - atomic_read(&inst->use)); + inst->id_sequence, 1); } -static struct seq_operations nfqnl_seq_ops = { +static const struct seq_operations nfqnl_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, @@ -1037,26 +879,11 @@ static struct seq_operations nfqnl_seq_ops = { static int nfqnl_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct iter_state *is; - int ret; - - is = kmalloc(sizeof(*is), GFP_KERNEL); - if (!is) - return -ENOMEM; - memset(is, 0, sizeof(*is)); - ret = seq_open(file, &nfqnl_seq_ops); - if (ret < 0) - goto out_free; - seq = file->private_data; - seq->private = is; - return ret; -out_free: - kfree(is); - return ret; + return seq_open_private(file, &nfqnl_seq_ops, + sizeof(struct iter_state)); } -static struct file_operations nfqnl_file_ops = { +static const struct file_operations nfqnl_file_ops = { .owner = THIS_MODULE, .open = nfqnl_open, .read = seq_read, @@ -1066,16 +893,9 @@ static struct file_operations nfqnl_file_ops = { #endif /* PROC_FS */ -static int -init_or_cleanup(int init) +static int __init nfnetlink_queue_init(void) { int i, status = -ENOMEM; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_nfqueue; -#endif - - if (!init) - goto cleanup; for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&instance_table[i]); @@ -1088,39 +908,32 @@ init_or_cleanup(int init) } #ifdef CONFIG_PROC_FS - proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, - proc_net_netfilter); - if (!proc_nfqueue) + if (!proc_create("nfnetlink_queue", 0440, + proc_net_netfilter, &nfqnl_file_ops)) goto cleanup_subsys; - proc_nfqueue->proc_fops = &nfqnl_file_ops; #endif register_netdevice_notifier(&nfqnl_dev_notifier); - return status; -cleanup: - nf_unregister_queue_handlers(&nfqh); - unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_queue", proc_net_netfilter); cleanup_subsys: -#endif nfnetlink_subsys_unregister(&nfqnl_subsys); +#endif cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); return status; } -static int __init init(void) -{ - - return init_or_cleanup(1); -} - -static void __exit fini(void) +static void __exit nfnetlink_queue_fini(void) { - init_or_cleanup(0); + nf_unregister_queue_handlers(&nfqh); + unregister_netdevice_notifier(&nfqnl_dev_notifier); +#ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_queue", proc_net_netfilter); +#endif + nfnetlink_subsys_unregister(&nfqnl_subsys); + netlink_unregister_notifier(&nfqnl_rtnl_notifier); } MODULE_DESCRIPTION("netfilter packet queue handler"); @@ -1128,5 +941,5 @@ MODULE_AUTHOR("Harald Welte "); MODULE_LICENSE("GPL"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); -module_init(init); -module_exit(fini); +module_init(nfnetlink_queue_init); +module_exit(nfnetlink_queue_fini);