X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fnetfilter%2Fnf_conntrack_expect.c;h=2f25ff61098298b40f1014988591b10ec0fbae77;hb=42107f5009da223daa800d6da6904d77297ae829;hp=c5006b0a4e6515464220e72db8560f81d7416d95;hpb=5d08ad440feae11b8d6e7599147a8a20ac60f99a;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c5006b0..2f25ff6 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,38 +28,32 @@ #include #include -LIST_HEAD(nf_ct_expect_list); -EXPORT_SYMBOL_GPL(nf_ct_expect_list); - -struct hlist_head *nf_ct_expect_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_ct_expect_hash); - unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); static unsigned int nf_ct_expect_hash_rnd __read_mostly; -static unsigned int nf_ct_expect_count; +unsigned int nf_ct_expect_max __read_mostly; static int nf_ct_expect_hash_rnd_initted __read_mostly; -static int nf_ct_expect_vmalloc; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static unsigned int nf_ct_expect_next_id; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); - list_del(&exp->list); - hlist_del(&exp->hnode); - nf_ct_expect_count--; + hlist_del_rcu(&exp->hnode); + net->ct.expect_count--; - NF_CT_STAT_INC(expect_delete); - master_help->expecting--; + hlist_del(&exp->lnode); + master_help->expecting[exp->class]--; nf_ct_expect_put(exp); + + NF_CT_STAT_INC(net, expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); @@ -66,37 +61,40 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) { struct nf_conntrack_expect *exp = (void *)ul_expect; - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); nf_ct_unlink_expect(exp); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_expect_put(exp); } static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) { + unsigned int hash; + if (unlikely(!nf_ct_expect_hash_rnd_initted)) { - get_random_bytes(&nf_ct_expect_hash_rnd, 4); + get_random_bytes(&nf_ct_expect_hash_rnd, + sizeof(nf_ct_expect_hash_rnd)); nf_ct_expect_hash_rnd_initted = 1; } - return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), + hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | - tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) % - nf_ct_expect_hsize; + (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd); + return ((u64)hash * nf_ct_expect_hsize) >> 32; } struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } @@ -106,15 +104,15 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; - read_lock_bh(&nf_conntrack_lock); - i = __nf_ct_expect_find(tuple); - if (i) - atomic_inc(&i->use); - read_unlock_bh(&nf_conntrack_lock); + rcu_read_lock(); + i = __nf_ct_expect_find(net, tuple); + if (i && !atomic_inc_not_zero(&i->use)) + i = NULL; + rcu_read_unlock(); return i; } @@ -123,11 +121,23 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_expect *exp; + struct nf_conntrack_expect *i, *exp = NULL; + struct hlist_node *n; + unsigned int h; + + if (!net->ct.expect_count) + return NULL; - exp = __nf_ct_expect_find(tuple); + h = nf_ct_expect_dst_hash(tuple); + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + if (!(i->flags & NF_CT_EXPECT_INACTIVE) && + nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + exp = i; + break; + } + } if (!exp) return NULL; @@ -153,17 +163,18 @@ nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) /* delete all expectations for this conntrack */ void nf_ct_remove_expectations(struct nf_conn *ct) { - struct nf_conntrack_expect *i, *tmp; struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; /* Optimization: most connection never expect any others. */ - if (!help || help->expecting == 0) + if (!help) return; - list_for_each_entry_safe(i, tmp, &nf_ct_expect_list, list) { - if (i->master == ct && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_ct_expect_put(i); + hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } } } @@ -191,20 +202,20 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { - return a->master == b->master - && nf_ct_tuple_equal(&a->tuple, &b->tuple) - && nf_ct_tuple_mask_equal(&a->mask, &b->mask); + return a->master == b->master && a->class == b->class && + nf_ct_tuple_equal(&a->tuple, &b->tuple) && + nf_ct_tuple_mask_equal(&a->mask, &b->mask); } /* Generally a bad idea to call this: could have matched already. */ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); @@ -221,14 +232,16 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) new->master = me; atomic_set(&new->use, 1); + INIT_RCU_HEAD(&new->rcu); return new; } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); -void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, - union nf_conntrack_address *saddr, - union nf_conntrack_address *daddr, - u_int8_t proto, __be16 *src, __be16 *dst) +void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, + u_int8_t family, + const union nf_inet_addr *saddr, + const union nf_inet_addr *daddr, + u_int8_t proto, const __be16 *src, const __be16 *dst) { int len; @@ -238,6 +251,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, len = 16; exp->flags = 0; + exp->class = class; exp->expectfn = NULL; exp->helper = NULL; exp->tuple.src.l3num = family; @@ -259,8 +273,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, } if (src) { - exp->tuple.src.u.all = (__force u16)*src; - exp->mask.src.u.all = 0xFFFF; + exp->tuple.src.u.all = *src; + exp->mask.src.u.all = htons(0xFFFF); } else { exp->tuple.src.u.all = 0; exp->mask.src.u.all = 0; @@ -272,85 +286,100 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, memset((void *)&exp->tuple.dst.u3 + len, 0x00, sizeof(exp->tuple.dst.u3) - len); - exp->tuple.dst.u.all = (__force u16)*dst; + exp->tuple.dst.u.all = *dst; } EXPORT_SYMBOL_GPL(nf_ct_expect_init); +static void nf_ct_expect_free_rcu(struct rcu_head *head) +{ + struct nf_conntrack_expect *exp; + + exp = container_of(head, struct nf_conntrack_expect, rcu); + kmem_cache_free(nf_ct_expect_cachep, exp); +} + void nf_ct_expect_put(struct nf_conntrack_expect *exp) { if (atomic_dec_and_test(&exp->use)) - kmem_cache_free(nf_ct_expect_cachep, exp); + call_rcu(&exp->rcu, nf_ct_expect_free_rcu); } EXPORT_SYMBOL_GPL(nf_ct_expect_put); static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); + const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); atomic_inc(&exp->use); - master_help->expecting++; - list_add(&exp->list, &nf_ct_expect_list); - hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); - nf_ct_expect_count++; + hlist_add_head(&exp->lnode, &master_help->expectations); + master_help->expecting[exp->class]++; + + hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); + net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); - exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; + p = &master_help->helper->expect_policy[exp->class]; + exp->timeout.expires = jiffies + p->timeout * HZ; add_timer(&exp->timeout); - exp->id = ++nf_ct_expect_next_id; atomic_inc(&exp->use); - NF_CT_STAT_INC(expect_create); + NF_CT_STAT_INC(net, expect_create); } /* Race with expectations being used means we could have none to find; OK. */ -static void evict_oldest_expect(struct nf_conn *master) +static void evict_oldest_expect(struct nf_conn *master, + struct nf_conntrack_expect *new) { - struct nf_conntrack_expect *i; + struct nf_conn_help *master_help = nfct_help(master); + struct nf_conntrack_expect *exp, *last = NULL; + struct hlist_node *n; - list_for_each_entry_reverse(i, &nf_ct_expect_list, list) { - if (i->master == master) { - if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_ct_expect_put(i); - } - break; - } + hlist_for_each_entry(exp, n, &master_help->expectations, lnode) { + if (exp->class == new->class) + last = exp; + } + + if (last && del_timer(&last->timeout)) { + nf_ct_unlink_expect(last); + nf_ct_expect_put(last); } } static inline int refresh_timer(struct nf_conntrack_expect *i) { struct nf_conn_help *master_help = nfct_help(i->master); + const struct nf_conntrack_expect_policy *p; if (!del_timer(&i->timeout)) return 0; - i->timeout.expires = jiffies + master_help->helper->timeout*HZ; + p = &master_help->helper->expect_policy[i->class]; + i->timeout.expires = jiffies + p->timeout * HZ; add_timer(&i->timeout); return 1; } -int nf_ct_expect_related(struct nf_conntrack_expect *expect) +static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { + const struct nf_conntrack_expect_policy *p; struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; - int ret; - - NF_CT_ASSERT(master_help); + int ret = 1; - write_lock_bh(&nf_conntrack_lock); if (!master_help->helper) { ret = -ESHUTDOWN; goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -363,31 +392,63 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) } } /* Will be over limit? */ - if (master_help->helper->max_expected && - master_help->expecting >= master_help->helper->max_expected) - evict_oldest_expect(master); + p = &master_help->helper->expect_policy[expect->class]; + if (p->max_expected && + master_help->expecting[expect->class] >= p->max_expected) { + evict_oldest_expect(master, expect); + if (master_help->expecting[expect->class] >= p->max_expected) { + ret = -EMFILE; + goto out; + } + } + + if (net->ct.expect_count >= nf_ct_expect_max) { + if (net_ratelimit()) + printk(KERN_WARNING + "nf_conntrack: expectation table full\n"); + ret = -EMFILE; + } +out: + return ret; +} + +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report) +{ + int ret; + + spin_lock_bh(&nf_conntrack_lock); + ret = __nf_ct_expect_check(expect); + if (ret <= 0) + goto out; - nf_ct_expect_insert(expect); - nf_ct_expect_event(IPEXP_NEW, expect); ret = 0; + nf_ct_expect_insert(expect); + spin_unlock_bh(&nf_conntrack_lock); + nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); + return ret; out: - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return ret; } -EXPORT_SYMBOL_GPL(nf_ct_expect_related); +EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; + struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) - return nf_ct_expect_hash[st->bucket].first; + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); + if (n) + return n; } return NULL; } @@ -395,13 +456,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; - head = head->next; + head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = nf_ct_expect_hash[st->bucket].first; + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -417,8 +479,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) } static void *exp_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) { - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); return ct_expect_get_idx(seq, *pos); } @@ -429,14 +492,16 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void exp_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) { - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; struct hlist_node *n = v; + char *delim = ""; expect = hlist_entry(n, struct nf_conntrack_expect, hnode); @@ -452,10 +517,18 @@ static int exp_seq_show(struct seq_file *s, void *v) __nf_ct_l3proto_find(expect->tuple.src.l3num), __nf_ct_l4proto_find(expect->tuple.src.l3num, expect->tuple.dst.protonum)); + + if (expect->flags & NF_CT_EXPECT_PERMANENT) { + seq_printf(s, "PERMANENT"); + delim = ","; + } + if (expect->flags & NF_CT_EXPECT_INACTIVE) + seq_printf(s, "%sINACTIVE", delim); + return seq_putc(s, '\n'); } -static struct seq_operations exp_seq_ops = { +static const struct seq_operations exp_seq_ops = { .start = exp_seq_start, .next = exp_seq_next, .stop = exp_seq_stop, @@ -464,23 +537,8 @@ static struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct ct_expect_iter_state *st; - int ret; - - st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); - if (st == NULL) - return -ENOMEM; - ret = seq_open(file, &exp_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - memset(st, 0, sizeof(struct ct_expect_iter_state)); - return ret; -out_free: - kfree(st); - return ret; + return seq_open_net(inode, file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations exp_file_ops = { @@ -488,71 +546,81 @@ static const struct file_operations exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif /* CONFIG_PROC_FS */ -static int __init exp_proc_init(void) +static int exp_proc_init(struct net *net) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; - proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops); + proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc) return -ENOMEM; #endif /* CONFIG_PROC_FS */ return 0; } -static void exp_proc_remove(void) +static void exp_proc_remove(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove("nf_conntrack_expect"); + proc_net_remove(net, "nf_conntrack_expect"); #endif /* CONFIG_PROC_FS */ } -module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); +module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); -int __init nf_conntrack_expect_init(void) +int nf_conntrack_expect_init(struct net *net) { int err = -ENOMEM; - if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = nf_conntrack_htable_size / 256; - if (!nf_ct_expect_hsize) - nf_ct_expect_hsize = 1; + if (net_eq(net, &init_net)) { + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = net->ct.htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; } - nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, - &nf_ct_expect_vmalloc); - if (nf_ct_expect_hash == NULL) + net->ct.expect_count = 0; + net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &net->ct.expect_vmalloc, 0); + if (net->ct.expect_hash == NULL) goto err1; - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + if (net_eq(net, &init_net)) { + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", sizeof(struct nf_conntrack_expect), - 0, 0, NULL, NULL); - if (!nf_ct_expect_cachep) - goto err2; + 0, 0, NULL); + if (!nf_ct_expect_cachep) + goto err2; + } - err = exp_proc_init(); + err = exp_proc_init(net); if (err < 0) goto err3; return 0; err3: - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, - nf_ct_expect_hsize); + if (net_eq(net, &init_net)) + kmem_cache_destroy(nf_ct_expect_cachep); err2: - kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, + nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(void) +void nf_conntrack_expect_fini(struct net *net) { - exp_proc_remove(); - kmem_cache_destroy(nf_ct_expect_cachep); - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + exp_proc_remove(net); + if (net_eq(net, &init_net)) { + rcu_barrier(); /* Wait for call_rcu() before destroy */ + kmem_cache_destroy(nf_ct_expect_cachep); + } + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); }