#include <linux/percpu.h>
#include <linux/kernel.h>
#include <linux/jhash.h>
+#include <net/net_namespace.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-struct hlist_head *nf_ct_expect_hash __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
-
unsigned int nf_ct_expect_hsize __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
static unsigned int nf_ct_expect_hash_rnd __read_mostly;
-static unsigned int nf_ct_expect_count;
unsigned int nf_ct_expect_max __read_mostly;
static int nf_ct_expect_hash_rnd_initted __read_mostly;
-static int nf_ct_expect_vmalloc;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
-static unsigned int nf_ct_expect_next_id;
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout));
- hlist_del(&exp->hnode);
- nf_ct_expect_count--;
+ hlist_del_rcu(&exp->hnode);
+ net->ct.expect_count--;
hlist_del(&exp->lnode);
- master_help->expecting--;
+ master_help->expecting[exp->class]--;
nf_ct_expect_put(exp);
- NF_CT_STAT_INC(expect_delete);
+ NF_CT_STAT_INC(net, expect_delete);
}
EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
{
struct nf_conntrack_expect *exp = (void *)ul_expect;
- write_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_lock);
nf_ct_unlink_expect(exp);
- write_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_lock);
nf_ct_expect_put(exp);
}
static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
{
+ unsigned int hash;
+
if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
get_random_bytes(&nf_ct_expect_hash_rnd, 4);
nf_ct_expect_hash_rnd_initted = 1;
}
- return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+ hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
(((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
- (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) %
- nf_ct_expect_hsize;
+ (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
+ return ((u64)hash * nf_ct_expect_hsize) >> 32;
}
struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
struct hlist_node *n;
unsigned int h;
- if (!nf_ct_expect_count)
+ if (!net->ct.expect_count)
return NULL;
h = nf_ct_expect_dst_hash(tuple);
- hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
return i;
}
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
- read_lock_bh(&nf_conntrack_lock);
- i = __nf_ct_expect_find(tuple);
- if (i)
- atomic_inc(&i->use);
- read_unlock_bh(&nf_conntrack_lock);
+ rcu_read_lock();
+ i = __nf_ct_expect_find(net, tuple);
+ if (i && !atomic_inc_not_zero(&i->use))
+ i = NULL;
+ rcu_read_unlock();
return i;
}
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_expect *exp;
+ struct nf_conntrack_expect *i, *exp = NULL;
+ struct hlist_node *n;
+ unsigned int h;
+
+ if (!net->ct.expect_count)
+ return NULL;
- exp = __nf_ct_expect_find(tuple);
+ h = nf_ct_expect_dst_hash(tuple);
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+ if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
+ nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+ exp = i;
+ break;
+ }
+ }
if (!exp)
return NULL;
struct hlist_node *n, *next;
/* Optimization: most connection never expect any others. */
- if (!help || help->expecting == 0)
+ if (!help)
return;
hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
- return a->master == b->master
+ return a->master == b->master && a->class == b->class
&& nf_ct_tuple_equal(&a->tuple, &b->tuple)
&& nf_ct_tuple_mask_equal(&a->mask, &b->mask);
}
/* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
- write_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
- write_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
new->master = me;
atomic_set(&new->use, 1);
+ INIT_RCU_HEAD(&new->rcu);
return new;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
-void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
- union nf_conntrack_address *saddr,
- union nf_conntrack_address *daddr,
- u_int8_t proto, __be16 *src, __be16 *dst)
+void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
+ u_int8_t family,
+ const union nf_inet_addr *saddr,
+ const union nf_inet_addr *daddr,
+ u_int8_t proto, const __be16 *src, const __be16 *dst)
{
int len;
len = 16;
exp->flags = 0;
+ exp->class = class;
exp->expectfn = NULL;
exp->helper = NULL;
exp->tuple.src.l3num = family;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_init);
+static void nf_ct_expect_free_rcu(struct rcu_head *head)
+{
+ struct nf_conntrack_expect *exp;
+
+ exp = container_of(head, struct nf_conntrack_expect, rcu);
+ kmem_cache_free(nf_ct_expect_cachep, exp);
+}
+
void nf_ct_expect_put(struct nf_conntrack_expect *exp)
{
if (atomic_dec_and_test(&exp->use))
- kmem_cache_free(nf_ct_expect_cachep, exp);
+ call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct net *net = nf_ct_exp_net(exp);
+ const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
atomic_inc(&exp->use);
hlist_add_head(&exp->lnode, &master_help->expectations);
- master_help->expecting++;
+ master_help->expecting[exp->class]++;
- hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]);
- nf_ct_expect_count++;
+ hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+ net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
- exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
+ p = &master_help->helper->expect_policy[exp->class];
+ exp->timeout.expires = jiffies + p->timeout * HZ;
add_timer(&exp->timeout);
- exp->id = ++nf_ct_expect_next_id;
atomic_inc(&exp->use);
- NF_CT_STAT_INC(expect_create);
+ NF_CT_STAT_INC(net, expect_create);
}
/* Race with expectations being used means we could have none to find; OK. */
-static void evict_oldest_expect(struct nf_conn *master)
+static void evict_oldest_expect(struct nf_conn *master,
+ struct nf_conntrack_expect *new)
{
struct nf_conn_help *master_help = nfct_help(master);
- struct nf_conntrack_expect *exp = NULL;
+ struct nf_conntrack_expect *exp, *last = NULL;
struct hlist_node *n;
- hlist_for_each_entry(exp, n, &master_help->expectations, lnode)
- ; /* nothing */
+ hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
+ if (exp->class == new->class)
+ last = exp;
+ }
- if (exp && del_timer(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
+ if (last && del_timer(&last->timeout)) {
+ nf_ct_unlink_expect(last);
+ nf_ct_expect_put(last);
}
}
static inline int refresh_timer(struct nf_conntrack_expect *i)
{
struct nf_conn_help *master_help = nfct_help(i->master);
+ const struct nf_conntrack_expect_policy *p;
if (!del_timer(&i->timeout))
return 0;
- i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
+ p = &master_help->helper->expect_policy[i->class];
+ i->timeout.expires = jiffies + p->timeout * HZ;
add_timer(&i->timeout);
return 1;
}
-int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
{
+ const struct nf_conntrack_expect_policy *p;
struct nf_conntrack_expect *i;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
+ struct net *net = nf_ct_exp_net(expect);
struct hlist_node *n;
unsigned int h;
- int ret;
-
- NF_CT_ASSERT(master_help);
+ int ret = 0;
- write_lock_bh(&nf_conntrack_lock);
if (!master_help->helper) {
ret = -ESHUTDOWN;
goto out;
}
h = nf_ct_expect_dst_hash(&expect->tuple);
- hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
/* Refresh timer: if it's dying, ignore.. */
if (refresh_timer(i)) {
}
}
/* Will be over limit? */
- if (master_help->helper->max_expected &&
- master_help->expecting >= master_help->helper->max_expected)
- evict_oldest_expect(master);
+ p = &master_help->helper->expect_policy[expect->class];
+ if (p->max_expected &&
+ master_help->expecting[expect->class] >= p->max_expected) {
+ evict_oldest_expect(master, expect);
+ if (master_help->expecting[expect->class] >= p->max_expected) {
+ ret = -EMFILE;
+ goto out;
+ }
+ }
- if (nf_ct_expect_count >= nf_ct_expect_max) {
+ if (net->ct.expect_count >= nf_ct_expect_max) {
if (net_ratelimit())
printk(KERN_WARNING
- "nf_conntrack: expectation table full");
+ "nf_conntrack: expectation table full\n");
ret = -EMFILE;
- goto out;
}
+out:
+ return ret;
+}
+
+int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+{
+ int ret;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ ret = __nf_ct_expect_check(expect);
+ if (ret < 0)
+ goto out;
nf_ct_expect_insert(expect);
+ atomic_inc(&expect->use);
+ spin_unlock_bh(&nf_conntrack_lock);
nf_ct_expect_event(IPEXP_NEW, expect);
- ret = 0;
+ nf_ct_expect_put(expect);
+ return ret;
out:
- write_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_lock);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related);
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+ u32 pid, int report)
+{
+ int ret;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ ret = __nf_ct_expect_check(expect);
+ if (ret < 0)
+ goto out;
+ nf_ct_expect_insert(expect);
+out:
+ spin_unlock_bh(&nf_conntrack_lock);
+ if (ret == 0)
+ nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+
#ifdef CONFIG_PROC_FS
struct ct_expect_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
+ struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- if (!hlist_empty(&nf_ct_expect_hash[st->bucket]))
- return nf_ct_expect_hash[st->bucket].first;
+ n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ if (n)
+ return n;
}
return NULL;
}
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
- head = head->next;
+ head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = nf_ct_expect_hash[st->bucket].first;
+ head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
}
return head;
}
}
static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
- read_lock_bh(&nf_conntrack_lock);
+ rcu_read_lock();
return ct_expect_get_idx(seq, *pos);
}
}
static void exp_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
{
- read_unlock_bh(&nf_conntrack_lock);
+ rcu_read_unlock();
}
static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *expect;
struct hlist_node *n = v;
+ char *delim = "";
expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
__nf_ct_l3proto_find(expect->tuple.src.l3num),
__nf_ct_l4proto_find(expect->tuple.src.l3num,
expect->tuple.dst.protonum));
+
+ if (expect->flags & NF_CT_EXPECT_PERMANENT) {
+ seq_printf(s, "PERMANENT");
+ delim = ",";
+ }
+ if (expect->flags & NF_CT_EXPECT_INACTIVE)
+ seq_printf(s, "%sINACTIVE", delim);
+
return seq_putc(s, '\n');
}
static int exp_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- struct ct_expect_iter_state *st;
- int ret;
-
- st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL);
- if (st == NULL)
- return -ENOMEM;
- ret = seq_open(file, &exp_seq_ops);
- if (ret)
- goto out_free;
- seq = file->private_data;
- seq->private = st;
- memset(st, 0, sizeof(struct ct_expect_iter_state));
- return ret;
-out_free:
- kfree(st);
- return ret;
+ return seq_open_net(inode, file, &exp_seq_ops,
+ sizeof(struct ct_expect_iter_state));
}
static const struct file_operations exp_file_ops = {
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif /* CONFIG_PROC_FS */
-static int __init exp_proc_init(void)
+static int exp_proc_init(struct net *net)
{
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc;
- proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops);
+ proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
if (!proc)
return -ENOMEM;
#endif /* CONFIG_PROC_FS */
return 0;
}
-static void exp_proc_remove(void)
+static void exp_proc_remove(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_net_remove("nf_conntrack_expect");
+ proc_net_remove(net, "nf_conntrack_expect");
#endif /* CONFIG_PROC_FS */
}
module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
-int __init nf_conntrack_expect_init(void)
+int nf_conntrack_expect_init(struct net *net)
{
int err = -ENOMEM;
- if (!nf_ct_expect_hsize) {
- nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
- if (!nf_ct_expect_hsize)
- nf_ct_expect_hsize = 1;
+ if (net_eq(net, &init_net)) {
+ if (!nf_ct_expect_hsize) {
+ nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+ if (!nf_ct_expect_hsize)
+ nf_ct_expect_hsize = 1;
+ }
+ nf_ct_expect_max = nf_ct_expect_hsize * 4;
}
- nf_ct_expect_max = nf_ct_expect_hsize * 4;
- nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
- &nf_ct_expect_vmalloc);
- if (nf_ct_expect_hash == NULL)
+ net->ct.expect_count = 0;
+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+ &net->ct.expect_vmalloc);
+ if (net->ct.expect_hash == NULL)
goto err1;
- nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+ if (net_eq(net, &init_net)) {
+ nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
sizeof(struct nf_conntrack_expect),
0, 0, NULL);
- if (!nf_ct_expect_cachep)
- goto err2;
+ if (!nf_ct_expect_cachep)
+ goto err2;
+ }
- err = exp_proc_init();
+ err = exp_proc_init(net);
if (err < 0)
goto err3;
return 0;
err3:
- nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
- nf_ct_expect_hsize);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
err2:
- kmem_cache_destroy(nf_ct_expect_cachep);
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
+ nf_ct_expect_hsize);
err1:
return err;
}
-void nf_conntrack_expect_fini(void)
+void nf_conntrack_expect_fini(struct net *net)
{
- exp_proc_remove();
- kmem_cache_destroy(nf_ct_expect_cachep);
- nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+ exp_proc_remove(net);
+ if (net_eq(net, &init_net))
+ kmem_cache_destroy(nf_ct_expect_cachep);
+ nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
nf_ct_expect_hsize);
}