netfilter: don't use INIT_RCU_HEAD()
[safe/jmp/linux-2.6] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30
31 unsigned int nf_ct_expect_hsize __read_mostly;
32 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
33
34 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
35 unsigned int nf_ct_expect_max __read_mostly;
36 static int nf_ct_expect_hash_rnd_initted __read_mostly;
37
38 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
39
40 /* nf_conntrack_expect helper functions */
41 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
42 {
43         struct nf_conn_help *master_help = nfct_help(exp->master);
44         struct net *net = nf_ct_exp_net(exp);
45
46         NF_CT_ASSERT(master_help);
47         NF_CT_ASSERT(!timer_pending(&exp->timeout));
48
49         hlist_del_rcu(&exp->hnode);
50         net->ct.expect_count--;
51
52         hlist_del(&exp->lnode);
53         master_help->expecting[exp->class]--;
54         nf_ct_expect_put(exp);
55
56         NF_CT_STAT_INC(net, expect_delete);
57 }
58 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
59
60 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
61 {
62         struct nf_conntrack_expect *exp = (void *)ul_expect;
63
64         spin_lock_bh(&nf_conntrack_lock);
65         nf_ct_unlink_expect(exp);
66         spin_unlock_bh(&nf_conntrack_lock);
67         nf_ct_expect_put(exp);
68 }
69
70 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
71 {
72         unsigned int hash;
73
74         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
75                 get_random_bytes(&nf_ct_expect_hash_rnd,
76                                  sizeof(nf_ct_expect_hash_rnd));
77                 nf_ct_expect_hash_rnd_initted = 1;
78         }
79
80         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
81                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
82                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
83         return ((u64)hash * nf_ct_expect_hsize) >> 32;
84 }
85
86 struct nf_conntrack_expect *
87 __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
88 {
89         struct nf_conntrack_expect *i;
90         struct hlist_node *n;
91         unsigned int h;
92
93         if (!net->ct.expect_count)
94                 return NULL;
95
96         h = nf_ct_expect_dst_hash(tuple);
97         hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
98                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
99                         return i;
100         }
101         return NULL;
102 }
103 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
104
105 /* Just find a expectation corresponding to a tuple. */
106 struct nf_conntrack_expect *
107 nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
108 {
109         struct nf_conntrack_expect *i;
110
111         rcu_read_lock();
112         i = __nf_ct_expect_find(net, tuple);
113         if (i && !atomic_inc_not_zero(&i->use))
114                 i = NULL;
115         rcu_read_unlock();
116
117         return i;
118 }
119 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
120
121 /* If an expectation for this connection is found, it gets delete from
122  * global list then returned. */
123 struct nf_conntrack_expect *
124 nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
125 {
126         struct nf_conntrack_expect *i, *exp = NULL;
127         struct hlist_node *n;
128         unsigned int h;
129
130         if (!net->ct.expect_count)
131                 return NULL;
132
133         h = nf_ct_expect_dst_hash(tuple);
134         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
135                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
136                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
137                         exp = i;
138                         break;
139                 }
140         }
141         if (!exp)
142                 return NULL;
143
144         /* If master is not in hash table yet (ie. packet hasn't left
145            this machine yet), how can other end know about expected?
146            Hence these are not the droids you are looking for (if
147            master ct never got confirmed, we'd hold a reference to it
148            and weird things would happen to future packets). */
149         if (!nf_ct_is_confirmed(exp->master))
150                 return NULL;
151
152         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
153                 atomic_inc(&exp->use);
154                 return exp;
155         } else if (del_timer(&exp->timeout)) {
156                 nf_ct_unlink_expect(exp);
157                 return exp;
158         }
159
160         return NULL;
161 }
162
163 /* delete all expectations for this conntrack */
164 void nf_ct_remove_expectations(struct nf_conn *ct)
165 {
166         struct nf_conn_help *help = nfct_help(ct);
167         struct nf_conntrack_expect *exp;
168         struct hlist_node *n, *next;
169
170         /* Optimization: most connection never expect any others. */
171         if (!help)
172                 return;
173
174         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
175                 if (del_timer(&exp->timeout)) {
176                         nf_ct_unlink_expect(exp);
177                         nf_ct_expect_put(exp);
178                 }
179         }
180 }
181 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
182
183 /* Would two expected things clash? */
184 static inline int expect_clash(const struct nf_conntrack_expect *a,
185                                const struct nf_conntrack_expect *b)
186 {
187         /* Part covered by intersection of masks must be unequal,
188            otherwise they clash */
189         struct nf_conntrack_tuple_mask intersect_mask;
190         int count;
191
192         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
193
194         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
195                 intersect_mask.src.u3.all[count] =
196                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
197         }
198
199         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
200 }
201
202 static inline int expect_matches(const struct nf_conntrack_expect *a,
203                                  const struct nf_conntrack_expect *b)
204 {
205         return a->master == b->master && a->class == b->class &&
206                 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
207                 nf_ct_tuple_mask_equal(&a->mask, &b->mask);
208 }
209
210 /* Generally a bad idea to call this: could have matched already. */
211 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
212 {
213         spin_lock_bh(&nf_conntrack_lock);
214         if (del_timer(&exp->timeout)) {
215                 nf_ct_unlink_expect(exp);
216                 nf_ct_expect_put(exp);
217         }
218         spin_unlock_bh(&nf_conntrack_lock);
219 }
220 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
221
222 /* We don't increase the master conntrack refcount for non-fulfilled
223  * conntracks. During the conntrack destruction, the expectations are
224  * always killed before the conntrack itself */
225 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
226 {
227         struct nf_conntrack_expect *new;
228
229         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
230         if (!new)
231                 return NULL;
232
233         new->master = me;
234         atomic_set(&new->use, 1);
235         return new;
236 }
237 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
238
239 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
240                        u_int8_t family,
241                        const union nf_inet_addr *saddr,
242                        const union nf_inet_addr *daddr,
243                        u_int8_t proto, const __be16 *src, const __be16 *dst)
244 {
245         int len;
246
247         if (family == AF_INET)
248                 len = 4;
249         else
250                 len = 16;
251
252         exp->flags = 0;
253         exp->class = class;
254         exp->expectfn = NULL;
255         exp->helper = NULL;
256         exp->tuple.src.l3num = family;
257         exp->tuple.dst.protonum = proto;
258
259         if (saddr) {
260                 memcpy(&exp->tuple.src.u3, saddr, len);
261                 if (sizeof(exp->tuple.src.u3) > len)
262                         /* address needs to be cleared for nf_ct_tuple_equal */
263                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
264                                sizeof(exp->tuple.src.u3) - len);
265                 memset(&exp->mask.src.u3, 0xFF, len);
266                 if (sizeof(exp->mask.src.u3) > len)
267                         memset((void *)&exp->mask.src.u3 + len, 0x00,
268                                sizeof(exp->mask.src.u3) - len);
269         } else {
270                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
271                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
272         }
273
274         if (src) {
275                 exp->tuple.src.u.all = *src;
276                 exp->mask.src.u.all = htons(0xFFFF);
277         } else {
278                 exp->tuple.src.u.all = 0;
279                 exp->mask.src.u.all = 0;
280         }
281
282         memcpy(&exp->tuple.dst.u3, daddr, len);
283         if (sizeof(exp->tuple.dst.u3) > len)
284                 /* address needs to be cleared for nf_ct_tuple_equal */
285                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
286                        sizeof(exp->tuple.dst.u3) - len);
287
288         exp->tuple.dst.u.all = *dst;
289 }
290 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
291
292 static void nf_ct_expect_free_rcu(struct rcu_head *head)
293 {
294         struct nf_conntrack_expect *exp;
295
296         exp = container_of(head, struct nf_conntrack_expect, rcu);
297         kmem_cache_free(nf_ct_expect_cachep, exp);
298 }
299
300 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
301 {
302         if (atomic_dec_and_test(&exp->use))
303                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
304 }
305 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
306
307 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
308 {
309         struct nf_conn_help *master_help = nfct_help(exp->master);
310         struct net *net = nf_ct_exp_net(exp);
311         const struct nf_conntrack_expect_policy *p;
312         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
313
314         atomic_inc(&exp->use);
315
316         hlist_add_head(&exp->lnode, &master_help->expectations);
317         master_help->expecting[exp->class]++;
318
319         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
320         net->ct.expect_count++;
321
322         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
323                     (unsigned long)exp);
324         p = &master_help->helper->expect_policy[exp->class];
325         exp->timeout.expires = jiffies + p->timeout * HZ;
326         add_timer(&exp->timeout);
327
328         atomic_inc(&exp->use);
329         NF_CT_STAT_INC(net, expect_create);
330 }
331
332 /* Race with expectations being used means we could have none to find; OK. */
333 static void evict_oldest_expect(struct nf_conn *master,
334                                 struct nf_conntrack_expect *new)
335 {
336         struct nf_conn_help *master_help = nfct_help(master);
337         struct nf_conntrack_expect *exp, *last = NULL;
338         struct hlist_node *n;
339
340         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
341                 if (exp->class == new->class)
342                         last = exp;
343         }
344
345         if (last && del_timer(&last->timeout)) {
346                 nf_ct_unlink_expect(last);
347                 nf_ct_expect_put(last);
348         }
349 }
350
351 static inline int refresh_timer(struct nf_conntrack_expect *i)
352 {
353         struct nf_conn_help *master_help = nfct_help(i->master);
354         const struct nf_conntrack_expect_policy *p;
355
356         if (!del_timer(&i->timeout))
357                 return 0;
358
359         p = &master_help->helper->expect_policy[i->class];
360         i->timeout.expires = jiffies + p->timeout * HZ;
361         add_timer(&i->timeout);
362         return 1;
363 }
364
365 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
366 {
367         const struct nf_conntrack_expect_policy *p;
368         struct nf_conntrack_expect *i;
369         struct nf_conn *master = expect->master;
370         struct nf_conn_help *master_help = nfct_help(master);
371         struct net *net = nf_ct_exp_net(expect);
372         struct hlist_node *n;
373         unsigned int h;
374         int ret = 1;
375
376         if (!master_help->helper) {
377                 ret = -ESHUTDOWN;
378                 goto out;
379         }
380         h = nf_ct_expect_dst_hash(&expect->tuple);
381         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
382                 if (expect_matches(i, expect)) {
383                         /* Refresh timer: if it's dying, ignore.. */
384                         if (refresh_timer(i)) {
385                                 ret = 0;
386                                 goto out;
387                         }
388                 } else if (expect_clash(i, expect)) {
389                         ret = -EBUSY;
390                         goto out;
391                 }
392         }
393         /* Will be over limit? */
394         p = &master_help->helper->expect_policy[expect->class];
395         if (p->max_expected &&
396             master_help->expecting[expect->class] >= p->max_expected) {
397                 evict_oldest_expect(master, expect);
398                 if (master_help->expecting[expect->class] >= p->max_expected) {
399                         ret = -EMFILE;
400                         goto out;
401                 }
402         }
403
404         if (net->ct.expect_count >= nf_ct_expect_max) {
405                 if (net_ratelimit())
406                         printk(KERN_WARNING
407                                "nf_conntrack: expectation table full\n");
408                 ret = -EMFILE;
409         }
410 out:
411         return ret;
412 }
413
414 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
415                                 u32 pid, int report)
416 {
417         int ret;
418
419         spin_lock_bh(&nf_conntrack_lock);
420         ret = __nf_ct_expect_check(expect);
421         if (ret <= 0)
422                 goto out;
423
424         ret = 0;
425         nf_ct_expect_insert(expect);
426         spin_unlock_bh(&nf_conntrack_lock);
427         nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
428         return ret;
429 out:
430         spin_unlock_bh(&nf_conntrack_lock);
431         return ret;
432 }
433 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
434
435 #ifdef CONFIG_PROC_FS
436 struct ct_expect_iter_state {
437         struct seq_net_private p;
438         unsigned int bucket;
439 };
440
441 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
442 {
443         struct net *net = seq_file_net(seq);
444         struct ct_expect_iter_state *st = seq->private;
445         struct hlist_node *n;
446
447         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
448                 n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
449                 if (n)
450                         return n;
451         }
452         return NULL;
453 }
454
455 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
456                                              struct hlist_node *head)
457 {
458         struct net *net = seq_file_net(seq);
459         struct ct_expect_iter_state *st = seq->private;
460
461         head = rcu_dereference(head->next);
462         while (head == NULL) {
463                 if (++st->bucket >= nf_ct_expect_hsize)
464                         return NULL;
465                 head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
466         }
467         return head;
468 }
469
470 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
471 {
472         struct hlist_node *head = ct_expect_get_first(seq);
473
474         if (head)
475                 while (pos && (head = ct_expect_get_next(seq, head)))
476                         pos--;
477         return pos ? NULL : head;
478 }
479
480 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
481         __acquires(RCU)
482 {
483         rcu_read_lock();
484         return ct_expect_get_idx(seq, *pos);
485 }
486
487 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
488 {
489         (*pos)++;
490         return ct_expect_get_next(seq, v);
491 }
492
493 static void exp_seq_stop(struct seq_file *seq, void *v)
494         __releases(RCU)
495 {
496         rcu_read_unlock();
497 }
498
499 static int exp_seq_show(struct seq_file *s, void *v)
500 {
501         struct nf_conntrack_expect *expect;
502         struct nf_conntrack_helper *helper;
503         struct hlist_node *n = v;
504         char *delim = "";
505
506         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
507
508         if (expect->timeout.function)
509                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
510                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
511         else
512                 seq_printf(s, "- ");
513         seq_printf(s, "l3proto = %u proto=%u ",
514                    expect->tuple.src.l3num,
515                    expect->tuple.dst.protonum);
516         print_tuple(s, &expect->tuple,
517                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
518                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
519                                        expect->tuple.dst.protonum));
520
521         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
522                 seq_printf(s, "PERMANENT");
523                 delim = ",";
524         }
525         if (expect->flags & NF_CT_EXPECT_INACTIVE)
526                 seq_printf(s, "%sINACTIVE", delim);
527
528         helper = rcu_dereference(nfct_help(expect->master)->helper);
529         if (helper) {
530                 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
531                 if (helper->expect_policy[expect->class].name)
532                         seq_printf(s, "/%s",
533                                    helper->expect_policy[expect->class].name);
534         }
535
536         return seq_putc(s, '\n');
537 }
538
539 static const struct seq_operations exp_seq_ops = {
540         .start = exp_seq_start,
541         .next = exp_seq_next,
542         .stop = exp_seq_stop,
543         .show = exp_seq_show
544 };
545
546 static int exp_open(struct inode *inode, struct file *file)
547 {
548         return seq_open_net(inode, file, &exp_seq_ops,
549                         sizeof(struct ct_expect_iter_state));
550 }
551
552 static const struct file_operations exp_file_ops = {
553         .owner   = THIS_MODULE,
554         .open    = exp_open,
555         .read    = seq_read,
556         .llseek  = seq_lseek,
557         .release = seq_release_net,
558 };
559 #endif /* CONFIG_PROC_FS */
560
561 static int exp_proc_init(struct net *net)
562 {
563 #ifdef CONFIG_PROC_FS
564         struct proc_dir_entry *proc;
565
566         proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
567         if (!proc)
568                 return -ENOMEM;
569 #endif /* CONFIG_PROC_FS */
570         return 0;
571 }
572
573 static void exp_proc_remove(struct net *net)
574 {
575 #ifdef CONFIG_PROC_FS
576         proc_net_remove(net, "nf_conntrack_expect");
577 #endif /* CONFIG_PROC_FS */
578 }
579
580 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
581
582 int nf_conntrack_expect_init(struct net *net)
583 {
584         int err = -ENOMEM;
585
586         if (net_eq(net, &init_net)) {
587                 if (!nf_ct_expect_hsize) {
588                         nf_ct_expect_hsize = net->ct.htable_size / 256;
589                         if (!nf_ct_expect_hsize)
590                                 nf_ct_expect_hsize = 1;
591                 }
592                 nf_ct_expect_max = nf_ct_expect_hsize * 4;
593         }
594
595         net->ct.expect_count = 0;
596         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
597                                                   &net->ct.expect_vmalloc, 0);
598         if (net->ct.expect_hash == NULL)
599                 goto err1;
600
601         if (net_eq(net, &init_net)) {
602                 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
603                                         sizeof(struct nf_conntrack_expect),
604                                         0, 0, NULL);
605                 if (!nf_ct_expect_cachep)
606                         goto err2;
607         }
608
609         err = exp_proc_init(net);
610         if (err < 0)
611                 goto err3;
612
613         return 0;
614
615 err3:
616         if (net_eq(net, &init_net))
617                 kmem_cache_destroy(nf_ct_expect_cachep);
618 err2:
619         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
620                              nf_ct_expect_hsize);
621 err1:
622         return err;
623 }
624
625 void nf_conntrack_expect_fini(struct net *net)
626 {
627         exp_proc_remove(net);
628         if (net_eq(net, &init_net)) {
629                 rcu_barrier(); /* Wait for call_rcu() before destroy */
630                 kmem_cache_destroy(nf_ct_expect_cachep);
631         }
632         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
633                              nf_ct_expect_hsize);
634 }