1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
23 * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
24 * - restructure nf_conn (introduce nf_conn_help)
25 * - redesign 'features' how they were originally intended
26 * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
27 * - add support for L3 protocol module load on demand.
29 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
32 #include <linux/config.h>
33 #include <linux/types.h>
34 #include <linux/netfilter.h>
35 #include <linux/module.h>
36 #include <linux/skbuff.h>
37 #include <linux/proc_fs.h>
38 #include <linux/vmalloc.h>
39 #include <linux/stddef.h>
40 #include <linux/slab.h>
41 #include <linux/random.h>
42 #include <linux/jhash.h>
43 #include <linux/err.h>
44 #include <linux/percpu.h>
45 #include <linux/moduleparam.h>
46 #include <linux/notifier.h>
47 #include <linux/kernel.h>
48 #include <linux/netdevice.h>
49 #include <linux/socket.h>
51 /* This rwlock protects the main hash table, protocol/helper/expected
52 registrations, conntrack timers*/
53 #define ASSERT_READ_LOCK(x)
54 #define ASSERT_WRITE_LOCK(x)
56 #include <net/netfilter/nf_conntrack.h>
57 #include <net/netfilter/nf_conntrack_l3proto.h>
58 #include <net/netfilter/nf_conntrack_protocol.h>
59 #include <net/netfilter/nf_conntrack_helper.h>
60 #include <net/netfilter/nf_conntrack_core.h>
61 #include <linux/netfilter_ipv4/listhelp.h>
63 #define NF_CONNTRACK_VERSION "0.5.0"
68 #define DEBUGP(format, args...)
71 DEFINE_RWLOCK(nf_conntrack_lock);
73 /* nf_conntrack_standalone needs this */
74 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
76 void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
77 LIST_HEAD(nf_conntrack_expect_list);
78 struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
79 struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
80 static LIST_HEAD(helpers);
81 unsigned int nf_conntrack_htable_size = 0;
83 struct list_head *nf_conntrack_hash;
84 static kmem_cache_t *nf_conntrack_expect_cachep;
85 struct nf_conn nf_conntrack_untracked;
86 unsigned int nf_ct_log_invalid;
87 static LIST_HEAD(unconfirmed);
88 static int nf_conntrack_vmalloc;
90 static unsigned int nf_conntrack_next_id;
91 static unsigned int nf_conntrack_expect_next_id;
92 #ifdef CONFIG_NF_CONNTRACK_EVENTS
93 ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
94 ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
96 DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
98 /* deliver cached events and clear cache entry - must be called with locally
99 * disabled softirqs */
101 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
103 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
104 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
106 atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
110 nf_ct_put(ecache->ct);
114 /* Deliver all cached events for a particular conntrack. This is called
115 * by code prior to async packet handling for freeing the skb */
116 void nf_ct_deliver_cached_events(const struct nf_conn *ct)
118 struct nf_conntrack_ecache *ecache;
121 ecache = &__get_cpu_var(nf_conntrack_ecache);
122 if (ecache->ct == ct)
123 __nf_ct_deliver_cached_events(ecache);
127 /* Deliver cached events for old pending events, if current conntrack != old */
128 void __nf_ct_event_cache_init(struct nf_conn *ct)
130 struct nf_conntrack_ecache *ecache;
132 /* take care of delivering potentially old events */
133 ecache = &__get_cpu_var(nf_conntrack_ecache);
134 BUG_ON(ecache->ct == ct);
136 __nf_ct_deliver_cached_events(ecache);
137 /* initialize for this conntrack/packet */
139 nf_conntrack_get(&ct->ct_general);
142 /* flush the event cache - touches other CPU's data and must not be called
143 * while packets are still passing through the code */
144 static void nf_ct_event_cache_flush(void)
146 struct nf_conntrack_ecache *ecache;
149 for_each_possible_cpu(cpu) {
150 ecache = &per_cpu(nf_conntrack_ecache, cpu);
152 nf_ct_put(ecache->ct);
156 static inline void nf_ct_event_cache_flush(void) {}
157 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
159 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
160 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
163 * This scheme offers various size of "struct nf_conn" dependent on
164 * features(helper, nat, ...)
167 #define NF_CT_FEATURES_NAMELEN 256
169 /* name of slab cache. printed in /proc/slabinfo */
172 /* size of slab cache */
175 /* slab cache pointer */
176 kmem_cache_t *cachep;
178 /* allocated slab cache + modules which uses this slab cache */
182 int (*init_conntrack)(struct nf_conn *, u_int32_t);
184 } nf_ct_cache[NF_CT_F_NUM];
186 /* protect members of nf_ct_cache except of "use" */
187 DEFINE_RWLOCK(nf_ct_cache_lock);
189 /* This avoids calling kmem_cache_create() with same name simultaneously */
190 static DEFINE_MUTEX(nf_ct_cache_mutex);
192 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
193 struct nf_conntrack_protocol *
194 __nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
196 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
197 return &nf_conntrack_generic_protocol;
199 return nf_ct_protos[l3proto][protocol];
202 /* this is guaranteed to always return a valid protocol helper, since
203 * it falls back to generic_protocol */
204 struct nf_conntrack_protocol *
205 nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
207 struct nf_conntrack_protocol *p;
210 p = __nf_ct_proto_find(l3proto, protocol);
211 if (!try_module_get(p->me))
212 p = &nf_conntrack_generic_protocol;
218 void nf_ct_proto_put(struct nf_conntrack_protocol *p)
223 struct nf_conntrack_l3proto *
224 nf_ct_l3proto_find_get(u_int16_t l3proto)
226 struct nf_conntrack_l3proto *p;
229 p = __nf_ct_l3proto_find(l3proto);
230 if (!try_module_get(p->me))
231 p = &nf_conntrack_generic_l3proto;
237 void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
243 nf_ct_l3proto_try_module_get(unsigned short l3proto)
246 struct nf_conntrack_l3proto *p;
248 retry: p = nf_ct_l3proto_find_get(l3proto);
249 if (p == &nf_conntrack_generic_l3proto) {
250 ret = request_module("nf_conntrack-%d", l3proto);
260 void nf_ct_l3proto_module_put(unsigned short l3proto)
262 struct nf_conntrack_l3proto *p;
265 p = __nf_ct_l3proto_find(l3proto);
271 static int nf_conntrack_hash_rnd_initted;
272 static unsigned int nf_conntrack_hash_rnd;
274 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
275 unsigned int size, unsigned int rnd)
278 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
279 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
280 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
281 (tuple->src.u.all << 16) | tuple->dst.u.all);
283 return jhash_2words(a, b, rnd) % size;
286 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
288 return __hash_conntrack(tuple, nf_conntrack_htable_size,
289 nf_conntrack_hash_rnd);
292 int nf_conntrack_register_cache(u_int32_t features, const char *name,
297 kmem_cache_t *cachep;
299 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
300 features, name, size);
302 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
303 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
308 mutex_lock(&nf_ct_cache_mutex);
310 write_lock_bh(&nf_ct_cache_lock);
311 /* e.g: multiple helpers are loaded */
312 if (nf_ct_cache[features].use > 0) {
313 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
314 if ((!strncmp(nf_ct_cache[features].name, name,
315 NF_CT_FEATURES_NAMELEN))
316 && nf_ct_cache[features].size == size) {
317 DEBUGP("nf_conntrack_register_cache: reusing.\n");
318 nf_ct_cache[features].use++;
323 write_unlock_bh(&nf_ct_cache_lock);
324 mutex_unlock(&nf_ct_cache_mutex);
327 write_unlock_bh(&nf_ct_cache_lock);
330 * The memory space for name of slab cache must be alive until
331 * cache is destroyed.
333 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
334 if (cache_name == NULL) {
335 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
340 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
341 >= NF_CT_FEATURES_NAMELEN) {
342 printk("nf_conntrack_register_cache: name too long\n");
347 cachep = kmem_cache_create(cache_name, size, 0, 0,
350 printk("nf_conntrack_register_cache: Can't create slab cache "
351 "for the features = 0x%x\n", features);
356 write_lock_bh(&nf_ct_cache_lock);
357 nf_ct_cache[features].use = 1;
358 nf_ct_cache[features].size = size;
359 nf_ct_cache[features].cachep = cachep;
360 nf_ct_cache[features].name = cache_name;
361 write_unlock_bh(&nf_ct_cache_lock);
368 mutex_unlock(&nf_ct_cache_mutex);
372 /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
373 void nf_conntrack_unregister_cache(u_int32_t features)
375 kmem_cache_t *cachep;
379 * This assures that kmem_cache_create() isn't called before destroying
382 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
383 mutex_lock(&nf_ct_cache_mutex);
385 write_lock_bh(&nf_ct_cache_lock);
386 if (--nf_ct_cache[features].use > 0) {
387 write_unlock_bh(&nf_ct_cache_lock);
388 mutex_unlock(&nf_ct_cache_mutex);
391 cachep = nf_ct_cache[features].cachep;
392 name = nf_ct_cache[features].name;
393 nf_ct_cache[features].cachep = NULL;
394 nf_ct_cache[features].name = NULL;
395 nf_ct_cache[features].size = 0;
396 write_unlock_bh(&nf_ct_cache_lock);
400 kmem_cache_destroy(cachep);
403 mutex_unlock(&nf_ct_cache_mutex);
407 nf_ct_get_tuple(const struct sk_buff *skb,
409 unsigned int dataoff,
412 struct nf_conntrack_tuple *tuple,
413 const struct nf_conntrack_l3proto *l3proto,
414 const struct nf_conntrack_protocol *protocol)
416 NF_CT_TUPLE_U_BLANK(tuple);
418 tuple->src.l3num = l3num;
419 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
422 tuple->dst.protonum = protonum;
423 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
425 return protocol->pkt_to_tuple(skb, dataoff, tuple);
429 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
430 const struct nf_conntrack_tuple *orig,
431 const struct nf_conntrack_l3proto *l3proto,
432 const struct nf_conntrack_protocol *protocol)
434 NF_CT_TUPLE_U_BLANK(inverse);
436 inverse->src.l3num = orig->src.l3num;
437 if (l3proto->invert_tuple(inverse, orig) == 0)
440 inverse->dst.dir = !orig->dst.dir;
442 inverse->dst.protonum = orig->dst.protonum;
443 return protocol->invert_tuple(inverse, orig);
446 /* nf_conntrack_expect helper functions */
447 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
449 struct nf_conn_help *master_help = nfct_help(exp->master);
451 NF_CT_ASSERT(master_help);
452 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
453 NF_CT_ASSERT(!timer_pending(&exp->timeout));
455 list_del(&exp->list);
456 NF_CT_STAT_INC(expect_delete);
457 master_help->expecting--;
458 nf_conntrack_expect_put(exp);
461 static void expectation_timed_out(unsigned long ul_expect)
463 struct nf_conntrack_expect *exp = (void *)ul_expect;
465 write_lock_bh(&nf_conntrack_lock);
466 nf_ct_unlink_expect(exp);
467 write_unlock_bh(&nf_conntrack_lock);
468 nf_conntrack_expect_put(exp);
471 struct nf_conntrack_expect *
472 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
474 struct nf_conntrack_expect *i;
476 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
477 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
485 /* Just find a expectation corresponding to a tuple. */
486 struct nf_conntrack_expect *
487 nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
489 struct nf_conntrack_expect *i;
491 read_lock_bh(&nf_conntrack_lock);
492 i = __nf_conntrack_expect_find(tuple);
493 read_unlock_bh(&nf_conntrack_lock);
498 /* If an expectation for this connection is found, it gets delete from
499 * global list then returned. */
500 static struct nf_conntrack_expect *
501 find_expectation(const struct nf_conntrack_tuple *tuple)
503 struct nf_conntrack_expect *i;
505 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
506 /* If master is not in hash table yet (ie. packet hasn't left
507 this machine yet), how can other end know about expected?
508 Hence these are not the droids you are looking for (if
509 master ct never got confirmed, we'd hold a reference to it
510 and weird things would happen to future packets). */
511 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
512 && nf_ct_is_confirmed(i->master)) {
513 if (i->flags & NF_CT_EXPECT_PERMANENT) {
516 } else if (del_timer(&i->timeout)) {
517 nf_ct_unlink_expect(i);
525 /* delete all expectations for this conntrack */
526 void nf_ct_remove_expectations(struct nf_conn *ct)
528 struct nf_conntrack_expect *i, *tmp;
529 struct nf_conn_help *help = nfct_help(ct);
531 /* Optimization: most connection never expect any others. */
532 if (!help || help->expecting == 0)
535 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
536 if (i->master == ct && del_timer(&i->timeout)) {
537 nf_ct_unlink_expect(i);
538 nf_conntrack_expect_put(i);
544 clean_from_lists(struct nf_conn *ct)
548 DEBUGP("clean_from_lists(%p)\n", ct);
549 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
551 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
552 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
553 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
554 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
556 /* Destroy all pending expectations */
557 nf_ct_remove_expectations(ct);
561 destroy_conntrack(struct nf_conntrack *nfct)
563 struct nf_conn *ct = (struct nf_conn *)nfct;
564 struct nf_conntrack_l3proto *l3proto;
565 struct nf_conntrack_protocol *proto;
567 DEBUGP("destroy_conntrack(%p)\n", ct);
568 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
569 NF_CT_ASSERT(!timer_pending(&ct->timeout));
571 nf_conntrack_event(IPCT_DESTROY, ct);
572 set_bit(IPS_DYING_BIT, &ct->status);
574 /* To make sure we don't get any weird locking issues here:
575 * destroy_conntrack() MUST NOT be called with a write lock
576 * to nf_conntrack_lock!!! -HW */
577 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
578 if (l3proto && l3proto->destroy)
579 l3proto->destroy(ct);
581 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
582 if (proto && proto->destroy)
585 if (nf_conntrack_destroyed)
586 nf_conntrack_destroyed(ct);
588 write_lock_bh(&nf_conntrack_lock);
589 /* Expectations will have been removed in clean_from_lists,
590 * except TFTP can create an expectation on the first packet,
591 * before connection is in the list, so we need to clean here,
593 nf_ct_remove_expectations(ct);
595 /* We overload first tuple to link into unconfirmed list. */
596 if (!nf_ct_is_confirmed(ct)) {
597 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
598 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
601 NF_CT_STAT_INC(delete);
602 write_unlock_bh(&nf_conntrack_lock);
605 nf_ct_put(ct->master);
607 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
608 nf_conntrack_free(ct);
611 static void death_by_timeout(unsigned long ul_conntrack)
613 struct nf_conn *ct = (void *)ul_conntrack;
615 write_lock_bh(&nf_conntrack_lock);
616 /* Inside lock so preempt is disabled on module removal path.
617 * Otherwise we can get spurious warnings. */
618 NF_CT_STAT_INC(delete_list);
619 clean_from_lists(ct);
620 write_unlock_bh(&nf_conntrack_lock);
625 conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
626 const struct nf_conntrack_tuple *tuple,
627 const struct nf_conn *ignored_conntrack)
629 ASSERT_READ_LOCK(&nf_conntrack_lock);
630 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
631 && nf_ct_tuple_equal(tuple, &i->tuple);
634 struct nf_conntrack_tuple_hash *
635 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
636 const struct nf_conn *ignored_conntrack)
638 struct nf_conntrack_tuple_hash *h;
639 unsigned int hash = hash_conntrack(tuple);
641 ASSERT_READ_LOCK(&nf_conntrack_lock);
642 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
643 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
644 NF_CT_STAT_INC(found);
647 NF_CT_STAT_INC(searched);
653 /* Find a connection corresponding to a tuple. */
654 struct nf_conntrack_tuple_hash *
655 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
656 const struct nf_conn *ignored_conntrack)
658 struct nf_conntrack_tuple_hash *h;
660 read_lock_bh(&nf_conntrack_lock);
661 h = __nf_conntrack_find(tuple, ignored_conntrack);
663 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
664 read_unlock_bh(&nf_conntrack_lock);
669 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
671 unsigned int repl_hash)
673 ct->id = ++nf_conntrack_next_id;
674 list_prepend(&nf_conntrack_hash[hash],
675 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
676 list_prepend(&nf_conntrack_hash[repl_hash],
677 &ct->tuplehash[IP_CT_DIR_REPLY].list);
680 void nf_conntrack_hash_insert(struct nf_conn *ct)
682 unsigned int hash, repl_hash;
684 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
685 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
687 write_lock_bh(&nf_conntrack_lock);
688 __nf_conntrack_hash_insert(ct, hash, repl_hash);
689 write_unlock_bh(&nf_conntrack_lock);
692 /* Confirm a connection given skb; places it in hash table */
694 __nf_conntrack_confirm(struct sk_buff **pskb)
696 unsigned int hash, repl_hash;
698 enum ip_conntrack_info ctinfo;
700 ct = nf_ct_get(*pskb, &ctinfo);
702 /* ipt_REJECT uses nf_conntrack_attach to attach related
703 ICMP/TCP RST packets in other direction. Actual packet
704 which created connection will be IP_CT_NEW or for an
705 expected connection, IP_CT_RELATED. */
706 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
709 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
710 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
712 /* We're not in hash table, and we refuse to set up related
713 connections for unconfirmed conns. But packet copies and
714 REJECT will give spurious warnings here. */
715 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
717 /* No external references means noone else could have
719 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
720 DEBUGP("Confirming conntrack %p\n", ct);
722 write_lock_bh(&nf_conntrack_lock);
724 /* See if there's one in the list already, including reverse:
725 NAT could have grabbed it without realizing, since we're
726 not in the hash. If there is, we lost race. */
727 if (!LIST_FIND(&nf_conntrack_hash[hash],
729 struct nf_conntrack_tuple_hash *,
730 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
731 && !LIST_FIND(&nf_conntrack_hash[repl_hash],
733 struct nf_conntrack_tuple_hash *,
734 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
735 struct nf_conn_help *help;
736 /* Remove from unconfirmed list */
737 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
739 __nf_conntrack_hash_insert(ct, hash, repl_hash);
740 /* Timer relative to confirmation time, not original
741 setting time, otherwise we'd get timer wrap in
742 weird delay cases. */
743 ct->timeout.expires += jiffies;
744 add_timer(&ct->timeout);
745 atomic_inc(&ct->ct_general.use);
746 set_bit(IPS_CONFIRMED_BIT, &ct->status);
747 NF_CT_STAT_INC(insert);
748 write_unlock_bh(&nf_conntrack_lock);
749 help = nfct_help(ct);
750 if (help && help->helper)
751 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
752 #ifdef CONFIG_NF_NAT_NEEDED
753 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
754 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
755 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
757 nf_conntrack_event_cache(master_ct(ct) ?
758 IPCT_RELATED : IPCT_NEW, *pskb);
762 NF_CT_STAT_INC(insert_failed);
763 write_unlock_bh(&nf_conntrack_lock);
767 /* Returns true if a connection correspondings to the tuple (required
770 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
771 const struct nf_conn *ignored_conntrack)
773 struct nf_conntrack_tuple_hash *h;
775 read_lock_bh(&nf_conntrack_lock);
776 h = __nf_conntrack_find(tuple, ignored_conntrack);
777 read_unlock_bh(&nf_conntrack_lock);
782 /* There's a small race here where we may free a just-assured
783 connection. Too bad: we're in trouble anyway. */
784 static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
786 return !(test_bit(IPS_ASSURED_BIT,
787 &nf_ct_tuplehash_to_ctrack(i)->status));
790 static int early_drop(struct list_head *chain)
792 /* Traverse backwards: gives us oldest, which is roughly LRU */
793 struct nf_conntrack_tuple_hash *h;
794 struct nf_conn *ct = NULL;
797 read_lock_bh(&nf_conntrack_lock);
798 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
800 ct = nf_ct_tuplehash_to_ctrack(h);
801 atomic_inc(&ct->ct_general.use);
803 read_unlock_bh(&nf_conntrack_lock);
808 if (del_timer(&ct->timeout)) {
809 death_by_timeout((unsigned long)ct);
811 NF_CT_STAT_INC(early_drop);
817 static inline int helper_cmp(const struct nf_conntrack_helper *i,
818 const struct nf_conntrack_tuple *rtuple)
820 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
823 static struct nf_conntrack_helper *
824 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
826 return LIST_FIND(&helpers, helper_cmp,
827 struct nf_conntrack_helper *,
831 struct nf_conntrack_helper *
832 nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
834 struct nf_conntrack_helper *helper;
836 /* need nf_conntrack_lock to assure that helper exists until
837 * try_module_get() is called */
838 read_lock_bh(&nf_conntrack_lock);
840 helper = __nf_ct_helper_find(tuple);
842 /* need to increase module usage count to assure helper will
843 * not go away while the caller is e.g. busy putting a
844 * conntrack in the hash that uses the helper */
845 if (!try_module_get(helper->me))
849 read_unlock_bh(&nf_conntrack_lock);
854 void nf_ct_helper_put(struct nf_conntrack_helper *helper)
856 module_put(helper->me);
859 static struct nf_conn *
860 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
861 const struct nf_conntrack_tuple *repl,
862 const struct nf_conntrack_l3proto *l3proto)
864 struct nf_conn *conntrack = NULL;
865 u_int32_t features = 0;
866 struct nf_conntrack_helper *helper;
868 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
869 get_random_bytes(&nf_conntrack_hash_rnd, 4);
870 nf_conntrack_hash_rnd_initted = 1;
874 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
875 unsigned int hash = hash_conntrack(orig);
876 /* Try dropping from this hash chain. */
877 if (!early_drop(&nf_conntrack_hash[hash])) {
880 "nf_conntrack: table full, dropping"
882 return ERR_PTR(-ENOMEM);
886 /* find features needed by this conntrack. */
887 features = l3proto->get_features(orig);
889 /* FIXME: protect helper list per RCU */
890 read_lock_bh(&nf_conntrack_lock);
891 helper = __nf_ct_helper_find(repl);
893 features |= NF_CT_F_HELP;
894 read_unlock_bh(&nf_conntrack_lock);
896 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
898 read_lock_bh(&nf_ct_cache_lock);
900 if (unlikely(!nf_ct_cache[features].use)) {
901 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
906 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
907 if (conntrack == NULL) {
908 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
912 memset(conntrack, 0, nf_ct_cache[features].size);
913 conntrack->features = features;
915 struct nf_conn_help *help = nfct_help(conntrack);
917 help->helper = helper;
920 atomic_set(&conntrack->ct_general.use, 1);
921 conntrack->ct_general.destroy = destroy_conntrack;
922 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
923 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
924 /* Don't set timer yet: wait for confirmation */
925 init_timer(&conntrack->timeout);
926 conntrack->timeout.data = (unsigned long)conntrack;
927 conntrack->timeout.function = death_by_timeout;
929 atomic_inc(&nf_conntrack_count);
931 read_unlock_bh(&nf_ct_cache_lock);
935 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
936 const struct nf_conntrack_tuple *repl)
938 struct nf_conntrack_l3proto *l3proto;
940 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
941 return __nf_conntrack_alloc(orig, repl, l3proto);
944 void nf_conntrack_free(struct nf_conn *conntrack)
946 u_int32_t features = conntrack->features;
947 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
948 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
950 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
951 atomic_dec(&nf_conntrack_count);
954 /* Allocate a new conntrack: we return -ENOMEM if classification
955 failed due to stress. Otherwise it really is unclassifiable. */
956 static struct nf_conntrack_tuple_hash *
957 init_conntrack(const struct nf_conntrack_tuple *tuple,
958 struct nf_conntrack_l3proto *l3proto,
959 struct nf_conntrack_protocol *protocol,
961 unsigned int dataoff)
963 struct nf_conn *conntrack;
964 struct nf_conntrack_tuple repl_tuple;
965 struct nf_conntrack_expect *exp;
967 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
968 DEBUGP("Can't invert tuple.\n");
972 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
973 if (conntrack == NULL || IS_ERR(conntrack)) {
974 DEBUGP("Can't allocate conntrack.\n");
975 return (struct nf_conntrack_tuple_hash *)conntrack;
978 if (!protocol->new(conntrack, skb, dataoff)) {
979 nf_conntrack_free(conntrack);
980 DEBUGP("init conntrack: can't track with proto module\n");
984 write_lock_bh(&nf_conntrack_lock);
985 exp = find_expectation(tuple);
988 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
990 /* Welcome, Mr. Bond. We've been expecting you... */
991 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
992 conntrack->master = exp->master;
993 #ifdef CONFIG_NF_CONNTRACK_MARK
994 conntrack->mark = exp->master->mark;
996 nf_conntrack_get(&conntrack->master->ct_general);
997 NF_CT_STAT_INC(expect_new);
1001 /* Overload tuple linked list to put us in unconfirmed list. */
1002 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
1004 write_unlock_bh(&nf_conntrack_lock);
1008 exp->expectfn(conntrack, exp);
1009 nf_conntrack_expect_put(exp);
1012 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
1015 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1016 static inline struct nf_conn *
1017 resolve_normal_ct(struct sk_buff *skb,
1018 unsigned int dataoff,
1021 struct nf_conntrack_l3proto *l3proto,
1022 struct nf_conntrack_protocol *proto,
1024 enum ip_conntrack_info *ctinfo)
1026 struct nf_conntrack_tuple tuple;
1027 struct nf_conntrack_tuple_hash *h;
1030 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
1031 dataoff, l3num, protonum, &tuple, l3proto,
1033 DEBUGP("resolve_normal_ct: Can't get tuple\n");
1037 /* look for tuple match */
1038 h = nf_conntrack_find_get(&tuple, NULL);
1040 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
1046 ct = nf_ct_tuplehash_to_ctrack(h);
1048 /* It exists; we have (non-exclusive) reference. */
1049 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1050 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
1051 /* Please set reply bit if this packet OK */
1054 /* Once we've had two way comms, always ESTABLISHED. */
1055 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1056 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
1057 *ctinfo = IP_CT_ESTABLISHED;
1058 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1059 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
1060 *ctinfo = IP_CT_RELATED;
1062 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
1063 *ctinfo = IP_CT_NEW;
1067 skb->nfct = &ct->ct_general;
1068 skb->nfctinfo = *ctinfo;
1073 nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
1076 enum ip_conntrack_info ctinfo;
1077 struct nf_conntrack_l3proto *l3proto;
1078 struct nf_conntrack_protocol *proto;
1079 unsigned int dataoff;
1084 /* Previously seen (loopback or untracked)? Ignore. */
1085 if ((*pskb)->nfct) {
1086 NF_CT_STAT_INC(ignore);
1090 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
1091 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
1092 DEBUGP("not prepared to track yet or error occured\n");
1096 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
1098 /* It may be an special packet, error, unclean...
1099 * inverse of the return code tells to the netfilter
1100 * core what to do with the packet. */
1101 if (proto->error != NULL &&
1102 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
1103 NF_CT_STAT_INC(error);
1104 NF_CT_STAT_INC(invalid);
1108 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
1109 &set_reply, &ctinfo);
1111 /* Not valid part of a connection */
1112 NF_CT_STAT_INC(invalid);
1117 /* Too stressed to deal. */
1118 NF_CT_STAT_INC(drop);
1122 NF_CT_ASSERT((*pskb)->nfct);
1124 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
1126 /* Invalid: inverse of the return code tells
1127 * the netfilter core what to do */
1128 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
1129 nf_conntrack_put((*pskb)->nfct);
1130 (*pskb)->nfct = NULL;
1131 NF_CT_STAT_INC(invalid);
1135 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1136 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
1141 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1142 const struct nf_conntrack_tuple *orig)
1144 return nf_ct_invert_tuple(inverse, orig,
1145 __nf_ct_l3proto_find(orig->src.l3num),
1146 __nf_ct_proto_find(orig->src.l3num,
1147 orig->dst.protonum));
1150 /* Would two expected things clash? */
1151 static inline int expect_clash(const struct nf_conntrack_expect *a,
1152 const struct nf_conntrack_expect *b)
1154 /* Part covered by intersection of masks must be unequal,
1155 otherwise they clash */
1156 struct nf_conntrack_tuple intersect_mask;
1159 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1160 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1161 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1162 intersect_mask.dst.protonum = a->mask.dst.protonum
1163 & b->mask.dst.protonum;
1165 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1166 intersect_mask.src.u3.all[count] =
1167 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1170 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1171 intersect_mask.dst.u3.all[count] =
1172 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1175 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1178 static inline int expect_matches(const struct nf_conntrack_expect *a,
1179 const struct nf_conntrack_expect *b)
1181 return a->master == b->master
1182 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1183 && nf_ct_tuple_equal(&a->mask, &b->mask);
1186 /* Generally a bad idea to call this: could have matched already. */
1187 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1189 struct nf_conntrack_expect *i;
1191 write_lock_bh(&nf_conntrack_lock);
1192 /* choose the the oldest expectation to evict */
1193 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1194 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1195 nf_ct_unlink_expect(i);
1196 write_unlock_bh(&nf_conntrack_lock);
1197 nf_conntrack_expect_put(i);
1201 write_unlock_bh(&nf_conntrack_lock);
1204 /* We don't increase the master conntrack refcount for non-fulfilled
1205 * conntracks. During the conntrack destruction, the expectations are
1206 * always killed before the conntrack itself */
1207 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1209 struct nf_conntrack_expect *new;
1211 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1213 DEBUGP("expect_related: OOM allocating expect\n");
1217 atomic_set(&new->use, 1);
1221 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1223 if (atomic_dec_and_test(&exp->use))
1224 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1227 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1229 struct nf_conn_help *master_help = nfct_help(exp->master);
1231 atomic_inc(&exp->use);
1232 master_help->expecting++;
1233 list_add(&exp->list, &nf_conntrack_expect_list);
1235 init_timer(&exp->timeout);
1236 exp->timeout.data = (unsigned long)exp;
1237 exp->timeout.function = expectation_timed_out;
1238 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
1239 add_timer(&exp->timeout);
1241 exp->id = ++nf_conntrack_expect_next_id;
1242 atomic_inc(&exp->use);
1243 NF_CT_STAT_INC(expect_create);
1246 /* Race with expectations being used means we could have none to find; OK. */
1247 static void evict_oldest_expect(struct nf_conn *master)
1249 struct nf_conntrack_expect *i;
1251 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1252 if (i->master == master) {
1253 if (del_timer(&i->timeout)) {
1254 nf_ct_unlink_expect(i);
1255 nf_conntrack_expect_put(i);
1262 static inline int refresh_timer(struct nf_conntrack_expect *i)
1264 struct nf_conn_help *master_help = nfct_help(i->master);
1266 if (!del_timer(&i->timeout))
1269 i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
1270 add_timer(&i->timeout);
1274 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1276 struct nf_conntrack_expect *i;
1277 struct nf_conn *master = expect->master;
1278 struct nf_conn_help *master_help = nfct_help(master);
1281 NF_CT_ASSERT(master_help);
1283 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1284 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1285 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1287 write_lock_bh(&nf_conntrack_lock);
1288 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1289 if (expect_matches(i, expect)) {
1290 /* Refresh timer: if it's dying, ignore.. */
1291 if (refresh_timer(i)) {
1295 } else if (expect_clash(i, expect)) {
1300 /* Will be over limit? */
1301 if (master_help->helper->max_expected &&
1302 master_help->expecting >= master_help->helper->max_expected)
1303 evict_oldest_expect(master);
1305 nf_conntrack_expect_insert(expect);
1306 nf_conntrack_expect_event(IPEXP_NEW, expect);
1309 write_unlock_bh(&nf_conntrack_lock);
1313 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1316 BUG_ON(me->timeout == 0);
1318 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1319 sizeof(struct nf_conn)
1320 + sizeof(struct nf_conn_help)
1321 + __alignof__(struct nf_conn_help));
1323 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1326 write_lock_bh(&nf_conntrack_lock);
1327 list_prepend(&helpers, me);
1328 write_unlock_bh(&nf_conntrack_lock);
1333 struct nf_conntrack_helper *
1334 __nf_conntrack_helper_find_byname(const char *name)
1336 struct nf_conntrack_helper *h;
1338 list_for_each_entry(h, &helpers, list) {
1339 if (!strcmp(h->name, name))
1346 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1347 const struct nf_conntrack_helper *me)
1349 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
1350 struct nf_conn_help *help = nfct_help(ct);
1352 if (help && help->helper == me) {
1353 nf_conntrack_event(IPCT_HELPER, ct);
1354 help->helper = NULL;
1359 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1362 struct nf_conntrack_expect *exp, *tmp;
1364 /* Need write lock here, to delete helper. */
1365 write_lock_bh(&nf_conntrack_lock);
1366 LIST_DELETE(&helpers, me);
1368 /* Get rid of expectations */
1369 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
1370 struct nf_conn_help *help = nfct_help(exp->master);
1371 if (help->helper == me && del_timer(&exp->timeout)) {
1372 nf_ct_unlink_expect(exp);
1373 nf_conntrack_expect_put(exp);
1377 /* Get rid of expecteds, set helpers to NULL. */
1378 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
1379 for (i = 0; i < nf_conntrack_htable_size; i++)
1380 LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
1381 struct nf_conntrack_tuple_hash *, me);
1382 write_unlock_bh(&nf_conntrack_lock);
1384 /* Someone could be still looking at the helper in a bh. */
1388 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1389 void __nf_ct_refresh_acct(struct nf_conn *ct,
1390 enum ip_conntrack_info ctinfo,
1391 const struct sk_buff *skb,
1392 unsigned long extra_jiffies,
1397 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1400 write_lock_bh(&nf_conntrack_lock);
1402 /* If not in hash table, timer will not be active yet */
1403 if (!nf_ct_is_confirmed(ct)) {
1404 ct->timeout.expires = extra_jiffies;
1405 event = IPCT_REFRESH;
1407 /* Need del_timer for race avoidance (may already be dying). */
1408 if (del_timer(&ct->timeout)) {
1409 ct->timeout.expires = jiffies + extra_jiffies;
1410 add_timer(&ct->timeout);
1411 event = IPCT_REFRESH;
1415 #ifdef CONFIG_NF_CT_ACCT
1417 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1418 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1419 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1420 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1421 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1422 event |= IPCT_COUNTER_FILLING;
1426 write_unlock_bh(&nf_conntrack_lock);
1428 /* must be unlocked when calling event cache */
1430 nf_conntrack_event_cache(event, skb);
1433 #if defined(CONFIG_NF_CT_NETLINK) || \
1434 defined(CONFIG_NF_CT_NETLINK_MODULE)
1436 #include <linux/netfilter/nfnetlink.h>
1437 #include <linux/netfilter/nfnetlink_conntrack.h>
1438 #include <linux/mutex.h>
1441 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1442 * in ip_conntrack_core, since we don't want the protocols to autoload
1443 * or depend on ctnetlink */
1444 int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1445 const struct nf_conntrack_tuple *tuple)
1447 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1448 &tuple->src.u.tcp.port);
1449 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1450 &tuple->dst.u.tcp.port);
1457 static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1458 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1459 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1462 int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1463 struct nf_conntrack_tuple *t)
1465 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1468 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1472 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1474 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1480 /* Used by ipt_REJECT and ip6t_REJECT. */
1481 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1484 enum ip_conntrack_info ctinfo;
1486 /* This ICMP is in reverse direction to the packet which caused it */
1487 ct = nf_ct_get(skb, &ctinfo);
1488 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1489 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1491 ctinfo = IP_CT_RELATED;
1493 /* Attach to new skbuff, and increment count */
1494 nskb->nfct = &ct->ct_general;
1495 nskb->nfctinfo = ctinfo;
1496 nf_conntrack_get(nskb->nfct);
1500 do_iter(const struct nf_conntrack_tuple_hash *i,
1501 int (*iter)(struct nf_conn *i, void *data),
1504 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1507 /* Bring out ya dead! */
1508 static struct nf_conntrack_tuple_hash *
1509 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1510 void *data, unsigned int *bucket)
1512 struct nf_conntrack_tuple_hash *h = NULL;
1514 write_lock_bh(&nf_conntrack_lock);
1515 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1516 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
1517 struct nf_conntrack_tuple_hash *, iter, data);
1522 h = LIST_FIND_W(&unconfirmed, do_iter,
1523 struct nf_conntrack_tuple_hash *, iter, data);
1525 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1526 write_unlock_bh(&nf_conntrack_lock);
1532 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1534 struct nf_conntrack_tuple_hash *h;
1535 unsigned int bucket = 0;
1537 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1538 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1539 /* Time to push up daises... */
1540 if (del_timer(&ct->timeout))
1541 death_by_timeout((unsigned long)ct);
1542 /* ... else the timer will get him soon. */
1548 static int kill_all(struct nf_conn *i, void *data)
1553 static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1558 free_pages((unsigned long)hash,
1559 get_order(sizeof(struct list_head) * size));
1562 void nf_conntrack_flush()
1564 nf_ct_iterate_cleanup(kill_all, NULL);
1567 /* Mishearing the voices in his head, our hero wonders how he's
1568 supposed to kill the mall. */
1569 void nf_conntrack_cleanup(void)
1573 ip_ct_attach = NULL;
1575 /* This makes sure all current packets have passed through
1576 netfilter framework. Roll on, two-stage module
1580 nf_ct_event_cache_flush();
1582 nf_conntrack_flush();
1583 if (atomic_read(&nf_conntrack_count) != 0) {
1585 goto i_see_dead_people;
1587 /* wait until all references to nf_conntrack_untracked are dropped */
1588 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1591 for (i = 0; i < NF_CT_F_NUM; i++) {
1592 if (nf_ct_cache[i].use == 0)
1595 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1596 nf_ct_cache[i].use = 1;
1597 nf_conntrack_unregister_cache(i);
1599 kmem_cache_destroy(nf_conntrack_expect_cachep);
1600 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1601 nf_conntrack_htable_size);
1603 /* free l3proto protocol tables */
1604 for (i = 0; i < PF_MAX; i++)
1605 if (nf_ct_protos[i]) {
1606 kfree(nf_ct_protos[i]);
1607 nf_ct_protos[i] = NULL;
1611 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1613 struct list_head *hash;
1617 hash = (void*)__get_free_pages(GFP_KERNEL,
1618 get_order(sizeof(struct list_head)
1622 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1623 hash = vmalloc(sizeof(struct list_head) * size);
1627 for (i = 0; i < size; i++)
1628 INIT_LIST_HEAD(&hash[i]);
1633 int set_hashsize(const char *val, struct kernel_param *kp)
1635 int i, bucket, hashsize, vmalloced;
1636 int old_vmalloced, old_size;
1638 struct list_head *hash, *old_hash;
1639 struct nf_conntrack_tuple_hash *h;
1641 /* On boot, we can set this without any fancy locking. */
1642 if (!nf_conntrack_htable_size)
1643 return param_set_uint(val, kp);
1645 hashsize = simple_strtol(val, NULL, 0);
1649 hash = alloc_hashtable(hashsize, &vmalloced);
1653 /* We have to rehahs for the new table anyway, so we also can
1654 * use a newrandom seed */
1655 get_random_bytes(&rnd, 4);
1657 write_lock_bh(&nf_conntrack_lock);
1658 for (i = 0; i < nf_conntrack_htable_size; i++) {
1659 while (!list_empty(&nf_conntrack_hash[i])) {
1660 h = list_entry(nf_conntrack_hash[i].next,
1661 struct nf_conntrack_tuple_hash, list);
1663 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1664 list_add_tail(&h->list, &hash[bucket]);
1667 old_size = nf_conntrack_htable_size;
1668 old_vmalloced = nf_conntrack_vmalloc;
1669 old_hash = nf_conntrack_hash;
1671 nf_conntrack_htable_size = hashsize;
1672 nf_conntrack_vmalloc = vmalloced;
1673 nf_conntrack_hash = hash;
1674 nf_conntrack_hash_rnd = rnd;
1675 write_unlock_bh(&nf_conntrack_lock);
1677 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1681 module_param_call(hashsize, set_hashsize, param_get_uint,
1682 &nf_conntrack_htable_size, 0600);
1684 int __init nf_conntrack_init(void)
1689 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1690 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1691 if (!nf_conntrack_htable_size) {
1692 nf_conntrack_htable_size
1693 = (((num_physpages << PAGE_SHIFT) / 16384)
1694 / sizeof(struct list_head));
1695 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1696 nf_conntrack_htable_size = 8192;
1697 if (nf_conntrack_htable_size < 16)
1698 nf_conntrack_htable_size = 16;
1700 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1702 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1703 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1706 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1707 &nf_conntrack_vmalloc);
1708 if (!nf_conntrack_hash) {
1709 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1713 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
1714 sizeof(struct nf_conn));
1716 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1720 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1721 sizeof(struct nf_conntrack_expect),
1723 if (!nf_conntrack_expect_cachep) {
1724 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1725 goto err_free_conntrack_slab;
1728 /* Don't NEED lock here, but good form anyway. */
1729 write_lock_bh(&nf_conntrack_lock);
1730 for (i = 0; i < PF_MAX; i++)
1731 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1732 write_unlock_bh(&nf_conntrack_lock);
1734 /* For use by REJECT target */
1735 ip_ct_attach = __nf_conntrack_attach;
1737 /* Set up fake conntrack:
1738 - to never be deleted, not in any hashes */
1739 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1740 /* - and look it like as a confirmed connection */
1741 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1745 err_free_conntrack_slab:
1746 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1748 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1749 nf_conntrack_htable_size);