1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/types.h>
21 #include <linux/icmp.h>
23 #include <linux/netfilter.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/module.h>
26 #include <linux/skbuff.h>
27 #include <linux/proc_fs.h>
28 #include <linux/vmalloc.h>
29 #include <net/checksum.h>
31 #include <linux/stddef.h>
32 #include <linux/sysctl.h>
33 #include <linux/slab.h>
34 #include <linux/random.h>
35 #include <linux/jhash.h>
36 #include <linux/err.h>
37 #include <linux/percpu.h>
38 #include <linux/moduleparam.h>
39 #include <linux/notifier.h>
41 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
42 registrations, conntrack timers*/
43 #define ASSERT_READ_LOCK(x)
44 #define ASSERT_WRITE_LOCK(x)
46 #include <linux/netfilter_ipv4/ip_conntrack.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
49 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
51 #define IP_CONNTRACK_VERSION "2.4"
56 #define DEBUGP(format, args...)
59 DEFINE_RWLOCK(ip_conntrack_lock);
61 /* ip_conntrack_standalone needs this */
62 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
64 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
65 LIST_HEAD(ip_conntrack_expect_list);
66 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
67 static LIST_HEAD(helpers);
68 unsigned int ip_conntrack_htable_size __read_mostly = 0;
69 int ip_conntrack_max __read_mostly;
70 struct list_head *ip_conntrack_hash;
71 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
72 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
73 struct ip_conntrack ip_conntrack_untracked;
74 unsigned int ip_ct_log_invalid __read_mostly;
75 static LIST_HEAD(unconfirmed);
76 static int ip_conntrack_vmalloc;
78 static unsigned int ip_conntrack_next_id;
79 static unsigned int ip_conntrack_expect_next_id;
80 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
81 ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
82 ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
84 DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
86 /* deliver cached events and clear cache entry - must be called with locally
87 * disabled softirqs */
89 __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
91 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
92 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
93 atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
96 ip_conntrack_put(ecache->ct);
100 /* Deliver all cached events for a particular conntrack. This is called
101 * by code prior to async packet handling or freeing the skb */
102 void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
104 struct ip_conntrack_ecache *ecache;
107 ecache = &__get_cpu_var(ip_conntrack_ecache);
108 if (ecache->ct == ct)
109 __ip_ct_deliver_cached_events(ecache);
113 void __ip_ct_event_cache_init(struct ip_conntrack *ct)
115 struct ip_conntrack_ecache *ecache;
117 /* take care of delivering potentially old events */
118 ecache = &__get_cpu_var(ip_conntrack_ecache);
119 BUG_ON(ecache->ct == ct);
121 __ip_ct_deliver_cached_events(ecache);
122 /* initialize for this conntrack/packet */
124 nf_conntrack_get(&ct->ct_general);
127 /* flush the event cache - touches other CPU's data and must not be called while
128 * packets are still passing through the code */
129 static void ip_ct_event_cache_flush(void)
131 struct ip_conntrack_ecache *ecache;
134 for_each_possible_cpu(cpu) {
135 ecache = &per_cpu(ip_conntrack_ecache, cpu);
137 ip_conntrack_put(ecache->ct);
141 static inline void ip_ct_event_cache_flush(void) {}
142 #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
144 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
146 static int ip_conntrack_hash_rnd_initted;
147 static unsigned int ip_conntrack_hash_rnd;
149 static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
150 unsigned int size, unsigned int rnd)
152 return (jhash_3words(tuple->src.ip,
153 (tuple->dst.ip ^ tuple->dst.protonum),
154 (tuple->src.u.all | (tuple->dst.u.all << 16)),
159 hash_conntrack(const struct ip_conntrack_tuple *tuple)
161 return __hash_conntrack(tuple, ip_conntrack_htable_size,
162 ip_conntrack_hash_rnd);
166 ip_ct_get_tuple(const struct iphdr *iph,
167 const struct sk_buff *skb,
168 unsigned int dataoff,
169 struct ip_conntrack_tuple *tuple,
170 const struct ip_conntrack_protocol *protocol)
173 if (iph->frag_off & htons(IP_OFFSET)) {
174 printk("ip_conntrack_core: Frag of proto %u.\n",
179 tuple->src.ip = iph->saddr;
180 tuple->dst.ip = iph->daddr;
181 tuple->dst.protonum = iph->protocol;
182 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
184 return protocol->pkt_to_tuple(skb, dataoff, tuple);
188 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
189 const struct ip_conntrack_tuple *orig,
190 const struct ip_conntrack_protocol *protocol)
192 inverse->src.ip = orig->dst.ip;
193 inverse->dst.ip = orig->src.ip;
194 inverse->dst.protonum = orig->dst.protonum;
195 inverse->dst.dir = !orig->dst.dir;
197 return protocol->invert_tuple(inverse, orig);
201 /* ip_conntrack_expect helper functions */
202 void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
204 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
205 IP_NF_ASSERT(!timer_pending(&exp->timeout));
206 list_del(&exp->list);
207 CONNTRACK_STAT_INC(expect_delete);
208 exp->master->expecting--;
209 ip_conntrack_expect_put(exp);
212 static void expectation_timed_out(unsigned long ul_expect)
214 struct ip_conntrack_expect *exp = (void *)ul_expect;
216 write_lock_bh(&ip_conntrack_lock);
217 ip_ct_unlink_expect(exp);
218 write_unlock_bh(&ip_conntrack_lock);
219 ip_conntrack_expect_put(exp);
222 struct ip_conntrack_expect *
223 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
225 struct ip_conntrack_expect *i;
227 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
228 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
236 /* Just find a expectation corresponding to a tuple. */
237 struct ip_conntrack_expect *
238 ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
240 struct ip_conntrack_expect *i;
242 read_lock_bh(&ip_conntrack_lock);
243 i = __ip_conntrack_expect_find(tuple);
244 read_unlock_bh(&ip_conntrack_lock);
249 /* If an expectation for this connection is found, it gets delete from
250 * global list then returned. */
251 static struct ip_conntrack_expect *
252 find_expectation(const struct ip_conntrack_tuple *tuple)
254 struct ip_conntrack_expect *i;
256 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
257 /* If master is not in hash table yet (ie. packet hasn't left
258 this machine yet), how can other end know about expected?
259 Hence these are not the droids you are looking for (if
260 master ct never got confirmed, we'd hold a reference to it
261 and weird things would happen to future packets). */
262 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
263 && is_confirmed(i->master)) {
264 if (i->flags & IP_CT_EXPECT_PERMANENT) {
267 } else if (del_timer(&i->timeout)) {
268 ip_ct_unlink_expect(i);
276 /* delete all expectations for this conntrack */
277 void ip_ct_remove_expectations(struct ip_conntrack *ct)
279 struct ip_conntrack_expect *i, *tmp;
281 /* Optimization: most connection never expect any others. */
282 if (ct->expecting == 0)
285 list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
286 if (i->master == ct && del_timer(&i->timeout)) {
287 ip_ct_unlink_expect(i);
288 ip_conntrack_expect_put(i);
294 clean_from_lists(struct ip_conntrack *ct)
296 DEBUGP("clean_from_lists(%p)\n", ct);
297 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
298 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
299 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
301 /* Destroy all pending expectations */
302 ip_ct_remove_expectations(ct);
306 destroy_conntrack(struct nf_conntrack *nfct)
308 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
309 struct ip_conntrack_protocol *proto;
311 DEBUGP("destroy_conntrack(%p)\n", ct);
312 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
313 IP_NF_ASSERT(!timer_pending(&ct->timeout));
315 ip_conntrack_event(IPCT_DESTROY, ct);
316 set_bit(IPS_DYING_BIT, &ct->status);
318 /* To make sure we don't get any weird locking issues here:
319 * destroy_conntrack() MUST NOT be called with a write lock
320 * to ip_conntrack_lock!!! -HW */
321 proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
322 if (proto && proto->destroy)
325 if (ip_conntrack_destroyed)
326 ip_conntrack_destroyed(ct);
328 write_lock_bh(&ip_conntrack_lock);
329 /* Expectations will have been removed in clean_from_lists,
330 * except TFTP can create an expectation on the first packet,
331 * before connection is in the list, so we need to clean here,
333 ip_ct_remove_expectations(ct);
335 /* We overload first tuple to link into unconfirmed list. */
336 if (!is_confirmed(ct)) {
337 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
338 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
341 CONNTRACK_STAT_INC(delete);
342 write_unlock_bh(&ip_conntrack_lock);
345 ip_conntrack_put(ct->master);
347 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
348 ip_conntrack_free(ct);
351 static void death_by_timeout(unsigned long ul_conntrack)
353 struct ip_conntrack *ct = (void *)ul_conntrack;
355 write_lock_bh(&ip_conntrack_lock);
356 /* Inside lock so preempt is disabled on module removal path.
357 * Otherwise we can get spurious warnings. */
358 CONNTRACK_STAT_INC(delete_list);
359 clean_from_lists(ct);
360 write_unlock_bh(&ip_conntrack_lock);
361 ip_conntrack_put(ct);
364 struct ip_conntrack_tuple_hash *
365 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
366 const struct ip_conntrack *ignored_conntrack)
368 struct ip_conntrack_tuple_hash *h;
369 unsigned int hash = hash_conntrack(tuple);
371 ASSERT_READ_LOCK(&ip_conntrack_lock);
372 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
373 if (tuplehash_to_ctrack(h) != ignored_conntrack &&
374 ip_ct_tuple_equal(tuple, &h->tuple)) {
375 CONNTRACK_STAT_INC(found);
378 CONNTRACK_STAT_INC(searched);
384 /* Find a connection corresponding to a tuple. */
385 struct ip_conntrack_tuple_hash *
386 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
387 const struct ip_conntrack *ignored_conntrack)
389 struct ip_conntrack_tuple_hash *h;
391 read_lock_bh(&ip_conntrack_lock);
392 h = __ip_conntrack_find(tuple, ignored_conntrack);
394 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
395 read_unlock_bh(&ip_conntrack_lock);
400 static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
402 unsigned int repl_hash)
404 ct->id = ++ip_conntrack_next_id;
405 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
406 &ip_conntrack_hash[hash]);
407 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
408 &ip_conntrack_hash[repl_hash]);
411 void ip_conntrack_hash_insert(struct ip_conntrack *ct)
413 unsigned int hash, repl_hash;
415 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
416 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
418 write_lock_bh(&ip_conntrack_lock);
419 __ip_conntrack_hash_insert(ct, hash, repl_hash);
420 write_unlock_bh(&ip_conntrack_lock);
423 /* Confirm a connection given skb; places it in hash table */
425 __ip_conntrack_confirm(struct sk_buff **pskb)
427 unsigned int hash, repl_hash;
428 struct ip_conntrack_tuple_hash *h;
429 struct ip_conntrack *ct;
430 enum ip_conntrack_info ctinfo;
432 ct = ip_conntrack_get(*pskb, &ctinfo);
434 /* ipt_REJECT uses ip_conntrack_attach to attach related
435 ICMP/TCP RST packets in other direction. Actual packet
436 which created connection will be IP_CT_NEW or for an
437 expected connection, IP_CT_RELATED. */
438 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
441 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
442 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
444 /* We're not in hash table, and we refuse to set up related
445 connections for unconfirmed conns. But packet copies and
446 REJECT will give spurious warnings here. */
447 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
449 /* No external references means noone else could have
451 IP_NF_ASSERT(!is_confirmed(ct));
452 DEBUGP("Confirming conntrack %p\n", ct);
454 write_lock_bh(&ip_conntrack_lock);
456 /* See if there's one in the list already, including reverse:
457 NAT could have grabbed it without realizing, since we're
458 not in the hash. If there is, we lost race. */
459 list_for_each_entry(h, &ip_conntrack_hash[hash], list)
460 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
463 list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
464 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
468 /* Remove from unconfirmed list */
469 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
471 __ip_conntrack_hash_insert(ct, hash, repl_hash);
472 /* Timer relative to confirmation time, not original
473 setting time, otherwise we'd get timer wrap in
474 weird delay cases. */
475 ct->timeout.expires += jiffies;
476 add_timer(&ct->timeout);
477 atomic_inc(&ct->ct_general.use);
478 set_bit(IPS_CONFIRMED_BIT, &ct->status);
479 CONNTRACK_STAT_INC(insert);
480 write_unlock_bh(&ip_conntrack_lock);
482 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
483 #ifdef CONFIG_IP_NF_NAT_NEEDED
484 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
485 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
486 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
488 ip_conntrack_event_cache(master_ct(ct) ?
489 IPCT_RELATED : IPCT_NEW, *pskb);
494 CONNTRACK_STAT_INC(insert_failed);
495 write_unlock_bh(&ip_conntrack_lock);
499 /* Returns true if a connection correspondings to the tuple (required
502 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
503 const struct ip_conntrack *ignored_conntrack)
505 struct ip_conntrack_tuple_hash *h;
507 read_lock_bh(&ip_conntrack_lock);
508 h = __ip_conntrack_find(tuple, ignored_conntrack);
509 read_unlock_bh(&ip_conntrack_lock);
514 /* There's a small race here where we may free a just-assured
515 connection. Too bad: we're in trouble anyway. */
516 static int early_drop(struct list_head *chain)
518 /* Traverse backwards: gives us oldest, which is roughly LRU */
519 struct ip_conntrack_tuple_hash *h;
520 struct ip_conntrack *ct = NULL, *tmp;
523 read_lock_bh(&ip_conntrack_lock);
524 list_for_each_entry_reverse(h, chain, list) {
525 tmp = tuplehash_to_ctrack(h);
526 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
528 atomic_inc(&ct->ct_general.use);
532 read_unlock_bh(&ip_conntrack_lock);
537 if (del_timer(&ct->timeout)) {
538 death_by_timeout((unsigned long)ct);
540 CONNTRACK_STAT_INC(early_drop);
542 ip_conntrack_put(ct);
546 static struct ip_conntrack_helper *
547 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
549 struct ip_conntrack_helper *h;
551 list_for_each_entry(h, &helpers, list) {
552 if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
558 struct ip_conntrack_helper *
559 ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
561 struct ip_conntrack_helper *helper;
563 /* need ip_conntrack_lock to assure that helper exists until
564 * try_module_get() is called */
565 read_lock_bh(&ip_conntrack_lock);
567 helper = __ip_conntrack_helper_find(tuple);
569 /* need to increase module usage count to assure helper will
570 * not go away while the caller is e.g. busy putting a
571 * conntrack in the hash that uses the helper */
572 if (!try_module_get(helper->me))
576 read_unlock_bh(&ip_conntrack_lock);
581 void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
583 module_put(helper->me);
586 struct ip_conntrack_protocol *
587 __ip_conntrack_proto_find(u_int8_t protocol)
589 return ip_ct_protos[protocol];
592 /* this is guaranteed to always return a valid protocol helper, since
593 * it falls back to generic_protocol */
594 struct ip_conntrack_protocol *
595 ip_conntrack_proto_find_get(u_int8_t protocol)
597 struct ip_conntrack_protocol *p;
600 p = __ip_conntrack_proto_find(protocol);
602 if (!try_module_get(p->me))
603 p = &ip_conntrack_generic_protocol;
610 void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
615 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
616 struct ip_conntrack_tuple *repl)
618 struct ip_conntrack *conntrack;
620 if (!ip_conntrack_hash_rnd_initted) {
621 get_random_bytes(&ip_conntrack_hash_rnd, 4);
622 ip_conntrack_hash_rnd_initted = 1;
626 && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
627 unsigned int hash = hash_conntrack(orig);
628 /* Try dropping from this hash chain. */
629 if (!early_drop(&ip_conntrack_hash[hash])) {
632 "ip_conntrack: table full, dropping"
634 return ERR_PTR(-ENOMEM);
638 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
640 DEBUGP("Can't allocate conntrack.\n");
641 return ERR_PTR(-ENOMEM);
644 memset(conntrack, 0, sizeof(*conntrack));
645 atomic_set(&conntrack->ct_general.use, 1);
646 conntrack->ct_general.destroy = destroy_conntrack;
647 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
648 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
649 /* Don't set timer yet: wait for confirmation */
650 init_timer(&conntrack->timeout);
651 conntrack->timeout.data = (unsigned long)conntrack;
652 conntrack->timeout.function = death_by_timeout;
654 atomic_inc(&ip_conntrack_count);
660 ip_conntrack_free(struct ip_conntrack *conntrack)
662 atomic_dec(&ip_conntrack_count);
663 kmem_cache_free(ip_conntrack_cachep, conntrack);
666 /* Allocate a new conntrack: we return -ENOMEM if classification
667 * failed due to stress. Otherwise it really is unclassifiable */
668 static struct ip_conntrack_tuple_hash *
669 init_conntrack(struct ip_conntrack_tuple *tuple,
670 struct ip_conntrack_protocol *protocol,
673 struct ip_conntrack *conntrack;
674 struct ip_conntrack_tuple repl_tuple;
675 struct ip_conntrack_expect *exp;
677 if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
678 DEBUGP("Can't invert tuple.\n");
682 conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
683 if (conntrack == NULL || IS_ERR(conntrack))
684 return (struct ip_conntrack_tuple_hash *)conntrack;
686 if (!protocol->new(conntrack, skb)) {
687 ip_conntrack_free(conntrack);
691 write_lock_bh(&ip_conntrack_lock);
692 exp = find_expectation(tuple);
695 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
697 /* Welcome, Mr. Bond. We've been expecting you... */
698 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
699 conntrack->master = exp->master;
700 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
701 conntrack->mark = exp->master->mark;
703 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
704 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
705 /* this is ugly, but there is no other place where to put it */
706 conntrack->nat.masq_index = exp->master->nat.masq_index;
708 #ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
709 conntrack->secmark = exp->master->secmark;
711 nf_conntrack_get(&conntrack->master->ct_general);
712 CONNTRACK_STAT_INC(expect_new);
714 conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
716 CONNTRACK_STAT_INC(new);
719 /* Overload tuple linked list to put us in unconfirmed list. */
720 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
722 write_unlock_bh(&ip_conntrack_lock);
726 exp->expectfn(conntrack, exp);
727 ip_conntrack_expect_put(exp);
730 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
733 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
734 static inline struct ip_conntrack *
735 resolve_normal_ct(struct sk_buff *skb,
736 struct ip_conntrack_protocol *proto,
738 unsigned int hooknum,
739 enum ip_conntrack_info *ctinfo)
741 struct ip_conntrack_tuple tuple;
742 struct ip_conntrack_tuple_hash *h;
743 struct ip_conntrack *ct;
745 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
747 if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
751 /* look for tuple match */
752 h = ip_conntrack_find_get(&tuple, NULL);
754 h = init_conntrack(&tuple, proto, skb);
760 ct = tuplehash_to_ctrack(h);
762 /* It exists; we have (non-exclusive) reference. */
763 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
764 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
765 /* Please set reply bit if this packet OK */
768 /* Once we've had two way comms, always ESTABLISHED. */
769 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
770 DEBUGP("ip_conntrack_in: normal packet for %p\n",
772 *ctinfo = IP_CT_ESTABLISHED;
773 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
774 DEBUGP("ip_conntrack_in: related packet for %p\n",
776 *ctinfo = IP_CT_RELATED;
778 DEBUGP("ip_conntrack_in: new packet for %p\n",
784 skb->nfct = &ct->ct_general;
785 skb->nfctinfo = *ctinfo;
789 /* Netfilter hook itself. */
790 unsigned int ip_conntrack_in(unsigned int hooknum,
791 struct sk_buff **pskb,
792 const struct net_device *in,
793 const struct net_device *out,
794 int (*okfn)(struct sk_buff *))
796 struct ip_conntrack *ct;
797 enum ip_conntrack_info ctinfo;
798 struct ip_conntrack_protocol *proto;
802 /* Previously seen (loopback or untracked)? Ignore. */
804 CONNTRACK_STAT_INC(ignore);
809 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
810 if (net_ratelimit()) {
811 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
812 (*pskb)->nh.iph->protocol, hooknum);
817 /* Doesn't cover locally-generated broadcast, so not worth it. */
819 /* Ignore broadcast: no `connection'. */
820 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
821 printk("Broadcast packet!\n");
823 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
824 == htonl(0x000000FF)) {
825 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
826 NIPQUAD((*pskb)->nh.iph->saddr),
827 NIPQUAD((*pskb)->nh.iph->daddr),
828 (*pskb)->sk, (*pskb)->pkt_type);
832 proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
834 /* It may be an special packet, error, unclean...
835 * inverse of the return code tells to the netfilter
836 * core what to do with the packet. */
837 if (proto->error != NULL
838 && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
839 CONNTRACK_STAT_INC(error);
840 CONNTRACK_STAT_INC(invalid);
844 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
845 /* Not valid part of a connection */
846 CONNTRACK_STAT_INC(invalid);
851 /* Too stressed to deal. */
852 CONNTRACK_STAT_INC(drop);
856 IP_NF_ASSERT((*pskb)->nfct);
858 ret = proto->packet(ct, *pskb, ctinfo);
860 /* Invalid: inverse of the return code tells
861 * the netfilter core what to do*/
862 nf_conntrack_put((*pskb)->nfct);
863 (*pskb)->nfct = NULL;
864 CONNTRACK_STAT_INC(invalid);
868 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
869 ip_conntrack_event_cache(IPCT_STATUS, *pskb);
874 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
875 const struct ip_conntrack_tuple *orig)
877 return ip_ct_invert_tuple(inverse, orig,
878 __ip_conntrack_proto_find(orig->dst.protonum));
881 /* Would two expected things clash? */
882 static inline int expect_clash(const struct ip_conntrack_expect *a,
883 const struct ip_conntrack_expect *b)
885 /* Part covered by intersection of masks must be unequal,
886 otherwise they clash */
887 struct ip_conntrack_tuple intersect_mask
888 = { { a->mask.src.ip & b->mask.src.ip,
889 { a->mask.src.u.all & b->mask.src.u.all } },
890 { a->mask.dst.ip & b->mask.dst.ip,
891 { a->mask.dst.u.all & b->mask.dst.u.all },
892 a->mask.dst.protonum & b->mask.dst.protonum } };
894 return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
897 static inline int expect_matches(const struct ip_conntrack_expect *a,
898 const struct ip_conntrack_expect *b)
900 return a->master == b->master
901 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
902 && ip_ct_tuple_equal(&a->mask, &b->mask);
905 /* Generally a bad idea to call this: could have matched already. */
906 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
908 struct ip_conntrack_expect *i;
910 write_lock_bh(&ip_conntrack_lock);
911 /* choose the the oldest expectation to evict */
912 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
913 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
914 ip_ct_unlink_expect(i);
915 write_unlock_bh(&ip_conntrack_lock);
916 ip_conntrack_expect_put(i);
920 write_unlock_bh(&ip_conntrack_lock);
923 /* We don't increase the master conntrack refcount for non-fulfilled
924 * conntracks. During the conntrack destruction, the expectations are
925 * always killed before the conntrack itself */
926 struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
928 struct ip_conntrack_expect *new;
930 new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
932 DEBUGP("expect_related: OOM allocating expect\n");
936 atomic_set(&new->use, 1);
940 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
942 if (atomic_dec_and_test(&exp->use))
943 kmem_cache_free(ip_conntrack_expect_cachep, exp);
946 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
948 atomic_inc(&exp->use);
949 exp->master->expecting++;
950 list_add(&exp->list, &ip_conntrack_expect_list);
952 init_timer(&exp->timeout);
953 exp->timeout.data = (unsigned long)exp;
954 exp->timeout.function = expectation_timed_out;
955 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
956 add_timer(&exp->timeout);
958 exp->id = ++ip_conntrack_expect_next_id;
959 atomic_inc(&exp->use);
960 CONNTRACK_STAT_INC(expect_create);
963 /* Race with expectations being used means we could have none to find; OK. */
964 static void evict_oldest_expect(struct ip_conntrack *master)
966 struct ip_conntrack_expect *i;
968 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
969 if (i->master == master) {
970 if (del_timer(&i->timeout)) {
971 ip_ct_unlink_expect(i);
972 ip_conntrack_expect_put(i);
979 static inline int refresh_timer(struct ip_conntrack_expect *i)
981 if (!del_timer(&i->timeout))
984 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
985 add_timer(&i->timeout);
989 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
991 struct ip_conntrack_expect *i;
994 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
995 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
996 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
998 write_lock_bh(&ip_conntrack_lock);
999 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
1000 if (expect_matches(i, expect)) {
1001 /* Refresh timer: if it's dying, ignore.. */
1002 if (refresh_timer(i)) {
1006 } else if (expect_clash(i, expect)) {
1012 /* Will be over limit? */
1013 if (expect->master->helper->max_expected &&
1014 expect->master->expecting >= expect->master->helper->max_expected)
1015 evict_oldest_expect(expect->master);
1017 ip_conntrack_expect_insert(expect);
1018 ip_conntrack_expect_event(IPEXP_NEW, expect);
1021 write_unlock_bh(&ip_conntrack_lock);
1025 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1026 implicitly racy: see __ip_conntrack_confirm */
1027 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1028 const struct ip_conntrack_tuple *newreply)
1030 write_lock_bh(&ip_conntrack_lock);
1031 /* Should be unconfirmed, so not in hash table yet */
1032 IP_NF_ASSERT(!is_confirmed(conntrack));
1034 DEBUGP("Altering reply tuple of %p to ", conntrack);
1035 DUMP_TUPLE(newreply);
1037 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1038 if (!conntrack->master && conntrack->expecting == 0)
1039 conntrack->helper = __ip_conntrack_helper_find(newreply);
1040 write_unlock_bh(&ip_conntrack_lock);
1043 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1045 BUG_ON(me->timeout == 0);
1046 write_lock_bh(&ip_conntrack_lock);
1047 list_add(&me->list, &helpers);
1048 write_unlock_bh(&ip_conntrack_lock);
1053 struct ip_conntrack_helper *
1054 __ip_conntrack_helper_find_byname(const char *name)
1056 struct ip_conntrack_helper *h;
1058 list_for_each_entry(h, &helpers, list) {
1059 if (!strcmp(h->name, name))
1066 static inline void unhelp(struct ip_conntrack_tuple_hash *i,
1067 const struct ip_conntrack_helper *me)
1069 if (tuplehash_to_ctrack(i)->helper == me) {
1070 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1071 tuplehash_to_ctrack(i)->helper = NULL;
1075 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1078 struct ip_conntrack_tuple_hash *h;
1079 struct ip_conntrack_expect *exp, *tmp;
1081 /* Need write lock here, to delete helper. */
1082 write_lock_bh(&ip_conntrack_lock);
1083 list_del(&me->list);
1085 /* Get rid of expectations */
1086 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
1087 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1088 ip_ct_unlink_expect(exp);
1089 ip_conntrack_expect_put(exp);
1092 /* Get rid of expecteds, set helpers to NULL. */
1093 list_for_each_entry(h, &unconfirmed, list)
1095 for (i = 0; i < ip_conntrack_htable_size; i++) {
1096 list_for_each_entry(h, &ip_conntrack_hash[i], list)
1099 write_unlock_bh(&ip_conntrack_lock);
1101 /* Someone could be still looking at the helper in a bh. */
1105 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1106 void __ip_ct_refresh_acct(struct ip_conntrack *ct,
1107 enum ip_conntrack_info ctinfo,
1108 const struct sk_buff *skb,
1109 unsigned long extra_jiffies,
1114 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1117 write_lock_bh(&ip_conntrack_lock);
1119 /* Only update if this is not a fixed timeout */
1120 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
1121 write_unlock_bh(&ip_conntrack_lock);
1125 /* If not in hash table, timer will not be active yet */
1126 if (!is_confirmed(ct)) {
1127 ct->timeout.expires = extra_jiffies;
1128 event = IPCT_REFRESH;
1130 /* Need del_timer for race avoidance (may already be dying). */
1131 if (del_timer(&ct->timeout)) {
1132 ct->timeout.expires = jiffies + extra_jiffies;
1133 add_timer(&ct->timeout);
1134 event = IPCT_REFRESH;
1138 #ifdef CONFIG_IP_NF_CT_ACCT
1140 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1141 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1142 ntohs(skb->nh.iph->tot_len);
1143 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1144 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1145 event |= IPCT_COUNTER_FILLING;
1149 write_unlock_bh(&ip_conntrack_lock);
1151 /* must be unlocked when calling event cache */
1153 ip_conntrack_event_cache(event, skb);
1156 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1157 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1158 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1159 * in ip_conntrack_core, since we don't want the protocols to autoload
1160 * or depend on ctnetlink */
1161 int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1162 const struct ip_conntrack_tuple *tuple)
1164 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1165 &tuple->src.u.tcp.port);
1166 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1167 &tuple->dst.u.tcp.port);
1174 int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1175 struct ip_conntrack_tuple *t)
1177 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1181 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1183 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1189 /* Returns new sk_buff, or NULL */
1191 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
1196 skb = ip_defrag(skb, user);
1200 ip_send_check(skb->nh.iph);
1204 /* Used by ipt_REJECT. */
1205 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1207 struct ip_conntrack *ct;
1208 enum ip_conntrack_info ctinfo;
1210 /* This ICMP is in reverse direction to the packet which caused it */
1211 ct = ip_conntrack_get(skb, &ctinfo);
1213 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1214 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1216 ctinfo = IP_CT_RELATED;
1218 /* Attach to new skbuff, and increment count */
1219 nskb->nfct = &ct->ct_general;
1220 nskb->nfctinfo = ctinfo;
1221 nf_conntrack_get(nskb->nfct);
1224 /* Bring out ya dead! */
1225 static struct ip_conntrack *
1226 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1227 void *data, unsigned int *bucket)
1229 struct ip_conntrack_tuple_hash *h;
1230 struct ip_conntrack *ct;
1232 write_lock_bh(&ip_conntrack_lock);
1233 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1234 list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
1235 ct = tuplehash_to_ctrack(h);
1240 list_for_each_entry(h, &unconfirmed, list) {
1241 ct = tuplehash_to_ctrack(h);
1245 write_unlock_bh(&ip_conntrack_lock);
1249 atomic_inc(&ct->ct_general.use);
1250 write_unlock_bh(&ip_conntrack_lock);
1255 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1257 struct ip_conntrack *ct;
1258 unsigned int bucket = 0;
1260 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1261 /* Time to push up daises... */
1262 if (del_timer(&ct->timeout))
1263 death_by_timeout((unsigned long)ct);
1264 /* ... else the timer will get him soon. */
1266 ip_conntrack_put(ct);
1270 /* Fast function for those who don't want to parse /proc (and I don't
1272 /* Reversing the socket's dst/src point of view gives us the reply
1275 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1277 struct inet_sock *inet = inet_sk(sk);
1278 struct ip_conntrack_tuple_hash *h;
1279 struct ip_conntrack_tuple tuple;
1281 IP_CT_TUPLE_U_BLANK(&tuple);
1282 tuple.src.ip = inet->rcv_saddr;
1283 tuple.src.u.tcp.port = inet->sport;
1284 tuple.dst.ip = inet->daddr;
1285 tuple.dst.u.tcp.port = inet->dport;
1286 tuple.dst.protonum = IPPROTO_TCP;
1288 /* We only do TCP at the moment: is there a better way? */
1289 if (strcmp(sk->sk_prot->name, "TCP")) {
1290 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1291 return -ENOPROTOOPT;
1294 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1295 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1296 *len, sizeof(struct sockaddr_in));
1300 h = ip_conntrack_find_get(&tuple, NULL);
1302 struct sockaddr_in sin;
1303 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1305 sin.sin_family = AF_INET;
1306 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1307 .tuple.dst.u.tcp.port;
1308 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1310 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
1312 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1313 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1314 ip_conntrack_put(ct);
1315 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1320 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1321 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1322 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1326 static struct nf_sockopt_ops so_getorigdst = {
1328 .get_optmin = SO_ORIGINAL_DST,
1329 .get_optmax = SO_ORIGINAL_DST+1,
1333 static int kill_all(struct ip_conntrack *i, void *data)
1338 void ip_conntrack_flush(void)
1340 ip_ct_iterate_cleanup(kill_all, NULL);
1343 static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1348 free_pages((unsigned long)hash,
1349 get_order(sizeof(struct list_head) * size));
1352 /* Mishearing the voices in his head, our hero wonders how he's
1353 supposed to kill the mall. */
1354 void ip_conntrack_cleanup(void)
1356 ip_ct_attach = NULL;
1358 /* This makes sure all current packets have passed through
1359 netfilter framework. Roll on, two-stage module
1363 ip_ct_event_cache_flush();
1365 ip_conntrack_flush();
1366 if (atomic_read(&ip_conntrack_count) != 0) {
1368 goto i_see_dead_people;
1370 /* wait until all references to ip_conntrack_untracked are dropped */
1371 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1374 kmem_cache_destroy(ip_conntrack_cachep);
1375 kmem_cache_destroy(ip_conntrack_expect_cachep);
1376 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1377 ip_conntrack_htable_size);
1378 nf_unregister_sockopt(&so_getorigdst);
1381 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1383 struct list_head *hash;
1387 hash = (void*)__get_free_pages(GFP_KERNEL,
1388 get_order(sizeof(struct list_head)
1392 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1393 hash = vmalloc(sizeof(struct list_head) * size);
1397 for (i = 0; i < size; i++)
1398 INIT_LIST_HEAD(&hash[i]);
1403 static int set_hashsize(const char *val, struct kernel_param *kp)
1405 int i, bucket, hashsize, vmalloced;
1406 int old_vmalloced, old_size;
1408 struct list_head *hash, *old_hash;
1409 struct ip_conntrack_tuple_hash *h;
1411 /* On boot, we can set this without any fancy locking. */
1412 if (!ip_conntrack_htable_size)
1413 return param_set_int(val, kp);
1415 hashsize = simple_strtol(val, NULL, 0);
1419 hash = alloc_hashtable(hashsize, &vmalloced);
1423 /* We have to rehash for the new table anyway, so we also can
1424 * use a new random seed */
1425 get_random_bytes(&rnd, 4);
1427 write_lock_bh(&ip_conntrack_lock);
1428 for (i = 0; i < ip_conntrack_htable_size; i++) {
1429 while (!list_empty(&ip_conntrack_hash[i])) {
1430 h = list_entry(ip_conntrack_hash[i].next,
1431 struct ip_conntrack_tuple_hash, list);
1433 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1434 list_add_tail(&h->list, &hash[bucket]);
1437 old_size = ip_conntrack_htable_size;
1438 old_vmalloced = ip_conntrack_vmalloc;
1439 old_hash = ip_conntrack_hash;
1441 ip_conntrack_htable_size = hashsize;
1442 ip_conntrack_vmalloc = vmalloced;
1443 ip_conntrack_hash = hash;
1444 ip_conntrack_hash_rnd = rnd;
1445 write_unlock_bh(&ip_conntrack_lock);
1447 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1451 module_param_call(hashsize, set_hashsize, param_get_uint,
1452 &ip_conntrack_htable_size, 0600);
1454 int __init ip_conntrack_init(void)
1459 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1460 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1461 if (!ip_conntrack_htable_size) {
1462 ip_conntrack_htable_size
1463 = (((num_physpages << PAGE_SHIFT) / 16384)
1464 / sizeof(struct list_head));
1465 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1466 ip_conntrack_htable_size = 8192;
1467 if (ip_conntrack_htable_size < 16)
1468 ip_conntrack_htable_size = 16;
1470 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1472 printk("ip_conntrack version %s (%u buckets, %d max)"
1473 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1474 ip_conntrack_htable_size, ip_conntrack_max,
1475 sizeof(struct ip_conntrack));
1477 ret = nf_register_sockopt(&so_getorigdst);
1479 printk(KERN_ERR "Unable to register netfilter socket option\n");
1483 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1484 &ip_conntrack_vmalloc);
1485 if (!ip_conntrack_hash) {
1486 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1487 goto err_unreg_sockopt;
1490 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1491 sizeof(struct ip_conntrack), 0,
1493 if (!ip_conntrack_cachep) {
1494 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1498 ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1499 sizeof(struct ip_conntrack_expect),
1501 if (!ip_conntrack_expect_cachep) {
1502 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1503 goto err_free_conntrack_slab;
1506 /* Don't NEED lock here, but good form anyway. */
1507 write_lock_bh(&ip_conntrack_lock);
1508 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1509 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1510 /* Sew in builtin protocols. */
1511 ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1512 ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1513 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1514 write_unlock_bh(&ip_conntrack_lock);
1516 /* For use by ipt_REJECT */
1517 ip_ct_attach = ip_conntrack_attach;
1519 /* Set up fake conntrack:
1520 - to never be deleted, not in any hashes */
1521 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1522 /* - and look it like as a confirmed connection */
1523 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1527 err_free_conntrack_slab:
1528 kmem_cache_destroy(ip_conntrack_cachep);
1530 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1531 ip_conntrack_htable_size);
1533 nf_unregister_sockopt(&so_getorigdst);