[NETFILTER]: Kill lockhelp.h
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30
31 #include <linux/netfilter_ipv4/ip_tables.h>
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...)  printk(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) printk(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x)                                         \
55 do {                                                            \
56         if (!(x))                                               \
57                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
58                        __FUNCTION__, __FILE__, __LINE__);       \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64
65 static DECLARE_MUTEX(ipt_mutex);
66
67 /* Must have mutex */
68 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #include <linux/netfilter_ipv4/listhelp.h>
71
72 #if 0
73 /* All the better to debug you with... */
74 #define static
75 #define inline
76 #endif
77
78 /*
79    We keep a set of rules for each CPU, so we can avoid write-locking
80    them in the softirq when updating the counters and therefore
81    only need to read-lock in the softirq; doing a write_lock_bh() in user
82    context stops packets coming through and allows user context to read
83    the counters or update the rules.
84
85    To be cache friendly on SMP, we arrange them like so:
86    [ n-entries ]
87    ... cache-align padding ...
88    [ n-entries ]
89
90    Hence the start of any table is given by get_table() below.  */
91
92 /* The table itself */
93 struct ipt_table_info
94 {
95         /* Size per table */
96         unsigned int size;
97         /* Number of entries: FIXME. --RR */
98         unsigned int number;
99         /* Initial number of entries. Needed for module usage count */
100         unsigned int initial_entries;
101
102         /* Entry points and underflows */
103         unsigned int hook_entry[NF_IP_NUMHOOKS];
104         unsigned int underflow[NF_IP_NUMHOOKS];
105
106         /* ipt_entry tables: one per CPU */
107         char entries[0] ____cacheline_aligned;
108 };
109
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114
115 #ifdef CONFIG_SMP
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
117 #else
118 #define TABLE_OFFSET(t,p) 0
119 #endif
120
121 #if 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125 #endif
126
127 /* Returns whether matches rule or not. */
128 static inline int
129 ip_packet_match(const struct iphdr *ip,
130                 const char *indev,
131                 const char *outdev,
132                 const struct ipt_ip *ipinfo,
133                 int isfrag)
134 {
135         size_t i;
136         unsigned long ret;
137
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
139
140         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
141                   IPT_INV_SRCIP)
142             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
143                      IPT_INV_DSTIP)) {
144                 dprintf("Source or dest mismatch.\n");
145
146                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
147                         NIPQUAD(ip->saddr),
148                         NIPQUAD(ipinfo->smsk.s_addr),
149                         NIPQUAD(ipinfo->src.s_addr),
150                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
152                         NIPQUAD(ip->daddr),
153                         NIPQUAD(ipinfo->dmsk.s_addr),
154                         NIPQUAD(ipinfo->dst.s_addr),
155                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
156                 return 0;
157         }
158
159         /* Look for ifname matches; this should unroll nicely. */
160         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161                 ret |= (((const unsigned long *)indev)[i]
162                         ^ ((const unsigned long *)ipinfo->iniface)[i])
163                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
164         }
165
166         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167                 dprintf("VIA in mismatch (%s vs %s).%s\n",
168                         indev, ipinfo->iniface,
169                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
170                 return 0;
171         }
172
173         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174                 ret |= (((const unsigned long *)outdev)[i]
175                         ^ ((const unsigned long *)ipinfo->outiface)[i])
176                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
177         }
178
179         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180                 dprintf("VIA out mismatch (%s vs %s).%s\n",
181                         outdev, ipinfo->outiface,
182                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
183                 return 0;
184         }
185
186         /* Check specific protocol */
187         if (ipinfo->proto
188             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189                 dprintf("Packet protocol %hi does not match %hi.%s\n",
190                         ip->protocol, ipinfo->proto,
191                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
192                 return 0;
193         }
194
195         /* If we have a fragment rule but the packet is not a fragment
196          * then we return zero */
197         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198                 dprintf("Fragment rule but not fragment.%s\n",
199                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
200                 return 0;
201         }
202
203         return 1;
204 }
205
206 static inline int
207 ip_checkentry(const struct ipt_ip *ip)
208 {
209         if (ip->flags & ~IPT_F_MASK) {
210                 duprintf("Unknown flag bits set: %08X\n",
211                          ip->flags & ~IPT_F_MASK);
212                 return 0;
213         }
214         if (ip->invflags & ~IPT_INV_MASK) {
215                 duprintf("Unknown invflag bits set: %08X\n",
216                          ip->invflags & ~IPT_INV_MASK);
217                 return 0;
218         }
219         return 1;
220 }
221
222 static unsigned int
223 ipt_error(struct sk_buff **pskb,
224           const struct net_device *in,
225           const struct net_device *out,
226           unsigned int hooknum,
227           const void *targinfo,
228           void *userinfo)
229 {
230         if (net_ratelimit())
231                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
232
233         return NF_DROP;
234 }
235
236 static inline
237 int do_match(struct ipt_entry_match *m,
238              const struct sk_buff *skb,
239              const struct net_device *in,
240              const struct net_device *out,
241              int offset,
242              int *hotdrop)
243 {
244         /* Stop iteration if it doesn't match */
245         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
246                 return 1;
247         else
248                 return 0;
249 }
250
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
253 {
254         return (struct ipt_entry *)(base + offset);
255 }
256
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
258 unsigned int
259 ipt_do_table(struct sk_buff **pskb,
260              unsigned int hook,
261              const struct net_device *in,
262              const struct net_device *out,
263              struct ipt_table *table,
264              void *userdata)
265 {
266         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
267         u_int16_t offset;
268         struct iphdr *ip;
269         u_int16_t datalen;
270         int hotdrop = 0;
271         /* Initializing verdict to NF_DROP keeps gcc happy. */
272         unsigned int verdict = NF_DROP;
273         const char *indev, *outdev;
274         void *table_base;
275         struct ipt_entry *e, *back;
276
277         /* Initialization */
278         ip = (*pskb)->nh.iph;
279         datalen = (*pskb)->len - ip->ihl * 4;
280         indev = in ? in->name : nulldevname;
281         outdev = out ? out->name : nulldevname;
282         /* We handle fragments by dealing with the first fragment as
283          * if it was a normal packet.  All other fragments are treated
284          * normally, except that they will NEVER match rules that ask
285          * things we don't know, ie. tcp syn flag or ports).  If the
286          * rule is also a fragment-specific rule, non-fragments won't
287          * match it. */
288         offset = ntohs(ip->frag_off) & IP_OFFSET;
289
290         read_lock_bh(&table->lock);
291         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292         table_base = (void *)table->private->entries
293                 + TABLE_OFFSET(table->private, smp_processor_id());
294         e = get_entry(table_base, table->private->hook_entry[hook]);
295
296 #ifdef CONFIG_NETFILTER_DEBUG
297         /* Check noone else using our table */
298         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301                        smp_processor_id(),
302                        table->name,
303                        &((struct ipt_entry *)table_base)->comefrom,
304                        ((struct ipt_entry *)table_base)->comefrom);
305         }
306         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307 #endif
308
309         /* For return from builtin chain */
310         back = get_entry(table_base, table->private->underflow[hook]);
311
312         do {
313                 IP_NF_ASSERT(e);
314                 IP_NF_ASSERT(back);
315                 (*pskb)->nfcache |= e->nfcache;
316                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317                         struct ipt_entry_target *t;
318
319                         if (IPT_MATCH_ITERATE(e, do_match,
320                                               *pskb, in, out,
321                                               offset, &hotdrop) != 0)
322                                 goto no_match;
323
324                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
325
326                         t = ipt_get_target(e);
327                         IP_NF_ASSERT(t->u.kernel.target);
328                         /* Standard target? */
329                         if (!t->u.kernel.target->target) {
330                                 int v;
331
332                                 v = ((struct ipt_standard_target *)t)->verdict;
333                                 if (v < 0) {
334                                         /* Pop from stack? */
335                                         if (v != IPT_RETURN) {
336                                                 verdict = (unsigned)(-v) - 1;
337                                                 break;
338                                         }
339                                         e = back;
340                                         back = get_entry(table_base,
341                                                          back->comefrom);
342                                         continue;
343                                 }
344                                 if (table_base + v
345                                     != (void *)e + e->next_offset) {
346                                         /* Save old back ptr in next entry */
347                                         struct ipt_entry *next
348                                                 = (void *)e + e->next_offset;
349                                         next->comefrom
350                                                 = (void *)back - table_base;
351                                         /* set back pointer to next entry */
352                                         back = next;
353                                 }
354
355                                 e = get_entry(table_base, v);
356                         } else {
357                                 /* Targets which reenter must return
358                                    abs. verdicts */
359 #ifdef CONFIG_NETFILTER_DEBUG
360                                 ((struct ipt_entry *)table_base)->comefrom
361                                         = 0xeeeeeeec;
362 #endif
363                                 verdict = t->u.kernel.target->target(pskb,
364                                                                      in, out,
365                                                                      hook,
366                                                                      t->data,
367                                                                      userdata);
368
369 #ifdef CONFIG_NETFILTER_DEBUG
370                                 if (((struct ipt_entry *)table_base)->comefrom
371                                     != 0xeeeeeeec
372                                     && verdict == IPT_CONTINUE) {
373                                         printk("Target %s reentered!\n",
374                                                t->u.kernel.target->name);
375                                         verdict = NF_DROP;
376                                 }
377                                 ((struct ipt_entry *)table_base)->comefrom
378                                         = 0x57acc001;
379 #endif
380                                 /* Target might have changed stuff. */
381                                 ip = (*pskb)->nh.iph;
382                                 datalen = (*pskb)->len - ip->ihl * 4;
383
384                                 if (verdict == IPT_CONTINUE)
385                                         e = (void *)e + e->next_offset;
386                                 else
387                                         /* Verdict */
388                                         break;
389                         }
390                 } else {
391
392                 no_match:
393                         e = (void *)e + e->next_offset;
394                 }
395         } while (!hotdrop);
396
397 #ifdef CONFIG_NETFILTER_DEBUG
398         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399 #endif
400         read_unlock_bh(&table->lock);
401
402 #ifdef DEBUG_ALLOW_ALL
403         return NF_ACCEPT;
404 #else
405         if (hotdrop)
406                 return NF_DROP;
407         else return verdict;
408 #endif
409 }
410
411 /*
412  * These are weird, but module loading must not be done with mutex
413  * held (since they will register), and we have to have a single
414  * function to use try_then_request_module().
415  */
416
417 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
418 static inline struct ipt_table *find_table_lock(const char *name)
419 {
420         struct ipt_table *t;
421
422         if (down_interruptible(&ipt_mutex) != 0)
423                 return ERR_PTR(-EINTR);
424
425         list_for_each_entry(t, &ipt_tables, list)
426                 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
427                         return t;
428         up(&ipt_mutex);
429         return NULL;
430 }
431
432 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
433 static inline struct ipt_match *find_match(const char *name, u8 revision)
434 {
435         struct ipt_match *m;
436         int err = 0;
437
438         if (down_interruptible(&ipt_mutex) != 0)
439                 return ERR_PTR(-EINTR);
440
441         list_for_each_entry(m, &ipt_match, list) {
442                 if (strcmp(m->name, name) == 0) {
443                         if (m->revision == revision) {
444                                 if (try_module_get(m->me)) {
445                                         up(&ipt_mutex);
446                                         return m;
447                                 }
448                         } else
449                                 err = -EPROTOTYPE; /* Found something. */
450                 }
451         }
452         up(&ipt_mutex);
453         return ERR_PTR(err);
454 }
455
456 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
457 static inline struct ipt_target *find_target(const char *name, u8 revision)
458 {
459         struct ipt_target *t;
460         int err = 0;
461
462         if (down_interruptible(&ipt_mutex) != 0)
463                 return ERR_PTR(-EINTR);
464
465         list_for_each_entry(t, &ipt_target, list) {
466                 if (strcmp(t->name, name) == 0) {
467                         if (t->revision == revision) {
468                                 if (try_module_get(t->me)) {
469                                         up(&ipt_mutex);
470                                         return t;
471                                 }
472                         } else
473                                 err = -EPROTOTYPE; /* Found something. */
474                 }
475         }
476         up(&ipt_mutex);
477         return ERR_PTR(err);
478 }
479
480 struct ipt_target *ipt_find_target(const char *name, u8 revision)
481 {
482         struct ipt_target *target;
483
484         target = try_then_request_module(find_target(name, revision),
485                                          "ipt_%s", name);
486         if (IS_ERR(target) || !target)
487                 return NULL;
488         return target;
489 }
490
491 static int match_revfn(const char *name, u8 revision, int *bestp)
492 {
493         struct ipt_match *m;
494         int have_rev = 0;
495
496         list_for_each_entry(m, &ipt_match, list) {
497                 if (strcmp(m->name, name) == 0) {
498                         if (m->revision > *bestp)
499                                 *bestp = m->revision;
500                         if (m->revision == revision)
501                                 have_rev = 1;
502                 }
503         }
504         return have_rev;
505 }
506
507 static int target_revfn(const char *name, u8 revision, int *bestp)
508 {
509         struct ipt_target *t;
510         int have_rev = 0;
511
512         list_for_each_entry(t, &ipt_target, list) {
513                 if (strcmp(t->name, name) == 0) {
514                         if (t->revision > *bestp)
515                                 *bestp = t->revision;
516                         if (t->revision == revision)
517                                 have_rev = 1;
518                 }
519         }
520         return have_rev;
521 }
522
523 /* Returns true or false (if no such extension at all) */
524 static inline int find_revision(const char *name, u8 revision,
525                                 int (*revfn)(const char *, u8, int *),
526                                 int *err)
527 {
528         int have_rev, best = -1;
529
530         if (down_interruptible(&ipt_mutex) != 0) {
531                 *err = -EINTR;
532                 return 1;
533         }
534         have_rev = revfn(name, revision, &best);
535         up(&ipt_mutex);
536
537         /* Nothing at all?  Return 0 to try loading module. */
538         if (best == -1) {
539                 *err = -ENOENT;
540                 return 0;
541         }
542
543         *err = best;
544         if (!have_rev)
545                 *err = -EPROTONOSUPPORT;
546         return 1;
547 }
548
549
550 /* All zeroes == unconditional rule. */
551 static inline int
552 unconditional(const struct ipt_ip *ip)
553 {
554         unsigned int i;
555
556         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
557                 if (((__u32 *)ip)[i])
558                         return 0;
559
560         return 1;
561 }
562
563 /* Figures out from what hook each rule can be called: returns 0 if
564    there are loops.  Puts hook bitmask in comefrom. */
565 static int
566 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
567 {
568         unsigned int hook;
569
570         /* No recursion; use packet counter to save back ptrs (reset
571            to 0 as we leave), and comefrom to save source hook bitmask */
572         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
573                 unsigned int pos = newinfo->hook_entry[hook];
574                 struct ipt_entry *e
575                         = (struct ipt_entry *)(newinfo->entries + pos);
576
577                 if (!(valid_hooks & (1 << hook)))
578                         continue;
579
580                 /* Set initial back pointer. */
581                 e->counters.pcnt = pos;
582
583                 for (;;) {
584                         struct ipt_standard_target *t
585                                 = (void *)ipt_get_target(e);
586
587                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
588                                 printk("iptables: loop hook %u pos %u %08X.\n",
589                                        hook, pos, e->comefrom);
590                                 return 0;
591                         }
592                         e->comefrom
593                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
594
595                         /* Unconditional return/END. */
596                         if (e->target_offset == sizeof(struct ipt_entry)
597                             && (strcmp(t->target.u.user.name,
598                                        IPT_STANDARD_TARGET) == 0)
599                             && t->verdict < 0
600                             && unconditional(&e->ip)) {
601                                 unsigned int oldpos, size;
602
603                                 /* Return: backtrack through the last
604                                    big jump. */
605                                 do {
606                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
607 #ifdef DEBUG_IP_FIREWALL_USER
608                                         if (e->comefrom
609                                             & (1 << NF_IP_NUMHOOKS)) {
610                                                 duprintf("Back unset "
611                                                          "on hook %u "
612                                                          "rule %u\n",
613                                                          hook, pos);
614                                         }
615 #endif
616                                         oldpos = pos;
617                                         pos = e->counters.pcnt;
618                                         e->counters.pcnt = 0;
619
620                                         /* We're at the start. */
621                                         if (pos == oldpos)
622                                                 goto next;
623
624                                         e = (struct ipt_entry *)
625                                                 (newinfo->entries + pos);
626                                 } while (oldpos == pos + e->next_offset);
627
628                                 /* Move along one */
629                                 size = e->next_offset;
630                                 e = (struct ipt_entry *)
631                                         (newinfo->entries + pos + size);
632                                 e->counters.pcnt = pos;
633                                 pos += size;
634                         } else {
635                                 int newpos = t->verdict;
636
637                                 if (strcmp(t->target.u.user.name,
638                                            IPT_STANDARD_TARGET) == 0
639                                     && newpos >= 0) {
640                                         /* This a jump; chase it. */
641                                         duprintf("Jump rule %u -> %u\n",
642                                                  pos, newpos);
643                                 } else {
644                                         /* ... this is a fallthru */
645                                         newpos = pos + e->next_offset;
646                                 }
647                                 e = (struct ipt_entry *)
648                                         (newinfo->entries + newpos);
649                                 e->counters.pcnt = pos;
650                                 pos = newpos;
651                         }
652                 }
653                 next:
654                 duprintf("Finished chain %u\n", hook);
655         }
656         return 1;
657 }
658
659 static inline int
660 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
661 {
662         if (i && (*i)-- == 0)
663                 return 1;
664
665         if (m->u.kernel.match->destroy)
666                 m->u.kernel.match->destroy(m->data,
667                                            m->u.match_size - sizeof(*m));
668         module_put(m->u.kernel.match->me);
669         return 0;
670 }
671
672 static inline int
673 standard_check(const struct ipt_entry_target *t,
674                unsigned int max_offset)
675 {
676         struct ipt_standard_target *targ = (void *)t;
677
678         /* Check standard info. */
679         if (t->u.target_size
680             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
681                 duprintf("standard_check: target size %u != %u\n",
682                          t->u.target_size,
683                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
684                 return 0;
685         }
686
687         if (targ->verdict >= 0
688             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
689                 duprintf("ipt_standard_check: bad verdict (%i)\n",
690                          targ->verdict);
691                 return 0;
692         }
693
694         if (targ->verdict < -NF_MAX_VERDICT - 1) {
695                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
696                          targ->verdict);
697                 return 0;
698         }
699         return 1;
700 }
701
702 static inline int
703 check_match(struct ipt_entry_match *m,
704             const char *name,
705             const struct ipt_ip *ip,
706             unsigned int hookmask,
707             unsigned int *i)
708 {
709         struct ipt_match *match;
710
711         match = try_then_request_module(find_match(m->u.user.name,
712                                                    m->u.user.revision),
713                                         "ipt_%s", m->u.user.name);
714         if (IS_ERR(match) || !match) {
715                 duprintf("check_match: `%s' not found\n", m->u.user.name);
716                 return match ? PTR_ERR(match) : -ENOENT;
717         }
718         m->u.kernel.match = match;
719
720         if (m->u.kernel.match->checkentry
721             && !m->u.kernel.match->checkentry(name, ip, m->data,
722                                               m->u.match_size - sizeof(*m),
723                                               hookmask)) {
724                 module_put(m->u.kernel.match->me);
725                 duprintf("ip_tables: check failed for `%s'.\n",
726                          m->u.kernel.match->name);
727                 return -EINVAL;
728         }
729
730         (*i)++;
731         return 0;
732 }
733
734 static struct ipt_target ipt_standard_target;
735
736 static inline int
737 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
738             unsigned int *i)
739 {
740         struct ipt_entry_target *t;
741         struct ipt_target *target;
742         int ret;
743         unsigned int j;
744
745         if (!ip_checkentry(&e->ip)) {
746                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
747                 return -EINVAL;
748         }
749
750         j = 0;
751         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
752         if (ret != 0)
753                 goto cleanup_matches;
754
755         t = ipt_get_target(e);
756         target = try_then_request_module(find_target(t->u.user.name,
757                                                      t->u.user.revision),
758                                          "ipt_%s", t->u.user.name);
759         if (IS_ERR(target) || !target) {
760                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
761                 ret = target ? PTR_ERR(target) : -ENOENT;
762                 goto cleanup_matches;
763         }
764         t->u.kernel.target = target;
765
766         if (t->u.kernel.target == &ipt_standard_target) {
767                 if (!standard_check(t, size)) {
768                         ret = -EINVAL;
769                         goto cleanup_matches;
770                 }
771         } else if (t->u.kernel.target->checkentry
772                    && !t->u.kernel.target->checkentry(name, e, t->data,
773                                                       t->u.target_size
774                                                       - sizeof(*t),
775                                                       e->comefrom)) {
776                 module_put(t->u.kernel.target->me);
777                 duprintf("ip_tables: check failed for `%s'.\n",
778                          t->u.kernel.target->name);
779                 ret = -EINVAL;
780                 goto cleanup_matches;
781         }
782
783         (*i)++;
784         return 0;
785
786  cleanup_matches:
787         IPT_MATCH_ITERATE(e, cleanup_match, &j);
788         return ret;
789 }
790
791 static inline int
792 check_entry_size_and_hooks(struct ipt_entry *e,
793                            struct ipt_table_info *newinfo,
794                            unsigned char *base,
795                            unsigned char *limit,
796                            const unsigned int *hook_entries,
797                            const unsigned int *underflows,
798                            unsigned int *i)
799 {
800         unsigned int h;
801
802         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
803             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
804                 duprintf("Bad offset %p\n", e);
805                 return -EINVAL;
806         }
807
808         if (e->next_offset
809             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
810                 duprintf("checking: element %p size %u\n",
811                          e, e->next_offset);
812                 return -EINVAL;
813         }
814
815         /* Check hooks & underflows */
816         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
817                 if ((unsigned char *)e - base == hook_entries[h])
818                         newinfo->hook_entry[h] = hook_entries[h];
819                 if ((unsigned char *)e - base == underflows[h])
820                         newinfo->underflow[h] = underflows[h];
821         }
822
823         /* FIXME: underflows must be unconditional, standard verdicts
824            < 0 (not IPT_RETURN). --RR */
825
826         /* Clear counters and comefrom */
827         e->counters = ((struct ipt_counters) { 0, 0 });
828         e->comefrom = 0;
829
830         (*i)++;
831         return 0;
832 }
833
834 static inline int
835 cleanup_entry(struct ipt_entry *e, unsigned int *i)
836 {
837         struct ipt_entry_target *t;
838
839         if (i && (*i)-- == 0)
840                 return 1;
841
842         /* Cleanup all matches */
843         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
844         t = ipt_get_target(e);
845         if (t->u.kernel.target->destroy)
846                 t->u.kernel.target->destroy(t->data,
847                                             t->u.target_size - sizeof(*t));
848         module_put(t->u.kernel.target->me);
849         return 0;
850 }
851
852 /* Checks and translates the user-supplied table segment (held in
853    newinfo) */
854 static int
855 translate_table(const char *name,
856                 unsigned int valid_hooks,
857                 struct ipt_table_info *newinfo,
858                 unsigned int size,
859                 unsigned int number,
860                 const unsigned int *hook_entries,
861                 const unsigned int *underflows)
862 {
863         unsigned int i;
864         int ret;
865
866         newinfo->size = size;
867         newinfo->number = number;
868
869         /* Init all hooks to impossible value. */
870         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
871                 newinfo->hook_entry[i] = 0xFFFFFFFF;
872                 newinfo->underflow[i] = 0xFFFFFFFF;
873         }
874
875         duprintf("translate_table: size %u\n", newinfo->size);
876         i = 0;
877         /* Walk through entries, checking offsets. */
878         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
879                                 check_entry_size_and_hooks,
880                                 newinfo,
881                                 newinfo->entries,
882                                 newinfo->entries + size,
883                                 hook_entries, underflows, &i);
884         if (ret != 0)
885                 return ret;
886
887         if (i != number) {
888                 duprintf("translate_table: %u not %u entries\n",
889                          i, number);
890                 return -EINVAL;
891         }
892
893         /* Check hooks all assigned */
894         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
895                 /* Only hooks which are valid */
896                 if (!(valid_hooks & (1 << i)))
897                         continue;
898                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
899                         duprintf("Invalid hook entry %u %u\n",
900                                  i, hook_entries[i]);
901                         return -EINVAL;
902                 }
903                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
904                         duprintf("Invalid underflow %u %u\n",
905                                  i, underflows[i]);
906                         return -EINVAL;
907                 }
908         }
909
910         if (!mark_source_chains(newinfo, valid_hooks))
911                 return -ELOOP;
912
913         /* Finally, each sanity check must pass */
914         i = 0;
915         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
916                                 check_entry, name, size, &i);
917
918         if (ret != 0) {
919                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
920                                   cleanup_entry, &i);
921                 return ret;
922         }
923
924         /* And one copy for every other CPU */
925         for (i = 1; i < num_possible_cpus(); i++) {
926                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
927                        newinfo->entries,
928                        SMP_ALIGN(newinfo->size));
929         }
930
931         return ret;
932 }
933
934 static struct ipt_table_info *
935 replace_table(struct ipt_table *table,
936               unsigned int num_counters,
937               struct ipt_table_info *newinfo,
938               int *error)
939 {
940         struct ipt_table_info *oldinfo;
941
942 #ifdef CONFIG_NETFILTER_DEBUG
943         {
944                 struct ipt_entry *table_base;
945                 unsigned int i;
946
947                 for (i = 0; i < num_possible_cpus(); i++) {
948                         table_base =
949                                 (void *)newinfo->entries
950                                 + TABLE_OFFSET(newinfo, i);
951
952                         table_base->comefrom = 0xdead57ac;
953                 }
954         }
955 #endif
956
957         /* Do the substitution. */
958         write_lock_bh(&table->lock);
959         /* Check inside lock: is the old number correct? */
960         if (num_counters != table->private->number) {
961                 duprintf("num_counters != table->private->number (%u/%u)\n",
962                          num_counters, table->private->number);
963                 write_unlock_bh(&table->lock);
964                 *error = -EAGAIN;
965                 return NULL;
966         }
967         oldinfo = table->private;
968         table->private = newinfo;
969         newinfo->initial_entries = oldinfo->initial_entries;
970         write_unlock_bh(&table->lock);
971
972         return oldinfo;
973 }
974
975 /* Gets counters. */
976 static inline int
977 add_entry_to_counter(const struct ipt_entry *e,
978                      struct ipt_counters total[],
979                      unsigned int *i)
980 {
981         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
982
983         (*i)++;
984         return 0;
985 }
986
987 static void
988 get_counters(const struct ipt_table_info *t,
989              struct ipt_counters counters[])
990 {
991         unsigned int cpu;
992         unsigned int i;
993
994         for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
995                 i = 0;
996                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
997                                   t->size,
998                                   add_entry_to_counter,
999                                   counters,
1000                                   &i);
1001         }
1002 }
1003
1004 static int
1005 copy_entries_to_user(unsigned int total_size,
1006                      struct ipt_table *table,
1007                      void __user *userptr)
1008 {
1009         unsigned int off, num, countersize;
1010         struct ipt_entry *e;
1011         struct ipt_counters *counters;
1012         int ret = 0;
1013
1014         /* We need atomic snapshot of counters: rest doesn't change
1015            (other than comefrom, which userspace doesn't care
1016            about). */
1017         countersize = sizeof(struct ipt_counters) * table->private->number;
1018         counters = vmalloc(countersize);
1019
1020         if (counters == NULL)
1021                 return -ENOMEM;
1022
1023         /* First, sum counters... */
1024         memset(counters, 0, countersize);
1025         write_lock_bh(&table->lock);
1026         get_counters(table->private, counters);
1027         write_unlock_bh(&table->lock);
1028
1029         /* ... then copy entire thing from CPU 0... */
1030         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1031                 ret = -EFAULT;
1032                 goto free_counters;
1033         }
1034
1035         /* FIXME: use iterator macros --RR */
1036         /* ... then go back and fix counters and names */
1037         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1038                 unsigned int i;
1039                 struct ipt_entry_match *m;
1040                 struct ipt_entry_target *t;
1041
1042                 e = (struct ipt_entry *)(table->private->entries + off);
1043                 if (copy_to_user(userptr + off
1044                                  + offsetof(struct ipt_entry, counters),
1045                                  &counters[num],
1046                                  sizeof(counters[num])) != 0) {
1047                         ret = -EFAULT;
1048                         goto free_counters;
1049                 }
1050
1051                 for (i = sizeof(struct ipt_entry);
1052                      i < e->target_offset;
1053                      i += m->u.match_size) {
1054                         m = (void *)e + i;
1055
1056                         if (copy_to_user(userptr + off + i
1057                                          + offsetof(struct ipt_entry_match,
1058                                                     u.user.name),
1059                                          m->u.kernel.match->name,
1060                                          strlen(m->u.kernel.match->name)+1)
1061                             != 0) {
1062                                 ret = -EFAULT;
1063                                 goto free_counters;
1064                         }
1065                 }
1066
1067                 t = ipt_get_target(e);
1068                 if (copy_to_user(userptr + off + e->target_offset
1069                                  + offsetof(struct ipt_entry_target,
1070                                             u.user.name),
1071                                  t->u.kernel.target->name,
1072                                  strlen(t->u.kernel.target->name)+1) != 0) {
1073                         ret = -EFAULT;
1074                         goto free_counters;
1075                 }
1076         }
1077
1078  free_counters:
1079         vfree(counters);
1080         return ret;
1081 }
1082
1083 static int
1084 get_entries(const struct ipt_get_entries *entries,
1085             struct ipt_get_entries __user *uptr)
1086 {
1087         int ret;
1088         struct ipt_table *t;
1089
1090         t = find_table_lock(entries->name);
1091         if (t && !IS_ERR(t)) {
1092                 duprintf("t->private->number = %u\n",
1093                          t->private->number);
1094                 if (entries->size == t->private->size)
1095                         ret = copy_entries_to_user(t->private->size,
1096                                                    t, uptr->entrytable);
1097                 else {
1098                         duprintf("get_entries: I've got %u not %u!\n",
1099                                  t->private->size,
1100                                  entries->size);
1101                         ret = -EINVAL;
1102                 }
1103                 module_put(t->me);
1104                 up(&ipt_mutex);
1105         } else
1106                 ret = t ? PTR_ERR(t) : -ENOENT;
1107
1108         return ret;
1109 }
1110
1111 static int
1112 do_replace(void __user *user, unsigned int len)
1113 {
1114         int ret;
1115         struct ipt_replace tmp;
1116         struct ipt_table *t;
1117         struct ipt_table_info *newinfo, *oldinfo;
1118         struct ipt_counters *counters;
1119
1120         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1121                 return -EFAULT;
1122
1123         /* Hack: Causes ipchains to give correct error msg --RR */
1124         if (len != sizeof(tmp) + tmp.size)
1125                 return -ENOPROTOOPT;
1126
1127         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1128         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1129                 return -ENOMEM;
1130
1131         newinfo = vmalloc(sizeof(struct ipt_table_info)
1132                           + SMP_ALIGN(tmp.size) * num_possible_cpus());
1133         if (!newinfo)
1134                 return -ENOMEM;
1135
1136         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1137                            tmp.size) != 0) {
1138                 ret = -EFAULT;
1139                 goto free_newinfo;
1140         }
1141
1142         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1143         if (!counters) {
1144                 ret = -ENOMEM;
1145                 goto free_newinfo;
1146         }
1147         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1148
1149         ret = translate_table(tmp.name, tmp.valid_hooks,
1150                               newinfo, tmp.size, tmp.num_entries,
1151                               tmp.hook_entry, tmp.underflow);
1152         if (ret != 0)
1153                 goto free_newinfo_counters;
1154
1155         duprintf("ip_tables: Translated table\n");
1156
1157         t = try_then_request_module(find_table_lock(tmp.name),
1158                                     "iptable_%s", tmp.name);
1159         if (!t || IS_ERR(t)) {
1160                 ret = t ? PTR_ERR(t) : -ENOENT;
1161                 goto free_newinfo_counters_untrans;
1162         }
1163
1164         /* You lied! */
1165         if (tmp.valid_hooks != t->valid_hooks) {
1166                 duprintf("Valid hook crap: %08X vs %08X\n",
1167                          tmp.valid_hooks, t->valid_hooks);
1168                 ret = -EINVAL;
1169                 goto put_module;
1170         }
1171
1172         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1173         if (!oldinfo)
1174                 goto put_module;
1175
1176         /* Update module usage count based on number of rules */
1177         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1178                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1179         if ((oldinfo->number > oldinfo->initial_entries) || 
1180             (newinfo->number <= oldinfo->initial_entries)) 
1181                 module_put(t->me);
1182         if ((oldinfo->number > oldinfo->initial_entries) &&
1183             (newinfo->number <= oldinfo->initial_entries))
1184                 module_put(t->me);
1185
1186         /* Get the old counters. */
1187         get_counters(oldinfo, counters);
1188         /* Decrease module usage counts and free resource */
1189         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1190         vfree(oldinfo);
1191         if (copy_to_user(tmp.counters, counters,
1192                          sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1193                 ret = -EFAULT;
1194         vfree(counters);
1195         up(&ipt_mutex);
1196         return ret;
1197
1198  put_module:
1199         module_put(t->me);
1200         up(&ipt_mutex);
1201  free_newinfo_counters_untrans:
1202         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1203  free_newinfo_counters:
1204         vfree(counters);
1205  free_newinfo:
1206         vfree(newinfo);
1207         return ret;
1208 }
1209
1210 /* We're lazy, and add to the first CPU; overflow works its fey magic
1211  * and everything is OK. */
1212 static inline int
1213 add_counter_to_entry(struct ipt_entry *e,
1214                      const struct ipt_counters addme[],
1215                      unsigned int *i)
1216 {
1217 #if 0
1218         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1219                  *i,
1220                  (long unsigned int)e->counters.pcnt,
1221                  (long unsigned int)e->counters.bcnt,
1222                  (long unsigned int)addme[*i].pcnt,
1223                  (long unsigned int)addme[*i].bcnt);
1224 #endif
1225
1226         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1227
1228         (*i)++;
1229         return 0;
1230 }
1231
1232 static int
1233 do_add_counters(void __user *user, unsigned int len)
1234 {
1235         unsigned int i;
1236         struct ipt_counters_info tmp, *paddc;
1237         struct ipt_table *t;
1238         int ret = 0;
1239
1240         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1241                 return -EFAULT;
1242
1243         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1244                 return -EINVAL;
1245
1246         paddc = vmalloc(len);
1247         if (!paddc)
1248                 return -ENOMEM;
1249
1250         if (copy_from_user(paddc, user, len) != 0) {
1251                 ret = -EFAULT;
1252                 goto free;
1253         }
1254
1255         t = find_table_lock(tmp.name);
1256         if (!t || IS_ERR(t)) {
1257                 ret = t ? PTR_ERR(t) : -ENOENT;
1258                 goto free;
1259         }
1260
1261         write_lock_bh(&t->lock);
1262         if (t->private->number != paddc->num_counters) {
1263                 ret = -EINVAL;
1264                 goto unlock_up_free;
1265         }
1266
1267         i = 0;
1268         IPT_ENTRY_ITERATE(t->private->entries,
1269                           t->private->size,
1270                           add_counter_to_entry,
1271                           paddc->counters,
1272                           &i);
1273  unlock_up_free:
1274         write_unlock_bh(&t->lock);
1275         up(&ipt_mutex);
1276         module_put(t->me);
1277  free:
1278         vfree(paddc);
1279
1280         return ret;
1281 }
1282
1283 static int
1284 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1285 {
1286         int ret;
1287
1288         if (!capable(CAP_NET_ADMIN))
1289                 return -EPERM;
1290
1291         switch (cmd) {
1292         case IPT_SO_SET_REPLACE:
1293                 ret = do_replace(user, len);
1294                 break;
1295
1296         case IPT_SO_SET_ADD_COUNTERS:
1297                 ret = do_add_counters(user, len);
1298                 break;
1299
1300         default:
1301                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1302                 ret = -EINVAL;
1303         }
1304
1305         return ret;
1306 }
1307
1308 static int
1309 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1310 {
1311         int ret;
1312
1313         if (!capable(CAP_NET_ADMIN))
1314                 return -EPERM;
1315
1316         switch (cmd) {
1317         case IPT_SO_GET_INFO: {
1318                 char name[IPT_TABLE_MAXNAMELEN];
1319                 struct ipt_table *t;
1320
1321                 if (*len != sizeof(struct ipt_getinfo)) {
1322                         duprintf("length %u != %u\n", *len,
1323                                  sizeof(struct ipt_getinfo));
1324                         ret = -EINVAL;
1325                         break;
1326                 }
1327
1328                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1329                         ret = -EFAULT;
1330                         break;
1331                 }
1332                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1333
1334                 t = try_then_request_module(find_table_lock(name),
1335                                             "iptable_%s", name);
1336                 if (t && !IS_ERR(t)) {
1337                         struct ipt_getinfo info;
1338
1339                         info.valid_hooks = t->valid_hooks;
1340                         memcpy(info.hook_entry, t->private->hook_entry,
1341                                sizeof(info.hook_entry));
1342                         memcpy(info.underflow, t->private->underflow,
1343                                sizeof(info.underflow));
1344                         info.num_entries = t->private->number;
1345                         info.size = t->private->size;
1346                         memcpy(info.name, name, sizeof(info.name));
1347
1348                         if (copy_to_user(user, &info, *len) != 0)
1349                                 ret = -EFAULT;
1350                         else
1351                                 ret = 0;
1352                         up(&ipt_mutex);
1353                         module_put(t->me);
1354                 } else
1355                         ret = t ? PTR_ERR(t) : -ENOENT;
1356         }
1357         break;
1358
1359         case IPT_SO_GET_ENTRIES: {
1360                 struct ipt_get_entries get;
1361
1362                 if (*len < sizeof(get)) {
1363                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1364                         ret = -EINVAL;
1365                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1366                         ret = -EFAULT;
1367                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1368                         duprintf("get_entries: %u != %u\n", *len,
1369                                  sizeof(struct ipt_get_entries) + get.size);
1370                         ret = -EINVAL;
1371                 } else
1372                         ret = get_entries(&get, user);
1373                 break;
1374         }
1375
1376         case IPT_SO_GET_REVISION_MATCH:
1377         case IPT_SO_GET_REVISION_TARGET: {
1378                 struct ipt_get_revision rev;
1379                 int (*revfn)(const char *, u8, int *);
1380
1381                 if (*len != sizeof(rev)) {
1382                         ret = -EINVAL;
1383                         break;
1384                 }
1385                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1386                         ret = -EFAULT;
1387                         break;
1388                 }
1389
1390                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1391                         revfn = target_revfn;
1392                 else
1393                         revfn = match_revfn;
1394
1395                 try_then_request_module(find_revision(rev.name, rev.revision,
1396                                                       revfn, &ret),
1397                                         "ipt_%s", rev.name);
1398                 break;
1399         }
1400
1401         default:
1402                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1403                 ret = -EINVAL;
1404         }
1405
1406         return ret;
1407 }
1408
1409 /* Registration hooks for targets. */
1410 int
1411 ipt_register_target(struct ipt_target *target)
1412 {
1413         int ret;
1414
1415         ret = down_interruptible(&ipt_mutex);
1416         if (ret != 0)
1417                 return ret;
1418         list_add(&target->list, &ipt_target);
1419         up(&ipt_mutex);
1420         return ret;
1421 }
1422
1423 void
1424 ipt_unregister_target(struct ipt_target *target)
1425 {
1426         down(&ipt_mutex);
1427         LIST_DELETE(&ipt_target, target);
1428         up(&ipt_mutex);
1429 }
1430
1431 int
1432 ipt_register_match(struct ipt_match *match)
1433 {
1434         int ret;
1435
1436         ret = down_interruptible(&ipt_mutex);
1437         if (ret != 0)
1438                 return ret;
1439
1440         list_add(&match->list, &ipt_match);
1441         up(&ipt_mutex);
1442
1443         return ret;
1444 }
1445
1446 void
1447 ipt_unregister_match(struct ipt_match *match)
1448 {
1449         down(&ipt_mutex);
1450         LIST_DELETE(&ipt_match, match);
1451         up(&ipt_mutex);
1452 }
1453
1454 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1455 {
1456         int ret;
1457         struct ipt_table_info *newinfo;
1458         static struct ipt_table_info bootstrap
1459                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1460
1461         newinfo = vmalloc(sizeof(struct ipt_table_info)
1462                           + SMP_ALIGN(repl->size) * num_possible_cpus());
1463         if (!newinfo)
1464                 return -ENOMEM;
1465
1466         memcpy(newinfo->entries, repl->entries, repl->size);
1467
1468         ret = translate_table(table->name, table->valid_hooks,
1469                               newinfo, repl->size,
1470                               repl->num_entries,
1471                               repl->hook_entry,
1472                               repl->underflow);
1473         if (ret != 0) {
1474                 vfree(newinfo);
1475                 return ret;
1476         }
1477
1478         ret = down_interruptible(&ipt_mutex);
1479         if (ret != 0) {
1480                 vfree(newinfo);
1481                 return ret;
1482         }
1483
1484         /* Don't autoload: we'd eat our tail... */
1485         if (list_named_find(&ipt_tables, table->name)) {
1486                 ret = -EEXIST;
1487                 goto free_unlock;
1488         }
1489
1490         /* Simplifies replace_table code. */
1491         table->private = &bootstrap;
1492         if (!replace_table(table, 0, newinfo, &ret))
1493                 goto free_unlock;
1494
1495         duprintf("table->private->number = %u\n",
1496                  table->private->number);
1497         
1498         /* save number of initial entries */
1499         table->private->initial_entries = table->private->number;
1500
1501         rwlock_init(&table->lock);
1502         list_prepend(&ipt_tables, table);
1503
1504  unlock:
1505         up(&ipt_mutex);
1506         return ret;
1507
1508  free_unlock:
1509         vfree(newinfo);
1510         goto unlock;
1511 }
1512
1513 void ipt_unregister_table(struct ipt_table *table)
1514 {
1515         down(&ipt_mutex);
1516         LIST_DELETE(&ipt_tables, table);
1517         up(&ipt_mutex);
1518
1519         /* Decrease module usage counts and free resources */
1520         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1521                           cleanup_entry, NULL);
1522         vfree(table->private);
1523 }
1524
1525 /* Returns 1 if the port is matched by the range, 0 otherwise */
1526 static inline int
1527 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1528 {
1529         int ret;
1530
1531         ret = (port >= min && port <= max) ^ invert;
1532         return ret;
1533 }
1534
1535 static int
1536 tcp_find_option(u_int8_t option,
1537                 const struct sk_buff *skb,
1538                 unsigned int optlen,
1539                 int invert,
1540                 int *hotdrop)
1541 {
1542         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1543         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1544         unsigned int i;
1545
1546         duprintf("tcp_match: finding option\n");
1547
1548         if (!optlen)
1549                 return invert;
1550
1551         /* If we don't have the whole header, drop packet. */
1552         op = skb_header_pointer(skb,
1553                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1554                                 optlen, _opt);
1555         if (op == NULL) {
1556                 *hotdrop = 1;
1557                 return 0;
1558         }
1559
1560         for (i = 0; i < optlen; ) {
1561                 if (op[i] == option) return !invert;
1562                 if (op[i] < 2) i++;
1563                 else i += op[i+1]?:1;
1564         }
1565
1566         return invert;
1567 }
1568
1569 static int
1570 tcp_match(const struct sk_buff *skb,
1571           const struct net_device *in,
1572           const struct net_device *out,
1573           const void *matchinfo,
1574           int offset,
1575           int *hotdrop)
1576 {
1577         struct tcphdr _tcph, *th;
1578         const struct ipt_tcp *tcpinfo = matchinfo;
1579
1580         if (offset) {
1581                 /* To quote Alan:
1582
1583                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1584                    causes this. Its a cracker trying to break in by doing a
1585                    flag overwrite to pass the direction checks.
1586                 */
1587                 if (offset == 1) {
1588                         duprintf("Dropping evil TCP offset=1 frag.\n");
1589                         *hotdrop = 1;
1590                 }
1591                 /* Must not be a fragment. */
1592                 return 0;
1593         }
1594
1595 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1596
1597         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1598                                 sizeof(_tcph), &_tcph);
1599         if (th == NULL) {
1600                 /* We've been asked to examine this packet, and we
1601                    can't.  Hence, no choice but to drop. */
1602                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1603                 *hotdrop = 1;
1604                 return 0;
1605         }
1606
1607         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1608                         ntohs(th->source),
1609                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1610                 return 0;
1611         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1612                         ntohs(th->dest),
1613                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1614                 return 0;
1615         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1616                       == tcpinfo->flg_cmp,
1617                       IPT_TCP_INV_FLAGS))
1618                 return 0;
1619         if (tcpinfo->option) {
1620                 if (th->doff * 4 < sizeof(_tcph)) {
1621                         *hotdrop = 1;
1622                         return 0;
1623                 }
1624                 if (!tcp_find_option(tcpinfo->option, skb,
1625                                      th->doff*4 - sizeof(_tcph),
1626                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1627                                      hotdrop))
1628                         return 0;
1629         }
1630         return 1;
1631 }
1632
1633 /* Called when user tries to insert an entry of this type. */
1634 static int
1635 tcp_checkentry(const char *tablename,
1636                const struct ipt_ip *ip,
1637                void *matchinfo,
1638                unsigned int matchsize,
1639                unsigned int hook_mask)
1640 {
1641         const struct ipt_tcp *tcpinfo = matchinfo;
1642
1643         /* Must specify proto == TCP, and no unknown invflags */
1644         return ip->proto == IPPROTO_TCP
1645                 && !(ip->invflags & IPT_INV_PROTO)
1646                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1647                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1648 }
1649
1650 static int
1651 udp_match(const struct sk_buff *skb,
1652           const struct net_device *in,
1653           const struct net_device *out,
1654           const void *matchinfo,
1655           int offset,
1656           int *hotdrop)
1657 {
1658         struct udphdr _udph, *uh;
1659         const struct ipt_udp *udpinfo = matchinfo;
1660
1661         /* Must not be a fragment. */
1662         if (offset)
1663                 return 0;
1664
1665         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1666                                 sizeof(_udph), &_udph);
1667         if (uh == NULL) {
1668                 /* We've been asked to examine this packet, and we
1669                    can't.  Hence, no choice but to drop. */
1670                 duprintf("Dropping evil UDP tinygram.\n");
1671                 *hotdrop = 1;
1672                 return 0;
1673         }
1674
1675         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1676                           ntohs(uh->source),
1677                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1678                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1679                               ntohs(uh->dest),
1680                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1681 }
1682
1683 /* Called when user tries to insert an entry of this type. */
1684 static int
1685 udp_checkentry(const char *tablename,
1686                const struct ipt_ip *ip,
1687                void *matchinfo,
1688                unsigned int matchinfosize,
1689                unsigned int hook_mask)
1690 {
1691         const struct ipt_udp *udpinfo = matchinfo;
1692
1693         /* Must specify proto == UDP, and no unknown invflags */
1694         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1695                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1696                          IPPROTO_UDP);
1697                 return 0;
1698         }
1699         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1700                 duprintf("ipt_udp: matchsize %u != %u\n",
1701                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1702                 return 0;
1703         }
1704         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1705                 duprintf("ipt_udp: unknown flags %X\n",
1706                          udpinfo->invflags);
1707                 return 0;
1708         }
1709
1710         return 1;
1711 }
1712
1713 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1714 static inline int
1715 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1716                      u_int8_t type, u_int8_t code,
1717                      int invert)
1718 {
1719         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1720                 ^ invert;
1721 }
1722
1723 static int
1724 icmp_match(const struct sk_buff *skb,
1725            const struct net_device *in,
1726            const struct net_device *out,
1727            const void *matchinfo,
1728            int offset,
1729            int *hotdrop)
1730 {
1731         struct icmphdr _icmph, *ic;
1732         const struct ipt_icmp *icmpinfo = matchinfo;
1733
1734         /* Must not be a fragment. */
1735         if (offset)
1736                 return 0;
1737
1738         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1739                                 sizeof(_icmph), &_icmph);
1740         if (ic == NULL) {
1741                 /* We've been asked to examine this packet, and we
1742                  * can't.  Hence, no choice but to drop.
1743                  */
1744                 duprintf("Dropping evil ICMP tinygram.\n");
1745                 *hotdrop = 1;
1746                 return 0;
1747         }
1748
1749         return icmp_type_code_match(icmpinfo->type,
1750                                     icmpinfo->code[0],
1751                                     icmpinfo->code[1],
1752                                     ic->type, ic->code,
1753                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1754 }
1755
1756 /* Called when user tries to insert an entry of this type. */
1757 static int
1758 icmp_checkentry(const char *tablename,
1759            const struct ipt_ip *ip,
1760            void *matchinfo,
1761            unsigned int matchsize,
1762            unsigned int hook_mask)
1763 {
1764         const struct ipt_icmp *icmpinfo = matchinfo;
1765
1766         /* Must specify proto == ICMP, and no unknown invflags */
1767         return ip->proto == IPPROTO_ICMP
1768                 && !(ip->invflags & IPT_INV_PROTO)
1769                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1770                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1771 }
1772
1773 /* The built-in targets: standard (NULL) and error. */
1774 static struct ipt_target ipt_standard_target = {
1775         .name           = IPT_STANDARD_TARGET,
1776 };
1777
1778 static struct ipt_target ipt_error_target = {
1779         .name           = IPT_ERROR_TARGET,
1780         .target         = ipt_error,
1781 };
1782
1783 static struct nf_sockopt_ops ipt_sockopts = {
1784         .pf             = PF_INET,
1785         .set_optmin     = IPT_BASE_CTL,
1786         .set_optmax     = IPT_SO_SET_MAX+1,
1787         .set            = do_ipt_set_ctl,
1788         .get_optmin     = IPT_BASE_CTL,
1789         .get_optmax     = IPT_SO_GET_MAX+1,
1790         .get            = do_ipt_get_ctl,
1791 };
1792
1793 static struct ipt_match tcp_matchstruct = {
1794         .name           = "tcp",
1795         .match          = &tcp_match,
1796         .checkentry     = &tcp_checkentry,
1797 };
1798
1799 static struct ipt_match udp_matchstruct = {
1800         .name           = "udp",
1801         .match          = &udp_match,
1802         .checkentry     = &udp_checkentry,
1803 };
1804
1805 static struct ipt_match icmp_matchstruct = {
1806         .name           = "icmp",
1807         .match          = &icmp_match,
1808         .checkentry     = &icmp_checkentry,
1809 };
1810
1811 #ifdef CONFIG_PROC_FS
1812 static inline int print_name(const char *i,
1813                              off_t start_offset, char *buffer, int length,
1814                              off_t *pos, unsigned int *count)
1815 {
1816         if ((*count)++ >= start_offset) {
1817                 unsigned int namelen;
1818
1819                 namelen = sprintf(buffer + *pos, "%s\n",
1820                                   i + sizeof(struct list_head));
1821                 if (*pos + namelen > length) {
1822                         /* Stop iterating */
1823                         return 1;
1824                 }
1825                 *pos += namelen;
1826         }
1827         return 0;
1828 }
1829
1830 static inline int print_target(const struct ipt_target *t,
1831                                off_t start_offset, char *buffer, int length,
1832                                off_t *pos, unsigned int *count)
1833 {
1834         if (t == &ipt_standard_target || t == &ipt_error_target)
1835                 return 0;
1836         return print_name((char *)t, start_offset, buffer, length, pos, count);
1837 }
1838
1839 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1840 {
1841         off_t pos = 0;
1842         unsigned int count = 0;
1843
1844         if (down_interruptible(&ipt_mutex) != 0)
1845                 return 0;
1846
1847         LIST_FIND(&ipt_tables, print_name, void *,
1848                   offset, buffer, length, &pos, &count);
1849
1850         up(&ipt_mutex);
1851
1852         /* `start' hack - see fs/proc/generic.c line ~105 */
1853         *start=(char *)((unsigned long)count-offset);
1854         return pos;
1855 }
1856
1857 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1858 {
1859         off_t pos = 0;
1860         unsigned int count = 0;
1861
1862         if (down_interruptible(&ipt_mutex) != 0)
1863                 return 0;
1864
1865         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1866                   offset, buffer, length, &pos, &count);
1867         
1868         up(&ipt_mutex);
1869
1870         *start = (char *)((unsigned long)count - offset);
1871         return pos;
1872 }
1873
1874 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1875 {
1876         off_t pos = 0;
1877         unsigned int count = 0;
1878
1879         if (down_interruptible(&ipt_mutex) != 0)
1880                 return 0;
1881         
1882         LIST_FIND(&ipt_match, print_name, void *,
1883                   offset, buffer, length, &pos, &count);
1884
1885         up(&ipt_mutex);
1886
1887         *start = (char *)((unsigned long)count - offset);
1888         return pos;
1889 }
1890
1891 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1892 { { "ip_tables_names", ipt_get_tables },
1893   { "ip_tables_targets", ipt_get_targets },
1894   { "ip_tables_matches", ipt_get_matches },
1895   { NULL, NULL} };
1896 #endif /*CONFIG_PROC_FS*/
1897
1898 static int __init init(void)
1899 {
1900         int ret;
1901
1902         /* Noone else will be downing sem now, so we won't sleep */
1903         down(&ipt_mutex);
1904         list_append(&ipt_target, &ipt_standard_target);
1905         list_append(&ipt_target, &ipt_error_target);
1906         list_append(&ipt_match, &tcp_matchstruct);
1907         list_append(&ipt_match, &udp_matchstruct);
1908         list_append(&ipt_match, &icmp_matchstruct);
1909         up(&ipt_mutex);
1910
1911         /* Register setsockopt */
1912         ret = nf_register_sockopt(&ipt_sockopts);
1913         if (ret < 0) {
1914                 duprintf("Unable to register sockopts.\n");
1915                 return ret;
1916         }
1917
1918 #ifdef CONFIG_PROC_FS
1919         {
1920         struct proc_dir_entry *proc;
1921         int i;
1922
1923         for (i = 0; ipt_proc_entry[i].name; i++) {
1924                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1925                                        ipt_proc_entry[i].get_info);
1926                 if (!proc) {
1927                         while (--i >= 0)
1928                                 proc_net_remove(ipt_proc_entry[i].name);
1929                         nf_unregister_sockopt(&ipt_sockopts);
1930                         return -ENOMEM;
1931                 }
1932                 proc->owner = THIS_MODULE;
1933         }
1934         }
1935 #endif
1936
1937         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1938         return 0;
1939 }
1940
1941 static void __exit fini(void)
1942 {
1943         nf_unregister_sockopt(&ipt_sockopts);
1944 #ifdef CONFIG_PROC_FS
1945         {
1946         int i;
1947         for (i = 0; ipt_proc_entry[i].name; i++)
1948                 proc_net_remove(ipt_proc_entry[i].name);
1949         }
1950 #endif
1951 }
1952
1953 EXPORT_SYMBOL(ipt_register_table);
1954 EXPORT_SYMBOL(ipt_unregister_table);
1955 EXPORT_SYMBOL(ipt_register_match);
1956 EXPORT_SYMBOL(ipt_unregister_match);
1957 EXPORT_SYMBOL(ipt_do_table);
1958 EXPORT_SYMBOL(ipt_register_target);
1959 EXPORT_SYMBOL(ipt_unregister_target);
1960 EXPORT_SYMBOL(ipt_find_target);
1961
1962 module_init(init);
1963 module_exit(fini);