[NETFILTER]: Convert ip_tables matches/targets to centralized error checking
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15  *      - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
16  */
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
26 #include <net/ip.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
32
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
35
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
39
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
43
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...)  printk(format , ## args)
46 #else
47 #define dprintf(format, args...)
48 #endif
49
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
52 #else
53 #define duprintf(format, args...)
54 #endif
55
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x)                                         \
58 do {                                                            \
59         if (!(x))                                               \
60                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
61                        __FUNCTION__, __FILE__, __LINE__);       \
62 } while(0)
63 #else
64 #define IP_NF_ASSERT(x)
65 #endif
66
67 #if 0
68 /* All the better to debug you with... */
69 #define static
70 #define inline
71 #endif
72
73 /*
74    We keep a set of rules for each CPU, so we can avoid write-locking
75    them in the softirq when updating the counters and therefore
76    only need to read-lock in the softirq; doing a write_lock_bh() in user
77    context stops packets coming through and allows user context to read
78    the counters or update the rules.
79
80    Hence the start of any table is given by get_table() below.  */
81
82 /* Returns whether matches rule or not. */
83 static inline int
84 ip_packet_match(const struct iphdr *ip,
85                 const char *indev,
86                 const char *outdev,
87                 const struct ipt_ip *ipinfo,
88                 int isfrag)
89 {
90         size_t i;
91         unsigned long ret;
92
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
94
95         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
96                   IPT_INV_SRCIP)
97             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
98                      IPT_INV_DSTIP)) {
99                 dprintf("Source or dest mismatch.\n");
100
101                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
102                         NIPQUAD(ip->saddr),
103                         NIPQUAD(ipinfo->smsk.s_addr),
104                         NIPQUAD(ipinfo->src.s_addr),
105                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
107                         NIPQUAD(ip->daddr),
108                         NIPQUAD(ipinfo->dmsk.s_addr),
109                         NIPQUAD(ipinfo->dst.s_addr),
110                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
111                 return 0;
112         }
113
114         /* Look for ifname matches; this should unroll nicely. */
115         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116                 ret |= (((const unsigned long *)indev)[i]
117                         ^ ((const unsigned long *)ipinfo->iniface)[i])
118                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
119         }
120
121         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122                 dprintf("VIA in mismatch (%s vs %s).%s\n",
123                         indev, ipinfo->iniface,
124                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
125                 return 0;
126         }
127
128         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129                 ret |= (((const unsigned long *)outdev)[i]
130                         ^ ((const unsigned long *)ipinfo->outiface)[i])
131                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
132         }
133
134         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135                 dprintf("VIA out mismatch (%s vs %s).%s\n",
136                         outdev, ipinfo->outiface,
137                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
138                 return 0;
139         }
140
141         /* Check specific protocol */
142         if (ipinfo->proto
143             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144                 dprintf("Packet protocol %hi does not match %hi.%s\n",
145                         ip->protocol, ipinfo->proto,
146                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
147                 return 0;
148         }
149
150         /* If we have a fragment rule but the packet is not a fragment
151          * then we return zero */
152         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153                 dprintf("Fragment rule but not fragment.%s\n",
154                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
155                 return 0;
156         }
157
158         return 1;
159 }
160
161 static inline int
162 ip_checkentry(const struct ipt_ip *ip)
163 {
164         if (ip->flags & ~IPT_F_MASK) {
165                 duprintf("Unknown flag bits set: %08X\n",
166                          ip->flags & ~IPT_F_MASK);
167                 return 0;
168         }
169         if (ip->invflags & ~IPT_INV_MASK) {
170                 duprintf("Unknown invflag bits set: %08X\n",
171                          ip->invflags & ~IPT_INV_MASK);
172                 return 0;
173         }
174         return 1;
175 }
176
177 static unsigned int
178 ipt_error(struct sk_buff **pskb,
179           const struct net_device *in,
180           const struct net_device *out,
181           unsigned int hooknum,
182           const void *targinfo,
183           void *userinfo)
184 {
185         if (net_ratelimit())
186                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
187
188         return NF_DROP;
189 }
190
191 static inline
192 int do_match(struct ipt_entry_match *m,
193              const struct sk_buff *skb,
194              const struct net_device *in,
195              const struct net_device *out,
196              int offset,
197              int *hotdrop)
198 {
199         /* Stop iteration if it doesn't match */
200         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, 
201             skb->nh.iph->ihl*4, hotdrop))
202                 return 1;
203         else
204                 return 0;
205 }
206
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
209 {
210         return (struct ipt_entry *)(base + offset);
211 }
212
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
214 unsigned int
215 ipt_do_table(struct sk_buff **pskb,
216              unsigned int hook,
217              const struct net_device *in,
218              const struct net_device *out,
219              struct ipt_table *table,
220              void *userdata)
221 {
222         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
223         u_int16_t offset;
224         struct iphdr *ip;
225         u_int16_t datalen;
226         int hotdrop = 0;
227         /* Initializing verdict to NF_DROP keeps gcc happy. */
228         unsigned int verdict = NF_DROP;
229         const char *indev, *outdev;
230         void *table_base;
231         struct ipt_entry *e, *back;
232         struct xt_table_info *private = table->private;
233
234         /* Initialization */
235         ip = (*pskb)->nh.iph;
236         datalen = (*pskb)->len - ip->ihl * 4;
237         indev = in ? in->name : nulldevname;
238         outdev = out ? out->name : nulldevname;
239         /* We handle fragments by dealing with the first fragment as
240          * if it was a normal packet.  All other fragments are treated
241          * normally, except that they will NEVER match rules that ask
242          * things we don't know, ie. tcp syn flag or ports).  If the
243          * rule is also a fragment-specific rule, non-fragments won't
244          * match it. */
245         offset = ntohs(ip->frag_off) & IP_OFFSET;
246
247         read_lock_bh(&table->lock);
248         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249         table_base = (void *)private->entries[smp_processor_id()];
250         e = get_entry(table_base, private->hook_entry[hook]);
251
252         /* For return from builtin chain */
253         back = get_entry(table_base, private->underflow[hook]);
254
255         do {
256                 IP_NF_ASSERT(e);
257                 IP_NF_ASSERT(back);
258                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
259                         struct ipt_entry_target *t;
260
261                         if (IPT_MATCH_ITERATE(e, do_match,
262                                               *pskb, in, out,
263                                               offset, &hotdrop) != 0)
264                                 goto no_match;
265
266                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
267
268                         t = ipt_get_target(e);
269                         IP_NF_ASSERT(t->u.kernel.target);
270                         /* Standard target? */
271                         if (!t->u.kernel.target->target) {
272                                 int v;
273
274                                 v = ((struct ipt_standard_target *)t)->verdict;
275                                 if (v < 0) {
276                                         /* Pop from stack? */
277                                         if (v != IPT_RETURN) {
278                                                 verdict = (unsigned)(-v) - 1;
279                                                 break;
280                                         }
281                                         e = back;
282                                         back = get_entry(table_base,
283                                                          back->comefrom);
284                                         continue;
285                                 }
286                                 if (table_base + v != (void *)e + e->next_offset
287                                     && !(e->ip.flags & IPT_F_GOTO)) {
288                                         /* Save old back ptr in next entry */
289                                         struct ipt_entry *next
290                                                 = (void *)e + e->next_offset;
291                                         next->comefrom
292                                                 = (void *)back - table_base;
293                                         /* set back pointer to next entry */
294                                         back = next;
295                                 }
296
297                                 e = get_entry(table_base, v);
298                         } else {
299                                 /* Targets which reenter must return
300                                    abs. verdicts */
301 #ifdef CONFIG_NETFILTER_DEBUG
302                                 ((struct ipt_entry *)table_base)->comefrom
303                                         = 0xeeeeeeec;
304 #endif
305                                 verdict = t->u.kernel.target->target(pskb,
306                                                                      in, out,
307                                                                      hook,
308                                                                      t->data,
309                                                                      userdata);
310
311 #ifdef CONFIG_NETFILTER_DEBUG
312                                 if (((struct ipt_entry *)table_base)->comefrom
313                                     != 0xeeeeeeec
314                                     && verdict == IPT_CONTINUE) {
315                                         printk("Target %s reentered!\n",
316                                                t->u.kernel.target->name);
317                                         verdict = NF_DROP;
318                                 }
319                                 ((struct ipt_entry *)table_base)->comefrom
320                                         = 0x57acc001;
321 #endif
322                                 /* Target might have changed stuff. */
323                                 ip = (*pskb)->nh.iph;
324                                 datalen = (*pskb)->len - ip->ihl * 4;
325
326                                 if (verdict == IPT_CONTINUE)
327                                         e = (void *)e + e->next_offset;
328                                 else
329                                         /* Verdict */
330                                         break;
331                         }
332                 } else {
333
334                 no_match:
335                         e = (void *)e + e->next_offset;
336                 }
337         } while (!hotdrop);
338
339         read_unlock_bh(&table->lock);
340
341 #ifdef DEBUG_ALLOW_ALL
342         return NF_ACCEPT;
343 #else
344         if (hotdrop)
345                 return NF_DROP;
346         else return verdict;
347 #endif
348 }
349
350 /* All zeroes == unconditional rule. */
351 static inline int
352 unconditional(const struct ipt_ip *ip)
353 {
354         unsigned int i;
355
356         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
357                 if (((__u32 *)ip)[i])
358                         return 0;
359
360         return 1;
361 }
362
363 /* Figures out from what hook each rule can be called: returns 0 if
364    there are loops.  Puts hook bitmask in comefrom. */
365 static int
366 mark_source_chains(struct xt_table_info *newinfo,
367                    unsigned int valid_hooks, void *entry0)
368 {
369         unsigned int hook;
370
371         /* No recursion; use packet counter to save back ptrs (reset
372            to 0 as we leave), and comefrom to save source hook bitmask */
373         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
374                 unsigned int pos = newinfo->hook_entry[hook];
375                 struct ipt_entry *e
376                         = (struct ipt_entry *)(entry0 + pos);
377
378                 if (!(valid_hooks & (1 << hook)))
379                         continue;
380
381                 /* Set initial back pointer. */
382                 e->counters.pcnt = pos;
383
384                 for (;;) {
385                         struct ipt_standard_target *t
386                                 = (void *)ipt_get_target(e);
387
388                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
389                                 printk("iptables: loop hook %u pos %u %08X.\n",
390                                        hook, pos, e->comefrom);
391                                 return 0;
392                         }
393                         e->comefrom
394                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
395
396                         /* Unconditional return/END. */
397                         if (e->target_offset == sizeof(struct ipt_entry)
398                             && (strcmp(t->target.u.user.name,
399                                        IPT_STANDARD_TARGET) == 0)
400                             && t->verdict < 0
401                             && unconditional(&e->ip)) {
402                                 unsigned int oldpos, size;
403
404                                 /* Return: backtrack through the last
405                                    big jump. */
406                                 do {
407                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
408 #ifdef DEBUG_IP_FIREWALL_USER
409                                         if (e->comefrom
410                                             & (1 << NF_IP_NUMHOOKS)) {
411                                                 duprintf("Back unset "
412                                                          "on hook %u "
413                                                          "rule %u\n",
414                                                          hook, pos);
415                                         }
416 #endif
417                                         oldpos = pos;
418                                         pos = e->counters.pcnt;
419                                         e->counters.pcnt = 0;
420
421                                         /* We're at the start. */
422                                         if (pos == oldpos)
423                                                 goto next;
424
425                                         e = (struct ipt_entry *)
426                                                 (entry0 + pos);
427                                 } while (oldpos == pos + e->next_offset);
428
429                                 /* Move along one */
430                                 size = e->next_offset;
431                                 e = (struct ipt_entry *)
432                                         (entry0 + pos + size);
433                                 e->counters.pcnt = pos;
434                                 pos += size;
435                         } else {
436                                 int newpos = t->verdict;
437
438                                 if (strcmp(t->target.u.user.name,
439                                            IPT_STANDARD_TARGET) == 0
440                                     && newpos >= 0) {
441                                         /* This a jump; chase it. */
442                                         duprintf("Jump rule %u -> %u\n",
443                                                  pos, newpos);
444                                 } else {
445                                         /* ... this is a fallthru */
446                                         newpos = pos + e->next_offset;
447                                 }
448                                 e = (struct ipt_entry *)
449                                         (entry0 + newpos);
450                                 e->counters.pcnt = pos;
451                                 pos = newpos;
452                         }
453                 }
454                 next:
455                 duprintf("Finished chain %u\n", hook);
456         }
457         return 1;
458 }
459
460 static inline int
461 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
462 {
463         if (i && (*i)-- == 0)
464                 return 1;
465
466         if (m->u.kernel.match->destroy)
467                 m->u.kernel.match->destroy(m->data,
468                                            m->u.match_size - sizeof(*m));
469         module_put(m->u.kernel.match->me);
470         return 0;
471 }
472
473 static inline int
474 standard_check(const struct ipt_entry_target *t,
475                unsigned int max_offset)
476 {
477         struct ipt_standard_target *targ = (void *)t;
478
479         /* Check standard info. */
480         if (targ->verdict >= 0
481             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
482                 duprintf("ipt_standard_check: bad verdict (%i)\n",
483                          targ->verdict);
484                 return 0;
485         }
486         if (targ->verdict < -NF_MAX_VERDICT - 1) {
487                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
488                          targ->verdict);
489                 return 0;
490         }
491         return 1;
492 }
493
494 static inline int
495 check_match(struct ipt_entry_match *m,
496             const char *name,
497             const struct ipt_ip *ip,
498             unsigned int hookmask,
499             unsigned int *i)
500 {
501         struct ipt_match *match;
502         int ret;
503
504         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
505                                                    m->u.user.revision),
506                                         "ipt_%s", m->u.user.name);
507         if (IS_ERR(match) || !match) {
508                 duprintf("check_match: `%s' not found\n", m->u.user.name);
509                 return match ? PTR_ERR(match) : -ENOENT;
510         }
511         m->u.kernel.match = match;
512
513         ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
514                              name, hookmask, ip->proto,
515                              ip->invflags & IPT_INV_PROTO);
516         if (ret)
517                 goto err;
518
519         if (m->u.kernel.match->checkentry
520             && !m->u.kernel.match->checkentry(name, ip, m->data,
521                                               m->u.match_size - sizeof(*m),
522                                               hookmask)) {
523                 duprintf("ip_tables: check failed for `%s'.\n",
524                          m->u.kernel.match->name);
525                 ret = -EINVAL;
526                 goto err;
527         }
528
529         (*i)++;
530         return 0;
531 err:
532         module_put(m->u.kernel.match->me);
533         return ret;
534 }
535
536 static struct ipt_target ipt_standard_target;
537
538 static inline int
539 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
540             unsigned int *i)
541 {
542         struct ipt_entry_target *t;
543         struct ipt_target *target;
544         int ret;
545         unsigned int j;
546
547         if (!ip_checkentry(&e->ip)) {
548                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
549                 return -EINVAL;
550         }
551
552         j = 0;
553         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
554         if (ret != 0)
555                 goto cleanup_matches;
556
557         t = ipt_get_target(e);
558         target = try_then_request_module(xt_find_target(AF_INET,
559                                                      t->u.user.name,
560                                                      t->u.user.revision),
561                                          "ipt_%s", t->u.user.name);
562         if (IS_ERR(target) || !target) {
563                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
564                 ret = target ? PTR_ERR(target) : -ENOENT;
565                 goto cleanup_matches;
566         }
567         t->u.kernel.target = target;
568
569         ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
570                               name, e->comefrom, e->ip.proto,
571                               e->ip.invflags & IPT_INV_PROTO);
572         if (ret)
573                 goto err;
574
575         if (t->u.kernel.target == &ipt_standard_target) {
576                 if (!standard_check(t, size)) {
577                         ret = -EINVAL;
578                         goto cleanup_matches;
579                 }
580         } else if (t->u.kernel.target->checkentry
581                    && !t->u.kernel.target->checkentry(name, e, t->data,
582                                                       t->u.target_size
583                                                       - sizeof(*t),
584                                                       e->comefrom)) {
585                 duprintf("ip_tables: check failed for `%s'.\n",
586                          t->u.kernel.target->name);
587                 ret = -EINVAL;
588                 goto err;
589         }
590
591         (*i)++;
592         return 0;
593  err:
594         module_put(t->u.kernel.target->me);
595  cleanup_matches:
596         IPT_MATCH_ITERATE(e, cleanup_match, &j);
597         return ret;
598 }
599
600 static inline int
601 check_entry_size_and_hooks(struct ipt_entry *e,
602                            struct xt_table_info *newinfo,
603                            unsigned char *base,
604                            unsigned char *limit,
605                            const unsigned int *hook_entries,
606                            const unsigned int *underflows,
607                            unsigned int *i)
608 {
609         unsigned int h;
610
611         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
612             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
613                 duprintf("Bad offset %p\n", e);
614                 return -EINVAL;
615         }
616
617         if (e->next_offset
618             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
619                 duprintf("checking: element %p size %u\n",
620                          e, e->next_offset);
621                 return -EINVAL;
622         }
623
624         /* Check hooks & underflows */
625         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
626                 if ((unsigned char *)e - base == hook_entries[h])
627                         newinfo->hook_entry[h] = hook_entries[h];
628                 if ((unsigned char *)e - base == underflows[h])
629                         newinfo->underflow[h] = underflows[h];
630         }
631
632         /* FIXME: underflows must be unconditional, standard verdicts
633            < 0 (not IPT_RETURN). --RR */
634
635         /* Clear counters and comefrom */
636         e->counters = ((struct xt_counters) { 0, 0 });
637         e->comefrom = 0;
638
639         (*i)++;
640         return 0;
641 }
642
643 static inline int
644 cleanup_entry(struct ipt_entry *e, unsigned int *i)
645 {
646         struct ipt_entry_target *t;
647
648         if (i && (*i)-- == 0)
649                 return 1;
650
651         /* Cleanup all matches */
652         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
653         t = ipt_get_target(e);
654         if (t->u.kernel.target->destroy)
655                 t->u.kernel.target->destroy(t->data,
656                                             t->u.target_size - sizeof(*t));
657         module_put(t->u.kernel.target->me);
658         return 0;
659 }
660
661 /* Checks and translates the user-supplied table segment (held in
662    newinfo) */
663 static int
664 translate_table(const char *name,
665                 unsigned int valid_hooks,
666                 struct xt_table_info *newinfo,
667                 void *entry0,
668                 unsigned int size,
669                 unsigned int number,
670                 const unsigned int *hook_entries,
671                 const unsigned int *underflows)
672 {
673         unsigned int i;
674         int ret;
675
676         newinfo->size = size;
677         newinfo->number = number;
678
679         /* Init all hooks to impossible value. */
680         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
681                 newinfo->hook_entry[i] = 0xFFFFFFFF;
682                 newinfo->underflow[i] = 0xFFFFFFFF;
683         }
684
685         duprintf("translate_table: size %u\n", newinfo->size);
686         i = 0;
687         /* Walk through entries, checking offsets. */
688         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
689                                 check_entry_size_and_hooks,
690                                 newinfo,
691                                 entry0,
692                                 entry0 + size,
693                                 hook_entries, underflows, &i);
694         if (ret != 0)
695                 return ret;
696
697         if (i != number) {
698                 duprintf("translate_table: %u not %u entries\n",
699                          i, number);
700                 return -EINVAL;
701         }
702
703         /* Check hooks all assigned */
704         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
705                 /* Only hooks which are valid */
706                 if (!(valid_hooks & (1 << i)))
707                         continue;
708                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
709                         duprintf("Invalid hook entry %u %u\n",
710                                  i, hook_entries[i]);
711                         return -EINVAL;
712                 }
713                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
714                         duprintf("Invalid underflow %u %u\n",
715                                  i, underflows[i]);
716                         return -EINVAL;
717                 }
718         }
719
720         if (!mark_source_chains(newinfo, valid_hooks, entry0))
721                 return -ELOOP;
722
723         /* Finally, each sanity check must pass */
724         i = 0;
725         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
726                                 check_entry, name, size, &i);
727
728         if (ret != 0) {
729                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
730                                   cleanup_entry, &i);
731                 return ret;
732         }
733
734         /* And one copy for every other CPU */
735         for_each_cpu(i) {
736                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
737                         memcpy(newinfo->entries[i], entry0, newinfo->size);
738         }
739
740         return ret;
741 }
742
743 /* Gets counters. */
744 static inline int
745 add_entry_to_counter(const struct ipt_entry *e,
746                      struct xt_counters total[],
747                      unsigned int *i)
748 {
749         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
750
751         (*i)++;
752         return 0;
753 }
754
755 static inline int
756 set_entry_to_counter(const struct ipt_entry *e,
757                      struct ipt_counters total[],
758                      unsigned int *i)
759 {
760         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
761
762         (*i)++;
763         return 0;
764 }
765
766 static void
767 get_counters(const struct xt_table_info *t,
768              struct xt_counters counters[])
769 {
770         unsigned int cpu;
771         unsigned int i;
772         unsigned int curcpu;
773
774         /* Instead of clearing (by a previous call to memset())
775          * the counters and using adds, we set the counters
776          * with data used by 'current' CPU
777          * We dont care about preemption here.
778          */
779         curcpu = raw_smp_processor_id();
780
781         i = 0;
782         IPT_ENTRY_ITERATE(t->entries[curcpu],
783                           t->size,
784                           set_entry_to_counter,
785                           counters,
786                           &i);
787
788         for_each_cpu(cpu) {
789                 if (cpu == curcpu)
790                         continue;
791                 i = 0;
792                 IPT_ENTRY_ITERATE(t->entries[cpu],
793                                   t->size,
794                                   add_entry_to_counter,
795                                   counters,
796                                   &i);
797         }
798 }
799
800 static int
801 copy_entries_to_user(unsigned int total_size,
802                      struct ipt_table *table,
803                      void __user *userptr)
804 {
805         unsigned int off, num, countersize;
806         struct ipt_entry *e;
807         struct xt_counters *counters;
808         struct xt_table_info *private = table->private;
809         int ret = 0;
810         void *loc_cpu_entry;
811
812         /* We need atomic snapshot of counters: rest doesn't change
813            (other than comefrom, which userspace doesn't care
814            about). */
815         countersize = sizeof(struct xt_counters) * private->number;
816         counters = vmalloc_node(countersize, numa_node_id());
817
818         if (counters == NULL)
819                 return -ENOMEM;
820
821         /* First, sum counters... */
822         write_lock_bh(&table->lock);
823         get_counters(private, counters);
824         write_unlock_bh(&table->lock);
825
826         /* choose the copy that is on our node/cpu, ...
827          * This choice is lazy (because current thread is
828          * allowed to migrate to another cpu)
829          */
830         loc_cpu_entry = private->entries[raw_smp_processor_id()];
831         /* ... then copy entire thing ... */
832         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
833                 ret = -EFAULT;
834                 goto free_counters;
835         }
836
837         /* FIXME: use iterator macros --RR */
838         /* ... then go back and fix counters and names */
839         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
840                 unsigned int i;
841                 struct ipt_entry_match *m;
842                 struct ipt_entry_target *t;
843
844                 e = (struct ipt_entry *)(loc_cpu_entry + off);
845                 if (copy_to_user(userptr + off
846                                  + offsetof(struct ipt_entry, counters),
847                                  &counters[num],
848                                  sizeof(counters[num])) != 0) {
849                         ret = -EFAULT;
850                         goto free_counters;
851                 }
852
853                 for (i = sizeof(struct ipt_entry);
854                      i < e->target_offset;
855                      i += m->u.match_size) {
856                         m = (void *)e + i;
857
858                         if (copy_to_user(userptr + off + i
859                                          + offsetof(struct ipt_entry_match,
860                                                     u.user.name),
861                                          m->u.kernel.match->name,
862                                          strlen(m->u.kernel.match->name)+1)
863                             != 0) {
864                                 ret = -EFAULT;
865                                 goto free_counters;
866                         }
867                 }
868
869                 t = ipt_get_target(e);
870                 if (copy_to_user(userptr + off + e->target_offset
871                                  + offsetof(struct ipt_entry_target,
872                                             u.user.name),
873                                  t->u.kernel.target->name,
874                                  strlen(t->u.kernel.target->name)+1) != 0) {
875                         ret = -EFAULT;
876                         goto free_counters;
877                 }
878         }
879
880  free_counters:
881         vfree(counters);
882         return ret;
883 }
884
885 static int
886 get_entries(const struct ipt_get_entries *entries,
887             struct ipt_get_entries __user *uptr)
888 {
889         int ret;
890         struct ipt_table *t;
891
892         t = xt_find_table_lock(AF_INET, entries->name);
893         if (t && !IS_ERR(t)) {
894                 struct xt_table_info *private = t->private;
895                 duprintf("t->private->number = %u\n",
896                          private->number);
897                 if (entries->size == private->size)
898                         ret = copy_entries_to_user(private->size,
899                                                    t, uptr->entrytable);
900                 else {
901                         duprintf("get_entries: I've got %u not %u!\n",
902                                  private->size,
903                                  entries->size);
904                         ret = -EINVAL;
905                 }
906                 module_put(t->me);
907                 xt_table_unlock(t);
908         } else
909                 ret = t ? PTR_ERR(t) : -ENOENT;
910
911         return ret;
912 }
913
914 static int
915 do_replace(void __user *user, unsigned int len)
916 {
917         int ret;
918         struct ipt_replace tmp;
919         struct ipt_table *t;
920         struct xt_table_info *newinfo, *oldinfo;
921         struct xt_counters *counters;
922         void *loc_cpu_entry, *loc_cpu_old_entry;
923
924         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
925                 return -EFAULT;
926
927         /* Hack: Causes ipchains to give correct error msg --RR */
928         if (len != sizeof(tmp) + tmp.size)
929                 return -ENOPROTOOPT;
930
931         /* overflow check */
932         if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
933                         SMP_CACHE_BYTES)
934                 return -ENOMEM;
935         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
936                 return -ENOMEM;
937
938         newinfo = xt_alloc_table_info(tmp.size);
939         if (!newinfo)
940                 return -ENOMEM;
941
942         /* choose the copy that is our node/cpu */
943         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
944         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
945                            tmp.size) != 0) {
946                 ret = -EFAULT;
947                 goto free_newinfo;
948         }
949
950         counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
951         if (!counters) {
952                 ret = -ENOMEM;
953                 goto free_newinfo;
954         }
955
956         ret = translate_table(tmp.name, tmp.valid_hooks,
957                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
958                               tmp.hook_entry, tmp.underflow);
959         if (ret != 0)
960                 goto free_newinfo_counters;
961
962         duprintf("ip_tables: Translated table\n");
963
964         t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
965                                     "iptable_%s", tmp.name);
966         if (!t || IS_ERR(t)) {
967                 ret = t ? PTR_ERR(t) : -ENOENT;
968                 goto free_newinfo_counters_untrans;
969         }
970
971         /* You lied! */
972         if (tmp.valid_hooks != t->valid_hooks) {
973                 duprintf("Valid hook crap: %08X vs %08X\n",
974                          tmp.valid_hooks, t->valid_hooks);
975                 ret = -EINVAL;
976                 goto put_module;
977         }
978
979         oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
980         if (!oldinfo)
981                 goto put_module;
982
983         /* Update module usage count based on number of rules */
984         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
985                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
986         if ((oldinfo->number > oldinfo->initial_entries) || 
987             (newinfo->number <= oldinfo->initial_entries)) 
988                 module_put(t->me);
989         if ((oldinfo->number > oldinfo->initial_entries) &&
990             (newinfo->number <= oldinfo->initial_entries))
991                 module_put(t->me);
992
993         /* Get the old counters. */
994         get_counters(oldinfo, counters);
995         /* Decrease module usage counts and free resource */
996         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
997         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
998         xt_free_table_info(oldinfo);
999         if (copy_to_user(tmp.counters, counters,
1000                          sizeof(struct xt_counters) * tmp.num_counters) != 0)
1001                 ret = -EFAULT;
1002         vfree(counters);
1003         xt_table_unlock(t);
1004         return ret;
1005
1006  put_module:
1007         module_put(t->me);
1008         xt_table_unlock(t);
1009  free_newinfo_counters_untrans:
1010         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1011  free_newinfo_counters:
1012         vfree(counters);
1013  free_newinfo:
1014         xt_free_table_info(newinfo);
1015         return ret;
1016 }
1017
1018 /* We're lazy, and add to the first CPU; overflow works its fey magic
1019  * and everything is OK. */
1020 static inline int
1021 add_counter_to_entry(struct ipt_entry *e,
1022                      const struct xt_counters addme[],
1023                      unsigned int *i)
1024 {
1025 #if 0
1026         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1027                  *i,
1028                  (long unsigned int)e->counters.pcnt,
1029                  (long unsigned int)e->counters.bcnt,
1030                  (long unsigned int)addme[*i].pcnt,
1031                  (long unsigned int)addme[*i].bcnt);
1032 #endif
1033
1034         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1035
1036         (*i)++;
1037         return 0;
1038 }
1039
1040 static int
1041 do_add_counters(void __user *user, unsigned int len)
1042 {
1043         unsigned int i;
1044         struct xt_counters_info tmp, *paddc;
1045         struct ipt_table *t;
1046         struct xt_table_info *private;
1047         int ret = 0;
1048         void *loc_cpu_entry;
1049
1050         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1051                 return -EFAULT;
1052
1053         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1054                 return -EINVAL;
1055
1056         paddc = vmalloc_node(len, numa_node_id());
1057         if (!paddc)
1058                 return -ENOMEM;
1059
1060         if (copy_from_user(paddc, user, len) != 0) {
1061                 ret = -EFAULT;
1062                 goto free;
1063         }
1064
1065         t = xt_find_table_lock(AF_INET, tmp.name);
1066         if (!t || IS_ERR(t)) {
1067                 ret = t ? PTR_ERR(t) : -ENOENT;
1068                 goto free;
1069         }
1070
1071         write_lock_bh(&t->lock);
1072         private = t->private;
1073         if (private->number != paddc->num_counters) {
1074                 ret = -EINVAL;
1075                 goto unlock_up_free;
1076         }
1077
1078         i = 0;
1079         /* Choose the copy that is on our node */
1080         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1081         IPT_ENTRY_ITERATE(loc_cpu_entry,
1082                           private->size,
1083                           add_counter_to_entry,
1084                           paddc->counters,
1085                           &i);
1086  unlock_up_free:
1087         write_unlock_bh(&t->lock);
1088         xt_table_unlock(t);
1089         module_put(t->me);
1090  free:
1091         vfree(paddc);
1092
1093         return ret;
1094 }
1095
1096 static int
1097 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1098 {
1099         int ret;
1100
1101         if (!capable(CAP_NET_ADMIN))
1102                 return -EPERM;
1103
1104         switch (cmd) {
1105         case IPT_SO_SET_REPLACE:
1106                 ret = do_replace(user, len);
1107                 break;
1108
1109         case IPT_SO_SET_ADD_COUNTERS:
1110                 ret = do_add_counters(user, len);
1111                 break;
1112
1113         default:
1114                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1115                 ret = -EINVAL;
1116         }
1117
1118         return ret;
1119 }
1120
1121 static int
1122 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1123 {
1124         int ret;
1125
1126         if (!capable(CAP_NET_ADMIN))
1127                 return -EPERM;
1128
1129         switch (cmd) {
1130         case IPT_SO_GET_INFO: {
1131                 char name[IPT_TABLE_MAXNAMELEN];
1132                 struct ipt_table *t;
1133
1134                 if (*len != sizeof(struct ipt_getinfo)) {
1135                         duprintf("length %u != %u\n", *len,
1136                                  sizeof(struct ipt_getinfo));
1137                         ret = -EINVAL;
1138                         break;
1139                 }
1140
1141                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1142                         ret = -EFAULT;
1143                         break;
1144                 }
1145                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1146
1147                 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1148                                             "iptable_%s", name);
1149                 if (t && !IS_ERR(t)) {
1150                         struct ipt_getinfo info;
1151                         struct xt_table_info *private = t->private;
1152
1153                         info.valid_hooks = t->valid_hooks;
1154                         memcpy(info.hook_entry, private->hook_entry,
1155                                sizeof(info.hook_entry));
1156                         memcpy(info.underflow, private->underflow,
1157                                sizeof(info.underflow));
1158                         info.num_entries = private->number;
1159                         info.size = private->size;
1160                         memcpy(info.name, name, sizeof(info.name));
1161
1162                         if (copy_to_user(user, &info, *len) != 0)
1163                                 ret = -EFAULT;
1164                         else
1165                                 ret = 0;
1166                         xt_table_unlock(t);
1167                         module_put(t->me);
1168                 } else
1169                         ret = t ? PTR_ERR(t) : -ENOENT;
1170         }
1171         break;
1172
1173         case IPT_SO_GET_ENTRIES: {
1174                 struct ipt_get_entries get;
1175
1176                 if (*len < sizeof(get)) {
1177                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1178                         ret = -EINVAL;
1179                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1180                         ret = -EFAULT;
1181                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1182                         duprintf("get_entries: %u != %u\n", *len,
1183                                  sizeof(struct ipt_get_entries) + get.size);
1184                         ret = -EINVAL;
1185                 } else
1186                         ret = get_entries(&get, user);
1187                 break;
1188         }
1189
1190         case IPT_SO_GET_REVISION_MATCH:
1191         case IPT_SO_GET_REVISION_TARGET: {
1192                 struct ipt_get_revision rev;
1193                 int target;
1194
1195                 if (*len != sizeof(rev)) {
1196                         ret = -EINVAL;
1197                         break;
1198                 }
1199                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1200                         ret = -EFAULT;
1201                         break;
1202                 }
1203
1204                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1205                         target = 1;
1206                 else
1207                         target = 0;
1208
1209                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1210                                                          rev.revision,
1211                                                          target, &ret),
1212                                         "ipt_%s", rev.name);
1213                 break;
1214         }
1215
1216         default:
1217                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1218                 ret = -EINVAL;
1219         }
1220
1221         return ret;
1222 }
1223
1224 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1225 {
1226         int ret;
1227         struct xt_table_info *newinfo;
1228         static struct xt_table_info bootstrap
1229                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1230         void *loc_cpu_entry;
1231
1232         newinfo = xt_alloc_table_info(repl->size);
1233         if (!newinfo)
1234                 return -ENOMEM;
1235
1236         /* choose the copy on our node/cpu
1237          * but dont care of preemption
1238          */
1239         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1240         memcpy(loc_cpu_entry, repl->entries, repl->size);
1241
1242         ret = translate_table(table->name, table->valid_hooks,
1243                               newinfo, loc_cpu_entry, repl->size,
1244                               repl->num_entries,
1245                               repl->hook_entry,
1246                               repl->underflow);
1247         if (ret != 0) {
1248                 xt_free_table_info(newinfo);
1249                 return ret;
1250         }
1251
1252         if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1253                 xt_free_table_info(newinfo);
1254                 return ret;
1255         }
1256
1257         return 0;
1258 }
1259
1260 void ipt_unregister_table(struct ipt_table *table)
1261 {
1262         struct xt_table_info *private;
1263         void *loc_cpu_entry;
1264
1265         private = xt_unregister_table(table);
1266
1267         /* Decrease module usage counts and free resources */
1268         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1269         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1270         xt_free_table_info(private);
1271 }
1272
1273 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1274 static inline int
1275 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1276                      u_int8_t type, u_int8_t code,
1277                      int invert)
1278 {
1279         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1280                 ^ invert;
1281 }
1282
1283 static int
1284 icmp_match(const struct sk_buff *skb,
1285            const struct net_device *in,
1286            const struct net_device *out,
1287            const void *matchinfo,
1288            int offset,
1289            unsigned int protoff,
1290            int *hotdrop)
1291 {
1292         struct icmphdr _icmph, *ic;
1293         const struct ipt_icmp *icmpinfo = matchinfo;
1294
1295         /* Must not be a fragment. */
1296         if (offset)
1297                 return 0;
1298
1299         ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1300         if (ic == NULL) {
1301                 /* We've been asked to examine this packet, and we
1302                  * can't.  Hence, no choice but to drop.
1303                  */
1304                 duprintf("Dropping evil ICMP tinygram.\n");
1305                 *hotdrop = 1;
1306                 return 0;
1307         }
1308
1309         return icmp_type_code_match(icmpinfo->type,
1310                                     icmpinfo->code[0],
1311                                     icmpinfo->code[1],
1312                                     ic->type, ic->code,
1313                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1314 }
1315
1316 /* Called when user tries to insert an entry of this type. */
1317 static int
1318 icmp_checkentry(const char *tablename,
1319            const void *info,
1320            void *matchinfo,
1321            unsigned int matchsize,
1322            unsigned int hook_mask)
1323 {
1324         const struct ipt_icmp *icmpinfo = matchinfo;
1325
1326         /* Must specify no unknown invflags */
1327         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
1328 }
1329
1330 /* The built-in targets: standard (NULL) and error. */
1331 static struct ipt_target ipt_standard_target = {
1332         .name           = IPT_STANDARD_TARGET,
1333         .targetsize     = sizeof(int),
1334 };
1335
1336 static struct ipt_target ipt_error_target = {
1337         .name           = IPT_ERROR_TARGET,
1338         .target         = ipt_error,
1339         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
1340 };
1341
1342 static struct nf_sockopt_ops ipt_sockopts = {
1343         .pf             = PF_INET,
1344         .set_optmin     = IPT_BASE_CTL,
1345         .set_optmax     = IPT_SO_SET_MAX+1,
1346         .set            = do_ipt_set_ctl,
1347         .get_optmin     = IPT_BASE_CTL,
1348         .get_optmax     = IPT_SO_GET_MAX+1,
1349         .get            = do_ipt_get_ctl,
1350 };
1351
1352 static struct ipt_match icmp_matchstruct = {
1353         .name           = "icmp",
1354         .match          = icmp_match,
1355         .matchsize      = sizeof(struct ipt_icmp),
1356         .proto          = IPPROTO_ICMP,
1357         .checkentry     = icmp_checkentry,
1358 };
1359
1360 static int __init init(void)
1361 {
1362         int ret;
1363
1364         xt_proto_init(AF_INET);
1365
1366         /* Noone else will be downing sem now, so we won't sleep */
1367         xt_register_target(AF_INET, &ipt_standard_target);
1368         xt_register_target(AF_INET, &ipt_error_target);
1369         xt_register_match(AF_INET, &icmp_matchstruct);
1370
1371         /* Register setsockopt */
1372         ret = nf_register_sockopt(&ipt_sockopts);
1373         if (ret < 0) {
1374                 duprintf("Unable to register sockopts.\n");
1375                 return ret;
1376         }
1377
1378         printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1379         return 0;
1380 }
1381
1382 static void __exit fini(void)
1383 {
1384         nf_unregister_sockopt(&ipt_sockopts);
1385
1386         xt_unregister_match(AF_INET, &icmp_matchstruct);
1387         xt_unregister_target(AF_INET, &ipt_error_target);
1388         xt_unregister_target(AF_INET, &ipt_standard_target);
1389
1390         xt_proto_fini(AF_INET);
1391 }
1392
1393 EXPORT_SYMBOL(ipt_register_table);
1394 EXPORT_SYMBOL(ipt_unregister_table);
1395 EXPORT_SYMBOL(ipt_do_table);
1396 module_init(init);
1397 module_exit(fini);