netfilter: xtables: move extension arguments into compound structure (1/6)
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/cache.h>
12 #include <linux/capability.h>
13 #include <linux/skbuff.h>
14 #include <linux/kmod.h>
15 #include <linux/vmalloc.h>
16 #include <linux/netdevice.h>
17 #include <linux/module.h>
18 #include <linux/icmp.h>
19 #include <net/ip.h>
20 #include <net/compat.h>
21 #include <asm/uaccess.h>
22 #include <linux/mutex.h>
23 #include <linux/proc_fs.h>
24 #include <linux/err.h>
25 #include <linux/cpumask.h>
26
27 #include <linux/netfilter/x_tables.h>
28 #include <linux/netfilter_ipv4/ip_tables.h>
29 #include <net/netfilter/nf_log.h>
30
31 MODULE_LICENSE("GPL");
32 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33 MODULE_DESCRIPTION("IPv4 packet filter");
34
35 /*#define DEBUG_IP_FIREWALL*/
36 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
37 /*#define DEBUG_IP_FIREWALL_USER*/
38
39 #ifdef DEBUG_IP_FIREWALL
40 #define dprintf(format, args...)  printk(format , ## args)
41 #else
42 #define dprintf(format, args...)
43 #endif
44
45 #ifdef DEBUG_IP_FIREWALL_USER
46 #define duprintf(format, args...) printk(format , ## args)
47 #else
48 #define duprintf(format, args...)
49 #endif
50
51 #ifdef CONFIG_NETFILTER_DEBUG
52 #define IP_NF_ASSERT(x)                                         \
53 do {                                                            \
54         if (!(x))                                               \
55                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
56                        __func__, __FILE__, __LINE__);   \
57 } while(0)
58 #else
59 #define IP_NF_ASSERT(x)
60 #endif
61
62 #if 0
63 /* All the better to debug you with... */
64 #define static
65 #define inline
66 #endif
67
68 /*
69    We keep a set of rules for each CPU, so we can avoid write-locking
70    them in the softirq when updating the counters and therefore
71    only need to read-lock in the softirq; doing a write_lock_bh() in user
72    context stops packets coming through and allows user context to read
73    the counters or update the rules.
74
75    Hence the start of any table is given by get_table() below.  */
76
77 /* Returns whether matches rule or not. */
78 /* Performance critical - called for every packet */
79 static inline bool
80 ip_packet_match(const struct iphdr *ip,
81                 const char *indev,
82                 const char *outdev,
83                 const struct ipt_ip *ipinfo,
84                 int isfrag)
85 {
86         size_t i;
87         unsigned long ret;
88
89 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
90
91         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
92                   IPT_INV_SRCIP)
93             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
94                      IPT_INV_DSTIP)) {
95                 dprintf("Source or dest mismatch.\n");
96
97                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
98                         NIPQUAD(ip->saddr),
99                         NIPQUAD(ipinfo->smsk.s_addr),
100                         NIPQUAD(ipinfo->src.s_addr),
101                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
102                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
103                         NIPQUAD(ip->daddr),
104                         NIPQUAD(ipinfo->dmsk.s_addr),
105                         NIPQUAD(ipinfo->dst.s_addr),
106                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
107                 return false;
108         }
109
110         /* Look for ifname matches; this should unroll nicely. */
111         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
112                 ret |= (((const unsigned long *)indev)[i]
113                         ^ ((const unsigned long *)ipinfo->iniface)[i])
114                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
115         }
116
117         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
118                 dprintf("VIA in mismatch (%s vs %s).%s\n",
119                         indev, ipinfo->iniface,
120                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
121                 return false;
122         }
123
124         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
125                 ret |= (((const unsigned long *)outdev)[i]
126                         ^ ((const unsigned long *)ipinfo->outiface)[i])
127                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
128         }
129
130         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
131                 dprintf("VIA out mismatch (%s vs %s).%s\n",
132                         outdev, ipinfo->outiface,
133                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
134                 return false;
135         }
136
137         /* Check specific protocol */
138         if (ipinfo->proto
139             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
140                 dprintf("Packet protocol %hi does not match %hi.%s\n",
141                         ip->protocol, ipinfo->proto,
142                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
143                 return false;
144         }
145
146         /* If we have a fragment rule but the packet is not a fragment
147          * then we return zero */
148         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
149                 dprintf("Fragment rule but not fragment.%s\n",
150                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
151                 return false;
152         }
153
154         return true;
155 }
156
157 static bool
158 ip_checkentry(const struct ipt_ip *ip)
159 {
160         if (ip->flags & ~IPT_F_MASK) {
161                 duprintf("Unknown flag bits set: %08X\n",
162                          ip->flags & ~IPT_F_MASK);
163                 return false;
164         }
165         if (ip->invflags & ~IPT_INV_MASK) {
166                 duprintf("Unknown invflag bits set: %08X\n",
167                          ip->invflags & ~IPT_INV_MASK);
168                 return false;
169         }
170         return true;
171 }
172
173 static unsigned int
174 ipt_error(struct sk_buff *skb,
175           const struct net_device *in,
176           const struct net_device *out,
177           unsigned int hooknum,
178           const struct xt_target *target,
179           const void *targinfo)
180 {
181         if (net_ratelimit())
182                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
183
184         return NF_DROP;
185 }
186
187 /* Performance critical - called for every packet */
188 static inline bool
189 do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
190          struct xt_match_param *par)
191 {
192         par->match     = m->u.kernel.match;
193         par->matchinfo = m->data;
194
195         /* Stop iteration if it doesn't match */
196         if (!m->u.kernel.match->match(skb, par))
197                 return true;
198         else
199                 return false;
200 }
201
202 /* Performance critical */
203 static inline struct ipt_entry *
204 get_entry(void *base, unsigned int offset)
205 {
206         return (struct ipt_entry *)(base + offset);
207 }
208
209 /* All zeroes == unconditional rule. */
210 /* Mildly perf critical (only if packet tracing is on) */
211 static inline int
212 unconditional(const struct ipt_ip *ip)
213 {
214         unsigned int i;
215
216         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
217                 if (((__u32 *)ip)[i])
218                         return 0;
219
220         return 1;
221 #undef FWINV
222 }
223
224 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
225     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
226 static const char *const hooknames[] = {
227         [NF_INET_PRE_ROUTING]           = "PREROUTING",
228         [NF_INET_LOCAL_IN]              = "INPUT",
229         [NF_INET_FORWARD]               = "FORWARD",
230         [NF_INET_LOCAL_OUT]             = "OUTPUT",
231         [NF_INET_POST_ROUTING]          = "POSTROUTING",
232 };
233
234 enum nf_ip_trace_comments {
235         NF_IP_TRACE_COMMENT_RULE,
236         NF_IP_TRACE_COMMENT_RETURN,
237         NF_IP_TRACE_COMMENT_POLICY,
238 };
239
240 static const char *const comments[] = {
241         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
242         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
243         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
244 };
245
246 static struct nf_loginfo trace_loginfo = {
247         .type = NF_LOG_TYPE_LOG,
248         .u = {
249                 .log = {
250                         .level = 4,
251                         .logflags = NF_LOG_MASK,
252                 },
253         },
254 };
255
256 /* Mildly perf critical (only if packet tracing is on) */
257 static inline int
258 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
259                       char *hookname, char **chainname,
260                       char **comment, unsigned int *rulenum)
261 {
262         struct ipt_standard_target *t = (void *)ipt_get_target(s);
263
264         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
265                 /* Head of user chain: ERROR target with chainname */
266                 *chainname = t->target.data;
267                 (*rulenum) = 0;
268         } else if (s == e) {
269                 (*rulenum)++;
270
271                 if (s->target_offset == sizeof(struct ipt_entry)
272                    && strcmp(t->target.u.kernel.target->name,
273                              IPT_STANDARD_TARGET) == 0
274                    && t->verdict < 0
275                    && unconditional(&s->ip)) {
276                         /* Tail of chains: STANDARD target (return/policy) */
277                         *comment = *chainname == hookname
278                                 ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
279                                 : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
280                 }
281                 return 1;
282         } else
283                 (*rulenum)++;
284
285         return 0;
286 }
287
288 static void trace_packet(struct sk_buff *skb,
289                          unsigned int hook,
290                          const struct net_device *in,
291                          const struct net_device *out,
292                          const char *tablename,
293                          struct xt_table_info *private,
294                          struct ipt_entry *e)
295 {
296         void *table_base;
297         const struct ipt_entry *root;
298         char *hookname, *chainname, *comment;
299         unsigned int rulenum = 0;
300
301         table_base = (void *)private->entries[smp_processor_id()];
302         root = get_entry(table_base, private->hook_entry[hook]);
303
304         hookname = chainname = (char *)hooknames[hook];
305         comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
306
307         IPT_ENTRY_ITERATE(root,
308                           private->size - private->hook_entry[hook],
309                           get_chainname_rulenum,
310                           e, hookname, &chainname, &comment, &rulenum);
311
312         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
313                       "TRACE: %s:%s:%s:%u ",
314                       tablename, chainname, comment, rulenum);
315 }
316 #endif
317
318 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
319 unsigned int
320 ipt_do_table(struct sk_buff *skb,
321              unsigned int hook,
322              const struct net_device *in,
323              const struct net_device *out,
324              struct xt_table *table)
325 {
326         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
327         const struct iphdr *ip;
328         u_int16_t datalen;
329         bool hotdrop = false;
330         /* Initializing verdict to NF_DROP keeps gcc happy. */
331         unsigned int verdict = NF_DROP;
332         const char *indev, *outdev;
333         void *table_base;
334         struct ipt_entry *e, *back;
335         struct xt_table_info *private;
336         struct xt_match_param mtpar;
337
338         /* Initialization */
339         ip = ip_hdr(skb);
340         datalen = skb->len - ip->ihl * 4;
341         indev = in ? in->name : nulldevname;
342         outdev = out ? out->name : nulldevname;
343         /* We handle fragments by dealing with the first fragment as
344          * if it was a normal packet.  All other fragments are treated
345          * normally, except that they will NEVER match rules that ask
346          * things we don't know, ie. tcp syn flag or ports).  If the
347          * rule is also a fragment-specific rule, non-fragments won't
348          * match it. */
349         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
350         mtpar.thoff   = ip_hdrlen(skb);
351         mtpar.hotdrop = &hotdrop;
352         mtpar.in      = in;
353         mtpar.out     = out;
354
355         read_lock_bh(&table->lock);
356         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
357         private = table->private;
358         table_base = (void *)private->entries[smp_processor_id()];
359         e = get_entry(table_base, private->hook_entry[hook]);
360
361         /* For return from builtin chain */
362         back = get_entry(table_base, private->underflow[hook]);
363
364         do {
365                 IP_NF_ASSERT(e);
366                 IP_NF_ASSERT(back);
367                 if (ip_packet_match(ip, indev, outdev,
368                     &e->ip, mtpar.fragoff)) {
369                         struct ipt_entry_target *t;
370
371                         if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
372                                 goto no_match;
373
374                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
375
376                         t = ipt_get_target(e);
377                         IP_NF_ASSERT(t->u.kernel.target);
378
379 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
380     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
381                         /* The packet is traced: log it */
382                         if (unlikely(skb->nf_trace))
383                                 trace_packet(skb, hook, in, out,
384                                              table->name, private, e);
385 #endif
386                         /* Standard target? */
387                         if (!t->u.kernel.target->target) {
388                                 int v;
389
390                                 v = ((struct ipt_standard_target *)t)->verdict;
391                                 if (v < 0) {
392                                         /* Pop from stack? */
393                                         if (v != IPT_RETURN) {
394                                                 verdict = (unsigned)(-v) - 1;
395                                                 break;
396                                         }
397                                         e = back;
398                                         back = get_entry(table_base,
399                                                          back->comefrom);
400                                         continue;
401                                 }
402                                 if (table_base + v != (void *)e + e->next_offset
403                                     && !(e->ip.flags & IPT_F_GOTO)) {
404                                         /* Save old back ptr in next entry */
405                                         struct ipt_entry *next
406                                                 = (void *)e + e->next_offset;
407                                         next->comefrom
408                                                 = (void *)back - table_base;
409                                         /* set back pointer to next entry */
410                                         back = next;
411                                 }
412
413                                 e = get_entry(table_base, v);
414                         } else {
415                                 /* Targets which reenter must return
416                                    abs. verdicts */
417 #ifdef CONFIG_NETFILTER_DEBUG
418                                 ((struct ipt_entry *)table_base)->comefrom
419                                         = 0xeeeeeeec;
420 #endif
421                                 verdict = t->u.kernel.target->target(skb,
422                                                                      in, out,
423                                                                      hook,
424                                                                      t->u.kernel.target,
425                                                                      t->data);
426
427 #ifdef CONFIG_NETFILTER_DEBUG
428                                 if (((struct ipt_entry *)table_base)->comefrom
429                                     != 0xeeeeeeec
430                                     && verdict == IPT_CONTINUE) {
431                                         printk("Target %s reentered!\n",
432                                                t->u.kernel.target->name);
433                                         verdict = NF_DROP;
434                                 }
435                                 ((struct ipt_entry *)table_base)->comefrom
436                                         = 0x57acc001;
437 #endif
438                                 /* Target might have changed stuff. */
439                                 ip = ip_hdr(skb);
440                                 datalen = skb->len - ip->ihl * 4;
441
442                                 if (verdict == IPT_CONTINUE)
443                                         e = (void *)e + e->next_offset;
444                                 else
445                                         /* Verdict */
446                                         break;
447                         }
448                 } else {
449
450                 no_match:
451                         e = (void *)e + e->next_offset;
452                 }
453         } while (!hotdrop);
454
455         read_unlock_bh(&table->lock);
456
457 #ifdef DEBUG_ALLOW_ALL
458         return NF_ACCEPT;
459 #else
460         if (hotdrop)
461                 return NF_DROP;
462         else return verdict;
463 #endif
464 }
465
466 /* Figures out from what hook each rule can be called: returns 0 if
467    there are loops.  Puts hook bitmask in comefrom. */
468 static int
469 mark_source_chains(struct xt_table_info *newinfo,
470                    unsigned int valid_hooks, void *entry0)
471 {
472         unsigned int hook;
473
474         /* No recursion; use packet counter to save back ptrs (reset
475            to 0 as we leave), and comefrom to save source hook bitmask */
476         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
477                 unsigned int pos = newinfo->hook_entry[hook];
478                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
479
480                 if (!(valid_hooks & (1 << hook)))
481                         continue;
482
483                 /* Set initial back pointer. */
484                 e->counters.pcnt = pos;
485
486                 for (;;) {
487                         struct ipt_standard_target *t
488                                 = (void *)ipt_get_target(e);
489                         int visited = e->comefrom & (1 << hook);
490
491                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
492                                 printk("iptables: loop hook %u pos %u %08X.\n",
493                                        hook, pos, e->comefrom);
494                                 return 0;
495                         }
496                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
497
498                         /* Unconditional return/END. */
499                         if ((e->target_offset == sizeof(struct ipt_entry)
500                             && (strcmp(t->target.u.user.name,
501                                        IPT_STANDARD_TARGET) == 0)
502                             && t->verdict < 0
503                             && unconditional(&e->ip)) || visited) {
504                                 unsigned int oldpos, size;
505
506                                 if (t->verdict < -NF_MAX_VERDICT - 1) {
507                                         duprintf("mark_source_chains: bad "
508                                                 "negative verdict (%i)\n",
509                                                                 t->verdict);
510                                         return 0;
511                                 }
512
513                                 /* Return: backtrack through the last
514                                    big jump. */
515                                 do {
516                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
517 #ifdef DEBUG_IP_FIREWALL_USER
518                                         if (e->comefrom
519                                             & (1 << NF_INET_NUMHOOKS)) {
520                                                 duprintf("Back unset "
521                                                          "on hook %u "
522                                                          "rule %u\n",
523                                                          hook, pos);
524                                         }
525 #endif
526                                         oldpos = pos;
527                                         pos = e->counters.pcnt;
528                                         e->counters.pcnt = 0;
529
530                                         /* We're at the start. */
531                                         if (pos == oldpos)
532                                                 goto next;
533
534                                         e = (struct ipt_entry *)
535                                                 (entry0 + pos);
536                                 } while (oldpos == pos + e->next_offset);
537
538                                 /* Move along one */
539                                 size = e->next_offset;
540                                 e = (struct ipt_entry *)
541                                         (entry0 + pos + size);
542                                 e->counters.pcnt = pos;
543                                 pos += size;
544                         } else {
545                                 int newpos = t->verdict;
546
547                                 if (strcmp(t->target.u.user.name,
548                                            IPT_STANDARD_TARGET) == 0
549                                     && newpos >= 0) {
550                                         if (newpos > newinfo->size -
551                                                 sizeof(struct ipt_entry)) {
552                                                 duprintf("mark_source_chains: "
553                                                         "bad verdict (%i)\n",
554                                                                 newpos);
555                                                 return 0;
556                                         }
557                                         /* This a jump; chase it. */
558                                         duprintf("Jump rule %u -> %u\n",
559                                                  pos, newpos);
560                                 } else {
561                                         /* ... this is a fallthru */
562                                         newpos = pos + e->next_offset;
563                                 }
564                                 e = (struct ipt_entry *)
565                                         (entry0 + newpos);
566                                 e->counters.pcnt = pos;
567                                 pos = newpos;
568                         }
569                 }
570                 next:
571                 duprintf("Finished chain %u\n", hook);
572         }
573         return 1;
574 }
575
576 static int
577 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
578 {
579         if (i && (*i)-- == 0)
580                 return 1;
581
582         if (m->u.kernel.match->destroy)
583                 m->u.kernel.match->destroy(m->u.kernel.match, m->data);
584         module_put(m->u.kernel.match->me);
585         return 0;
586 }
587
588 static int
589 check_entry(struct ipt_entry *e, const char *name)
590 {
591         struct ipt_entry_target *t;
592
593         if (!ip_checkentry(&e->ip)) {
594                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
595                 return -EINVAL;
596         }
597
598         if (e->target_offset + sizeof(struct ipt_entry_target) >
599             e->next_offset)
600                 return -EINVAL;
601
602         t = ipt_get_target(e);
603         if (e->target_offset + t->u.target_size > e->next_offset)
604                 return -EINVAL;
605
606         return 0;
607 }
608
609 static int
610 check_match(struct ipt_entry_match *m, const char *name,
611                               const struct ipt_ip *ip,
612                               unsigned int hookmask, unsigned int *i)
613 {
614         struct xt_match *match;
615         int ret;
616
617         match = m->u.kernel.match;
618         ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
619                              name, hookmask, ip->proto,
620                              ip->invflags & IPT_INV_PROTO, ip, m->data);
621         if (ret < 0) {
622                 duprintf("ip_tables: check failed for `%s'.\n",
623                          m->u.kernel.match->name);
624                 return ret;
625         }
626         ++*i;
627         return 0;
628 }
629
630 static int
631 find_check_match(struct ipt_entry_match *m,
632                  const char *name,
633                  const struct ipt_ip *ip,
634                  unsigned int hookmask,
635                  unsigned int *i)
636 {
637         struct xt_match *match;
638         int ret;
639
640         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
641                                                       m->u.user.revision),
642                                         "ipt_%s", m->u.user.name);
643         if (IS_ERR(match) || !match) {
644                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
645                 return match ? PTR_ERR(match) : -ENOENT;
646         }
647         m->u.kernel.match = match;
648
649         ret = check_match(m, name, ip, hookmask, i);
650         if (ret)
651                 goto err;
652
653         return 0;
654 err:
655         module_put(m->u.kernel.match->me);
656         return ret;
657 }
658
659 static int check_target(struct ipt_entry *e, const char *name)
660 {
661         struct ipt_entry_target *t;
662         struct xt_target *target;
663         int ret;
664
665         t = ipt_get_target(e);
666         target = t->u.kernel.target;
667         ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
668                               name, e->comefrom, e->ip.proto,
669                               e->ip.invflags & IPT_INV_PROTO, e, t->data);
670         if (ret < 0) {
671                 duprintf("ip_tables: check failed for `%s'.\n",
672                          t->u.kernel.target->name);
673                 return ret;
674         }
675         return 0;
676 }
677
678 static int
679 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
680                  unsigned int *i)
681 {
682         struct ipt_entry_target *t;
683         struct xt_target *target;
684         int ret;
685         unsigned int j;
686
687         ret = check_entry(e, name);
688         if (ret)
689                 return ret;
690
691         j = 0;
692         ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
693                                 e->comefrom, &j);
694         if (ret != 0)
695                 goto cleanup_matches;
696
697         t = ipt_get_target(e);
698         target = try_then_request_module(xt_find_target(AF_INET,
699                                                         t->u.user.name,
700                                                         t->u.user.revision),
701                                          "ipt_%s", t->u.user.name);
702         if (IS_ERR(target) || !target) {
703                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
704                 ret = target ? PTR_ERR(target) : -ENOENT;
705                 goto cleanup_matches;
706         }
707         t->u.kernel.target = target;
708
709         ret = check_target(e, name);
710         if (ret)
711                 goto err;
712
713         (*i)++;
714         return 0;
715  err:
716         module_put(t->u.kernel.target->me);
717  cleanup_matches:
718         IPT_MATCH_ITERATE(e, cleanup_match, &j);
719         return ret;
720 }
721
722 static int
723 check_entry_size_and_hooks(struct ipt_entry *e,
724                            struct xt_table_info *newinfo,
725                            unsigned char *base,
726                            unsigned char *limit,
727                            const unsigned int *hook_entries,
728                            const unsigned int *underflows,
729                            unsigned int *i)
730 {
731         unsigned int h;
732
733         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
734             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
735                 duprintf("Bad offset %p\n", e);
736                 return -EINVAL;
737         }
738
739         if (e->next_offset
740             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
741                 duprintf("checking: element %p size %u\n",
742                          e, e->next_offset);
743                 return -EINVAL;
744         }
745
746         /* Check hooks & underflows */
747         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
748                 if ((unsigned char *)e - base == hook_entries[h])
749                         newinfo->hook_entry[h] = hook_entries[h];
750                 if ((unsigned char *)e - base == underflows[h])
751                         newinfo->underflow[h] = underflows[h];
752         }
753
754         /* FIXME: underflows must be unconditional, standard verdicts
755            < 0 (not IPT_RETURN). --RR */
756
757         /* Clear counters and comefrom */
758         e->counters = ((struct xt_counters) { 0, 0 });
759         e->comefrom = 0;
760
761         (*i)++;
762         return 0;
763 }
764
765 static int
766 cleanup_entry(struct ipt_entry *e, unsigned int *i)
767 {
768         struct ipt_entry_target *t;
769
770         if (i && (*i)-- == 0)
771                 return 1;
772
773         /* Cleanup all matches */
774         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
775         t = ipt_get_target(e);
776         if (t->u.kernel.target->destroy)
777                 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
778         module_put(t->u.kernel.target->me);
779         return 0;
780 }
781
782 /* Checks and translates the user-supplied table segment (held in
783    newinfo) */
784 static int
785 translate_table(const char *name,
786                 unsigned int valid_hooks,
787                 struct xt_table_info *newinfo,
788                 void *entry0,
789                 unsigned int size,
790                 unsigned int number,
791                 const unsigned int *hook_entries,
792                 const unsigned int *underflows)
793 {
794         unsigned int i;
795         int ret;
796
797         newinfo->size = size;
798         newinfo->number = number;
799
800         /* Init all hooks to impossible value. */
801         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
802                 newinfo->hook_entry[i] = 0xFFFFFFFF;
803                 newinfo->underflow[i] = 0xFFFFFFFF;
804         }
805
806         duprintf("translate_table: size %u\n", newinfo->size);
807         i = 0;
808         /* Walk through entries, checking offsets. */
809         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
810                                 check_entry_size_and_hooks,
811                                 newinfo,
812                                 entry0,
813                                 entry0 + size,
814                                 hook_entries, underflows, &i);
815         if (ret != 0)
816                 return ret;
817
818         if (i != number) {
819                 duprintf("translate_table: %u not %u entries\n",
820                          i, number);
821                 return -EINVAL;
822         }
823
824         /* Check hooks all assigned */
825         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
826                 /* Only hooks which are valid */
827                 if (!(valid_hooks & (1 << i)))
828                         continue;
829                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
830                         duprintf("Invalid hook entry %u %u\n",
831                                  i, hook_entries[i]);
832                         return -EINVAL;
833                 }
834                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
835                         duprintf("Invalid underflow %u %u\n",
836                                  i, underflows[i]);
837                         return -EINVAL;
838                 }
839         }
840
841         if (!mark_source_chains(newinfo, valid_hooks, entry0))
842                 return -ELOOP;
843
844         /* Finally, each sanity check must pass */
845         i = 0;
846         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
847                                 find_check_entry, name, size, &i);
848
849         if (ret != 0) {
850                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
851                                 cleanup_entry, &i);
852                 return ret;
853         }
854
855         /* And one copy for every other CPU */
856         for_each_possible_cpu(i) {
857                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
858                         memcpy(newinfo->entries[i], entry0, newinfo->size);
859         }
860
861         return ret;
862 }
863
864 /* Gets counters. */
865 static inline int
866 add_entry_to_counter(const struct ipt_entry *e,
867                      struct xt_counters total[],
868                      unsigned int *i)
869 {
870         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
871
872         (*i)++;
873         return 0;
874 }
875
876 static inline int
877 set_entry_to_counter(const struct ipt_entry *e,
878                      struct ipt_counters total[],
879                      unsigned int *i)
880 {
881         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
882
883         (*i)++;
884         return 0;
885 }
886
887 static void
888 get_counters(const struct xt_table_info *t,
889              struct xt_counters counters[])
890 {
891         unsigned int cpu;
892         unsigned int i;
893         unsigned int curcpu;
894
895         /* Instead of clearing (by a previous call to memset())
896          * the counters and using adds, we set the counters
897          * with data used by 'current' CPU
898          * We dont care about preemption here.
899          */
900         curcpu = raw_smp_processor_id();
901
902         i = 0;
903         IPT_ENTRY_ITERATE(t->entries[curcpu],
904                           t->size,
905                           set_entry_to_counter,
906                           counters,
907                           &i);
908
909         for_each_possible_cpu(cpu) {
910                 if (cpu == curcpu)
911                         continue;
912                 i = 0;
913                 IPT_ENTRY_ITERATE(t->entries[cpu],
914                                   t->size,
915                                   add_entry_to_counter,
916                                   counters,
917                                   &i);
918         }
919 }
920
921 static struct xt_counters * alloc_counters(struct xt_table *table)
922 {
923         unsigned int countersize;
924         struct xt_counters *counters;
925         const struct xt_table_info *private = table->private;
926
927         /* We need atomic snapshot of counters: rest doesn't change
928            (other than comefrom, which userspace doesn't care
929            about). */
930         countersize = sizeof(struct xt_counters) * private->number;
931         counters = vmalloc_node(countersize, numa_node_id());
932
933         if (counters == NULL)
934                 return ERR_PTR(-ENOMEM);
935
936         /* First, sum counters... */
937         write_lock_bh(&table->lock);
938         get_counters(private, counters);
939         write_unlock_bh(&table->lock);
940
941         return counters;
942 }
943
944 static int
945 copy_entries_to_user(unsigned int total_size,
946                      struct xt_table *table,
947                      void __user *userptr)
948 {
949         unsigned int off, num;
950         struct ipt_entry *e;
951         struct xt_counters *counters;
952         const struct xt_table_info *private = table->private;
953         int ret = 0;
954         const void *loc_cpu_entry;
955
956         counters = alloc_counters(table);
957         if (IS_ERR(counters))
958                 return PTR_ERR(counters);
959
960         /* choose the copy that is on our node/cpu, ...
961          * This choice is lazy (because current thread is
962          * allowed to migrate to another cpu)
963          */
964         loc_cpu_entry = private->entries[raw_smp_processor_id()];
965         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
966                 ret = -EFAULT;
967                 goto free_counters;
968         }
969
970         /* FIXME: use iterator macros --RR */
971         /* ... then go back and fix counters and names */
972         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
973                 unsigned int i;
974                 const struct ipt_entry_match *m;
975                 const struct ipt_entry_target *t;
976
977                 e = (struct ipt_entry *)(loc_cpu_entry + off);
978                 if (copy_to_user(userptr + off
979                                  + offsetof(struct ipt_entry, counters),
980                                  &counters[num],
981                                  sizeof(counters[num])) != 0) {
982                         ret = -EFAULT;
983                         goto free_counters;
984                 }
985
986                 for (i = sizeof(struct ipt_entry);
987                      i < e->target_offset;
988                      i += m->u.match_size) {
989                         m = (void *)e + i;
990
991                         if (copy_to_user(userptr + off + i
992                                          + offsetof(struct ipt_entry_match,
993                                                     u.user.name),
994                                          m->u.kernel.match->name,
995                                          strlen(m->u.kernel.match->name)+1)
996                             != 0) {
997                                 ret = -EFAULT;
998                                 goto free_counters;
999                         }
1000                 }
1001
1002                 t = ipt_get_target(e);
1003                 if (copy_to_user(userptr + off + e->target_offset
1004                                  + offsetof(struct ipt_entry_target,
1005                                             u.user.name),
1006                                  t->u.kernel.target->name,
1007                                  strlen(t->u.kernel.target->name)+1) != 0) {
1008                         ret = -EFAULT;
1009                         goto free_counters;
1010                 }
1011         }
1012
1013  free_counters:
1014         vfree(counters);
1015         return ret;
1016 }
1017
1018 #ifdef CONFIG_COMPAT
1019 static void compat_standard_from_user(void *dst, void *src)
1020 {
1021         int v = *(compat_int_t *)src;
1022
1023         if (v > 0)
1024                 v += xt_compat_calc_jump(AF_INET, v);
1025         memcpy(dst, &v, sizeof(v));
1026 }
1027
1028 static int compat_standard_to_user(void __user *dst, void *src)
1029 {
1030         compat_int_t cv = *(int *)src;
1031
1032         if (cv > 0)
1033                 cv -= xt_compat_calc_jump(AF_INET, cv);
1034         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1035 }
1036
1037 static inline int
1038 compat_calc_match(struct ipt_entry_match *m, int *size)
1039 {
1040         *size += xt_compat_match_offset(m->u.kernel.match);
1041         return 0;
1042 }
1043
1044 static int compat_calc_entry(struct ipt_entry *e,
1045                              const struct xt_table_info *info,
1046                              void *base, struct xt_table_info *newinfo)
1047 {
1048         struct ipt_entry_target *t;
1049         unsigned int entry_offset;
1050         int off, i, ret;
1051
1052         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1053         entry_offset = (void *)e - base;
1054         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1055         t = ipt_get_target(e);
1056         off += xt_compat_target_offset(t->u.kernel.target);
1057         newinfo->size -= off;
1058         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1059         if (ret)
1060                 return ret;
1061
1062         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1063                 if (info->hook_entry[i] &&
1064                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1065                         newinfo->hook_entry[i] -= off;
1066                 if (info->underflow[i] &&
1067                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1068                         newinfo->underflow[i] -= off;
1069         }
1070         return 0;
1071 }
1072
1073 static int compat_table_info(const struct xt_table_info *info,
1074                              struct xt_table_info *newinfo)
1075 {
1076         void *loc_cpu_entry;
1077
1078         if (!newinfo || !info)
1079                 return -EINVAL;
1080
1081         /* we dont care about newinfo->entries[] */
1082         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1083         newinfo->initial_entries = 0;
1084         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1085         return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1086                                  compat_calc_entry, info, loc_cpu_entry,
1087                                  newinfo);
1088 }
1089 #endif
1090
1091 static int get_info(struct net *net, void __user *user, int *len, int compat)
1092 {
1093         char name[IPT_TABLE_MAXNAMELEN];
1094         struct xt_table *t;
1095         int ret;
1096
1097         if (*len != sizeof(struct ipt_getinfo)) {
1098                 duprintf("length %u != %zu\n", *len,
1099                          sizeof(struct ipt_getinfo));
1100                 return -EINVAL;
1101         }
1102
1103         if (copy_from_user(name, user, sizeof(name)) != 0)
1104                 return -EFAULT;
1105
1106         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1107 #ifdef CONFIG_COMPAT
1108         if (compat)
1109                 xt_compat_lock(AF_INET);
1110 #endif
1111         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1112                                     "iptable_%s", name);
1113         if (t && !IS_ERR(t)) {
1114                 struct ipt_getinfo info;
1115                 const struct xt_table_info *private = t->private;
1116
1117 #ifdef CONFIG_COMPAT
1118                 if (compat) {
1119                         struct xt_table_info tmp;
1120                         ret = compat_table_info(private, &tmp);
1121                         xt_compat_flush_offsets(AF_INET);
1122                         private = &tmp;
1123                 }
1124 #endif
1125                 info.valid_hooks = t->valid_hooks;
1126                 memcpy(info.hook_entry, private->hook_entry,
1127                        sizeof(info.hook_entry));
1128                 memcpy(info.underflow, private->underflow,
1129                        sizeof(info.underflow));
1130                 info.num_entries = private->number;
1131                 info.size = private->size;
1132                 strcpy(info.name, name);
1133
1134                 if (copy_to_user(user, &info, *len) != 0)
1135                         ret = -EFAULT;
1136                 else
1137                         ret = 0;
1138
1139                 xt_table_unlock(t);
1140                 module_put(t->me);
1141         } else
1142                 ret = t ? PTR_ERR(t) : -ENOENT;
1143 #ifdef CONFIG_COMPAT
1144         if (compat)
1145                 xt_compat_unlock(AF_INET);
1146 #endif
1147         return ret;
1148 }
1149
1150 static int
1151 get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1152 {
1153         int ret;
1154         struct ipt_get_entries get;
1155         struct xt_table *t;
1156
1157         if (*len < sizeof(get)) {
1158                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1159                 return -EINVAL;
1160         }
1161         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1162                 return -EFAULT;
1163         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1164                 duprintf("get_entries: %u != %zu\n",
1165                          *len, sizeof(get) + get.size);
1166                 return -EINVAL;
1167         }
1168
1169         t = xt_find_table_lock(net, AF_INET, get.name);
1170         if (t && !IS_ERR(t)) {
1171                 const struct xt_table_info *private = t->private;
1172                 duprintf("t->private->number = %u\n", private->number);
1173                 if (get.size == private->size)
1174                         ret = copy_entries_to_user(private->size,
1175                                                    t, uptr->entrytable);
1176                 else {
1177                         duprintf("get_entries: I've got %u not %u!\n",
1178                                  private->size, get.size);
1179                         ret = -EAGAIN;
1180                 }
1181                 module_put(t->me);
1182                 xt_table_unlock(t);
1183         } else
1184                 ret = t ? PTR_ERR(t) : -ENOENT;
1185
1186         return ret;
1187 }
1188
1189 static int
1190 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1191              struct xt_table_info *newinfo, unsigned int num_counters,
1192              void __user *counters_ptr)
1193 {
1194         int ret;
1195         struct xt_table *t;
1196         struct xt_table_info *oldinfo;
1197         struct xt_counters *counters;
1198         void *loc_cpu_old_entry;
1199
1200         ret = 0;
1201         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1202         if (!counters) {
1203                 ret = -ENOMEM;
1204                 goto out;
1205         }
1206
1207         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1208                                     "iptable_%s", name);
1209         if (!t || IS_ERR(t)) {
1210                 ret = t ? PTR_ERR(t) : -ENOENT;
1211                 goto free_newinfo_counters_untrans;
1212         }
1213
1214         /* You lied! */
1215         if (valid_hooks != t->valid_hooks) {
1216                 duprintf("Valid hook crap: %08X vs %08X\n",
1217                          valid_hooks, t->valid_hooks);
1218                 ret = -EINVAL;
1219                 goto put_module;
1220         }
1221
1222         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1223         if (!oldinfo)
1224                 goto put_module;
1225
1226         /* Update module usage count based on number of rules */
1227         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1228                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1229         if ((oldinfo->number > oldinfo->initial_entries) ||
1230             (newinfo->number <= oldinfo->initial_entries))
1231                 module_put(t->me);
1232         if ((oldinfo->number > oldinfo->initial_entries) &&
1233             (newinfo->number <= oldinfo->initial_entries))
1234                 module_put(t->me);
1235
1236         /* Get the old counters. */
1237         get_counters(oldinfo, counters);
1238         /* Decrease module usage counts and free resource */
1239         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1240         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1241                           NULL);
1242         xt_free_table_info(oldinfo);
1243         if (copy_to_user(counters_ptr, counters,
1244                          sizeof(struct xt_counters) * num_counters) != 0)
1245                 ret = -EFAULT;
1246         vfree(counters);
1247         xt_table_unlock(t);
1248         return ret;
1249
1250  put_module:
1251         module_put(t->me);
1252         xt_table_unlock(t);
1253  free_newinfo_counters_untrans:
1254         vfree(counters);
1255  out:
1256         return ret;
1257 }
1258
1259 static int
1260 do_replace(struct net *net, void __user *user, unsigned int len)
1261 {
1262         int ret;
1263         struct ipt_replace tmp;
1264         struct xt_table_info *newinfo;
1265         void *loc_cpu_entry;
1266
1267         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1268                 return -EFAULT;
1269
1270         /* overflow check */
1271         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1272                 return -ENOMEM;
1273
1274         newinfo = xt_alloc_table_info(tmp.size);
1275         if (!newinfo)
1276                 return -ENOMEM;
1277
1278         /* choose the copy that is on our node/cpu */
1279         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1280         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1281                            tmp.size) != 0) {
1282                 ret = -EFAULT;
1283                 goto free_newinfo;
1284         }
1285
1286         ret = translate_table(tmp.name, tmp.valid_hooks,
1287                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1288                               tmp.hook_entry, tmp.underflow);
1289         if (ret != 0)
1290                 goto free_newinfo;
1291
1292         duprintf("ip_tables: Translated table\n");
1293
1294         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1295                            tmp.num_counters, tmp.counters);
1296         if (ret)
1297                 goto free_newinfo_untrans;
1298         return 0;
1299
1300  free_newinfo_untrans:
1301         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1302  free_newinfo:
1303         xt_free_table_info(newinfo);
1304         return ret;
1305 }
1306
1307 /* We're lazy, and add to the first CPU; overflow works its fey magic
1308  * and everything is OK. */
1309 static int
1310 add_counter_to_entry(struct ipt_entry *e,
1311                      const struct xt_counters addme[],
1312                      unsigned int *i)
1313 {
1314 #if 0
1315         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1316                  *i,
1317                  (long unsigned int)e->counters.pcnt,
1318                  (long unsigned int)e->counters.bcnt,
1319                  (long unsigned int)addme[*i].pcnt,
1320                  (long unsigned int)addme[*i].bcnt);
1321 #endif
1322
1323         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1324
1325         (*i)++;
1326         return 0;
1327 }
1328
1329 static int
1330 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1331 {
1332         unsigned int i;
1333         struct xt_counters_info tmp;
1334         struct xt_counters *paddc;
1335         unsigned int num_counters;
1336         const char *name;
1337         int size;
1338         void *ptmp;
1339         struct xt_table *t;
1340         const struct xt_table_info *private;
1341         int ret = 0;
1342         void *loc_cpu_entry;
1343 #ifdef CONFIG_COMPAT
1344         struct compat_xt_counters_info compat_tmp;
1345
1346         if (compat) {
1347                 ptmp = &compat_tmp;
1348                 size = sizeof(struct compat_xt_counters_info);
1349         } else
1350 #endif
1351         {
1352                 ptmp = &tmp;
1353                 size = sizeof(struct xt_counters_info);
1354         }
1355
1356         if (copy_from_user(ptmp, user, size) != 0)
1357                 return -EFAULT;
1358
1359 #ifdef CONFIG_COMPAT
1360         if (compat) {
1361                 num_counters = compat_tmp.num_counters;
1362                 name = compat_tmp.name;
1363         } else
1364 #endif
1365         {
1366                 num_counters = tmp.num_counters;
1367                 name = tmp.name;
1368         }
1369
1370         if (len != size + num_counters * sizeof(struct xt_counters))
1371                 return -EINVAL;
1372
1373         paddc = vmalloc_node(len - size, numa_node_id());
1374         if (!paddc)
1375                 return -ENOMEM;
1376
1377         if (copy_from_user(paddc, user + size, len - size) != 0) {
1378                 ret = -EFAULT;
1379                 goto free;
1380         }
1381
1382         t = xt_find_table_lock(net, AF_INET, name);
1383         if (!t || IS_ERR(t)) {
1384                 ret = t ? PTR_ERR(t) : -ENOENT;
1385                 goto free;
1386         }
1387
1388         write_lock_bh(&t->lock);
1389         private = t->private;
1390         if (private->number != num_counters) {
1391                 ret = -EINVAL;
1392                 goto unlock_up_free;
1393         }
1394
1395         i = 0;
1396         /* Choose the copy that is on our node */
1397         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1398         IPT_ENTRY_ITERATE(loc_cpu_entry,
1399                           private->size,
1400                           add_counter_to_entry,
1401                           paddc,
1402                           &i);
1403  unlock_up_free:
1404         write_unlock_bh(&t->lock);
1405         xt_table_unlock(t);
1406         module_put(t->me);
1407  free:
1408         vfree(paddc);
1409
1410         return ret;
1411 }
1412
1413 #ifdef CONFIG_COMPAT
1414 struct compat_ipt_replace {
1415         char                    name[IPT_TABLE_MAXNAMELEN];
1416         u32                     valid_hooks;
1417         u32                     num_entries;
1418         u32                     size;
1419         u32                     hook_entry[NF_INET_NUMHOOKS];
1420         u32                     underflow[NF_INET_NUMHOOKS];
1421         u32                     num_counters;
1422         compat_uptr_t           counters;       /* struct ipt_counters * */
1423         struct compat_ipt_entry entries[0];
1424 };
1425
1426 static int
1427 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1428                           unsigned int *size, struct xt_counters *counters,
1429                           unsigned int *i)
1430 {
1431         struct ipt_entry_target *t;
1432         struct compat_ipt_entry __user *ce;
1433         u_int16_t target_offset, next_offset;
1434         compat_uint_t origsize;
1435         int ret;
1436
1437         ret = -EFAULT;
1438         origsize = *size;
1439         ce = (struct compat_ipt_entry __user *)*dstptr;
1440         if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1441                 goto out;
1442
1443         if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1444                 goto out;
1445
1446         *dstptr += sizeof(struct compat_ipt_entry);
1447         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1448
1449         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1450         target_offset = e->target_offset - (origsize - *size);
1451         if (ret)
1452                 goto out;
1453         t = ipt_get_target(e);
1454         ret = xt_compat_target_to_user(t, dstptr, size);
1455         if (ret)
1456                 goto out;
1457         ret = -EFAULT;
1458         next_offset = e->next_offset - (origsize - *size);
1459         if (put_user(target_offset, &ce->target_offset))
1460                 goto out;
1461         if (put_user(next_offset, &ce->next_offset))
1462                 goto out;
1463
1464         (*i)++;
1465         return 0;
1466 out:
1467         return ret;
1468 }
1469
1470 static int
1471 compat_find_calc_match(struct ipt_entry_match *m,
1472                        const char *name,
1473                        const struct ipt_ip *ip,
1474                        unsigned int hookmask,
1475                        int *size, unsigned int *i)
1476 {
1477         struct xt_match *match;
1478
1479         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1480                                                       m->u.user.revision),
1481                                         "ipt_%s", m->u.user.name);
1482         if (IS_ERR(match) || !match) {
1483                 duprintf("compat_check_calc_match: `%s' not found\n",
1484                          m->u.user.name);
1485                 return match ? PTR_ERR(match) : -ENOENT;
1486         }
1487         m->u.kernel.match = match;
1488         *size += xt_compat_match_offset(match);
1489
1490         (*i)++;
1491         return 0;
1492 }
1493
1494 static int
1495 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1496 {
1497         if (i && (*i)-- == 0)
1498                 return 1;
1499
1500         module_put(m->u.kernel.match->me);
1501         return 0;
1502 }
1503
1504 static int
1505 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1506 {
1507         struct ipt_entry_target *t;
1508
1509         if (i && (*i)-- == 0)
1510                 return 1;
1511
1512         /* Cleanup all matches */
1513         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1514         t = compat_ipt_get_target(e);
1515         module_put(t->u.kernel.target->me);
1516         return 0;
1517 }
1518
1519 static int
1520 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1521                                   struct xt_table_info *newinfo,
1522                                   unsigned int *size,
1523                                   unsigned char *base,
1524                                   unsigned char *limit,
1525                                   unsigned int *hook_entries,
1526                                   unsigned int *underflows,
1527                                   unsigned int *i,
1528                                   const char *name)
1529 {
1530         struct ipt_entry_target *t;
1531         struct xt_target *target;
1532         unsigned int entry_offset;
1533         unsigned int j;
1534         int ret, off, h;
1535
1536         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1537         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
1538             || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1539                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1540                 return -EINVAL;
1541         }
1542
1543         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1544                              sizeof(struct compat_xt_entry_target)) {
1545                 duprintf("checking: element %p size %u\n",
1546                          e, e->next_offset);
1547                 return -EINVAL;
1548         }
1549
1550         /* For purposes of check_entry casting the compat entry is fine */
1551         ret = check_entry((struct ipt_entry *)e, name);
1552         if (ret)
1553                 return ret;
1554
1555         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1556         entry_offset = (void *)e - (void *)base;
1557         j = 0;
1558         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1559                                        &e->ip, e->comefrom, &off, &j);
1560         if (ret != 0)
1561                 goto release_matches;
1562
1563         t = compat_ipt_get_target(e);
1564         target = try_then_request_module(xt_find_target(AF_INET,
1565                                                         t->u.user.name,
1566                                                         t->u.user.revision),
1567                                          "ipt_%s", t->u.user.name);
1568         if (IS_ERR(target) || !target) {
1569                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1570                          t->u.user.name);
1571                 ret = target ? PTR_ERR(target) : -ENOENT;
1572                 goto release_matches;
1573         }
1574         t->u.kernel.target = target;
1575
1576         off += xt_compat_target_offset(target);
1577         *size += off;
1578         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1579         if (ret)
1580                 goto out;
1581
1582         /* Check hooks & underflows */
1583         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1584                 if ((unsigned char *)e - base == hook_entries[h])
1585                         newinfo->hook_entry[h] = hook_entries[h];
1586                 if ((unsigned char *)e - base == underflows[h])
1587                         newinfo->underflow[h] = underflows[h];
1588         }
1589
1590         /* Clear counters and comefrom */
1591         memset(&e->counters, 0, sizeof(e->counters));
1592         e->comefrom = 0;
1593
1594         (*i)++;
1595         return 0;
1596
1597 out:
1598         module_put(t->u.kernel.target->me);
1599 release_matches:
1600         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1601         return ret;
1602 }
1603
1604 static int
1605 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1606                             unsigned int *size, const char *name,
1607                             struct xt_table_info *newinfo, unsigned char *base)
1608 {
1609         struct ipt_entry_target *t;
1610         struct xt_target *target;
1611         struct ipt_entry *de;
1612         unsigned int origsize;
1613         int ret, h;
1614
1615         ret = 0;
1616         origsize = *size;
1617         de = (struct ipt_entry *)*dstptr;
1618         memcpy(de, e, sizeof(struct ipt_entry));
1619         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1620
1621         *dstptr += sizeof(struct ipt_entry);
1622         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1623
1624         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1625                                        dstptr, size);
1626         if (ret)
1627                 return ret;
1628         de->target_offset = e->target_offset - (origsize - *size);
1629         t = compat_ipt_get_target(e);
1630         target = t->u.kernel.target;
1631         xt_compat_target_from_user(t, dstptr, size);
1632
1633         de->next_offset = e->next_offset - (origsize - *size);
1634         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1635                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1636                         newinfo->hook_entry[h] -= origsize - *size;
1637                 if ((unsigned char *)de - base < newinfo->underflow[h])
1638                         newinfo->underflow[h] -= origsize - *size;
1639         }
1640         return ret;
1641 }
1642
1643 static int
1644 compat_check_entry(struct ipt_entry *e, const char *name,
1645                                      unsigned int *i)
1646 {
1647         unsigned int j;
1648         int ret;
1649
1650         j = 0;
1651         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
1652                                 e->comefrom, &j);
1653         if (ret)
1654                 goto cleanup_matches;
1655
1656         ret = check_target(e, name);
1657         if (ret)
1658                 goto cleanup_matches;
1659
1660         (*i)++;
1661         return 0;
1662
1663  cleanup_matches:
1664         IPT_MATCH_ITERATE(e, cleanup_match, &j);
1665         return ret;
1666 }
1667
1668 static int
1669 translate_compat_table(const char *name,
1670                        unsigned int valid_hooks,
1671                        struct xt_table_info **pinfo,
1672                        void **pentry0,
1673                        unsigned int total_size,
1674                        unsigned int number,
1675                        unsigned int *hook_entries,
1676                        unsigned int *underflows)
1677 {
1678         unsigned int i, j;
1679         struct xt_table_info *newinfo, *info;
1680         void *pos, *entry0, *entry1;
1681         unsigned int size;
1682         int ret;
1683
1684         info = *pinfo;
1685         entry0 = *pentry0;
1686         size = total_size;
1687         info->number = number;
1688
1689         /* Init all hooks to impossible value. */
1690         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1691                 info->hook_entry[i] = 0xFFFFFFFF;
1692                 info->underflow[i] = 0xFFFFFFFF;
1693         }
1694
1695         duprintf("translate_compat_table: size %u\n", info->size);
1696         j = 0;
1697         xt_compat_lock(AF_INET);
1698         /* Walk through entries, checking offsets. */
1699         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1700                                        check_compat_entry_size_and_hooks,
1701                                        info, &size, entry0,
1702                                        entry0 + total_size,
1703                                        hook_entries, underflows, &j, name);
1704         if (ret != 0)
1705                 goto out_unlock;
1706
1707         ret = -EINVAL;
1708         if (j != number) {
1709                 duprintf("translate_compat_table: %u not %u entries\n",
1710                          j, number);
1711                 goto out_unlock;
1712         }
1713
1714         /* Check hooks all assigned */
1715         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1716                 /* Only hooks which are valid */
1717                 if (!(valid_hooks & (1 << i)))
1718                         continue;
1719                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1720                         duprintf("Invalid hook entry %u %u\n",
1721                                  i, hook_entries[i]);
1722                         goto out_unlock;
1723                 }
1724                 if (info->underflow[i] == 0xFFFFFFFF) {
1725                         duprintf("Invalid underflow %u %u\n",
1726                                  i, underflows[i]);
1727                         goto out_unlock;
1728                 }
1729         }
1730
1731         ret = -ENOMEM;
1732         newinfo = xt_alloc_table_info(size);
1733         if (!newinfo)
1734                 goto out_unlock;
1735
1736         newinfo->number = number;
1737         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1738                 newinfo->hook_entry[i] = info->hook_entry[i];
1739                 newinfo->underflow[i] = info->underflow[i];
1740         }
1741         entry1 = newinfo->entries[raw_smp_processor_id()];
1742         pos = entry1;
1743         size = total_size;
1744         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1745                                        compat_copy_entry_from_user,
1746                                        &pos, &size, name, newinfo, entry1);
1747         xt_compat_flush_offsets(AF_INET);
1748         xt_compat_unlock(AF_INET);
1749         if (ret)
1750                 goto free_newinfo;
1751
1752         ret = -ELOOP;
1753         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1754                 goto free_newinfo;
1755
1756         i = 0;
1757         ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1758                                 name, &i);
1759         if (ret) {
1760                 j -= i;
1761                 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1762                                                   compat_release_entry, &j);
1763                 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1764                 xt_free_table_info(newinfo);
1765                 return ret;
1766         }
1767
1768         /* And one copy for every other CPU */
1769         for_each_possible_cpu(i)
1770                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1771                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1772
1773         *pinfo = newinfo;
1774         *pentry0 = entry1;
1775         xt_free_table_info(info);
1776         return 0;
1777
1778 free_newinfo:
1779         xt_free_table_info(newinfo);
1780 out:
1781         COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1782         return ret;
1783 out_unlock:
1784         xt_compat_flush_offsets(AF_INET);
1785         xt_compat_unlock(AF_INET);
1786         goto out;
1787 }
1788
1789 static int
1790 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1791 {
1792         int ret;
1793         struct compat_ipt_replace tmp;
1794         struct xt_table_info *newinfo;
1795         void *loc_cpu_entry;
1796
1797         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1798                 return -EFAULT;
1799
1800         /* overflow check */
1801         if (tmp.size >= INT_MAX / num_possible_cpus())
1802                 return -ENOMEM;
1803         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1804                 return -ENOMEM;
1805
1806         newinfo = xt_alloc_table_info(tmp.size);
1807         if (!newinfo)
1808                 return -ENOMEM;
1809
1810         /* choose the copy that is on our node/cpu */
1811         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1812         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1813                            tmp.size) != 0) {
1814                 ret = -EFAULT;
1815                 goto free_newinfo;
1816         }
1817
1818         ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1819                                      &newinfo, &loc_cpu_entry, tmp.size,
1820                                      tmp.num_entries, tmp.hook_entry,
1821                                      tmp.underflow);
1822         if (ret != 0)
1823                 goto free_newinfo;
1824
1825         duprintf("compat_do_replace: Translated table\n");
1826
1827         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1828                            tmp.num_counters, compat_ptr(tmp.counters));
1829         if (ret)
1830                 goto free_newinfo_untrans;
1831         return 0;
1832
1833  free_newinfo_untrans:
1834         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1835  free_newinfo:
1836         xt_free_table_info(newinfo);
1837         return ret;
1838 }
1839
1840 static int
1841 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1842                       unsigned int len)
1843 {
1844         int ret;
1845
1846         if (!capable(CAP_NET_ADMIN))
1847                 return -EPERM;
1848
1849         switch (cmd) {
1850         case IPT_SO_SET_REPLACE:
1851                 ret = compat_do_replace(sock_net(sk), user, len);
1852                 break;
1853
1854         case IPT_SO_SET_ADD_COUNTERS:
1855                 ret = do_add_counters(sock_net(sk), user, len, 1);
1856                 break;
1857
1858         default:
1859                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1860                 ret = -EINVAL;
1861         }
1862
1863         return ret;
1864 }
1865
1866 struct compat_ipt_get_entries {
1867         char name[IPT_TABLE_MAXNAMELEN];
1868         compat_uint_t size;
1869         struct compat_ipt_entry entrytable[0];
1870 };
1871
1872 static int
1873 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1874                             void __user *userptr)
1875 {
1876         struct xt_counters *counters;
1877         const struct xt_table_info *private = table->private;
1878         void __user *pos;
1879         unsigned int size;
1880         int ret = 0;
1881         const void *loc_cpu_entry;
1882         unsigned int i = 0;
1883
1884         counters = alloc_counters(table);
1885         if (IS_ERR(counters))
1886                 return PTR_ERR(counters);
1887
1888         /* choose the copy that is on our node/cpu, ...
1889          * This choice is lazy (because current thread is
1890          * allowed to migrate to another cpu)
1891          */
1892         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1893         pos = userptr;
1894         size = total_size;
1895         ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1896                                 compat_copy_entry_to_user,
1897                                 &pos, &size, counters, &i);
1898
1899         vfree(counters);
1900         return ret;
1901 }
1902
1903 static int
1904 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1905                    int *len)
1906 {
1907         int ret;
1908         struct compat_ipt_get_entries get;
1909         struct xt_table *t;
1910
1911         if (*len < sizeof(get)) {
1912                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1913                 return -EINVAL;
1914         }
1915
1916         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1917                 return -EFAULT;
1918
1919         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1920                 duprintf("compat_get_entries: %u != %zu\n",
1921                          *len, sizeof(get) + get.size);
1922                 return -EINVAL;
1923         }
1924
1925         xt_compat_lock(AF_INET);
1926         t = xt_find_table_lock(net, AF_INET, get.name);
1927         if (t && !IS_ERR(t)) {
1928                 const struct xt_table_info *private = t->private;
1929                 struct xt_table_info info;
1930                 duprintf("t->private->number = %u\n", private->number);
1931                 ret = compat_table_info(private, &info);
1932                 if (!ret && get.size == info.size) {
1933                         ret = compat_copy_entries_to_user(private->size,
1934                                                           t, uptr->entrytable);
1935                 } else if (!ret) {
1936                         duprintf("compat_get_entries: I've got %u not %u!\n",
1937                                  private->size, get.size);
1938                         ret = -EAGAIN;
1939                 }
1940                 xt_compat_flush_offsets(AF_INET);
1941                 module_put(t->me);
1942                 xt_table_unlock(t);
1943         } else
1944                 ret = t ? PTR_ERR(t) : -ENOENT;
1945
1946         xt_compat_unlock(AF_INET);
1947         return ret;
1948 }
1949
1950 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1951
1952 static int
1953 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1954 {
1955         int ret;
1956
1957         if (!capable(CAP_NET_ADMIN))
1958                 return -EPERM;
1959
1960         switch (cmd) {
1961         case IPT_SO_GET_INFO:
1962                 ret = get_info(sock_net(sk), user, len, 1);
1963                 break;
1964         case IPT_SO_GET_ENTRIES:
1965                 ret = compat_get_entries(sock_net(sk), user, len);
1966                 break;
1967         default:
1968                 ret = do_ipt_get_ctl(sk, cmd, user, len);
1969         }
1970         return ret;
1971 }
1972 #endif
1973
1974 static int
1975 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1976 {
1977         int ret;
1978
1979         if (!capable(CAP_NET_ADMIN))
1980                 return -EPERM;
1981
1982         switch (cmd) {
1983         case IPT_SO_SET_REPLACE:
1984                 ret = do_replace(sock_net(sk), user, len);
1985                 break;
1986
1987         case IPT_SO_SET_ADD_COUNTERS:
1988                 ret = do_add_counters(sock_net(sk), user, len, 0);
1989                 break;
1990
1991         default:
1992                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1993                 ret = -EINVAL;
1994         }
1995
1996         return ret;
1997 }
1998
1999 static int
2000 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2001 {
2002         int ret;
2003
2004         if (!capable(CAP_NET_ADMIN))
2005                 return -EPERM;
2006
2007         switch (cmd) {
2008         case IPT_SO_GET_INFO:
2009                 ret = get_info(sock_net(sk), user, len, 0);
2010                 break;
2011
2012         case IPT_SO_GET_ENTRIES:
2013                 ret = get_entries(sock_net(sk), user, len);
2014                 break;
2015
2016         case IPT_SO_GET_REVISION_MATCH:
2017         case IPT_SO_GET_REVISION_TARGET: {
2018                 struct ipt_get_revision rev;
2019                 int target;
2020
2021                 if (*len != sizeof(rev)) {
2022                         ret = -EINVAL;
2023                         break;
2024                 }
2025                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2026                         ret = -EFAULT;
2027                         break;
2028                 }
2029
2030                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2031                         target = 1;
2032                 else
2033                         target = 0;
2034
2035                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2036                                                          rev.revision,
2037                                                          target, &ret),
2038                                         "ipt_%s", rev.name);
2039                 break;
2040         }
2041
2042         default:
2043                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2044                 ret = -EINVAL;
2045         }
2046
2047         return ret;
2048 }
2049
2050 struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2051                                     const struct ipt_replace *repl)
2052 {
2053         int ret;
2054         struct xt_table_info *newinfo;
2055         struct xt_table_info bootstrap
2056                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2057         void *loc_cpu_entry;
2058         struct xt_table *new_table;
2059
2060         newinfo = xt_alloc_table_info(repl->size);
2061         if (!newinfo) {
2062                 ret = -ENOMEM;
2063                 goto out;
2064         }
2065
2066         /* choose the copy on our node/cpu, but dont care about preemption */
2067         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2068         memcpy(loc_cpu_entry, repl->entries, repl->size);
2069
2070         ret = translate_table(table->name, table->valid_hooks,
2071                               newinfo, loc_cpu_entry, repl->size,
2072                               repl->num_entries,
2073                               repl->hook_entry,
2074                               repl->underflow);
2075         if (ret != 0)
2076                 goto out_free;
2077
2078         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2079         if (IS_ERR(new_table)) {
2080                 ret = PTR_ERR(new_table);
2081                 goto out_free;
2082         }
2083
2084         return new_table;
2085
2086 out_free:
2087         xt_free_table_info(newinfo);
2088 out:
2089         return ERR_PTR(ret);
2090 }
2091
2092 void ipt_unregister_table(struct xt_table *table)
2093 {
2094         struct xt_table_info *private;
2095         void *loc_cpu_entry;
2096         struct module *table_owner = table->me;
2097
2098         private = xt_unregister_table(table);
2099
2100         /* Decrease module usage counts and free resources */
2101         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2102         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2103         if (private->number > private->initial_entries)
2104                 module_put(table_owner);
2105         xt_free_table_info(private);
2106 }
2107
2108 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2109 static inline bool
2110 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2111                      u_int8_t type, u_int8_t code,
2112                      bool invert)
2113 {
2114         return ((test_type == 0xFF) ||
2115                 (type == test_type && code >= min_code && code <= max_code))
2116                 ^ invert;
2117 }
2118
2119 static bool
2120 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2121 {
2122         const struct icmphdr *ic;
2123         struct icmphdr _icmph;
2124         const struct ipt_icmp *icmpinfo = par->matchinfo;
2125
2126         /* Must not be a fragment. */
2127         if (par->fragoff != 0)
2128                 return false;
2129
2130         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2131         if (ic == NULL) {
2132                 /* We've been asked to examine this packet, and we
2133                  * can't.  Hence, no choice but to drop.
2134                  */
2135                 duprintf("Dropping evil ICMP tinygram.\n");
2136                 *par->hotdrop = true;
2137                 return false;
2138         }
2139
2140         return icmp_type_code_match(icmpinfo->type,
2141                                     icmpinfo->code[0],
2142                                     icmpinfo->code[1],
2143                                     ic->type, ic->code,
2144                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2145 }
2146
2147 /* Called when user tries to insert an entry of this type. */
2148 static bool
2149 icmp_checkentry(const char *tablename,
2150            const void *entry,
2151            const struct xt_match *match,
2152            void *matchinfo,
2153            unsigned int hook_mask)
2154 {
2155         const struct ipt_icmp *icmpinfo = matchinfo;
2156
2157         /* Must specify no unknown invflags */
2158         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2159 }
2160
2161 /* The built-in targets: standard (NULL) and error. */
2162 static struct xt_target ipt_standard_target __read_mostly = {
2163         .name           = IPT_STANDARD_TARGET,
2164         .targetsize     = sizeof(int),
2165         .family         = AF_INET,
2166 #ifdef CONFIG_COMPAT
2167         .compatsize     = sizeof(compat_int_t),
2168         .compat_from_user = compat_standard_from_user,
2169         .compat_to_user = compat_standard_to_user,
2170 #endif
2171 };
2172
2173 static struct xt_target ipt_error_target __read_mostly = {
2174         .name           = IPT_ERROR_TARGET,
2175         .target         = ipt_error,
2176         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2177         .family         = AF_INET,
2178 };
2179
2180 static struct nf_sockopt_ops ipt_sockopts = {
2181         .pf             = PF_INET,
2182         .set_optmin     = IPT_BASE_CTL,
2183         .set_optmax     = IPT_SO_SET_MAX+1,
2184         .set            = do_ipt_set_ctl,
2185 #ifdef CONFIG_COMPAT
2186         .compat_set     = compat_do_ipt_set_ctl,
2187 #endif
2188         .get_optmin     = IPT_BASE_CTL,
2189         .get_optmax     = IPT_SO_GET_MAX+1,
2190         .get            = do_ipt_get_ctl,
2191 #ifdef CONFIG_COMPAT
2192         .compat_get     = compat_do_ipt_get_ctl,
2193 #endif
2194         .owner          = THIS_MODULE,
2195 };
2196
2197 static struct xt_match icmp_matchstruct __read_mostly = {
2198         .name           = "icmp",
2199         .match          = icmp_match,
2200         .matchsize      = sizeof(struct ipt_icmp),
2201         .checkentry     = icmp_checkentry,
2202         .proto          = IPPROTO_ICMP,
2203         .family         = AF_INET,
2204 };
2205
2206 static int __net_init ip_tables_net_init(struct net *net)
2207 {
2208         return xt_proto_init(net, AF_INET);
2209 }
2210
2211 static void __net_exit ip_tables_net_exit(struct net *net)
2212 {
2213         xt_proto_fini(net, AF_INET);
2214 }
2215
2216 static struct pernet_operations ip_tables_net_ops = {
2217         .init = ip_tables_net_init,
2218         .exit = ip_tables_net_exit,
2219 };
2220
2221 static int __init ip_tables_init(void)
2222 {
2223         int ret;
2224
2225         ret = register_pernet_subsys(&ip_tables_net_ops);
2226         if (ret < 0)
2227                 goto err1;
2228
2229         /* Noone else will be downing sem now, so we won't sleep */
2230         ret = xt_register_target(&ipt_standard_target);
2231         if (ret < 0)
2232                 goto err2;
2233         ret = xt_register_target(&ipt_error_target);
2234         if (ret < 0)
2235                 goto err3;
2236         ret = xt_register_match(&icmp_matchstruct);
2237         if (ret < 0)
2238                 goto err4;
2239
2240         /* Register setsockopt */
2241         ret = nf_register_sockopt(&ipt_sockopts);
2242         if (ret < 0)
2243                 goto err5;
2244
2245         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2246         return 0;
2247
2248 err5:
2249         xt_unregister_match(&icmp_matchstruct);
2250 err4:
2251         xt_unregister_target(&ipt_error_target);
2252 err3:
2253         xt_unregister_target(&ipt_standard_target);
2254 err2:
2255         unregister_pernet_subsys(&ip_tables_net_ops);
2256 err1:
2257         return ret;
2258 }
2259
2260 static void __exit ip_tables_fini(void)
2261 {
2262         nf_unregister_sockopt(&ipt_sockopts);
2263
2264         xt_unregister_match(&icmp_matchstruct);
2265         xt_unregister_target(&ipt_error_target);
2266         xt_unregister_target(&ipt_standard_target);
2267
2268         unregister_pernet_subsys(&ip_tables_net_ops);
2269 }
2270
2271 EXPORT_SYMBOL(ipt_register_table);
2272 EXPORT_SYMBOL(ipt_unregister_table);
2273 EXPORT_SYMBOL(ipt_do_table);
2274 module_init(ip_tables_init);
2275 module_exit(ip_tables_fini);