netfilter: iptables: lock free counters
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/cache.h>
12 #include <linux/capability.h>
13 #include <linux/skbuff.h>
14 #include <linux/kmod.h>
15 #include <linux/vmalloc.h>
16 #include <linux/netdevice.h>
17 #include <linux/module.h>
18 #include <linux/icmp.h>
19 #include <net/ip.h>
20 #include <net/compat.h>
21 #include <asm/uaccess.h>
22 #include <linux/mutex.h>
23 #include <linux/proc_fs.h>
24 #include <linux/err.h>
25 #include <linux/cpumask.h>
26
27 #include <linux/netfilter/x_tables.h>
28 #include <linux/netfilter_ipv4/ip_tables.h>
29 #include <net/netfilter/nf_log.h>
30
31 MODULE_LICENSE("GPL");
32 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33 MODULE_DESCRIPTION("IPv4 packet filter");
34
35 /*#define DEBUG_IP_FIREWALL*/
36 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
37 /*#define DEBUG_IP_FIREWALL_USER*/
38
39 #ifdef DEBUG_IP_FIREWALL
40 #define dprintf(format, args...)  printk(format , ## args)
41 #else
42 #define dprintf(format, args...)
43 #endif
44
45 #ifdef DEBUG_IP_FIREWALL_USER
46 #define duprintf(format, args...) printk(format , ## args)
47 #else
48 #define duprintf(format, args...)
49 #endif
50
51 #ifdef CONFIG_NETFILTER_DEBUG
52 #define IP_NF_ASSERT(x)                                         \
53 do {                                                            \
54         if (!(x))                                               \
55                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
56                        __func__, __FILE__, __LINE__);   \
57 } while(0)
58 #else
59 #define IP_NF_ASSERT(x)
60 #endif
61
62 #if 0
63 /* All the better to debug you with... */
64 #define static
65 #define inline
66 #endif
67
68 /*
69    We keep a set of rules for each CPU, so we can avoid write-locking
70    them in the softirq when updating the counters and therefore
71    only need to read-lock in the softirq; doing a write_lock_bh() in user
72    context stops packets coming through and allows user context to read
73    the counters or update the rules.
74
75    Hence the start of any table is given by get_table() below.  */
76
77 /* Returns whether matches rule or not. */
78 /* Performance critical - called for every packet */
79 static inline bool
80 ip_packet_match(const struct iphdr *ip,
81                 const char *indev,
82                 const char *outdev,
83                 const struct ipt_ip *ipinfo,
84                 int isfrag)
85 {
86         size_t i;
87         unsigned long ret;
88
89 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
90
91         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
92                   IPT_INV_SRCIP)
93             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
94                      IPT_INV_DSTIP)) {
95                 dprintf("Source or dest mismatch.\n");
96
97                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
98                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
99                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
100                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
101                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
102                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
103                 return false;
104         }
105
106         /* Look for ifname matches; this should unroll nicely. */
107         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
108                 ret |= (((const unsigned long *)indev)[i]
109                         ^ ((const unsigned long *)ipinfo->iniface)[i])
110                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
111         }
112
113         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
114                 dprintf("VIA in mismatch (%s vs %s).%s\n",
115                         indev, ipinfo->iniface,
116                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
117                 return false;
118         }
119
120         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
121                 ret |= (((const unsigned long *)outdev)[i]
122                         ^ ((const unsigned long *)ipinfo->outiface)[i])
123                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
124         }
125
126         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
127                 dprintf("VIA out mismatch (%s vs %s).%s\n",
128                         outdev, ipinfo->outiface,
129                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
130                 return false;
131         }
132
133         /* Check specific protocol */
134         if (ipinfo->proto
135             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
136                 dprintf("Packet protocol %hi does not match %hi.%s\n",
137                         ip->protocol, ipinfo->proto,
138                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
139                 return false;
140         }
141
142         /* If we have a fragment rule but the packet is not a fragment
143          * then we return zero */
144         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
145                 dprintf("Fragment rule but not fragment.%s\n",
146                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
147                 return false;
148         }
149
150         return true;
151 }
152
153 static bool
154 ip_checkentry(const struct ipt_ip *ip)
155 {
156         if (ip->flags & ~IPT_F_MASK) {
157                 duprintf("Unknown flag bits set: %08X\n",
158                          ip->flags & ~IPT_F_MASK);
159                 return false;
160         }
161         if (ip->invflags & ~IPT_INV_MASK) {
162                 duprintf("Unknown invflag bits set: %08X\n",
163                          ip->invflags & ~IPT_INV_MASK);
164                 return false;
165         }
166         return true;
167 }
168
169 static unsigned int
170 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
171 {
172         if (net_ratelimit())
173                 printk("ip_tables: error: `%s'\n",
174                        (const char *)par->targinfo);
175
176         return NF_DROP;
177 }
178
179 /* Performance critical - called for every packet */
180 static inline bool
181 do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
182          struct xt_match_param *par)
183 {
184         par->match     = m->u.kernel.match;
185         par->matchinfo = m->data;
186
187         /* Stop iteration if it doesn't match */
188         if (!m->u.kernel.match->match(skb, par))
189                 return true;
190         else
191                 return false;
192 }
193
194 /* Performance critical */
195 static inline struct ipt_entry *
196 get_entry(void *base, unsigned int offset)
197 {
198         return (struct ipt_entry *)(base + offset);
199 }
200
201 /* All zeroes == unconditional rule. */
202 /* Mildly perf critical (only if packet tracing is on) */
203 static inline int
204 unconditional(const struct ipt_ip *ip)
205 {
206         unsigned int i;
207
208         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
209                 if (((__u32 *)ip)[i])
210                         return 0;
211
212         return 1;
213 #undef FWINV
214 }
215
216 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
217     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
218 static const char *const hooknames[] = {
219         [NF_INET_PRE_ROUTING]           = "PREROUTING",
220         [NF_INET_LOCAL_IN]              = "INPUT",
221         [NF_INET_FORWARD]               = "FORWARD",
222         [NF_INET_LOCAL_OUT]             = "OUTPUT",
223         [NF_INET_POST_ROUTING]          = "POSTROUTING",
224 };
225
226 enum nf_ip_trace_comments {
227         NF_IP_TRACE_COMMENT_RULE,
228         NF_IP_TRACE_COMMENT_RETURN,
229         NF_IP_TRACE_COMMENT_POLICY,
230 };
231
232 static const char *const comments[] = {
233         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
234         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
235         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
236 };
237
238 static struct nf_loginfo trace_loginfo = {
239         .type = NF_LOG_TYPE_LOG,
240         .u = {
241                 .log = {
242                         .level = 4,
243                         .logflags = NF_LOG_MASK,
244                 },
245         },
246 };
247
248 /* Mildly perf critical (only if packet tracing is on) */
249 static inline int
250 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
251                       char *hookname, char **chainname,
252                       char **comment, unsigned int *rulenum)
253 {
254         struct ipt_standard_target *t = (void *)ipt_get_target(s);
255
256         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
257                 /* Head of user chain: ERROR target with chainname */
258                 *chainname = t->target.data;
259                 (*rulenum) = 0;
260         } else if (s == e) {
261                 (*rulenum)++;
262
263                 if (s->target_offset == sizeof(struct ipt_entry)
264                    && strcmp(t->target.u.kernel.target->name,
265                              IPT_STANDARD_TARGET) == 0
266                    && t->verdict < 0
267                    && unconditional(&s->ip)) {
268                         /* Tail of chains: STANDARD target (return/policy) */
269                         *comment = *chainname == hookname
270                                 ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
271                                 : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
272                 }
273                 return 1;
274         } else
275                 (*rulenum)++;
276
277         return 0;
278 }
279
280 static void trace_packet(struct sk_buff *skb,
281                          unsigned int hook,
282                          const struct net_device *in,
283                          const struct net_device *out,
284                          const char *tablename,
285                          struct xt_table_info *private,
286                          struct ipt_entry *e)
287 {
288         void *table_base;
289         const struct ipt_entry *root;
290         char *hookname, *chainname, *comment;
291         unsigned int rulenum = 0;
292
293         table_base = (void *)private->entries[smp_processor_id()];
294         root = get_entry(table_base, private->hook_entry[hook]);
295
296         hookname = chainname = (char *)hooknames[hook];
297         comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
298
299         IPT_ENTRY_ITERATE(root,
300                           private->size - private->hook_entry[hook],
301                           get_chainname_rulenum,
302                           e, hookname, &chainname, &comment, &rulenum);
303
304         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
305                       "TRACE: %s:%s:%s:%u ",
306                       tablename, chainname, comment, rulenum);
307 }
308 #endif
309
310 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
311 unsigned int
312 ipt_do_table(struct sk_buff *skb,
313              unsigned int hook,
314              const struct net_device *in,
315              const struct net_device *out,
316              struct xt_table *table)
317 {
318         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
319         const struct iphdr *ip;
320         u_int16_t datalen;
321         bool hotdrop = false;
322         /* Initializing verdict to NF_DROP keeps gcc happy. */
323         unsigned int verdict = NF_DROP;
324         const char *indev, *outdev;
325         void *table_base;
326         struct ipt_entry *e, *back;
327         struct xt_table_info *private;
328         struct xt_match_param mtpar;
329         struct xt_target_param tgpar;
330
331         /* Initialization */
332         ip = ip_hdr(skb);
333         datalen = skb->len - ip->ihl * 4;
334         indev = in ? in->name : nulldevname;
335         outdev = out ? out->name : nulldevname;
336         /* We handle fragments by dealing with the first fragment as
337          * if it was a normal packet.  All other fragments are treated
338          * normally, except that they will NEVER match rules that ask
339          * things we don't know, ie. tcp syn flag or ports).  If the
340          * rule is also a fragment-specific rule, non-fragments won't
341          * match it. */
342         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
343         mtpar.thoff   = ip_hdrlen(skb);
344         mtpar.hotdrop = &hotdrop;
345         mtpar.in      = tgpar.in  = in;
346         mtpar.out     = tgpar.out = out;
347         mtpar.family  = tgpar.family = NFPROTO_IPV4;
348         tgpar.hooknum = hook;
349
350         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
351
352         rcu_read_lock();
353         private = rcu_dereference(table->private);
354         table_base = rcu_dereference(private->entries[smp_processor_id()]);
355
356         e = get_entry(table_base, private->hook_entry[hook]);
357
358         /* For return from builtin chain */
359         back = get_entry(table_base, private->underflow[hook]);
360
361         do {
362                 IP_NF_ASSERT(e);
363                 IP_NF_ASSERT(back);
364                 if (ip_packet_match(ip, indev, outdev,
365                     &e->ip, mtpar.fragoff)) {
366                         struct ipt_entry_target *t;
367
368                         if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
369                                 goto no_match;
370
371                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
372
373                         t = ipt_get_target(e);
374                         IP_NF_ASSERT(t->u.kernel.target);
375
376 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
377     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
378                         /* The packet is traced: log it */
379                         if (unlikely(skb->nf_trace))
380                                 trace_packet(skb, hook, in, out,
381                                              table->name, private, e);
382 #endif
383                         /* Standard target? */
384                         if (!t->u.kernel.target->target) {
385                                 int v;
386
387                                 v = ((struct ipt_standard_target *)t)->verdict;
388                                 if (v < 0) {
389                                         /* Pop from stack? */
390                                         if (v != IPT_RETURN) {
391                                                 verdict = (unsigned)(-v) - 1;
392                                                 break;
393                                         }
394                                         e = back;
395                                         back = get_entry(table_base,
396                                                          back->comefrom);
397                                         continue;
398                                 }
399                                 if (table_base + v != (void *)e + e->next_offset
400                                     && !(e->ip.flags & IPT_F_GOTO)) {
401                                         /* Save old back ptr in next entry */
402                                         struct ipt_entry *next
403                                                 = (void *)e + e->next_offset;
404                                         next->comefrom
405                                                 = (void *)back - table_base;
406                                         /* set back pointer to next entry */
407                                         back = next;
408                                 }
409
410                                 e = get_entry(table_base, v);
411                         } else {
412                                 /* Targets which reenter must return
413                                    abs. verdicts */
414                                 tgpar.target   = t->u.kernel.target;
415                                 tgpar.targinfo = t->data;
416 #ifdef CONFIG_NETFILTER_DEBUG
417                                 ((struct ipt_entry *)table_base)->comefrom
418                                         = 0xeeeeeeec;
419 #endif
420                                 verdict = t->u.kernel.target->target(skb,
421                                                                      &tgpar);
422 #ifdef CONFIG_NETFILTER_DEBUG
423                                 if (((struct ipt_entry *)table_base)->comefrom
424                                     != 0xeeeeeeec
425                                     && verdict == IPT_CONTINUE) {
426                                         printk("Target %s reentered!\n",
427                                                t->u.kernel.target->name);
428                                         verdict = NF_DROP;
429                                 }
430                                 ((struct ipt_entry *)table_base)->comefrom
431                                         = 0x57acc001;
432 #endif
433                                 /* Target might have changed stuff. */
434                                 ip = ip_hdr(skb);
435                                 datalen = skb->len - ip->ihl * 4;
436
437                                 if (verdict == IPT_CONTINUE)
438                                         e = (void *)e + e->next_offset;
439                                 else
440                                         /* Verdict */
441                                         break;
442                         }
443                 } else {
444
445                 no_match:
446                         e = (void *)e + e->next_offset;
447                 }
448         } while (!hotdrop);
449
450         rcu_read_unlock();
451
452 #ifdef DEBUG_ALLOW_ALL
453         return NF_ACCEPT;
454 #else
455         if (hotdrop)
456                 return NF_DROP;
457         else return verdict;
458 #endif
459 }
460
461 /* Figures out from what hook each rule can be called: returns 0 if
462    there are loops.  Puts hook bitmask in comefrom. */
463 static int
464 mark_source_chains(struct xt_table_info *newinfo,
465                    unsigned int valid_hooks, void *entry0)
466 {
467         unsigned int hook;
468
469         /* No recursion; use packet counter to save back ptrs (reset
470            to 0 as we leave), and comefrom to save source hook bitmask */
471         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
472                 unsigned int pos = newinfo->hook_entry[hook];
473                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
474
475                 if (!(valid_hooks & (1 << hook)))
476                         continue;
477
478                 /* Set initial back pointer. */
479                 e->counters.pcnt = pos;
480
481                 for (;;) {
482                         struct ipt_standard_target *t
483                                 = (void *)ipt_get_target(e);
484                         int visited = e->comefrom & (1 << hook);
485
486                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
487                                 printk("iptables: loop hook %u pos %u %08X.\n",
488                                        hook, pos, e->comefrom);
489                                 return 0;
490                         }
491                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
492
493                         /* Unconditional return/END. */
494                         if ((e->target_offset == sizeof(struct ipt_entry)
495                             && (strcmp(t->target.u.user.name,
496                                        IPT_STANDARD_TARGET) == 0)
497                             && t->verdict < 0
498                             && unconditional(&e->ip)) || visited) {
499                                 unsigned int oldpos, size;
500
501                                 if (t->verdict < -NF_MAX_VERDICT - 1) {
502                                         duprintf("mark_source_chains: bad "
503                                                 "negative verdict (%i)\n",
504                                                                 t->verdict);
505                                         return 0;
506                                 }
507
508                                 /* Return: backtrack through the last
509                                    big jump. */
510                                 do {
511                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
512 #ifdef DEBUG_IP_FIREWALL_USER
513                                         if (e->comefrom
514                                             & (1 << NF_INET_NUMHOOKS)) {
515                                                 duprintf("Back unset "
516                                                          "on hook %u "
517                                                          "rule %u\n",
518                                                          hook, pos);
519                                         }
520 #endif
521                                         oldpos = pos;
522                                         pos = e->counters.pcnt;
523                                         e->counters.pcnt = 0;
524
525                                         /* We're at the start. */
526                                         if (pos == oldpos)
527                                                 goto next;
528
529                                         e = (struct ipt_entry *)
530                                                 (entry0 + pos);
531                                 } while (oldpos == pos + e->next_offset);
532
533                                 /* Move along one */
534                                 size = e->next_offset;
535                                 e = (struct ipt_entry *)
536                                         (entry0 + pos + size);
537                                 e->counters.pcnt = pos;
538                                 pos += size;
539                         } else {
540                                 int newpos = t->verdict;
541
542                                 if (strcmp(t->target.u.user.name,
543                                            IPT_STANDARD_TARGET) == 0
544                                     && newpos >= 0) {
545                                         if (newpos > newinfo->size -
546                                                 sizeof(struct ipt_entry)) {
547                                                 duprintf("mark_source_chains: "
548                                                         "bad verdict (%i)\n",
549                                                                 newpos);
550                                                 return 0;
551                                         }
552                                         /* This a jump; chase it. */
553                                         duprintf("Jump rule %u -> %u\n",
554                                                  pos, newpos);
555                                 } else {
556                                         /* ... this is a fallthru */
557                                         newpos = pos + e->next_offset;
558                                 }
559                                 e = (struct ipt_entry *)
560                                         (entry0 + newpos);
561                                 e->counters.pcnt = pos;
562                                 pos = newpos;
563                         }
564                 }
565                 next:
566                 duprintf("Finished chain %u\n", hook);
567         }
568         return 1;
569 }
570
571 static int
572 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
573 {
574         struct xt_mtdtor_param par;
575
576         if (i && (*i)-- == 0)
577                 return 1;
578
579         par.match     = m->u.kernel.match;
580         par.matchinfo = m->data;
581         par.family    = NFPROTO_IPV4;
582         if (par.match->destroy != NULL)
583                 par.match->destroy(&par);
584         module_put(par.match->me);
585         return 0;
586 }
587
588 static int
589 check_entry(struct ipt_entry *e, const char *name)
590 {
591         struct ipt_entry_target *t;
592
593         if (!ip_checkentry(&e->ip)) {
594                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
595                 return -EINVAL;
596         }
597
598         if (e->target_offset + sizeof(struct ipt_entry_target) >
599             e->next_offset)
600                 return -EINVAL;
601
602         t = ipt_get_target(e);
603         if (e->target_offset + t->u.target_size > e->next_offset)
604                 return -EINVAL;
605
606         return 0;
607 }
608
609 static int
610 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
611             unsigned int *i)
612 {
613         const struct ipt_ip *ip = par->entryinfo;
614         int ret;
615
616         par->match     = m->u.kernel.match;
617         par->matchinfo = m->data;
618
619         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
620               ip->proto, ip->invflags & IPT_INV_PROTO);
621         if (ret < 0) {
622                 duprintf("ip_tables: check failed for `%s'.\n",
623                          par.match->name);
624                 return ret;
625         }
626         ++*i;
627         return 0;
628 }
629
630 static int
631 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
632                  unsigned int *i)
633 {
634         struct xt_match *match;
635         int ret;
636
637         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
638                                                       m->u.user.revision),
639                                         "ipt_%s", m->u.user.name);
640         if (IS_ERR(match) || !match) {
641                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
642                 return match ? PTR_ERR(match) : -ENOENT;
643         }
644         m->u.kernel.match = match;
645
646         ret = check_match(m, par, i);
647         if (ret)
648                 goto err;
649
650         return 0;
651 err:
652         module_put(m->u.kernel.match->me);
653         return ret;
654 }
655
656 static int check_target(struct ipt_entry *e, const char *name)
657 {
658         struct ipt_entry_target *t = ipt_get_target(e);
659         struct xt_tgchk_param par = {
660                 .table     = name,
661                 .entryinfo = e,
662                 .target    = t->u.kernel.target,
663                 .targinfo  = t->data,
664                 .hook_mask = e->comefrom,
665                 .family    = NFPROTO_IPV4,
666         };
667         int ret;
668
669         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
670               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
671         if (ret < 0) {
672                 duprintf("ip_tables: check failed for `%s'.\n",
673                          t->u.kernel.target->name);
674                 return ret;
675         }
676         return 0;
677 }
678
679 static int
680 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
681                  unsigned int *i)
682 {
683         struct ipt_entry_target *t;
684         struct xt_target *target;
685         int ret;
686         unsigned int j;
687         struct xt_mtchk_param mtpar;
688
689         ret = check_entry(e, name);
690         if (ret)
691                 return ret;
692
693         j = 0;
694         mtpar.table     = name;
695         mtpar.entryinfo = &e->ip;
696         mtpar.hook_mask = e->comefrom;
697         mtpar.family    = NFPROTO_IPV4;
698         ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
699         if (ret != 0)
700                 goto cleanup_matches;
701
702         t = ipt_get_target(e);
703         target = try_then_request_module(xt_find_target(AF_INET,
704                                                         t->u.user.name,
705                                                         t->u.user.revision),
706                                          "ipt_%s", t->u.user.name);
707         if (IS_ERR(target) || !target) {
708                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
709                 ret = target ? PTR_ERR(target) : -ENOENT;
710                 goto cleanup_matches;
711         }
712         t->u.kernel.target = target;
713
714         ret = check_target(e, name);
715         if (ret)
716                 goto err;
717
718         (*i)++;
719         return 0;
720  err:
721         module_put(t->u.kernel.target->me);
722  cleanup_matches:
723         IPT_MATCH_ITERATE(e, cleanup_match, &j);
724         return ret;
725 }
726
727 static int
728 check_entry_size_and_hooks(struct ipt_entry *e,
729                            struct xt_table_info *newinfo,
730                            unsigned char *base,
731                            unsigned char *limit,
732                            const unsigned int *hook_entries,
733                            const unsigned int *underflows,
734                            unsigned int *i)
735 {
736         unsigned int h;
737
738         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
739             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
740                 duprintf("Bad offset %p\n", e);
741                 return -EINVAL;
742         }
743
744         if (e->next_offset
745             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
746                 duprintf("checking: element %p size %u\n",
747                          e, e->next_offset);
748                 return -EINVAL;
749         }
750
751         /* Check hooks & underflows */
752         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
753                 if ((unsigned char *)e - base == hook_entries[h])
754                         newinfo->hook_entry[h] = hook_entries[h];
755                 if ((unsigned char *)e - base == underflows[h])
756                         newinfo->underflow[h] = underflows[h];
757         }
758
759         /* FIXME: underflows must be unconditional, standard verdicts
760            < 0 (not IPT_RETURN). --RR */
761
762         /* Clear counters and comefrom */
763         e->counters = ((struct xt_counters) { 0, 0 });
764         e->comefrom = 0;
765
766         (*i)++;
767         return 0;
768 }
769
770 static int
771 cleanup_entry(struct ipt_entry *e, unsigned int *i)
772 {
773         struct xt_tgdtor_param par;
774         struct ipt_entry_target *t;
775
776         if (i && (*i)-- == 0)
777                 return 1;
778
779         /* Cleanup all matches */
780         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
781         t = ipt_get_target(e);
782
783         par.target   = t->u.kernel.target;
784         par.targinfo = t->data;
785         par.family   = NFPROTO_IPV4;
786         if (par.target->destroy != NULL)
787                 par.target->destroy(&par);
788         module_put(par.target->me);
789         return 0;
790 }
791
792 /* Checks and translates the user-supplied table segment (held in
793    newinfo) */
794 static int
795 translate_table(const char *name,
796                 unsigned int valid_hooks,
797                 struct xt_table_info *newinfo,
798                 void *entry0,
799                 unsigned int size,
800                 unsigned int number,
801                 const unsigned int *hook_entries,
802                 const unsigned int *underflows)
803 {
804         unsigned int i;
805         int ret;
806
807         newinfo->size = size;
808         newinfo->number = number;
809
810         /* Init all hooks to impossible value. */
811         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
812                 newinfo->hook_entry[i] = 0xFFFFFFFF;
813                 newinfo->underflow[i] = 0xFFFFFFFF;
814         }
815
816         duprintf("translate_table: size %u\n", newinfo->size);
817         i = 0;
818         /* Walk through entries, checking offsets. */
819         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
820                                 check_entry_size_and_hooks,
821                                 newinfo,
822                                 entry0,
823                                 entry0 + size,
824                                 hook_entries, underflows, &i);
825         if (ret != 0)
826                 return ret;
827
828         if (i != number) {
829                 duprintf("translate_table: %u not %u entries\n",
830                          i, number);
831                 return -EINVAL;
832         }
833
834         /* Check hooks all assigned */
835         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
836                 /* Only hooks which are valid */
837                 if (!(valid_hooks & (1 << i)))
838                         continue;
839                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
840                         duprintf("Invalid hook entry %u %u\n",
841                                  i, hook_entries[i]);
842                         return -EINVAL;
843                 }
844                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
845                         duprintf("Invalid underflow %u %u\n",
846                                  i, underflows[i]);
847                         return -EINVAL;
848                 }
849         }
850
851         if (!mark_source_chains(newinfo, valid_hooks, entry0))
852                 return -ELOOP;
853
854         /* Finally, each sanity check must pass */
855         i = 0;
856         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
857                                 find_check_entry, name, size, &i);
858
859         if (ret != 0) {
860                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
861                                 cleanup_entry, &i);
862                 return ret;
863         }
864
865         /* And one copy for every other CPU */
866         for_each_possible_cpu(i) {
867                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
868                         memcpy(newinfo->entries[i], entry0, newinfo->size);
869         }
870
871         return ret;
872 }
873
874 /* Gets counters. */
875 static inline int
876 add_entry_to_counter(const struct ipt_entry *e,
877                      struct xt_counters total[],
878                      unsigned int *i)
879 {
880         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
881
882         (*i)++;
883         return 0;
884 }
885
886 static inline int
887 set_entry_to_counter(const struct ipt_entry *e,
888                      struct ipt_counters total[],
889                      unsigned int *i)
890 {
891         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
892
893         (*i)++;
894         return 0;
895 }
896
897 static void
898 get_counters(const struct xt_table_info *t,
899              struct xt_counters counters[])
900 {
901         unsigned int cpu;
902         unsigned int i;
903         unsigned int curcpu;
904
905         /* Instead of clearing (by a previous call to memset())
906          * the counters and using adds, we set the counters
907          * with data used by 'current' CPU
908          * We dont care about preemption here.
909          */
910         curcpu = raw_smp_processor_id();
911
912         i = 0;
913         IPT_ENTRY_ITERATE(t->entries[curcpu],
914                           t->size,
915                           set_entry_to_counter,
916                           counters,
917                           &i);
918
919         for_each_possible_cpu(cpu) {
920                 if (cpu == curcpu)
921                         continue;
922                 i = 0;
923                 IPT_ENTRY_ITERATE(t->entries[cpu],
924                                   t->size,
925                                   add_entry_to_counter,
926                                   counters,
927                                   &i);
928         }
929
930 }
931
932 /* We're lazy, and add to the first CPU; overflow works its fey magic
933  * and everything is OK. */
934 static int
935 add_counter_to_entry(struct ipt_entry *e,
936                      const struct xt_counters addme[],
937                      unsigned int *i)
938 {
939         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
940
941         (*i)++;
942         return 0;
943 }
944
945 /* Take values from counters and add them back onto the current cpu */
946 static void put_counters(struct xt_table_info *t,
947                          const struct xt_counters counters[])
948 {
949         unsigned int i, cpu;
950
951         local_bh_disable();
952         cpu = smp_processor_id();
953         i = 0;
954         IPT_ENTRY_ITERATE(t->entries[cpu],
955                           t->size,
956                           add_counter_to_entry,
957                           counters,
958                           &i);
959         local_bh_enable();
960 }
961
962
963 static inline int
964 zero_entry_counter(struct ipt_entry *e, void *arg)
965 {
966         e->counters.bcnt = 0;
967         e->counters.pcnt = 0;
968         return 0;
969 }
970
971 static void
972 clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
973 {
974         unsigned int cpu;
975         const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
976
977         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
978         for_each_possible_cpu(cpu) {
979                 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
980                 IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
981                                   zero_entry_counter, NULL);
982         }
983 }
984
985 static struct xt_counters * alloc_counters(struct xt_table *table)
986 {
987         unsigned int countersize;
988         struct xt_counters *counters;
989         struct xt_table_info *private = table->private;
990         struct xt_table_info *info;
991
992         /* We need atomic snapshot of counters: rest doesn't change
993            (other than comefrom, which userspace doesn't care
994            about). */
995         countersize = sizeof(struct xt_counters) * private->number;
996         counters = vmalloc_node(countersize, numa_node_id());
997
998         if (counters == NULL)
999                 goto nomem;
1000
1001         info = xt_alloc_table_info(private->size);
1002         if (!info)
1003                 goto free_counters;
1004
1005         clone_counters(info, private);
1006
1007         mutex_lock(&table->lock);
1008         xt_table_entry_swap_rcu(private, info);
1009         synchronize_net();      /* Wait until smoke has cleared */
1010
1011         get_counters(info, counters);
1012         put_counters(private, counters);
1013         mutex_unlock(&table->lock);
1014
1015         xt_free_table_info(info);
1016
1017         return counters;
1018
1019  free_counters:
1020         vfree(counters);
1021  nomem:
1022         return ERR_PTR(-ENOMEM);
1023 }
1024
1025 static int
1026 copy_entries_to_user(unsigned int total_size,
1027                      struct xt_table *table,
1028                      void __user *userptr)
1029 {
1030         unsigned int off, num;
1031         struct ipt_entry *e;
1032         struct xt_counters *counters;
1033         const struct xt_table_info *private = table->private;
1034         int ret = 0;
1035         const void *loc_cpu_entry;
1036
1037         counters = alloc_counters(table);
1038         if (IS_ERR(counters))
1039                 return PTR_ERR(counters);
1040
1041         /* choose the copy that is on our node/cpu, ...
1042          * This choice is lazy (because current thread is
1043          * allowed to migrate to another cpu)
1044          */
1045         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1046         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1047                 ret = -EFAULT;
1048                 goto free_counters;
1049         }
1050
1051         /* FIXME: use iterator macros --RR */
1052         /* ... then go back and fix counters and names */
1053         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1054                 unsigned int i;
1055                 const struct ipt_entry_match *m;
1056                 const struct ipt_entry_target *t;
1057
1058                 e = (struct ipt_entry *)(loc_cpu_entry + off);
1059                 if (copy_to_user(userptr + off
1060                                  + offsetof(struct ipt_entry, counters),
1061                                  &counters[num],
1062                                  sizeof(counters[num])) != 0) {
1063                         ret = -EFAULT;
1064                         goto free_counters;
1065                 }
1066
1067                 for (i = sizeof(struct ipt_entry);
1068                      i < e->target_offset;
1069                      i += m->u.match_size) {
1070                         m = (void *)e + i;
1071
1072                         if (copy_to_user(userptr + off + i
1073                                          + offsetof(struct ipt_entry_match,
1074                                                     u.user.name),
1075                                          m->u.kernel.match->name,
1076                                          strlen(m->u.kernel.match->name)+1)
1077                             != 0) {
1078                                 ret = -EFAULT;
1079                                 goto free_counters;
1080                         }
1081                 }
1082
1083                 t = ipt_get_target(e);
1084                 if (copy_to_user(userptr + off + e->target_offset
1085                                  + offsetof(struct ipt_entry_target,
1086                                             u.user.name),
1087                                  t->u.kernel.target->name,
1088                                  strlen(t->u.kernel.target->name)+1) != 0) {
1089                         ret = -EFAULT;
1090                         goto free_counters;
1091                 }
1092         }
1093
1094  free_counters:
1095         vfree(counters);
1096         return ret;
1097 }
1098
1099 #ifdef CONFIG_COMPAT
1100 static void compat_standard_from_user(void *dst, void *src)
1101 {
1102         int v = *(compat_int_t *)src;
1103
1104         if (v > 0)
1105                 v += xt_compat_calc_jump(AF_INET, v);
1106         memcpy(dst, &v, sizeof(v));
1107 }
1108
1109 static int compat_standard_to_user(void __user *dst, void *src)
1110 {
1111         compat_int_t cv = *(int *)src;
1112
1113         if (cv > 0)
1114                 cv -= xt_compat_calc_jump(AF_INET, cv);
1115         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1116 }
1117
1118 static inline int
1119 compat_calc_match(struct ipt_entry_match *m, int *size)
1120 {
1121         *size += xt_compat_match_offset(m->u.kernel.match);
1122         return 0;
1123 }
1124
1125 static int compat_calc_entry(struct ipt_entry *e,
1126                              const struct xt_table_info *info,
1127                              void *base, struct xt_table_info *newinfo)
1128 {
1129         struct ipt_entry_target *t;
1130         unsigned int entry_offset;
1131         int off, i, ret;
1132
1133         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1134         entry_offset = (void *)e - base;
1135         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1136         t = ipt_get_target(e);
1137         off += xt_compat_target_offset(t->u.kernel.target);
1138         newinfo->size -= off;
1139         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1140         if (ret)
1141                 return ret;
1142
1143         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1144                 if (info->hook_entry[i] &&
1145                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1146                         newinfo->hook_entry[i] -= off;
1147                 if (info->underflow[i] &&
1148                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1149                         newinfo->underflow[i] -= off;
1150         }
1151         return 0;
1152 }
1153
1154 static int compat_table_info(const struct xt_table_info *info,
1155                              struct xt_table_info *newinfo)
1156 {
1157         void *loc_cpu_entry;
1158
1159         if (!newinfo || !info)
1160                 return -EINVAL;
1161
1162         /* we dont care about newinfo->entries[] */
1163         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1164         newinfo->initial_entries = 0;
1165         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1166         return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1167                                  compat_calc_entry, info, loc_cpu_entry,
1168                                  newinfo);
1169 }
1170 #endif
1171
1172 static int get_info(struct net *net, void __user *user, int *len, int compat)
1173 {
1174         char name[IPT_TABLE_MAXNAMELEN];
1175         struct xt_table *t;
1176         int ret;
1177
1178         if (*len != sizeof(struct ipt_getinfo)) {
1179                 duprintf("length %u != %zu\n", *len,
1180                          sizeof(struct ipt_getinfo));
1181                 return -EINVAL;
1182         }
1183
1184         if (copy_from_user(name, user, sizeof(name)) != 0)
1185                 return -EFAULT;
1186
1187         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1188 #ifdef CONFIG_COMPAT
1189         if (compat)
1190                 xt_compat_lock(AF_INET);
1191 #endif
1192         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1193                                     "iptable_%s", name);
1194         if (t && !IS_ERR(t)) {
1195                 struct ipt_getinfo info;
1196                 const struct xt_table_info *private = t->private;
1197
1198 #ifdef CONFIG_COMPAT
1199                 if (compat) {
1200                         struct xt_table_info tmp;
1201                         ret = compat_table_info(private, &tmp);
1202                         xt_compat_flush_offsets(AF_INET);
1203                         private = &tmp;
1204                 }
1205 #endif
1206                 info.valid_hooks = t->valid_hooks;
1207                 memcpy(info.hook_entry, private->hook_entry,
1208                        sizeof(info.hook_entry));
1209                 memcpy(info.underflow, private->underflow,
1210                        sizeof(info.underflow));
1211                 info.num_entries = private->number;
1212                 info.size = private->size;
1213                 strcpy(info.name, name);
1214
1215                 if (copy_to_user(user, &info, *len) != 0)
1216                         ret = -EFAULT;
1217                 else
1218                         ret = 0;
1219
1220                 xt_table_unlock(t);
1221                 module_put(t->me);
1222         } else
1223                 ret = t ? PTR_ERR(t) : -ENOENT;
1224 #ifdef CONFIG_COMPAT
1225         if (compat)
1226                 xt_compat_unlock(AF_INET);
1227 #endif
1228         return ret;
1229 }
1230
1231 static int
1232 get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1233 {
1234         int ret;
1235         struct ipt_get_entries get;
1236         struct xt_table *t;
1237
1238         if (*len < sizeof(get)) {
1239                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1240                 return -EINVAL;
1241         }
1242         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1243                 return -EFAULT;
1244         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1245                 duprintf("get_entries: %u != %zu\n",
1246                          *len, sizeof(get) + get.size);
1247                 return -EINVAL;
1248         }
1249
1250         t = xt_find_table_lock(net, AF_INET, get.name);
1251         if (t && !IS_ERR(t)) {
1252                 const struct xt_table_info *private = t->private;
1253                 duprintf("t->private->number = %u\n", private->number);
1254                 if (get.size == private->size)
1255                         ret = copy_entries_to_user(private->size,
1256                                                    t, uptr->entrytable);
1257                 else {
1258                         duprintf("get_entries: I've got %u not %u!\n",
1259                                  private->size, get.size);
1260                         ret = -EAGAIN;
1261                 }
1262                 module_put(t->me);
1263                 xt_table_unlock(t);
1264         } else
1265                 ret = t ? PTR_ERR(t) : -ENOENT;
1266
1267         return ret;
1268 }
1269
1270 static int
1271 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1272              struct xt_table_info *newinfo, unsigned int num_counters,
1273              void __user *counters_ptr)
1274 {
1275         int ret;
1276         struct xt_table *t;
1277         struct xt_table_info *oldinfo;
1278         struct xt_counters *counters;
1279         void *loc_cpu_old_entry;
1280
1281         ret = 0;
1282         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1283         if (!counters) {
1284                 ret = -ENOMEM;
1285                 goto out;
1286         }
1287
1288         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1289                                     "iptable_%s", name);
1290         if (!t || IS_ERR(t)) {
1291                 ret = t ? PTR_ERR(t) : -ENOENT;
1292                 goto free_newinfo_counters_untrans;
1293         }
1294
1295         /* You lied! */
1296         if (valid_hooks != t->valid_hooks) {
1297                 duprintf("Valid hook crap: %08X vs %08X\n",
1298                          valid_hooks, t->valid_hooks);
1299                 ret = -EINVAL;
1300                 goto put_module;
1301         }
1302
1303         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1304         if (!oldinfo)
1305                 goto put_module;
1306
1307         /* Update module usage count based on number of rules */
1308         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1309                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1310         if ((oldinfo->number > oldinfo->initial_entries) ||
1311             (newinfo->number <= oldinfo->initial_entries))
1312                 module_put(t->me);
1313         if ((oldinfo->number > oldinfo->initial_entries) &&
1314             (newinfo->number <= oldinfo->initial_entries))
1315                 module_put(t->me);
1316
1317         /* Get the old counters. */
1318         get_counters(oldinfo, counters);
1319         /* Decrease module usage counts and free resource */
1320         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1321         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1322                           NULL);
1323         xt_free_table_info(oldinfo);
1324         if (copy_to_user(counters_ptr, counters,
1325                          sizeof(struct xt_counters) * num_counters) != 0)
1326                 ret = -EFAULT;
1327         vfree(counters);
1328         xt_table_unlock(t);
1329         return ret;
1330
1331  put_module:
1332         module_put(t->me);
1333         xt_table_unlock(t);
1334  free_newinfo_counters_untrans:
1335         vfree(counters);
1336  out:
1337         return ret;
1338 }
1339
1340 static int
1341 do_replace(struct net *net, void __user *user, unsigned int len)
1342 {
1343         int ret;
1344         struct ipt_replace tmp;
1345         struct xt_table_info *newinfo;
1346         void *loc_cpu_entry;
1347
1348         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1349                 return -EFAULT;
1350
1351         /* overflow check */
1352         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1353                 return -ENOMEM;
1354
1355         newinfo = xt_alloc_table_info(tmp.size);
1356         if (!newinfo)
1357                 return -ENOMEM;
1358
1359         /* choose the copy that is on our node/cpu */
1360         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1361         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1362                            tmp.size) != 0) {
1363                 ret = -EFAULT;
1364                 goto free_newinfo;
1365         }
1366
1367         ret = translate_table(tmp.name, tmp.valid_hooks,
1368                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1369                               tmp.hook_entry, tmp.underflow);
1370         if (ret != 0)
1371                 goto free_newinfo;
1372
1373         duprintf("ip_tables: Translated table\n");
1374
1375         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1376                            tmp.num_counters, tmp.counters);
1377         if (ret)
1378                 goto free_newinfo_untrans;
1379         return 0;
1380
1381  free_newinfo_untrans:
1382         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1383  free_newinfo:
1384         xt_free_table_info(newinfo);
1385         return ret;
1386 }
1387
1388
1389 static int
1390 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1391 {
1392         unsigned int i;
1393         struct xt_counters_info tmp;
1394         struct xt_counters *paddc;
1395         unsigned int num_counters;
1396         const char *name;
1397         int size;
1398         void *ptmp;
1399         struct xt_table *t;
1400         const struct xt_table_info *private;
1401         int ret = 0;
1402         void *loc_cpu_entry;
1403 #ifdef CONFIG_COMPAT
1404         struct compat_xt_counters_info compat_tmp;
1405
1406         if (compat) {
1407                 ptmp = &compat_tmp;
1408                 size = sizeof(struct compat_xt_counters_info);
1409         } else
1410 #endif
1411         {
1412                 ptmp = &tmp;
1413                 size = sizeof(struct xt_counters_info);
1414         }
1415
1416         if (copy_from_user(ptmp, user, size) != 0)
1417                 return -EFAULT;
1418
1419 #ifdef CONFIG_COMPAT
1420         if (compat) {
1421                 num_counters = compat_tmp.num_counters;
1422                 name = compat_tmp.name;
1423         } else
1424 #endif
1425         {
1426                 num_counters = tmp.num_counters;
1427                 name = tmp.name;
1428         }
1429
1430         if (len != size + num_counters * sizeof(struct xt_counters))
1431                 return -EINVAL;
1432
1433         paddc = vmalloc_node(len - size, numa_node_id());
1434         if (!paddc)
1435                 return -ENOMEM;
1436
1437         if (copy_from_user(paddc, user + size, len - size) != 0) {
1438                 ret = -EFAULT;
1439                 goto free;
1440         }
1441
1442         t = xt_find_table_lock(net, AF_INET, name);
1443         if (!t || IS_ERR(t)) {
1444                 ret = t ? PTR_ERR(t) : -ENOENT;
1445                 goto free;
1446         }
1447
1448         mutex_lock(&t->lock);
1449         private = t->private;
1450         if (private->number != num_counters) {
1451                 ret = -EINVAL;
1452                 goto unlock_up_free;
1453         }
1454
1455         preempt_disable();
1456         i = 0;
1457         /* Choose the copy that is on our node */
1458         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1459         IPT_ENTRY_ITERATE(loc_cpu_entry,
1460                           private->size,
1461                           add_counter_to_entry,
1462                           paddc,
1463                           &i);
1464         preempt_enable();
1465  unlock_up_free:
1466         mutex_unlock(&t->lock);
1467         xt_table_unlock(t);
1468         module_put(t->me);
1469  free:
1470         vfree(paddc);
1471
1472         return ret;
1473 }
1474
1475 #ifdef CONFIG_COMPAT
1476 struct compat_ipt_replace {
1477         char                    name[IPT_TABLE_MAXNAMELEN];
1478         u32                     valid_hooks;
1479         u32                     num_entries;
1480         u32                     size;
1481         u32                     hook_entry[NF_INET_NUMHOOKS];
1482         u32                     underflow[NF_INET_NUMHOOKS];
1483         u32                     num_counters;
1484         compat_uptr_t           counters;       /* struct ipt_counters * */
1485         struct compat_ipt_entry entries[0];
1486 };
1487
1488 static int
1489 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1490                           unsigned int *size, struct xt_counters *counters,
1491                           unsigned int *i)
1492 {
1493         struct ipt_entry_target *t;
1494         struct compat_ipt_entry __user *ce;
1495         u_int16_t target_offset, next_offset;
1496         compat_uint_t origsize;
1497         int ret;
1498
1499         ret = -EFAULT;
1500         origsize = *size;
1501         ce = (struct compat_ipt_entry __user *)*dstptr;
1502         if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1503                 goto out;
1504
1505         if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1506                 goto out;
1507
1508         *dstptr += sizeof(struct compat_ipt_entry);
1509         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1510
1511         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1512         target_offset = e->target_offset - (origsize - *size);
1513         if (ret)
1514                 goto out;
1515         t = ipt_get_target(e);
1516         ret = xt_compat_target_to_user(t, dstptr, size);
1517         if (ret)
1518                 goto out;
1519         ret = -EFAULT;
1520         next_offset = e->next_offset - (origsize - *size);
1521         if (put_user(target_offset, &ce->target_offset))
1522                 goto out;
1523         if (put_user(next_offset, &ce->next_offset))
1524                 goto out;
1525
1526         (*i)++;
1527         return 0;
1528 out:
1529         return ret;
1530 }
1531
1532 static int
1533 compat_find_calc_match(struct ipt_entry_match *m,
1534                        const char *name,
1535                        const struct ipt_ip *ip,
1536                        unsigned int hookmask,
1537                        int *size, unsigned int *i)
1538 {
1539         struct xt_match *match;
1540
1541         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1542                                                       m->u.user.revision),
1543                                         "ipt_%s", m->u.user.name);
1544         if (IS_ERR(match) || !match) {
1545                 duprintf("compat_check_calc_match: `%s' not found\n",
1546                          m->u.user.name);
1547                 return match ? PTR_ERR(match) : -ENOENT;
1548         }
1549         m->u.kernel.match = match;
1550         *size += xt_compat_match_offset(match);
1551
1552         (*i)++;
1553         return 0;
1554 }
1555
1556 static int
1557 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1558 {
1559         if (i && (*i)-- == 0)
1560                 return 1;
1561
1562         module_put(m->u.kernel.match->me);
1563         return 0;
1564 }
1565
1566 static int
1567 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1568 {
1569         struct ipt_entry_target *t;
1570
1571         if (i && (*i)-- == 0)
1572                 return 1;
1573
1574         /* Cleanup all matches */
1575         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1576         t = compat_ipt_get_target(e);
1577         module_put(t->u.kernel.target->me);
1578         return 0;
1579 }
1580
1581 static int
1582 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1583                                   struct xt_table_info *newinfo,
1584                                   unsigned int *size,
1585                                   unsigned char *base,
1586                                   unsigned char *limit,
1587                                   unsigned int *hook_entries,
1588                                   unsigned int *underflows,
1589                                   unsigned int *i,
1590                                   const char *name)
1591 {
1592         struct ipt_entry_target *t;
1593         struct xt_target *target;
1594         unsigned int entry_offset;
1595         unsigned int j;
1596         int ret, off, h;
1597
1598         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1599         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
1600             || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1601                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1602                 return -EINVAL;
1603         }
1604
1605         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1606                              sizeof(struct compat_xt_entry_target)) {
1607                 duprintf("checking: element %p size %u\n",
1608                          e, e->next_offset);
1609                 return -EINVAL;
1610         }
1611
1612         /* For purposes of check_entry casting the compat entry is fine */
1613         ret = check_entry((struct ipt_entry *)e, name);
1614         if (ret)
1615                 return ret;
1616
1617         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1618         entry_offset = (void *)e - (void *)base;
1619         j = 0;
1620         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1621                                        &e->ip, e->comefrom, &off, &j);
1622         if (ret != 0)
1623                 goto release_matches;
1624
1625         t = compat_ipt_get_target(e);
1626         target = try_then_request_module(xt_find_target(AF_INET,
1627                                                         t->u.user.name,
1628                                                         t->u.user.revision),
1629                                          "ipt_%s", t->u.user.name);
1630         if (IS_ERR(target) || !target) {
1631                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1632                          t->u.user.name);
1633                 ret = target ? PTR_ERR(target) : -ENOENT;
1634                 goto release_matches;
1635         }
1636         t->u.kernel.target = target;
1637
1638         off += xt_compat_target_offset(target);
1639         *size += off;
1640         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1641         if (ret)
1642                 goto out;
1643
1644         /* Check hooks & underflows */
1645         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1646                 if ((unsigned char *)e - base == hook_entries[h])
1647                         newinfo->hook_entry[h] = hook_entries[h];
1648                 if ((unsigned char *)e - base == underflows[h])
1649                         newinfo->underflow[h] = underflows[h];
1650         }
1651
1652         /* Clear counters and comefrom */
1653         memset(&e->counters, 0, sizeof(e->counters));
1654         e->comefrom = 0;
1655
1656         (*i)++;
1657         return 0;
1658
1659 out:
1660         module_put(t->u.kernel.target->me);
1661 release_matches:
1662         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1663         return ret;
1664 }
1665
1666 static int
1667 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1668                             unsigned int *size, const char *name,
1669                             struct xt_table_info *newinfo, unsigned char *base)
1670 {
1671         struct ipt_entry_target *t;
1672         struct xt_target *target;
1673         struct ipt_entry *de;
1674         unsigned int origsize;
1675         int ret, h;
1676
1677         ret = 0;
1678         origsize = *size;
1679         de = (struct ipt_entry *)*dstptr;
1680         memcpy(de, e, sizeof(struct ipt_entry));
1681         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1682
1683         *dstptr += sizeof(struct ipt_entry);
1684         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1685
1686         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1687                                        dstptr, size);
1688         if (ret)
1689                 return ret;
1690         de->target_offset = e->target_offset - (origsize - *size);
1691         t = compat_ipt_get_target(e);
1692         target = t->u.kernel.target;
1693         xt_compat_target_from_user(t, dstptr, size);
1694
1695         de->next_offset = e->next_offset - (origsize - *size);
1696         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1697                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1698                         newinfo->hook_entry[h] -= origsize - *size;
1699                 if ((unsigned char *)de - base < newinfo->underflow[h])
1700                         newinfo->underflow[h] -= origsize - *size;
1701         }
1702         return ret;
1703 }
1704
1705 static int
1706 compat_check_entry(struct ipt_entry *e, const char *name,
1707                                      unsigned int *i)
1708 {
1709         struct xt_mtchk_param mtpar;
1710         unsigned int j;
1711         int ret;
1712
1713         j = 0;
1714         mtpar.table     = name;
1715         mtpar.entryinfo = &e->ip;
1716         mtpar.hook_mask = e->comefrom;
1717         mtpar.family    = NFPROTO_IPV4;
1718         ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
1719         if (ret)
1720                 goto cleanup_matches;
1721
1722         ret = check_target(e, name);
1723         if (ret)
1724                 goto cleanup_matches;
1725
1726         (*i)++;
1727         return 0;
1728
1729  cleanup_matches:
1730         IPT_MATCH_ITERATE(e, cleanup_match, &j);
1731         return ret;
1732 }
1733
1734 static int
1735 translate_compat_table(const char *name,
1736                        unsigned int valid_hooks,
1737                        struct xt_table_info **pinfo,
1738                        void **pentry0,
1739                        unsigned int total_size,
1740                        unsigned int number,
1741                        unsigned int *hook_entries,
1742                        unsigned int *underflows)
1743 {
1744         unsigned int i, j;
1745         struct xt_table_info *newinfo, *info;
1746         void *pos, *entry0, *entry1;
1747         unsigned int size;
1748         int ret;
1749
1750         info = *pinfo;
1751         entry0 = *pentry0;
1752         size = total_size;
1753         info->number = number;
1754
1755         /* Init all hooks to impossible value. */
1756         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1757                 info->hook_entry[i] = 0xFFFFFFFF;
1758                 info->underflow[i] = 0xFFFFFFFF;
1759         }
1760
1761         duprintf("translate_compat_table: size %u\n", info->size);
1762         j = 0;
1763         xt_compat_lock(AF_INET);
1764         /* Walk through entries, checking offsets. */
1765         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1766                                        check_compat_entry_size_and_hooks,
1767                                        info, &size, entry0,
1768                                        entry0 + total_size,
1769                                        hook_entries, underflows, &j, name);
1770         if (ret != 0)
1771                 goto out_unlock;
1772
1773         ret = -EINVAL;
1774         if (j != number) {
1775                 duprintf("translate_compat_table: %u not %u entries\n",
1776                          j, number);
1777                 goto out_unlock;
1778         }
1779
1780         /* Check hooks all assigned */
1781         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1782                 /* Only hooks which are valid */
1783                 if (!(valid_hooks & (1 << i)))
1784                         continue;
1785                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1786                         duprintf("Invalid hook entry %u %u\n",
1787                                  i, hook_entries[i]);
1788                         goto out_unlock;
1789                 }
1790                 if (info->underflow[i] == 0xFFFFFFFF) {
1791                         duprintf("Invalid underflow %u %u\n",
1792                                  i, underflows[i]);
1793                         goto out_unlock;
1794                 }
1795         }
1796
1797         ret = -ENOMEM;
1798         newinfo = xt_alloc_table_info(size);
1799         if (!newinfo)
1800                 goto out_unlock;
1801
1802         newinfo->number = number;
1803         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1804                 newinfo->hook_entry[i] = info->hook_entry[i];
1805                 newinfo->underflow[i] = info->underflow[i];
1806         }
1807         entry1 = newinfo->entries[raw_smp_processor_id()];
1808         pos = entry1;
1809         size = total_size;
1810         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1811                                        compat_copy_entry_from_user,
1812                                        &pos, &size, name, newinfo, entry1);
1813         xt_compat_flush_offsets(AF_INET);
1814         xt_compat_unlock(AF_INET);
1815         if (ret)
1816                 goto free_newinfo;
1817
1818         ret = -ELOOP;
1819         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1820                 goto free_newinfo;
1821
1822         i = 0;
1823         ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1824                                 name, &i);
1825         if (ret) {
1826                 j -= i;
1827                 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1828                                                   compat_release_entry, &j);
1829                 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1830                 xt_free_table_info(newinfo);
1831                 return ret;
1832         }
1833
1834         /* And one copy for every other CPU */
1835         for_each_possible_cpu(i)
1836                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1837                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1838
1839         *pinfo = newinfo;
1840         *pentry0 = entry1;
1841         xt_free_table_info(info);
1842         return 0;
1843
1844 free_newinfo:
1845         xt_free_table_info(newinfo);
1846 out:
1847         COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1848         return ret;
1849 out_unlock:
1850         xt_compat_flush_offsets(AF_INET);
1851         xt_compat_unlock(AF_INET);
1852         goto out;
1853 }
1854
1855 static int
1856 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1857 {
1858         int ret;
1859         struct compat_ipt_replace tmp;
1860         struct xt_table_info *newinfo;
1861         void *loc_cpu_entry;
1862
1863         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1864                 return -EFAULT;
1865
1866         /* overflow check */
1867         if (tmp.size >= INT_MAX / num_possible_cpus())
1868                 return -ENOMEM;
1869         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1870                 return -ENOMEM;
1871
1872         newinfo = xt_alloc_table_info(tmp.size);
1873         if (!newinfo)
1874                 return -ENOMEM;
1875
1876         /* choose the copy that is on our node/cpu */
1877         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1878         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1879                            tmp.size) != 0) {
1880                 ret = -EFAULT;
1881                 goto free_newinfo;
1882         }
1883
1884         ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1885                                      &newinfo, &loc_cpu_entry, tmp.size,
1886                                      tmp.num_entries, tmp.hook_entry,
1887                                      tmp.underflow);
1888         if (ret != 0)
1889                 goto free_newinfo;
1890
1891         duprintf("compat_do_replace: Translated table\n");
1892
1893         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1894                            tmp.num_counters, compat_ptr(tmp.counters));
1895         if (ret)
1896                 goto free_newinfo_untrans;
1897         return 0;
1898
1899  free_newinfo_untrans:
1900         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1901  free_newinfo:
1902         xt_free_table_info(newinfo);
1903         return ret;
1904 }
1905
1906 static int
1907 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1908                       unsigned int len)
1909 {
1910         int ret;
1911
1912         if (!capable(CAP_NET_ADMIN))
1913                 return -EPERM;
1914
1915         switch (cmd) {
1916         case IPT_SO_SET_REPLACE:
1917                 ret = compat_do_replace(sock_net(sk), user, len);
1918                 break;
1919
1920         case IPT_SO_SET_ADD_COUNTERS:
1921                 ret = do_add_counters(sock_net(sk), user, len, 1);
1922                 break;
1923
1924         default:
1925                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1926                 ret = -EINVAL;
1927         }
1928
1929         return ret;
1930 }
1931
1932 struct compat_ipt_get_entries {
1933         char name[IPT_TABLE_MAXNAMELEN];
1934         compat_uint_t size;
1935         struct compat_ipt_entry entrytable[0];
1936 };
1937
1938 static int
1939 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1940                             void __user *userptr)
1941 {
1942         struct xt_counters *counters;
1943         const struct xt_table_info *private = table->private;
1944         void __user *pos;
1945         unsigned int size;
1946         int ret = 0;
1947         const void *loc_cpu_entry;
1948         unsigned int i = 0;
1949
1950         counters = alloc_counters(table);
1951         if (IS_ERR(counters))
1952                 return PTR_ERR(counters);
1953
1954         /* choose the copy that is on our node/cpu, ...
1955          * This choice is lazy (because current thread is
1956          * allowed to migrate to another cpu)
1957          */
1958         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1959         pos = userptr;
1960         size = total_size;
1961         ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1962                                 compat_copy_entry_to_user,
1963                                 &pos, &size, counters, &i);
1964
1965         vfree(counters);
1966         return ret;
1967 }
1968
1969 static int
1970 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1971                    int *len)
1972 {
1973         int ret;
1974         struct compat_ipt_get_entries get;
1975         struct xt_table *t;
1976
1977         if (*len < sizeof(get)) {
1978                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1979                 return -EINVAL;
1980         }
1981
1982         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1983                 return -EFAULT;
1984
1985         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1986                 duprintf("compat_get_entries: %u != %zu\n",
1987                          *len, sizeof(get) + get.size);
1988                 return -EINVAL;
1989         }
1990
1991         xt_compat_lock(AF_INET);
1992         t = xt_find_table_lock(net, AF_INET, get.name);
1993         if (t && !IS_ERR(t)) {
1994                 const struct xt_table_info *private = t->private;
1995                 struct xt_table_info info;
1996                 duprintf("t->private->number = %u\n", private->number);
1997                 ret = compat_table_info(private, &info);
1998                 if (!ret && get.size == info.size) {
1999                         ret = compat_copy_entries_to_user(private->size,
2000                                                           t, uptr->entrytable);
2001                 } else if (!ret) {
2002                         duprintf("compat_get_entries: I've got %u not %u!\n",
2003                                  private->size, get.size);
2004                         ret = -EAGAIN;
2005                 }
2006                 xt_compat_flush_offsets(AF_INET);
2007                 module_put(t->me);
2008                 xt_table_unlock(t);
2009         } else
2010                 ret = t ? PTR_ERR(t) : -ENOENT;
2011
2012         xt_compat_unlock(AF_INET);
2013         return ret;
2014 }
2015
2016 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
2017
2018 static int
2019 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2020 {
2021         int ret;
2022
2023         if (!capable(CAP_NET_ADMIN))
2024                 return -EPERM;
2025
2026         switch (cmd) {
2027         case IPT_SO_GET_INFO:
2028                 ret = get_info(sock_net(sk), user, len, 1);
2029                 break;
2030         case IPT_SO_GET_ENTRIES:
2031                 ret = compat_get_entries(sock_net(sk), user, len);
2032                 break;
2033         default:
2034                 ret = do_ipt_get_ctl(sk, cmd, user, len);
2035         }
2036         return ret;
2037 }
2038 #endif
2039
2040 static int
2041 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2042 {
2043         int ret;
2044
2045         if (!capable(CAP_NET_ADMIN))
2046                 return -EPERM;
2047
2048         switch (cmd) {
2049         case IPT_SO_SET_REPLACE:
2050                 ret = do_replace(sock_net(sk), user, len);
2051                 break;
2052
2053         case IPT_SO_SET_ADD_COUNTERS:
2054                 ret = do_add_counters(sock_net(sk), user, len, 0);
2055                 break;
2056
2057         default:
2058                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2059                 ret = -EINVAL;
2060         }
2061
2062         return ret;
2063 }
2064
2065 static int
2066 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2067 {
2068         int ret;
2069
2070         if (!capable(CAP_NET_ADMIN))
2071                 return -EPERM;
2072
2073         switch (cmd) {
2074         case IPT_SO_GET_INFO:
2075                 ret = get_info(sock_net(sk), user, len, 0);
2076                 break;
2077
2078         case IPT_SO_GET_ENTRIES:
2079                 ret = get_entries(sock_net(sk), user, len);
2080                 break;
2081
2082         case IPT_SO_GET_REVISION_MATCH:
2083         case IPT_SO_GET_REVISION_TARGET: {
2084                 struct ipt_get_revision rev;
2085                 int target;
2086
2087                 if (*len != sizeof(rev)) {
2088                         ret = -EINVAL;
2089                         break;
2090                 }
2091                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2092                         ret = -EFAULT;
2093                         break;
2094                 }
2095
2096                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2097                         target = 1;
2098                 else
2099                         target = 0;
2100
2101                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2102                                                          rev.revision,
2103                                                          target, &ret),
2104                                         "ipt_%s", rev.name);
2105                 break;
2106         }
2107
2108         default:
2109                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2110                 ret = -EINVAL;
2111         }
2112
2113         return ret;
2114 }
2115
2116 struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2117                                     const struct ipt_replace *repl)
2118 {
2119         int ret;
2120         struct xt_table_info *newinfo;
2121         struct xt_table_info bootstrap
2122                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2123         void *loc_cpu_entry;
2124         struct xt_table *new_table;
2125
2126         newinfo = xt_alloc_table_info(repl->size);
2127         if (!newinfo) {
2128                 ret = -ENOMEM;
2129                 goto out;
2130         }
2131
2132         /* choose the copy on our node/cpu, but dont care about preemption */
2133         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2134         memcpy(loc_cpu_entry, repl->entries, repl->size);
2135
2136         ret = translate_table(table->name, table->valid_hooks,
2137                               newinfo, loc_cpu_entry, repl->size,
2138                               repl->num_entries,
2139                               repl->hook_entry,
2140                               repl->underflow);
2141         if (ret != 0)
2142                 goto out_free;
2143
2144         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2145         if (IS_ERR(new_table)) {
2146                 ret = PTR_ERR(new_table);
2147                 goto out_free;
2148         }
2149
2150         return new_table;
2151
2152 out_free:
2153         xt_free_table_info(newinfo);
2154 out:
2155         return ERR_PTR(ret);
2156 }
2157
2158 void ipt_unregister_table(struct xt_table *table)
2159 {
2160         struct xt_table_info *private;
2161         void *loc_cpu_entry;
2162         struct module *table_owner = table->me;
2163
2164         private = xt_unregister_table(table);
2165
2166         /* Decrease module usage counts and free resources */
2167         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2168         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2169         if (private->number > private->initial_entries)
2170                 module_put(table_owner);
2171         xt_free_table_info(private);
2172 }
2173
2174 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2175 static inline bool
2176 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2177                      u_int8_t type, u_int8_t code,
2178                      bool invert)
2179 {
2180         return ((test_type == 0xFF) ||
2181                 (type == test_type && code >= min_code && code <= max_code))
2182                 ^ invert;
2183 }
2184
2185 static bool
2186 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2187 {
2188         const struct icmphdr *ic;
2189         struct icmphdr _icmph;
2190         const struct ipt_icmp *icmpinfo = par->matchinfo;
2191
2192         /* Must not be a fragment. */
2193         if (par->fragoff != 0)
2194                 return false;
2195
2196         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2197         if (ic == NULL) {
2198                 /* We've been asked to examine this packet, and we
2199                  * can't.  Hence, no choice but to drop.
2200                  */
2201                 duprintf("Dropping evil ICMP tinygram.\n");
2202                 *par->hotdrop = true;
2203                 return false;
2204         }
2205
2206         return icmp_type_code_match(icmpinfo->type,
2207                                     icmpinfo->code[0],
2208                                     icmpinfo->code[1],
2209                                     ic->type, ic->code,
2210                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2211 }
2212
2213 static bool icmp_checkentry(const struct xt_mtchk_param *par)
2214 {
2215         const struct ipt_icmp *icmpinfo = par->matchinfo;
2216
2217         /* Must specify no unknown invflags */
2218         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2219 }
2220
2221 /* The built-in targets: standard (NULL) and error. */
2222 static struct xt_target ipt_standard_target __read_mostly = {
2223         .name           = IPT_STANDARD_TARGET,
2224         .targetsize     = sizeof(int),
2225         .family         = AF_INET,
2226 #ifdef CONFIG_COMPAT
2227         .compatsize     = sizeof(compat_int_t),
2228         .compat_from_user = compat_standard_from_user,
2229         .compat_to_user = compat_standard_to_user,
2230 #endif
2231 };
2232
2233 static struct xt_target ipt_error_target __read_mostly = {
2234         .name           = IPT_ERROR_TARGET,
2235         .target         = ipt_error,
2236         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2237         .family         = AF_INET,
2238 };
2239
2240 static struct nf_sockopt_ops ipt_sockopts = {
2241         .pf             = PF_INET,
2242         .set_optmin     = IPT_BASE_CTL,
2243         .set_optmax     = IPT_SO_SET_MAX+1,
2244         .set            = do_ipt_set_ctl,
2245 #ifdef CONFIG_COMPAT
2246         .compat_set     = compat_do_ipt_set_ctl,
2247 #endif
2248         .get_optmin     = IPT_BASE_CTL,
2249         .get_optmax     = IPT_SO_GET_MAX+1,
2250         .get            = do_ipt_get_ctl,
2251 #ifdef CONFIG_COMPAT
2252         .compat_get     = compat_do_ipt_get_ctl,
2253 #endif
2254         .owner          = THIS_MODULE,
2255 };
2256
2257 static struct xt_match icmp_matchstruct __read_mostly = {
2258         .name           = "icmp",
2259         .match          = icmp_match,
2260         .matchsize      = sizeof(struct ipt_icmp),
2261         .checkentry     = icmp_checkentry,
2262         .proto          = IPPROTO_ICMP,
2263         .family         = AF_INET,
2264 };
2265
2266 static int __net_init ip_tables_net_init(struct net *net)
2267 {
2268         return xt_proto_init(net, AF_INET);
2269 }
2270
2271 static void __net_exit ip_tables_net_exit(struct net *net)
2272 {
2273         xt_proto_fini(net, AF_INET);
2274 }
2275
2276 static struct pernet_operations ip_tables_net_ops = {
2277         .init = ip_tables_net_init,
2278         .exit = ip_tables_net_exit,
2279 };
2280
2281 static int __init ip_tables_init(void)
2282 {
2283         int ret;
2284
2285         ret = register_pernet_subsys(&ip_tables_net_ops);
2286         if (ret < 0)
2287                 goto err1;
2288
2289         /* Noone else will be downing sem now, so we won't sleep */
2290         ret = xt_register_target(&ipt_standard_target);
2291         if (ret < 0)
2292                 goto err2;
2293         ret = xt_register_target(&ipt_error_target);
2294         if (ret < 0)
2295                 goto err3;
2296         ret = xt_register_match(&icmp_matchstruct);
2297         if (ret < 0)
2298                 goto err4;
2299
2300         /* Register setsockopt */
2301         ret = nf_register_sockopt(&ipt_sockopts);
2302         if (ret < 0)
2303                 goto err5;
2304
2305         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2306         return 0;
2307
2308 err5:
2309         xt_unregister_match(&icmp_matchstruct);
2310 err4:
2311         xt_unregister_target(&ipt_error_target);
2312 err3:
2313         xt_unregister_target(&ipt_standard_target);
2314 err2:
2315         unregister_pernet_subsys(&ip_tables_net_ops);
2316 err1:
2317         return ret;
2318 }
2319
2320 static void __exit ip_tables_fini(void)
2321 {
2322         nf_unregister_sockopt(&ipt_sockopts);
2323
2324         xt_unregister_match(&icmp_matchstruct);
2325         xt_unregister_target(&ipt_error_target);
2326         xt_unregister_target(&ipt_standard_target);
2327
2328         unregister_pernet_subsys(&ip_tables_net_ops);
2329 }
2330
2331 EXPORT_SYMBOL(ipt_register_table);
2332 EXPORT_SYMBOL(ipt_unregister_table);
2333 EXPORT_SYMBOL(ipt_do_table);
2334 module_init(ip_tables_init);
2335 module_exit(ip_tables_fini);