netfilter: ip_tables: unfold two critical loops in ip_packet_match()
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/cache.h>
12 #include <linux/capability.h>
13 #include <linux/skbuff.h>
14 #include <linux/kmod.h>
15 #include <linux/vmalloc.h>
16 #include <linux/netdevice.h>
17 #include <linux/module.h>
18 #include <linux/icmp.h>
19 #include <net/ip.h>
20 #include <net/compat.h>
21 #include <asm/uaccess.h>
22 #include <linux/mutex.h>
23 #include <linux/proc_fs.h>
24 #include <linux/err.h>
25 #include <linux/cpumask.h>
26
27 #include <linux/netfilter/x_tables.h>
28 #include <linux/netfilter_ipv4/ip_tables.h>
29 #include <net/netfilter/nf_log.h>
30
31 MODULE_LICENSE("GPL");
32 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33 MODULE_DESCRIPTION("IPv4 packet filter");
34
35 /*#define DEBUG_IP_FIREWALL*/
36 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
37 /*#define DEBUG_IP_FIREWALL_USER*/
38
39 #ifdef DEBUG_IP_FIREWALL
40 #define dprintf(format, args...)  printk(format , ## args)
41 #else
42 #define dprintf(format, args...)
43 #endif
44
45 #ifdef DEBUG_IP_FIREWALL_USER
46 #define duprintf(format, args...) printk(format , ## args)
47 #else
48 #define duprintf(format, args...)
49 #endif
50
51 #ifdef CONFIG_NETFILTER_DEBUG
52 #define IP_NF_ASSERT(x)                                         \
53 do {                                                            \
54         if (!(x))                                               \
55                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
56                        __func__, __FILE__, __LINE__);   \
57 } while(0)
58 #else
59 #define IP_NF_ASSERT(x)
60 #endif
61
62 #if 0
63 /* All the better to debug you with... */
64 #define static
65 #define inline
66 #endif
67
68 /*
69    We keep a set of rules for each CPU, so we can avoid write-locking
70    them in the softirq when updating the counters and therefore
71    only need to read-lock in the softirq; doing a write_lock_bh() in user
72    context stops packets coming through and allows user context to read
73    the counters or update the rules.
74
75    Hence the start of any table is given by get_table() below.  */
76
77 static unsigned long ifname_compare(const char *_a, const char *_b,
78                                     const unsigned char *_mask)
79 {
80         const unsigned long *a = (const unsigned long *)_a;
81         const unsigned long *b = (const unsigned long *)_b;
82         const unsigned long *mask = (const unsigned long *)_mask;
83         unsigned long ret;
84
85         ret = (a[0] ^ b[0]) & mask[0];
86         if (IFNAMSIZ > sizeof(unsigned long))
87                 ret |= (a[1] ^ b[1]) & mask[1];
88         if (IFNAMSIZ > 2 * sizeof(unsigned long))
89                 ret |= (a[2] ^ b[2]) & mask[2];
90         if (IFNAMSIZ > 3 * sizeof(unsigned long))
91                 ret |= (a[3] ^ b[3]) & mask[3];
92         BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
93         return ret;
94 }
95
96 /* Returns whether matches rule or not. */
97 /* Performance critical - called for every packet */
98 static inline bool
99 ip_packet_match(const struct iphdr *ip,
100                 const char *indev,
101                 const char *outdev,
102                 const struct ipt_ip *ipinfo,
103                 int isfrag)
104 {
105         unsigned long ret;
106
107 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
108
109         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
110                   IPT_INV_SRCIP)
111             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
112                      IPT_INV_DSTIP)) {
113                 dprintf("Source or dest mismatch.\n");
114
115                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
116                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
117                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
118                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
119                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
120                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
121                 return false;
122         }
123
124         ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask);
125
126         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
127                 dprintf("VIA in mismatch (%s vs %s).%s\n",
128                         indev, ipinfo->iniface,
129                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
130                 return false;
131         }
132
133         ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask);
134
135         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
136                 dprintf("VIA out mismatch (%s vs %s).%s\n",
137                         outdev, ipinfo->outiface,
138                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
139                 return false;
140         }
141
142         /* Check specific protocol */
143         if (ipinfo->proto
144             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
145                 dprintf("Packet protocol %hi does not match %hi.%s\n",
146                         ip->protocol, ipinfo->proto,
147                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
148                 return false;
149         }
150
151         /* If we have a fragment rule but the packet is not a fragment
152          * then we return zero */
153         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
154                 dprintf("Fragment rule but not fragment.%s\n",
155                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
156                 return false;
157         }
158
159         return true;
160 }
161
162 static bool
163 ip_checkentry(const struct ipt_ip *ip)
164 {
165         if (ip->flags & ~IPT_F_MASK) {
166                 duprintf("Unknown flag bits set: %08X\n",
167                          ip->flags & ~IPT_F_MASK);
168                 return false;
169         }
170         if (ip->invflags & ~IPT_INV_MASK) {
171                 duprintf("Unknown invflag bits set: %08X\n",
172                          ip->invflags & ~IPT_INV_MASK);
173                 return false;
174         }
175         return true;
176 }
177
178 static unsigned int
179 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
180 {
181         if (net_ratelimit())
182                 printk("ip_tables: error: `%s'\n",
183                        (const char *)par->targinfo);
184
185         return NF_DROP;
186 }
187
188 /* Performance critical - called for every packet */
189 static inline bool
190 do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
191          struct xt_match_param *par)
192 {
193         par->match     = m->u.kernel.match;
194         par->matchinfo = m->data;
195
196         /* Stop iteration if it doesn't match */
197         if (!m->u.kernel.match->match(skb, par))
198                 return true;
199         else
200                 return false;
201 }
202
203 /* Performance critical */
204 static inline struct ipt_entry *
205 get_entry(void *base, unsigned int offset)
206 {
207         return (struct ipt_entry *)(base + offset);
208 }
209
210 /* All zeroes == unconditional rule. */
211 /* Mildly perf critical (only if packet tracing is on) */
212 static inline int
213 unconditional(const struct ipt_ip *ip)
214 {
215         unsigned int i;
216
217         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
218                 if (((__u32 *)ip)[i])
219                         return 0;
220
221         return 1;
222 #undef FWINV
223 }
224
225 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
226     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
227 static const char *const hooknames[] = {
228         [NF_INET_PRE_ROUTING]           = "PREROUTING",
229         [NF_INET_LOCAL_IN]              = "INPUT",
230         [NF_INET_FORWARD]               = "FORWARD",
231         [NF_INET_LOCAL_OUT]             = "OUTPUT",
232         [NF_INET_POST_ROUTING]          = "POSTROUTING",
233 };
234
235 enum nf_ip_trace_comments {
236         NF_IP_TRACE_COMMENT_RULE,
237         NF_IP_TRACE_COMMENT_RETURN,
238         NF_IP_TRACE_COMMENT_POLICY,
239 };
240
241 static const char *const comments[] = {
242         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
243         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
244         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
245 };
246
247 static struct nf_loginfo trace_loginfo = {
248         .type = NF_LOG_TYPE_LOG,
249         .u = {
250                 .log = {
251                         .level = 4,
252                         .logflags = NF_LOG_MASK,
253                 },
254         },
255 };
256
257 /* Mildly perf critical (only if packet tracing is on) */
258 static inline int
259 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
260                       char *hookname, char **chainname,
261                       char **comment, unsigned int *rulenum)
262 {
263         struct ipt_standard_target *t = (void *)ipt_get_target(s);
264
265         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
266                 /* Head of user chain: ERROR target with chainname */
267                 *chainname = t->target.data;
268                 (*rulenum) = 0;
269         } else if (s == e) {
270                 (*rulenum)++;
271
272                 if (s->target_offset == sizeof(struct ipt_entry)
273                    && strcmp(t->target.u.kernel.target->name,
274                              IPT_STANDARD_TARGET) == 0
275                    && t->verdict < 0
276                    && unconditional(&s->ip)) {
277                         /* Tail of chains: STANDARD target (return/policy) */
278                         *comment = *chainname == hookname
279                                 ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
280                                 : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
281                 }
282                 return 1;
283         } else
284                 (*rulenum)++;
285
286         return 0;
287 }
288
289 static void trace_packet(struct sk_buff *skb,
290                          unsigned int hook,
291                          const struct net_device *in,
292                          const struct net_device *out,
293                          const char *tablename,
294                          struct xt_table_info *private,
295                          struct ipt_entry *e)
296 {
297         void *table_base;
298         const struct ipt_entry *root;
299         char *hookname, *chainname, *comment;
300         unsigned int rulenum = 0;
301
302         table_base = (void *)private->entries[smp_processor_id()];
303         root = get_entry(table_base, private->hook_entry[hook]);
304
305         hookname = chainname = (char *)hooknames[hook];
306         comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
307
308         IPT_ENTRY_ITERATE(root,
309                           private->size - private->hook_entry[hook],
310                           get_chainname_rulenum,
311                           e, hookname, &chainname, &comment, &rulenum);
312
313         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
314                       "TRACE: %s:%s:%s:%u ",
315                       tablename, chainname, comment, rulenum);
316 }
317 #endif
318
319 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
320 unsigned int
321 ipt_do_table(struct sk_buff *skb,
322              unsigned int hook,
323              const struct net_device *in,
324              const struct net_device *out,
325              struct xt_table *table)
326 {
327         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
328         const struct iphdr *ip;
329         u_int16_t datalen;
330         bool hotdrop = false;
331         /* Initializing verdict to NF_DROP keeps gcc happy. */
332         unsigned int verdict = NF_DROP;
333         const char *indev, *outdev;
334         void *table_base;
335         struct ipt_entry *e, *back;
336         struct xt_table_info *private;
337         struct xt_match_param mtpar;
338         struct xt_target_param tgpar;
339
340         /* Initialization */
341         ip = ip_hdr(skb);
342         datalen = skb->len - ip->ihl * 4;
343         indev = in ? in->name : nulldevname;
344         outdev = out ? out->name : nulldevname;
345         /* We handle fragments by dealing with the first fragment as
346          * if it was a normal packet.  All other fragments are treated
347          * normally, except that they will NEVER match rules that ask
348          * things we don't know, ie. tcp syn flag or ports).  If the
349          * rule is also a fragment-specific rule, non-fragments won't
350          * match it. */
351         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
352         mtpar.thoff   = ip_hdrlen(skb);
353         mtpar.hotdrop = &hotdrop;
354         mtpar.in      = tgpar.in  = in;
355         mtpar.out     = tgpar.out = out;
356         mtpar.family  = tgpar.family = NFPROTO_IPV4;
357         tgpar.hooknum = hook;
358
359         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
360
361         rcu_read_lock();
362         private = rcu_dereference(table->private);
363         table_base = rcu_dereference(private->entries[smp_processor_id()]);
364
365         e = get_entry(table_base, private->hook_entry[hook]);
366
367         /* For return from builtin chain */
368         back = get_entry(table_base, private->underflow[hook]);
369
370         do {
371                 IP_NF_ASSERT(e);
372                 IP_NF_ASSERT(back);
373                 if (ip_packet_match(ip, indev, outdev,
374                     &e->ip, mtpar.fragoff)) {
375                         struct ipt_entry_target *t;
376
377                         if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
378                                 goto no_match;
379
380                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
381
382                         t = ipt_get_target(e);
383                         IP_NF_ASSERT(t->u.kernel.target);
384
385 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
386     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
387                         /* The packet is traced: log it */
388                         if (unlikely(skb->nf_trace))
389                                 trace_packet(skb, hook, in, out,
390                                              table->name, private, e);
391 #endif
392                         /* Standard target? */
393                         if (!t->u.kernel.target->target) {
394                                 int v;
395
396                                 v = ((struct ipt_standard_target *)t)->verdict;
397                                 if (v < 0) {
398                                         /* Pop from stack? */
399                                         if (v != IPT_RETURN) {
400                                                 verdict = (unsigned)(-v) - 1;
401                                                 break;
402                                         }
403                                         e = back;
404                                         back = get_entry(table_base,
405                                                          back->comefrom);
406                                         continue;
407                                 }
408                                 if (table_base + v != (void *)e + e->next_offset
409                                     && !(e->ip.flags & IPT_F_GOTO)) {
410                                         /* Save old back ptr in next entry */
411                                         struct ipt_entry *next
412                                                 = (void *)e + e->next_offset;
413                                         next->comefrom
414                                                 = (void *)back - table_base;
415                                         /* set back pointer to next entry */
416                                         back = next;
417                                 }
418
419                                 e = get_entry(table_base, v);
420                         } else {
421                                 /* Targets which reenter must return
422                                    abs. verdicts */
423                                 tgpar.target   = t->u.kernel.target;
424                                 tgpar.targinfo = t->data;
425 #ifdef CONFIG_NETFILTER_DEBUG
426                                 ((struct ipt_entry *)table_base)->comefrom
427                                         = 0xeeeeeeec;
428 #endif
429                                 verdict = t->u.kernel.target->target(skb,
430                                                                      &tgpar);
431 #ifdef CONFIG_NETFILTER_DEBUG
432                                 if (((struct ipt_entry *)table_base)->comefrom
433                                     != 0xeeeeeeec
434                                     && verdict == IPT_CONTINUE) {
435                                         printk("Target %s reentered!\n",
436                                                t->u.kernel.target->name);
437                                         verdict = NF_DROP;
438                                 }
439                                 ((struct ipt_entry *)table_base)->comefrom
440                                         = 0x57acc001;
441 #endif
442                                 /* Target might have changed stuff. */
443                                 ip = ip_hdr(skb);
444                                 datalen = skb->len - ip->ihl * 4;
445
446                                 if (verdict == IPT_CONTINUE)
447                                         e = (void *)e + e->next_offset;
448                                 else
449                                         /* Verdict */
450                                         break;
451                         }
452                 } else {
453
454                 no_match:
455                         e = (void *)e + e->next_offset;
456                 }
457         } while (!hotdrop);
458
459         rcu_read_unlock();
460
461 #ifdef DEBUG_ALLOW_ALL
462         return NF_ACCEPT;
463 #else
464         if (hotdrop)
465                 return NF_DROP;
466         else return verdict;
467 #endif
468 }
469
470 /* Figures out from what hook each rule can be called: returns 0 if
471    there are loops.  Puts hook bitmask in comefrom. */
472 static int
473 mark_source_chains(struct xt_table_info *newinfo,
474                    unsigned int valid_hooks, void *entry0)
475 {
476         unsigned int hook;
477
478         /* No recursion; use packet counter to save back ptrs (reset
479            to 0 as we leave), and comefrom to save source hook bitmask */
480         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
481                 unsigned int pos = newinfo->hook_entry[hook];
482                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
483
484                 if (!(valid_hooks & (1 << hook)))
485                         continue;
486
487                 /* Set initial back pointer. */
488                 e->counters.pcnt = pos;
489
490                 for (;;) {
491                         struct ipt_standard_target *t
492                                 = (void *)ipt_get_target(e);
493                         int visited = e->comefrom & (1 << hook);
494
495                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
496                                 printk("iptables: loop hook %u pos %u %08X.\n",
497                                        hook, pos, e->comefrom);
498                                 return 0;
499                         }
500                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
501
502                         /* Unconditional return/END. */
503                         if ((e->target_offset == sizeof(struct ipt_entry)
504                             && (strcmp(t->target.u.user.name,
505                                        IPT_STANDARD_TARGET) == 0)
506                             && t->verdict < 0
507                             && unconditional(&e->ip)) || visited) {
508                                 unsigned int oldpos, size;
509
510                                 if (t->verdict < -NF_MAX_VERDICT - 1) {
511                                         duprintf("mark_source_chains: bad "
512                                                 "negative verdict (%i)\n",
513                                                                 t->verdict);
514                                         return 0;
515                                 }
516
517                                 /* Return: backtrack through the last
518                                    big jump. */
519                                 do {
520                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
521 #ifdef DEBUG_IP_FIREWALL_USER
522                                         if (e->comefrom
523                                             & (1 << NF_INET_NUMHOOKS)) {
524                                                 duprintf("Back unset "
525                                                          "on hook %u "
526                                                          "rule %u\n",
527                                                          hook, pos);
528                                         }
529 #endif
530                                         oldpos = pos;
531                                         pos = e->counters.pcnt;
532                                         e->counters.pcnt = 0;
533
534                                         /* We're at the start. */
535                                         if (pos == oldpos)
536                                                 goto next;
537
538                                         e = (struct ipt_entry *)
539                                                 (entry0 + pos);
540                                 } while (oldpos == pos + e->next_offset);
541
542                                 /* Move along one */
543                                 size = e->next_offset;
544                                 e = (struct ipt_entry *)
545                                         (entry0 + pos + size);
546                                 e->counters.pcnt = pos;
547                                 pos += size;
548                         } else {
549                                 int newpos = t->verdict;
550
551                                 if (strcmp(t->target.u.user.name,
552                                            IPT_STANDARD_TARGET) == 0
553                                     && newpos >= 0) {
554                                         if (newpos > newinfo->size -
555                                                 sizeof(struct ipt_entry)) {
556                                                 duprintf("mark_source_chains: "
557                                                         "bad verdict (%i)\n",
558                                                                 newpos);
559                                                 return 0;
560                                         }
561                                         /* This a jump; chase it. */
562                                         duprintf("Jump rule %u -> %u\n",
563                                                  pos, newpos);
564                                 } else {
565                                         /* ... this is a fallthru */
566                                         newpos = pos + e->next_offset;
567                                 }
568                                 e = (struct ipt_entry *)
569                                         (entry0 + newpos);
570                                 e->counters.pcnt = pos;
571                                 pos = newpos;
572                         }
573                 }
574                 next:
575                 duprintf("Finished chain %u\n", hook);
576         }
577         return 1;
578 }
579
580 static int
581 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
582 {
583         struct xt_mtdtor_param par;
584
585         if (i && (*i)-- == 0)
586                 return 1;
587
588         par.match     = m->u.kernel.match;
589         par.matchinfo = m->data;
590         par.family    = NFPROTO_IPV4;
591         if (par.match->destroy != NULL)
592                 par.match->destroy(&par);
593         module_put(par.match->me);
594         return 0;
595 }
596
597 static int
598 check_entry(struct ipt_entry *e, const char *name)
599 {
600         struct ipt_entry_target *t;
601
602         if (!ip_checkentry(&e->ip)) {
603                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
604                 return -EINVAL;
605         }
606
607         if (e->target_offset + sizeof(struct ipt_entry_target) >
608             e->next_offset)
609                 return -EINVAL;
610
611         t = ipt_get_target(e);
612         if (e->target_offset + t->u.target_size > e->next_offset)
613                 return -EINVAL;
614
615         return 0;
616 }
617
618 static int
619 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
620             unsigned int *i)
621 {
622         const struct ipt_ip *ip = par->entryinfo;
623         int ret;
624
625         par->match     = m->u.kernel.match;
626         par->matchinfo = m->data;
627
628         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
629               ip->proto, ip->invflags & IPT_INV_PROTO);
630         if (ret < 0) {
631                 duprintf("ip_tables: check failed for `%s'.\n",
632                          par.match->name);
633                 return ret;
634         }
635         ++*i;
636         return 0;
637 }
638
639 static int
640 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
641                  unsigned int *i)
642 {
643         struct xt_match *match;
644         int ret;
645
646         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
647                                                       m->u.user.revision),
648                                         "ipt_%s", m->u.user.name);
649         if (IS_ERR(match) || !match) {
650                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
651                 return match ? PTR_ERR(match) : -ENOENT;
652         }
653         m->u.kernel.match = match;
654
655         ret = check_match(m, par, i);
656         if (ret)
657                 goto err;
658
659         return 0;
660 err:
661         module_put(m->u.kernel.match->me);
662         return ret;
663 }
664
665 static int check_target(struct ipt_entry *e, const char *name)
666 {
667         struct ipt_entry_target *t = ipt_get_target(e);
668         struct xt_tgchk_param par = {
669                 .table     = name,
670                 .entryinfo = e,
671                 .target    = t->u.kernel.target,
672                 .targinfo  = t->data,
673                 .hook_mask = e->comefrom,
674                 .family    = NFPROTO_IPV4,
675         };
676         int ret;
677
678         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
679               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
680         if (ret < 0) {
681                 duprintf("ip_tables: check failed for `%s'.\n",
682                          t->u.kernel.target->name);
683                 return ret;
684         }
685         return 0;
686 }
687
688 static int
689 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
690                  unsigned int *i)
691 {
692         struct ipt_entry_target *t;
693         struct xt_target *target;
694         int ret;
695         unsigned int j;
696         struct xt_mtchk_param mtpar;
697
698         ret = check_entry(e, name);
699         if (ret)
700                 return ret;
701
702         j = 0;
703         mtpar.table     = name;
704         mtpar.entryinfo = &e->ip;
705         mtpar.hook_mask = e->comefrom;
706         mtpar.family    = NFPROTO_IPV4;
707         ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
708         if (ret != 0)
709                 goto cleanup_matches;
710
711         t = ipt_get_target(e);
712         target = try_then_request_module(xt_find_target(AF_INET,
713                                                         t->u.user.name,
714                                                         t->u.user.revision),
715                                          "ipt_%s", t->u.user.name);
716         if (IS_ERR(target) || !target) {
717                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
718                 ret = target ? PTR_ERR(target) : -ENOENT;
719                 goto cleanup_matches;
720         }
721         t->u.kernel.target = target;
722
723         ret = check_target(e, name);
724         if (ret)
725                 goto err;
726
727         (*i)++;
728         return 0;
729  err:
730         module_put(t->u.kernel.target->me);
731  cleanup_matches:
732         IPT_MATCH_ITERATE(e, cleanup_match, &j);
733         return ret;
734 }
735
736 static int
737 check_entry_size_and_hooks(struct ipt_entry *e,
738                            struct xt_table_info *newinfo,
739                            unsigned char *base,
740                            unsigned char *limit,
741                            const unsigned int *hook_entries,
742                            const unsigned int *underflows,
743                            unsigned int *i)
744 {
745         unsigned int h;
746
747         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
748             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
749                 duprintf("Bad offset %p\n", e);
750                 return -EINVAL;
751         }
752
753         if (e->next_offset
754             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
755                 duprintf("checking: element %p size %u\n",
756                          e, e->next_offset);
757                 return -EINVAL;
758         }
759
760         /* Check hooks & underflows */
761         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
762                 if ((unsigned char *)e - base == hook_entries[h])
763                         newinfo->hook_entry[h] = hook_entries[h];
764                 if ((unsigned char *)e - base == underflows[h])
765                         newinfo->underflow[h] = underflows[h];
766         }
767
768         /* FIXME: underflows must be unconditional, standard verdicts
769            < 0 (not IPT_RETURN). --RR */
770
771         /* Clear counters and comefrom */
772         e->counters = ((struct xt_counters) { 0, 0 });
773         e->comefrom = 0;
774
775         (*i)++;
776         return 0;
777 }
778
779 static int
780 cleanup_entry(struct ipt_entry *e, unsigned int *i)
781 {
782         struct xt_tgdtor_param par;
783         struct ipt_entry_target *t;
784
785         if (i && (*i)-- == 0)
786                 return 1;
787
788         /* Cleanup all matches */
789         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
790         t = ipt_get_target(e);
791
792         par.target   = t->u.kernel.target;
793         par.targinfo = t->data;
794         par.family   = NFPROTO_IPV4;
795         if (par.target->destroy != NULL)
796                 par.target->destroy(&par);
797         module_put(par.target->me);
798         return 0;
799 }
800
801 /* Checks and translates the user-supplied table segment (held in
802    newinfo) */
803 static int
804 translate_table(const char *name,
805                 unsigned int valid_hooks,
806                 struct xt_table_info *newinfo,
807                 void *entry0,
808                 unsigned int size,
809                 unsigned int number,
810                 const unsigned int *hook_entries,
811                 const unsigned int *underflows)
812 {
813         unsigned int i;
814         int ret;
815
816         newinfo->size = size;
817         newinfo->number = number;
818
819         /* Init all hooks to impossible value. */
820         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
821                 newinfo->hook_entry[i] = 0xFFFFFFFF;
822                 newinfo->underflow[i] = 0xFFFFFFFF;
823         }
824
825         duprintf("translate_table: size %u\n", newinfo->size);
826         i = 0;
827         /* Walk through entries, checking offsets. */
828         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
829                                 check_entry_size_and_hooks,
830                                 newinfo,
831                                 entry0,
832                                 entry0 + size,
833                                 hook_entries, underflows, &i);
834         if (ret != 0)
835                 return ret;
836
837         if (i != number) {
838                 duprintf("translate_table: %u not %u entries\n",
839                          i, number);
840                 return -EINVAL;
841         }
842
843         /* Check hooks all assigned */
844         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
845                 /* Only hooks which are valid */
846                 if (!(valid_hooks & (1 << i)))
847                         continue;
848                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
849                         duprintf("Invalid hook entry %u %u\n",
850                                  i, hook_entries[i]);
851                         return -EINVAL;
852                 }
853                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
854                         duprintf("Invalid underflow %u %u\n",
855                                  i, underflows[i]);
856                         return -EINVAL;
857                 }
858         }
859
860         if (!mark_source_chains(newinfo, valid_hooks, entry0))
861                 return -ELOOP;
862
863         /* Finally, each sanity check must pass */
864         i = 0;
865         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
866                                 find_check_entry, name, size, &i);
867
868         if (ret != 0) {
869                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
870                                 cleanup_entry, &i);
871                 return ret;
872         }
873
874         /* And one copy for every other CPU */
875         for_each_possible_cpu(i) {
876                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
877                         memcpy(newinfo->entries[i], entry0, newinfo->size);
878         }
879
880         return ret;
881 }
882
883 /* Gets counters. */
884 static inline int
885 add_entry_to_counter(const struct ipt_entry *e,
886                      struct xt_counters total[],
887                      unsigned int *i)
888 {
889         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
890
891         (*i)++;
892         return 0;
893 }
894
895 static inline int
896 set_entry_to_counter(const struct ipt_entry *e,
897                      struct ipt_counters total[],
898                      unsigned int *i)
899 {
900         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
901
902         (*i)++;
903         return 0;
904 }
905
906 static void
907 get_counters(const struct xt_table_info *t,
908              struct xt_counters counters[])
909 {
910         unsigned int cpu;
911         unsigned int i;
912         unsigned int curcpu;
913
914         /* Instead of clearing (by a previous call to memset())
915          * the counters and using adds, we set the counters
916          * with data used by 'current' CPU
917          * We dont care about preemption here.
918          */
919         curcpu = raw_smp_processor_id();
920
921         i = 0;
922         IPT_ENTRY_ITERATE(t->entries[curcpu],
923                           t->size,
924                           set_entry_to_counter,
925                           counters,
926                           &i);
927
928         for_each_possible_cpu(cpu) {
929                 if (cpu == curcpu)
930                         continue;
931                 i = 0;
932                 IPT_ENTRY_ITERATE(t->entries[cpu],
933                                   t->size,
934                                   add_entry_to_counter,
935                                   counters,
936                                   &i);
937         }
938
939 }
940
941 /* We're lazy, and add to the first CPU; overflow works its fey magic
942  * and everything is OK. */
943 static int
944 add_counter_to_entry(struct ipt_entry *e,
945                      const struct xt_counters addme[],
946                      unsigned int *i)
947 {
948         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
949
950         (*i)++;
951         return 0;
952 }
953
954 /* Take values from counters and add them back onto the current cpu */
955 static void put_counters(struct xt_table_info *t,
956                          const struct xt_counters counters[])
957 {
958         unsigned int i, cpu;
959
960         local_bh_disable();
961         cpu = smp_processor_id();
962         i = 0;
963         IPT_ENTRY_ITERATE(t->entries[cpu],
964                           t->size,
965                           add_counter_to_entry,
966                           counters,
967                           &i);
968         local_bh_enable();
969 }
970
971
972 static inline int
973 zero_entry_counter(struct ipt_entry *e, void *arg)
974 {
975         e->counters.bcnt = 0;
976         e->counters.pcnt = 0;
977         return 0;
978 }
979
980 static void
981 clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
982 {
983         unsigned int cpu;
984         const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
985
986         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
987         for_each_possible_cpu(cpu) {
988                 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
989                 IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
990                                   zero_entry_counter, NULL);
991         }
992 }
993
994 static struct xt_counters * alloc_counters(struct xt_table *table)
995 {
996         unsigned int countersize;
997         struct xt_counters *counters;
998         struct xt_table_info *private = table->private;
999         struct xt_table_info *info;
1000
1001         /* We need atomic snapshot of counters: rest doesn't change
1002            (other than comefrom, which userspace doesn't care
1003            about). */
1004         countersize = sizeof(struct xt_counters) * private->number;
1005         counters = vmalloc_node(countersize, numa_node_id());
1006
1007         if (counters == NULL)
1008                 goto nomem;
1009
1010         info = xt_alloc_table_info(private->size);
1011         if (!info)
1012                 goto free_counters;
1013
1014         clone_counters(info, private);
1015
1016         mutex_lock(&table->lock);
1017         xt_table_entry_swap_rcu(private, info);
1018         synchronize_net();      /* Wait until smoke has cleared */
1019
1020         get_counters(info, counters);
1021         put_counters(private, counters);
1022         mutex_unlock(&table->lock);
1023
1024         xt_free_table_info(info);
1025
1026         return counters;
1027
1028  free_counters:
1029         vfree(counters);
1030  nomem:
1031         return ERR_PTR(-ENOMEM);
1032 }
1033
1034 static int
1035 copy_entries_to_user(unsigned int total_size,
1036                      struct xt_table *table,
1037                      void __user *userptr)
1038 {
1039         unsigned int off, num;
1040         struct ipt_entry *e;
1041         struct xt_counters *counters;
1042         const struct xt_table_info *private = table->private;
1043         int ret = 0;
1044         const void *loc_cpu_entry;
1045
1046         counters = alloc_counters(table);
1047         if (IS_ERR(counters))
1048                 return PTR_ERR(counters);
1049
1050         /* choose the copy that is on our node/cpu, ...
1051          * This choice is lazy (because current thread is
1052          * allowed to migrate to another cpu)
1053          */
1054         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1055         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1056                 ret = -EFAULT;
1057                 goto free_counters;
1058         }
1059
1060         /* FIXME: use iterator macros --RR */
1061         /* ... then go back and fix counters and names */
1062         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1063                 unsigned int i;
1064                 const struct ipt_entry_match *m;
1065                 const struct ipt_entry_target *t;
1066
1067                 e = (struct ipt_entry *)(loc_cpu_entry + off);
1068                 if (copy_to_user(userptr + off
1069                                  + offsetof(struct ipt_entry, counters),
1070                                  &counters[num],
1071                                  sizeof(counters[num])) != 0) {
1072                         ret = -EFAULT;
1073                         goto free_counters;
1074                 }
1075
1076                 for (i = sizeof(struct ipt_entry);
1077                      i < e->target_offset;
1078                      i += m->u.match_size) {
1079                         m = (void *)e + i;
1080
1081                         if (copy_to_user(userptr + off + i
1082                                          + offsetof(struct ipt_entry_match,
1083                                                     u.user.name),
1084                                          m->u.kernel.match->name,
1085                                          strlen(m->u.kernel.match->name)+1)
1086                             != 0) {
1087                                 ret = -EFAULT;
1088                                 goto free_counters;
1089                         }
1090                 }
1091
1092                 t = ipt_get_target(e);
1093                 if (copy_to_user(userptr + off + e->target_offset
1094                                  + offsetof(struct ipt_entry_target,
1095                                             u.user.name),
1096                                  t->u.kernel.target->name,
1097                                  strlen(t->u.kernel.target->name)+1) != 0) {
1098                         ret = -EFAULT;
1099                         goto free_counters;
1100                 }
1101         }
1102
1103  free_counters:
1104         vfree(counters);
1105         return ret;
1106 }
1107
1108 #ifdef CONFIG_COMPAT
1109 static void compat_standard_from_user(void *dst, void *src)
1110 {
1111         int v = *(compat_int_t *)src;
1112
1113         if (v > 0)
1114                 v += xt_compat_calc_jump(AF_INET, v);
1115         memcpy(dst, &v, sizeof(v));
1116 }
1117
1118 static int compat_standard_to_user(void __user *dst, void *src)
1119 {
1120         compat_int_t cv = *(int *)src;
1121
1122         if (cv > 0)
1123                 cv -= xt_compat_calc_jump(AF_INET, cv);
1124         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1125 }
1126
1127 static inline int
1128 compat_calc_match(struct ipt_entry_match *m, int *size)
1129 {
1130         *size += xt_compat_match_offset(m->u.kernel.match);
1131         return 0;
1132 }
1133
1134 static int compat_calc_entry(struct ipt_entry *e,
1135                              const struct xt_table_info *info,
1136                              void *base, struct xt_table_info *newinfo)
1137 {
1138         struct ipt_entry_target *t;
1139         unsigned int entry_offset;
1140         int off, i, ret;
1141
1142         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1143         entry_offset = (void *)e - base;
1144         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1145         t = ipt_get_target(e);
1146         off += xt_compat_target_offset(t->u.kernel.target);
1147         newinfo->size -= off;
1148         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1149         if (ret)
1150                 return ret;
1151
1152         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1153                 if (info->hook_entry[i] &&
1154                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1155                         newinfo->hook_entry[i] -= off;
1156                 if (info->underflow[i] &&
1157                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1158                         newinfo->underflow[i] -= off;
1159         }
1160         return 0;
1161 }
1162
1163 static int compat_table_info(const struct xt_table_info *info,
1164                              struct xt_table_info *newinfo)
1165 {
1166         void *loc_cpu_entry;
1167
1168         if (!newinfo || !info)
1169                 return -EINVAL;
1170
1171         /* we dont care about newinfo->entries[] */
1172         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1173         newinfo->initial_entries = 0;
1174         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1175         return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1176                                  compat_calc_entry, info, loc_cpu_entry,
1177                                  newinfo);
1178 }
1179 #endif
1180
1181 static int get_info(struct net *net, void __user *user, int *len, int compat)
1182 {
1183         char name[IPT_TABLE_MAXNAMELEN];
1184         struct xt_table *t;
1185         int ret;
1186
1187         if (*len != sizeof(struct ipt_getinfo)) {
1188                 duprintf("length %u != %zu\n", *len,
1189                          sizeof(struct ipt_getinfo));
1190                 return -EINVAL;
1191         }
1192
1193         if (copy_from_user(name, user, sizeof(name)) != 0)
1194                 return -EFAULT;
1195
1196         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1197 #ifdef CONFIG_COMPAT
1198         if (compat)
1199                 xt_compat_lock(AF_INET);
1200 #endif
1201         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1202                                     "iptable_%s", name);
1203         if (t && !IS_ERR(t)) {
1204                 struct ipt_getinfo info;
1205                 const struct xt_table_info *private = t->private;
1206
1207 #ifdef CONFIG_COMPAT
1208                 if (compat) {
1209                         struct xt_table_info tmp;
1210                         ret = compat_table_info(private, &tmp);
1211                         xt_compat_flush_offsets(AF_INET);
1212                         private = &tmp;
1213                 }
1214 #endif
1215                 info.valid_hooks = t->valid_hooks;
1216                 memcpy(info.hook_entry, private->hook_entry,
1217                        sizeof(info.hook_entry));
1218                 memcpy(info.underflow, private->underflow,
1219                        sizeof(info.underflow));
1220                 info.num_entries = private->number;
1221                 info.size = private->size;
1222                 strcpy(info.name, name);
1223
1224                 if (copy_to_user(user, &info, *len) != 0)
1225                         ret = -EFAULT;
1226                 else
1227                         ret = 0;
1228
1229                 xt_table_unlock(t);
1230                 module_put(t->me);
1231         } else
1232                 ret = t ? PTR_ERR(t) : -ENOENT;
1233 #ifdef CONFIG_COMPAT
1234         if (compat)
1235                 xt_compat_unlock(AF_INET);
1236 #endif
1237         return ret;
1238 }
1239
1240 static int
1241 get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1242 {
1243         int ret;
1244         struct ipt_get_entries get;
1245         struct xt_table *t;
1246
1247         if (*len < sizeof(get)) {
1248                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1249                 return -EINVAL;
1250         }
1251         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1252                 return -EFAULT;
1253         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1254                 duprintf("get_entries: %u != %zu\n",
1255                          *len, sizeof(get) + get.size);
1256                 return -EINVAL;
1257         }
1258
1259         t = xt_find_table_lock(net, AF_INET, get.name);
1260         if (t && !IS_ERR(t)) {
1261                 const struct xt_table_info *private = t->private;
1262                 duprintf("t->private->number = %u\n", private->number);
1263                 if (get.size == private->size)
1264                         ret = copy_entries_to_user(private->size,
1265                                                    t, uptr->entrytable);
1266                 else {
1267                         duprintf("get_entries: I've got %u not %u!\n",
1268                                  private->size, get.size);
1269                         ret = -EAGAIN;
1270                 }
1271                 module_put(t->me);
1272                 xt_table_unlock(t);
1273         } else
1274                 ret = t ? PTR_ERR(t) : -ENOENT;
1275
1276         return ret;
1277 }
1278
1279 static int
1280 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1281              struct xt_table_info *newinfo, unsigned int num_counters,
1282              void __user *counters_ptr)
1283 {
1284         int ret;
1285         struct xt_table *t;
1286         struct xt_table_info *oldinfo;
1287         struct xt_counters *counters;
1288         void *loc_cpu_old_entry;
1289
1290         ret = 0;
1291         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1292         if (!counters) {
1293                 ret = -ENOMEM;
1294                 goto out;
1295         }
1296
1297         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1298                                     "iptable_%s", name);
1299         if (!t || IS_ERR(t)) {
1300                 ret = t ? PTR_ERR(t) : -ENOENT;
1301                 goto free_newinfo_counters_untrans;
1302         }
1303
1304         /* You lied! */
1305         if (valid_hooks != t->valid_hooks) {
1306                 duprintf("Valid hook crap: %08X vs %08X\n",
1307                          valid_hooks, t->valid_hooks);
1308                 ret = -EINVAL;
1309                 goto put_module;
1310         }
1311
1312         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1313         if (!oldinfo)
1314                 goto put_module;
1315
1316         /* Update module usage count based on number of rules */
1317         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1318                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1319         if ((oldinfo->number > oldinfo->initial_entries) ||
1320             (newinfo->number <= oldinfo->initial_entries))
1321                 module_put(t->me);
1322         if ((oldinfo->number > oldinfo->initial_entries) &&
1323             (newinfo->number <= oldinfo->initial_entries))
1324                 module_put(t->me);
1325
1326         /* Get the old counters. */
1327         get_counters(oldinfo, counters);
1328         /* Decrease module usage counts and free resource */
1329         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1330         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1331                           NULL);
1332         xt_free_table_info(oldinfo);
1333         if (copy_to_user(counters_ptr, counters,
1334                          sizeof(struct xt_counters) * num_counters) != 0)
1335                 ret = -EFAULT;
1336         vfree(counters);
1337         xt_table_unlock(t);
1338         return ret;
1339
1340  put_module:
1341         module_put(t->me);
1342         xt_table_unlock(t);
1343  free_newinfo_counters_untrans:
1344         vfree(counters);
1345  out:
1346         return ret;
1347 }
1348
1349 static int
1350 do_replace(struct net *net, void __user *user, unsigned int len)
1351 {
1352         int ret;
1353         struct ipt_replace tmp;
1354         struct xt_table_info *newinfo;
1355         void *loc_cpu_entry;
1356
1357         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1358                 return -EFAULT;
1359
1360         /* overflow check */
1361         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1362                 return -ENOMEM;
1363
1364         newinfo = xt_alloc_table_info(tmp.size);
1365         if (!newinfo)
1366                 return -ENOMEM;
1367
1368         /* choose the copy that is on our node/cpu */
1369         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1370         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1371                            tmp.size) != 0) {
1372                 ret = -EFAULT;
1373                 goto free_newinfo;
1374         }
1375
1376         ret = translate_table(tmp.name, tmp.valid_hooks,
1377                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1378                               tmp.hook_entry, tmp.underflow);
1379         if (ret != 0)
1380                 goto free_newinfo;
1381
1382         duprintf("ip_tables: Translated table\n");
1383
1384         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1385                            tmp.num_counters, tmp.counters);
1386         if (ret)
1387                 goto free_newinfo_untrans;
1388         return 0;
1389
1390  free_newinfo_untrans:
1391         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1392  free_newinfo:
1393         xt_free_table_info(newinfo);
1394         return ret;
1395 }
1396
1397
1398 static int
1399 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1400 {
1401         unsigned int i;
1402         struct xt_counters_info tmp;
1403         struct xt_counters *paddc;
1404         unsigned int num_counters;
1405         const char *name;
1406         int size;
1407         void *ptmp;
1408         struct xt_table *t;
1409         const struct xt_table_info *private;
1410         int ret = 0;
1411         void *loc_cpu_entry;
1412 #ifdef CONFIG_COMPAT
1413         struct compat_xt_counters_info compat_tmp;
1414
1415         if (compat) {
1416                 ptmp = &compat_tmp;
1417                 size = sizeof(struct compat_xt_counters_info);
1418         } else
1419 #endif
1420         {
1421                 ptmp = &tmp;
1422                 size = sizeof(struct xt_counters_info);
1423         }
1424
1425         if (copy_from_user(ptmp, user, size) != 0)
1426                 return -EFAULT;
1427
1428 #ifdef CONFIG_COMPAT
1429         if (compat) {
1430                 num_counters = compat_tmp.num_counters;
1431                 name = compat_tmp.name;
1432         } else
1433 #endif
1434         {
1435                 num_counters = tmp.num_counters;
1436                 name = tmp.name;
1437         }
1438
1439         if (len != size + num_counters * sizeof(struct xt_counters))
1440                 return -EINVAL;
1441
1442         paddc = vmalloc_node(len - size, numa_node_id());
1443         if (!paddc)
1444                 return -ENOMEM;
1445
1446         if (copy_from_user(paddc, user + size, len - size) != 0) {
1447                 ret = -EFAULT;
1448                 goto free;
1449         }
1450
1451         t = xt_find_table_lock(net, AF_INET, name);
1452         if (!t || IS_ERR(t)) {
1453                 ret = t ? PTR_ERR(t) : -ENOENT;
1454                 goto free;
1455         }
1456
1457         mutex_lock(&t->lock);
1458         private = t->private;
1459         if (private->number != num_counters) {
1460                 ret = -EINVAL;
1461                 goto unlock_up_free;
1462         }
1463
1464         preempt_disable();
1465         i = 0;
1466         /* Choose the copy that is on our node */
1467         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1468         IPT_ENTRY_ITERATE(loc_cpu_entry,
1469                           private->size,
1470                           add_counter_to_entry,
1471                           paddc,
1472                           &i);
1473         preempt_enable();
1474  unlock_up_free:
1475         mutex_unlock(&t->lock);
1476         xt_table_unlock(t);
1477         module_put(t->me);
1478  free:
1479         vfree(paddc);
1480
1481         return ret;
1482 }
1483
1484 #ifdef CONFIG_COMPAT
1485 struct compat_ipt_replace {
1486         char                    name[IPT_TABLE_MAXNAMELEN];
1487         u32                     valid_hooks;
1488         u32                     num_entries;
1489         u32                     size;
1490         u32                     hook_entry[NF_INET_NUMHOOKS];
1491         u32                     underflow[NF_INET_NUMHOOKS];
1492         u32                     num_counters;
1493         compat_uptr_t           counters;       /* struct ipt_counters * */
1494         struct compat_ipt_entry entries[0];
1495 };
1496
1497 static int
1498 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1499                           unsigned int *size, struct xt_counters *counters,
1500                           unsigned int *i)
1501 {
1502         struct ipt_entry_target *t;
1503         struct compat_ipt_entry __user *ce;
1504         u_int16_t target_offset, next_offset;
1505         compat_uint_t origsize;
1506         int ret;
1507
1508         ret = -EFAULT;
1509         origsize = *size;
1510         ce = (struct compat_ipt_entry __user *)*dstptr;
1511         if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1512                 goto out;
1513
1514         if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1515                 goto out;
1516
1517         *dstptr += sizeof(struct compat_ipt_entry);
1518         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1519
1520         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1521         target_offset = e->target_offset - (origsize - *size);
1522         if (ret)
1523                 goto out;
1524         t = ipt_get_target(e);
1525         ret = xt_compat_target_to_user(t, dstptr, size);
1526         if (ret)
1527                 goto out;
1528         ret = -EFAULT;
1529         next_offset = e->next_offset - (origsize - *size);
1530         if (put_user(target_offset, &ce->target_offset))
1531                 goto out;
1532         if (put_user(next_offset, &ce->next_offset))
1533                 goto out;
1534
1535         (*i)++;
1536         return 0;
1537 out:
1538         return ret;
1539 }
1540
1541 static int
1542 compat_find_calc_match(struct ipt_entry_match *m,
1543                        const char *name,
1544                        const struct ipt_ip *ip,
1545                        unsigned int hookmask,
1546                        int *size, unsigned int *i)
1547 {
1548         struct xt_match *match;
1549
1550         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1551                                                       m->u.user.revision),
1552                                         "ipt_%s", m->u.user.name);
1553         if (IS_ERR(match) || !match) {
1554                 duprintf("compat_check_calc_match: `%s' not found\n",
1555                          m->u.user.name);
1556                 return match ? PTR_ERR(match) : -ENOENT;
1557         }
1558         m->u.kernel.match = match;
1559         *size += xt_compat_match_offset(match);
1560
1561         (*i)++;
1562         return 0;
1563 }
1564
1565 static int
1566 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1567 {
1568         if (i && (*i)-- == 0)
1569                 return 1;
1570
1571         module_put(m->u.kernel.match->me);
1572         return 0;
1573 }
1574
1575 static int
1576 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1577 {
1578         struct ipt_entry_target *t;
1579
1580         if (i && (*i)-- == 0)
1581                 return 1;
1582
1583         /* Cleanup all matches */
1584         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1585         t = compat_ipt_get_target(e);
1586         module_put(t->u.kernel.target->me);
1587         return 0;
1588 }
1589
1590 static int
1591 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1592                                   struct xt_table_info *newinfo,
1593                                   unsigned int *size,
1594                                   unsigned char *base,
1595                                   unsigned char *limit,
1596                                   unsigned int *hook_entries,
1597                                   unsigned int *underflows,
1598                                   unsigned int *i,
1599                                   const char *name)
1600 {
1601         struct ipt_entry_target *t;
1602         struct xt_target *target;
1603         unsigned int entry_offset;
1604         unsigned int j;
1605         int ret, off, h;
1606
1607         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1608         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
1609             || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1610                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1611                 return -EINVAL;
1612         }
1613
1614         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1615                              sizeof(struct compat_xt_entry_target)) {
1616                 duprintf("checking: element %p size %u\n",
1617                          e, e->next_offset);
1618                 return -EINVAL;
1619         }
1620
1621         /* For purposes of check_entry casting the compat entry is fine */
1622         ret = check_entry((struct ipt_entry *)e, name);
1623         if (ret)
1624                 return ret;
1625
1626         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1627         entry_offset = (void *)e - (void *)base;
1628         j = 0;
1629         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1630                                        &e->ip, e->comefrom, &off, &j);
1631         if (ret != 0)
1632                 goto release_matches;
1633
1634         t = compat_ipt_get_target(e);
1635         target = try_then_request_module(xt_find_target(AF_INET,
1636                                                         t->u.user.name,
1637                                                         t->u.user.revision),
1638                                          "ipt_%s", t->u.user.name);
1639         if (IS_ERR(target) || !target) {
1640                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1641                          t->u.user.name);
1642                 ret = target ? PTR_ERR(target) : -ENOENT;
1643                 goto release_matches;
1644         }
1645         t->u.kernel.target = target;
1646
1647         off += xt_compat_target_offset(target);
1648         *size += off;
1649         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1650         if (ret)
1651                 goto out;
1652
1653         /* Check hooks & underflows */
1654         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1655                 if ((unsigned char *)e - base == hook_entries[h])
1656                         newinfo->hook_entry[h] = hook_entries[h];
1657                 if ((unsigned char *)e - base == underflows[h])
1658                         newinfo->underflow[h] = underflows[h];
1659         }
1660
1661         /* Clear counters and comefrom */
1662         memset(&e->counters, 0, sizeof(e->counters));
1663         e->comefrom = 0;
1664
1665         (*i)++;
1666         return 0;
1667
1668 out:
1669         module_put(t->u.kernel.target->me);
1670 release_matches:
1671         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1672         return ret;
1673 }
1674
1675 static int
1676 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1677                             unsigned int *size, const char *name,
1678                             struct xt_table_info *newinfo, unsigned char *base)
1679 {
1680         struct ipt_entry_target *t;
1681         struct xt_target *target;
1682         struct ipt_entry *de;
1683         unsigned int origsize;
1684         int ret, h;
1685
1686         ret = 0;
1687         origsize = *size;
1688         de = (struct ipt_entry *)*dstptr;
1689         memcpy(de, e, sizeof(struct ipt_entry));
1690         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1691
1692         *dstptr += sizeof(struct ipt_entry);
1693         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1694
1695         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1696                                        dstptr, size);
1697         if (ret)
1698                 return ret;
1699         de->target_offset = e->target_offset - (origsize - *size);
1700         t = compat_ipt_get_target(e);
1701         target = t->u.kernel.target;
1702         xt_compat_target_from_user(t, dstptr, size);
1703
1704         de->next_offset = e->next_offset - (origsize - *size);
1705         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1706                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1707                         newinfo->hook_entry[h] -= origsize - *size;
1708                 if ((unsigned char *)de - base < newinfo->underflow[h])
1709                         newinfo->underflow[h] -= origsize - *size;
1710         }
1711         return ret;
1712 }
1713
1714 static int
1715 compat_check_entry(struct ipt_entry *e, const char *name,
1716                                      unsigned int *i)
1717 {
1718         struct xt_mtchk_param mtpar;
1719         unsigned int j;
1720         int ret;
1721
1722         j = 0;
1723         mtpar.table     = name;
1724         mtpar.entryinfo = &e->ip;
1725         mtpar.hook_mask = e->comefrom;
1726         mtpar.family    = NFPROTO_IPV4;
1727         ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
1728         if (ret)
1729                 goto cleanup_matches;
1730
1731         ret = check_target(e, name);
1732         if (ret)
1733                 goto cleanup_matches;
1734
1735         (*i)++;
1736         return 0;
1737
1738  cleanup_matches:
1739         IPT_MATCH_ITERATE(e, cleanup_match, &j);
1740         return ret;
1741 }
1742
1743 static int
1744 translate_compat_table(const char *name,
1745                        unsigned int valid_hooks,
1746                        struct xt_table_info **pinfo,
1747                        void **pentry0,
1748                        unsigned int total_size,
1749                        unsigned int number,
1750                        unsigned int *hook_entries,
1751                        unsigned int *underflows)
1752 {
1753         unsigned int i, j;
1754         struct xt_table_info *newinfo, *info;
1755         void *pos, *entry0, *entry1;
1756         unsigned int size;
1757         int ret;
1758
1759         info = *pinfo;
1760         entry0 = *pentry0;
1761         size = total_size;
1762         info->number = number;
1763
1764         /* Init all hooks to impossible value. */
1765         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1766                 info->hook_entry[i] = 0xFFFFFFFF;
1767                 info->underflow[i] = 0xFFFFFFFF;
1768         }
1769
1770         duprintf("translate_compat_table: size %u\n", info->size);
1771         j = 0;
1772         xt_compat_lock(AF_INET);
1773         /* Walk through entries, checking offsets. */
1774         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1775                                        check_compat_entry_size_and_hooks,
1776                                        info, &size, entry0,
1777                                        entry0 + total_size,
1778                                        hook_entries, underflows, &j, name);
1779         if (ret != 0)
1780                 goto out_unlock;
1781
1782         ret = -EINVAL;
1783         if (j != number) {
1784                 duprintf("translate_compat_table: %u not %u entries\n",
1785                          j, number);
1786                 goto out_unlock;
1787         }
1788
1789         /* Check hooks all assigned */
1790         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1791                 /* Only hooks which are valid */
1792                 if (!(valid_hooks & (1 << i)))
1793                         continue;
1794                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1795                         duprintf("Invalid hook entry %u %u\n",
1796                                  i, hook_entries[i]);
1797                         goto out_unlock;
1798                 }
1799                 if (info->underflow[i] == 0xFFFFFFFF) {
1800                         duprintf("Invalid underflow %u %u\n",
1801                                  i, underflows[i]);
1802                         goto out_unlock;
1803                 }
1804         }
1805
1806         ret = -ENOMEM;
1807         newinfo = xt_alloc_table_info(size);
1808         if (!newinfo)
1809                 goto out_unlock;
1810
1811         newinfo->number = number;
1812         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1813                 newinfo->hook_entry[i] = info->hook_entry[i];
1814                 newinfo->underflow[i] = info->underflow[i];
1815         }
1816         entry1 = newinfo->entries[raw_smp_processor_id()];
1817         pos = entry1;
1818         size = total_size;
1819         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1820                                        compat_copy_entry_from_user,
1821                                        &pos, &size, name, newinfo, entry1);
1822         xt_compat_flush_offsets(AF_INET);
1823         xt_compat_unlock(AF_INET);
1824         if (ret)
1825                 goto free_newinfo;
1826
1827         ret = -ELOOP;
1828         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1829                 goto free_newinfo;
1830
1831         i = 0;
1832         ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1833                                 name, &i);
1834         if (ret) {
1835                 j -= i;
1836                 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1837                                                   compat_release_entry, &j);
1838                 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1839                 xt_free_table_info(newinfo);
1840                 return ret;
1841         }
1842
1843         /* And one copy for every other CPU */
1844         for_each_possible_cpu(i)
1845                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1846                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1847
1848         *pinfo = newinfo;
1849         *pentry0 = entry1;
1850         xt_free_table_info(info);
1851         return 0;
1852
1853 free_newinfo:
1854         xt_free_table_info(newinfo);
1855 out:
1856         COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1857         return ret;
1858 out_unlock:
1859         xt_compat_flush_offsets(AF_INET);
1860         xt_compat_unlock(AF_INET);
1861         goto out;
1862 }
1863
1864 static int
1865 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1866 {
1867         int ret;
1868         struct compat_ipt_replace tmp;
1869         struct xt_table_info *newinfo;
1870         void *loc_cpu_entry;
1871
1872         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1873                 return -EFAULT;
1874
1875         /* overflow check */
1876         if (tmp.size >= INT_MAX / num_possible_cpus())
1877                 return -ENOMEM;
1878         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1879                 return -ENOMEM;
1880
1881         newinfo = xt_alloc_table_info(tmp.size);
1882         if (!newinfo)
1883                 return -ENOMEM;
1884
1885         /* choose the copy that is on our node/cpu */
1886         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1887         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1888                            tmp.size) != 0) {
1889                 ret = -EFAULT;
1890                 goto free_newinfo;
1891         }
1892
1893         ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1894                                      &newinfo, &loc_cpu_entry, tmp.size,
1895                                      tmp.num_entries, tmp.hook_entry,
1896                                      tmp.underflow);
1897         if (ret != 0)
1898                 goto free_newinfo;
1899
1900         duprintf("compat_do_replace: Translated table\n");
1901
1902         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1903                            tmp.num_counters, compat_ptr(tmp.counters));
1904         if (ret)
1905                 goto free_newinfo_untrans;
1906         return 0;
1907
1908  free_newinfo_untrans:
1909         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1910  free_newinfo:
1911         xt_free_table_info(newinfo);
1912         return ret;
1913 }
1914
1915 static int
1916 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1917                       unsigned int len)
1918 {
1919         int ret;
1920
1921         if (!capable(CAP_NET_ADMIN))
1922                 return -EPERM;
1923
1924         switch (cmd) {
1925         case IPT_SO_SET_REPLACE:
1926                 ret = compat_do_replace(sock_net(sk), user, len);
1927                 break;
1928
1929         case IPT_SO_SET_ADD_COUNTERS:
1930                 ret = do_add_counters(sock_net(sk), user, len, 1);
1931                 break;
1932
1933         default:
1934                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1935                 ret = -EINVAL;
1936         }
1937
1938         return ret;
1939 }
1940
1941 struct compat_ipt_get_entries {
1942         char name[IPT_TABLE_MAXNAMELEN];
1943         compat_uint_t size;
1944         struct compat_ipt_entry entrytable[0];
1945 };
1946
1947 static int
1948 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1949                             void __user *userptr)
1950 {
1951         struct xt_counters *counters;
1952         const struct xt_table_info *private = table->private;
1953         void __user *pos;
1954         unsigned int size;
1955         int ret = 0;
1956         const void *loc_cpu_entry;
1957         unsigned int i = 0;
1958
1959         counters = alloc_counters(table);
1960         if (IS_ERR(counters))
1961                 return PTR_ERR(counters);
1962
1963         /* choose the copy that is on our node/cpu, ...
1964          * This choice is lazy (because current thread is
1965          * allowed to migrate to another cpu)
1966          */
1967         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1968         pos = userptr;
1969         size = total_size;
1970         ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1971                                 compat_copy_entry_to_user,
1972                                 &pos, &size, counters, &i);
1973
1974         vfree(counters);
1975         return ret;
1976 }
1977
1978 static int
1979 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1980                    int *len)
1981 {
1982         int ret;
1983         struct compat_ipt_get_entries get;
1984         struct xt_table *t;
1985
1986         if (*len < sizeof(get)) {
1987                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1988                 return -EINVAL;
1989         }
1990
1991         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1992                 return -EFAULT;
1993
1994         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1995                 duprintf("compat_get_entries: %u != %zu\n",
1996                          *len, sizeof(get) + get.size);
1997                 return -EINVAL;
1998         }
1999
2000         xt_compat_lock(AF_INET);
2001         t = xt_find_table_lock(net, AF_INET, get.name);
2002         if (t && !IS_ERR(t)) {
2003                 const struct xt_table_info *private = t->private;
2004                 struct xt_table_info info;
2005                 duprintf("t->private->number = %u\n", private->number);
2006                 ret = compat_table_info(private, &info);
2007                 if (!ret && get.size == info.size) {
2008                         ret = compat_copy_entries_to_user(private->size,
2009                                                           t, uptr->entrytable);
2010                 } else if (!ret) {
2011                         duprintf("compat_get_entries: I've got %u not %u!\n",
2012                                  private->size, get.size);
2013                         ret = -EAGAIN;
2014                 }
2015                 xt_compat_flush_offsets(AF_INET);
2016                 module_put(t->me);
2017                 xt_table_unlock(t);
2018         } else
2019                 ret = t ? PTR_ERR(t) : -ENOENT;
2020
2021         xt_compat_unlock(AF_INET);
2022         return ret;
2023 }
2024
2025 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
2026
2027 static int
2028 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2029 {
2030         int ret;
2031
2032         if (!capable(CAP_NET_ADMIN))
2033                 return -EPERM;
2034
2035         switch (cmd) {
2036         case IPT_SO_GET_INFO:
2037                 ret = get_info(sock_net(sk), user, len, 1);
2038                 break;
2039         case IPT_SO_GET_ENTRIES:
2040                 ret = compat_get_entries(sock_net(sk), user, len);
2041                 break;
2042         default:
2043                 ret = do_ipt_get_ctl(sk, cmd, user, len);
2044         }
2045         return ret;
2046 }
2047 #endif
2048
2049 static int
2050 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2051 {
2052         int ret;
2053
2054         if (!capable(CAP_NET_ADMIN))
2055                 return -EPERM;
2056
2057         switch (cmd) {
2058         case IPT_SO_SET_REPLACE:
2059                 ret = do_replace(sock_net(sk), user, len);
2060                 break;
2061
2062         case IPT_SO_SET_ADD_COUNTERS:
2063                 ret = do_add_counters(sock_net(sk), user, len, 0);
2064                 break;
2065
2066         default:
2067                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2068                 ret = -EINVAL;
2069         }
2070
2071         return ret;
2072 }
2073
2074 static int
2075 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2076 {
2077         int ret;
2078
2079         if (!capable(CAP_NET_ADMIN))
2080                 return -EPERM;
2081
2082         switch (cmd) {
2083         case IPT_SO_GET_INFO:
2084                 ret = get_info(sock_net(sk), user, len, 0);
2085                 break;
2086
2087         case IPT_SO_GET_ENTRIES:
2088                 ret = get_entries(sock_net(sk), user, len);
2089                 break;
2090
2091         case IPT_SO_GET_REVISION_MATCH:
2092         case IPT_SO_GET_REVISION_TARGET: {
2093                 struct ipt_get_revision rev;
2094                 int target;
2095
2096                 if (*len != sizeof(rev)) {
2097                         ret = -EINVAL;
2098                         break;
2099                 }
2100                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2101                         ret = -EFAULT;
2102                         break;
2103                 }
2104
2105                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2106                         target = 1;
2107                 else
2108                         target = 0;
2109
2110                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2111                                                          rev.revision,
2112                                                          target, &ret),
2113                                         "ipt_%s", rev.name);
2114                 break;
2115         }
2116
2117         default:
2118                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2119                 ret = -EINVAL;
2120         }
2121
2122         return ret;
2123 }
2124
2125 struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2126                                     const struct ipt_replace *repl)
2127 {
2128         int ret;
2129         struct xt_table_info *newinfo;
2130         struct xt_table_info bootstrap
2131                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2132         void *loc_cpu_entry;
2133         struct xt_table *new_table;
2134
2135         newinfo = xt_alloc_table_info(repl->size);
2136         if (!newinfo) {
2137                 ret = -ENOMEM;
2138                 goto out;
2139         }
2140
2141         /* choose the copy on our node/cpu, but dont care about preemption */
2142         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2143         memcpy(loc_cpu_entry, repl->entries, repl->size);
2144
2145         ret = translate_table(table->name, table->valid_hooks,
2146                               newinfo, loc_cpu_entry, repl->size,
2147                               repl->num_entries,
2148                               repl->hook_entry,
2149                               repl->underflow);
2150         if (ret != 0)
2151                 goto out_free;
2152
2153         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2154         if (IS_ERR(new_table)) {
2155                 ret = PTR_ERR(new_table);
2156                 goto out_free;
2157         }
2158
2159         return new_table;
2160
2161 out_free:
2162         xt_free_table_info(newinfo);
2163 out:
2164         return ERR_PTR(ret);
2165 }
2166
2167 void ipt_unregister_table(struct xt_table *table)
2168 {
2169         struct xt_table_info *private;
2170         void *loc_cpu_entry;
2171         struct module *table_owner = table->me;
2172
2173         private = xt_unregister_table(table);
2174
2175         /* Decrease module usage counts and free resources */
2176         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2177         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2178         if (private->number > private->initial_entries)
2179                 module_put(table_owner);
2180         xt_free_table_info(private);
2181 }
2182
2183 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2184 static inline bool
2185 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2186                      u_int8_t type, u_int8_t code,
2187                      bool invert)
2188 {
2189         return ((test_type == 0xFF) ||
2190                 (type == test_type && code >= min_code && code <= max_code))
2191                 ^ invert;
2192 }
2193
2194 static bool
2195 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2196 {
2197         const struct icmphdr *ic;
2198         struct icmphdr _icmph;
2199         const struct ipt_icmp *icmpinfo = par->matchinfo;
2200
2201         /* Must not be a fragment. */
2202         if (par->fragoff != 0)
2203                 return false;
2204
2205         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2206         if (ic == NULL) {
2207                 /* We've been asked to examine this packet, and we
2208                  * can't.  Hence, no choice but to drop.
2209                  */
2210                 duprintf("Dropping evil ICMP tinygram.\n");
2211                 *par->hotdrop = true;
2212                 return false;
2213         }
2214
2215         return icmp_type_code_match(icmpinfo->type,
2216                                     icmpinfo->code[0],
2217                                     icmpinfo->code[1],
2218                                     ic->type, ic->code,
2219                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2220 }
2221
2222 static bool icmp_checkentry(const struct xt_mtchk_param *par)
2223 {
2224         const struct ipt_icmp *icmpinfo = par->matchinfo;
2225
2226         /* Must specify no unknown invflags */
2227         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2228 }
2229
2230 /* The built-in targets: standard (NULL) and error. */
2231 static struct xt_target ipt_standard_target __read_mostly = {
2232         .name           = IPT_STANDARD_TARGET,
2233         .targetsize     = sizeof(int),
2234         .family         = AF_INET,
2235 #ifdef CONFIG_COMPAT
2236         .compatsize     = sizeof(compat_int_t),
2237         .compat_from_user = compat_standard_from_user,
2238         .compat_to_user = compat_standard_to_user,
2239 #endif
2240 };
2241
2242 static struct xt_target ipt_error_target __read_mostly = {
2243         .name           = IPT_ERROR_TARGET,
2244         .target         = ipt_error,
2245         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2246         .family         = AF_INET,
2247 };
2248
2249 static struct nf_sockopt_ops ipt_sockopts = {
2250         .pf             = PF_INET,
2251         .set_optmin     = IPT_BASE_CTL,
2252         .set_optmax     = IPT_SO_SET_MAX+1,
2253         .set            = do_ipt_set_ctl,
2254 #ifdef CONFIG_COMPAT
2255         .compat_set     = compat_do_ipt_set_ctl,
2256 #endif
2257         .get_optmin     = IPT_BASE_CTL,
2258         .get_optmax     = IPT_SO_GET_MAX+1,
2259         .get            = do_ipt_get_ctl,
2260 #ifdef CONFIG_COMPAT
2261         .compat_get     = compat_do_ipt_get_ctl,
2262 #endif
2263         .owner          = THIS_MODULE,
2264 };
2265
2266 static struct xt_match icmp_matchstruct __read_mostly = {
2267         .name           = "icmp",
2268         .match          = icmp_match,
2269         .matchsize      = sizeof(struct ipt_icmp),
2270         .checkentry     = icmp_checkentry,
2271         .proto          = IPPROTO_ICMP,
2272         .family         = AF_INET,
2273 };
2274
2275 static int __net_init ip_tables_net_init(struct net *net)
2276 {
2277         return xt_proto_init(net, AF_INET);
2278 }
2279
2280 static void __net_exit ip_tables_net_exit(struct net *net)
2281 {
2282         xt_proto_fini(net, AF_INET);
2283 }
2284
2285 static struct pernet_operations ip_tables_net_ops = {
2286         .init = ip_tables_net_init,
2287         .exit = ip_tables_net_exit,
2288 };
2289
2290 static int __init ip_tables_init(void)
2291 {
2292         int ret;
2293
2294         ret = register_pernet_subsys(&ip_tables_net_ops);
2295         if (ret < 0)
2296                 goto err1;
2297
2298         /* Noone else will be downing sem now, so we won't sleep */
2299         ret = xt_register_target(&ipt_standard_target);
2300         if (ret < 0)
2301                 goto err2;
2302         ret = xt_register_target(&ipt_error_target);
2303         if (ret < 0)
2304                 goto err3;
2305         ret = xt_register_match(&icmp_matchstruct);
2306         if (ret < 0)
2307                 goto err4;
2308
2309         /* Register setsockopt */
2310         ret = nf_register_sockopt(&ipt_sockopts);
2311         if (ret < 0)
2312                 goto err5;
2313
2314         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2315         return 0;
2316
2317 err5:
2318         xt_unregister_match(&icmp_matchstruct);
2319 err4:
2320         xt_unregister_target(&ipt_error_target);
2321 err3:
2322         xt_unregister_target(&ipt_standard_target);
2323 err2:
2324         unregister_pernet_subsys(&ip_tables_net_ops);
2325 err1:
2326         return ret;
2327 }
2328
2329 static void __exit ip_tables_fini(void)
2330 {
2331         nf_unregister_sockopt(&ipt_sockopts);
2332
2333         xt_unregister_match(&icmp_matchstruct);
2334         xt_unregister_target(&ipt_error_target);
2335         xt_unregister_target(&ipt_standard_target);
2336
2337         unregister_pernet_subsys(&ip_tables_net_ops);
2338 }
2339
2340 EXPORT_SYMBOL(ipt_register_table);
2341 EXPORT_SYMBOL(ipt_unregister_table);
2342 EXPORT_SYMBOL(ipt_do_table);
2343 module_init(ip_tables_init);
2344 module_exit(ip_tables_fini);