Linux-2.6.12-rc2
[safe/jmp/linux-2.6] / net / ipv4 / netfilter / arp_tables.c
1 /*
2  * Packet matching code for ARP packets.
3  *
4  * Based heavily, if not almost entirely, upon ip_tables.c framework.
5  *
6  * Some ARP specific bits are:
7  *
8  * Copyright (C) 2002 David S. Miller (davem@redhat.com)
9  *
10  */
11
12 #include <linux/config.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/if_arp.h>
17 #include <linux/kmod.h>
18 #include <linux/vmalloc.h>
19 #include <linux/proc_fs.h>
20 #include <linux/module.h>
21 #include <linux/init.h>
22
23 #include <asm/uaccess.h>
24 #include <asm/semaphore.h>
25
26 #include <linux/netfilter_arp/arp_tables.h>
27
28 MODULE_LICENSE("GPL");
29 MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
30 MODULE_DESCRIPTION("arptables core");
31
32 /*#define DEBUG_ARP_TABLES*/
33 /*#define DEBUG_ARP_TABLES_USER*/
34
35 #ifdef DEBUG_ARP_TABLES
36 #define dprintf(format, args...)  printk(format , ## args)
37 #else
38 #define dprintf(format, args...)
39 #endif
40
41 #ifdef DEBUG_ARP_TABLES_USER
42 #define duprintf(format, args...) printk(format , ## args)
43 #else
44 #define duprintf(format, args...)
45 #endif
46
47 #ifdef CONFIG_NETFILTER_DEBUG
48 #define ARP_NF_ASSERT(x)                                        \
49 do {                                                            \
50         if (!(x))                                               \
51                 printk("ARP_NF_ASSERT: %s:%s:%u\n",             \
52                        __FUNCTION__, __FILE__, __LINE__);       \
53 } while(0)
54 #else
55 #define ARP_NF_ASSERT(x)
56 #endif
57 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
58
59 static DECLARE_MUTEX(arpt_mutex);
60
61 #define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
62 #define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
63 #include <linux/netfilter_ipv4/lockhelp.h>
64 #include <linux/netfilter_ipv4/listhelp.h>
65
66 struct arpt_table_info {
67         unsigned int size;
68         unsigned int number;
69         unsigned int initial_entries;
70         unsigned int hook_entry[NF_ARP_NUMHOOKS];
71         unsigned int underflow[NF_ARP_NUMHOOKS];
72         char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
73 };
74
75 static LIST_HEAD(arpt_target);
76 static LIST_HEAD(arpt_tables);
77 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
78
79 #ifdef CONFIG_SMP
80 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
81 #else
82 #define TABLE_OFFSET(t,p) 0
83 #endif
84
85 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
86                                       char *hdr_addr, int len)
87 {
88         int i, ret;
89
90         if (len > ARPT_DEV_ADDR_LEN_MAX)
91                 len = ARPT_DEV_ADDR_LEN_MAX;
92
93         ret = 0;
94         for (i = 0; i < len; i++)
95                 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
96
97         return (ret != 0);
98 }
99
100 /* Returns whether packet matches rule or not. */
101 static inline int arp_packet_match(const struct arphdr *arphdr,
102                                    struct net_device *dev,
103                                    const char *indev,
104                                    const char *outdev,
105                                    const struct arpt_arp *arpinfo)
106 {
107         char *arpptr = (char *)(arphdr + 1);
108         char *src_devaddr, *tgt_devaddr;
109         u32 src_ipaddr, tgt_ipaddr;
110         int i, ret;
111
112 #define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
113
114         if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
115                   ARPT_INV_ARPOP)) {
116                 dprintf("ARP operation field mismatch.\n");
117                 dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
118                         arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
119                 return 0;
120         }
121
122         if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
123                   ARPT_INV_ARPHRD)) {
124                 dprintf("ARP hardware address format mismatch.\n");
125                 dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
126                         arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
127                 return 0;
128         }
129
130         if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
131                   ARPT_INV_ARPPRO)) {
132                 dprintf("ARP protocol address format mismatch.\n");
133                 dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
134                         arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
135                 return 0;
136         }
137
138         if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
139                   ARPT_INV_ARPHLN)) {
140                 dprintf("ARP hardware address length mismatch.\n");
141                 dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
142                         arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
143                 return 0;
144         }
145
146         src_devaddr = arpptr;
147         arpptr += dev->addr_len;
148         memcpy(&src_ipaddr, arpptr, sizeof(u32));
149         arpptr += sizeof(u32);
150         tgt_devaddr = arpptr;
151         arpptr += dev->addr_len;
152         memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
153
154         if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
155                   ARPT_INV_SRCDEVADDR) ||
156             FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
157                   ARPT_INV_TGTDEVADDR)) {
158                 dprintf("Source or target device address mismatch.\n");
159
160                 return 0;
161         }
162
163         if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
164                   ARPT_INV_SRCIP) ||
165             FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
166                   ARPT_INV_TGTIP)) {
167                 dprintf("Source or target IP address mismatch.\n");
168
169                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
170                         NIPQUAD(src_ipaddr),
171                         NIPQUAD(arpinfo->smsk.s_addr),
172                         NIPQUAD(arpinfo->src.s_addr),
173                         arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
174                 dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
175                         NIPQUAD(tgt_ipaddr),
176                         NIPQUAD(arpinfo->tmsk.s_addr),
177                         NIPQUAD(arpinfo->tgt.s_addr),
178                         arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
179                 return 0;
180         }
181
182         /* Look for ifname matches.  */
183         for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
184                 ret |= (indev[i] ^ arpinfo->iniface[i])
185                         & arpinfo->iniface_mask[i];
186         }
187
188         if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
189                 dprintf("VIA in mismatch (%s vs %s).%s\n",
190                         indev, arpinfo->iniface,
191                         arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
192                 return 0;
193         }
194
195         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
196                 unsigned long odev;
197                 memcpy(&odev, outdev + i*sizeof(unsigned long),
198                        sizeof(unsigned long));
199                 ret |= (odev
200                         ^ ((const unsigned long *)arpinfo->outiface)[i])
201                         & ((const unsigned long *)arpinfo->outiface_mask)[i];
202         }
203
204         if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
205                 dprintf("VIA out mismatch (%s vs %s).%s\n",
206                         outdev, arpinfo->outiface,
207                         arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
208                 return 0;
209         }
210
211         return 1;
212 }
213
214 static inline int arp_checkentry(const struct arpt_arp *arp)
215 {
216         if (arp->flags & ~ARPT_F_MASK) {
217                 duprintf("Unknown flag bits set: %08X\n",
218                          arp->flags & ~ARPT_F_MASK);
219                 return 0;
220         }
221         if (arp->invflags & ~ARPT_INV_MASK) {
222                 duprintf("Unknown invflag bits set: %08X\n",
223                          arp->invflags & ~ARPT_INV_MASK);
224                 return 0;
225         }
226
227         return 1;
228 }
229
230 static unsigned int arpt_error(struct sk_buff **pskb,
231                                unsigned int hooknum,
232                                const struct net_device *in,
233                                const struct net_device *out,
234                                const void *targinfo,
235                                void *userinfo)
236 {
237         if (net_ratelimit())
238                 printk("arp_tables: error: '%s'\n", (char *)targinfo);
239
240         return NF_DROP;
241 }
242
243 static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
244 {
245         return (struct arpt_entry *)(base + offset);
246 }
247
248 unsigned int arpt_do_table(struct sk_buff **pskb,
249                            unsigned int hook,
250                            const struct net_device *in,
251                            const struct net_device *out,
252                            struct arpt_table *table,
253                            void *userdata)
254 {
255         static const char nulldevname[IFNAMSIZ];
256         unsigned int verdict = NF_DROP;
257         struct arphdr *arp;
258         int hotdrop = 0;
259         struct arpt_entry *e, *back;
260         const char *indev, *outdev;
261         void *table_base;
262
263         /* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
264         if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
265                                      (2 * (*pskb)->dev->addr_len) +
266                                      (2 * sizeof(u32)))))
267                 return NF_DROP;
268
269         indev = in ? in->name : nulldevname;
270         outdev = out ? out->name : nulldevname;
271
272         read_lock_bh(&table->lock);
273         table_base = (void *)table->private->entries
274                 + TABLE_OFFSET(table->private,
275                                smp_processor_id());
276         e = get_entry(table_base, table->private->hook_entry[hook]);
277         back = get_entry(table_base, table->private->underflow[hook]);
278
279         arp = (*pskb)->nh.arph;
280         do {
281                 if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
282                         struct arpt_entry_target *t;
283                         int hdr_len;
284
285                         hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
286                                 (2 * (*pskb)->dev->addr_len);
287                         ADD_COUNTER(e->counters, hdr_len, 1);
288
289                         t = arpt_get_target(e);
290
291                         /* Standard target? */
292                         if (!t->u.kernel.target->target) {
293                                 int v;
294
295                                 v = ((struct arpt_standard_target *)t)->verdict;
296                                 if (v < 0) {
297                                         /* Pop from stack? */
298                                         if (v != ARPT_RETURN) {
299                                                 verdict = (unsigned)(-v) - 1;
300                                                 break;
301                                         }
302                                         e = back;
303                                         back = get_entry(table_base,
304                                                          back->comefrom);
305                                         continue;
306                                 }
307                                 if (table_base + v
308                                     != (void *)e + e->next_offset) {
309                                         /* Save old back ptr in next entry */
310                                         struct arpt_entry *next
311                                                 = (void *)e + e->next_offset;
312                                         next->comefrom =
313                                                 (void *)back - table_base;
314
315                                         /* set back pointer to next entry */
316                                         back = next;
317                                 }
318
319                                 e = get_entry(table_base, v);
320                         } else {
321                                 /* Targets which reenter must return
322                                  * abs. verdicts
323                                  */
324                                 verdict = t->u.kernel.target->target(pskb,
325                                                                      hook,
326                                                                      in, out,
327                                                                      t->data,
328                                                                      userdata);
329
330                                 /* Target might have changed stuff. */
331                                 arp = (*pskb)->nh.arph;
332
333                                 if (verdict == ARPT_CONTINUE)
334                                         e = (void *)e + e->next_offset;
335                                 else
336                                         /* Verdict */
337                                         break;
338                         }
339                 } else {
340                         e = (void *)e + e->next_offset;
341                 }
342         } while (!hotdrop);
343         read_unlock_bh(&table->lock);
344
345         if (hotdrop)
346                 return NF_DROP;
347         else
348                 return verdict;
349 }
350
351 static inline void *find_inlist_lock_noload(struct list_head *head,
352                                             const char *name,
353                                             int *error,
354                                             struct semaphore *mutex)
355 {
356         void *ret;
357
358         *error = down_interruptible(mutex);
359         if (*error != 0)
360                 return NULL;
361
362         ret = list_named_find(head, name);
363         if (!ret) {
364                 *error = -ENOENT;
365                 up(mutex);
366         }
367         return ret;
368 }
369
370 #ifndef CONFIG_KMOD
371 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
372 #else
373 static void *
374 find_inlist_lock(struct list_head *head,
375                  const char *name,
376                  const char *prefix,
377                  int *error,
378                  struct semaphore *mutex)
379 {
380         void *ret;
381
382         ret = find_inlist_lock_noload(head, name, error, mutex);
383         if (!ret) {
384                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
385                 request_module("%s%s", prefix, name);
386                 ret = find_inlist_lock_noload(head, name, error, mutex);
387         }
388
389         return ret;
390 }
391 #endif
392
393 static inline struct arpt_table *arpt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
394 {
395         return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex);
396 }
397
398 static struct arpt_target *arpt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
399 {
400         return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex);
401 }
402
403 /* All zeroes == unconditional rule. */
404 static inline int unconditional(const struct arpt_arp *arp)
405 {
406         unsigned int i;
407
408         for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
409                 if (((__u32 *)arp)[i])
410                         return 0;
411
412         return 1;
413 }
414
415 /* Figures out from what hook each rule can be called: returns 0 if
416  * there are loops.  Puts hook bitmask in comefrom.
417  */
418 static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
419 {
420         unsigned int hook;
421
422         /* No recursion; use packet counter to save back ptrs (reset
423          * to 0 as we leave), and comefrom to save source hook bitmask.
424          */
425         for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
426                 unsigned int pos = newinfo->hook_entry[hook];
427                 struct arpt_entry *e
428                         = (struct arpt_entry *)(newinfo->entries + pos);
429
430                 if (!(valid_hooks & (1 << hook)))
431                         continue;
432
433                 /* Set initial back pointer. */
434                 e->counters.pcnt = pos;
435
436                 for (;;) {
437                         struct arpt_standard_target *t
438                                 = (void *)arpt_get_target(e);
439
440                         if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
441                                 printk("arptables: loop hook %u pos %u %08X.\n",
442                                        hook, pos, e->comefrom);
443                                 return 0;
444                         }
445                         e->comefrom
446                                 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
447
448                         /* Unconditional return/END. */
449                         if (e->target_offset == sizeof(struct arpt_entry)
450                             && (strcmp(t->target.u.user.name,
451                                        ARPT_STANDARD_TARGET) == 0)
452                             && t->verdict < 0
453                             && unconditional(&e->arp)) {
454                                 unsigned int oldpos, size;
455
456                                 /* Return: backtrack through the last
457                                  * big jump.
458                                  */
459                                 do {
460                                         e->comefrom ^= (1<<NF_ARP_NUMHOOKS);
461                                         oldpos = pos;
462                                         pos = e->counters.pcnt;
463                                         e->counters.pcnt = 0;
464
465                                         /* We're at the start. */
466                                         if (pos == oldpos)
467                                                 goto next;
468
469                                         e = (struct arpt_entry *)
470                                                 (newinfo->entries + pos);
471                                 } while (oldpos == pos + e->next_offset);
472
473                                 /* Move along one */
474                                 size = e->next_offset;
475                                 e = (struct arpt_entry *)
476                                         (newinfo->entries + pos + size);
477                                 e->counters.pcnt = pos;
478                                 pos += size;
479                         } else {
480                                 int newpos = t->verdict;
481
482                                 if (strcmp(t->target.u.user.name,
483                                            ARPT_STANDARD_TARGET) == 0
484                                     && newpos >= 0) {
485                                         /* This a jump; chase it. */
486                                         duprintf("Jump rule %u -> %u\n",
487                                                  pos, newpos);
488                                 } else {
489                                         /* ... this is a fallthru */
490                                         newpos = pos + e->next_offset;
491                                 }
492                                 e = (struct arpt_entry *)
493                                         (newinfo->entries + newpos);
494                                 e->counters.pcnt = pos;
495                                 pos = newpos;
496                         }
497                 }
498                 next:
499                 duprintf("Finished chain %u\n", hook);
500         }
501         return 1;
502 }
503
504 static inline int standard_check(const struct arpt_entry_target *t,
505                                  unsigned int max_offset)
506 {
507         struct arpt_standard_target *targ = (void *)t;
508
509         /* Check standard info. */
510         if (t->u.target_size
511             != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
512                 duprintf("arpt_standard_check: target size %u != %Zu\n",
513                          t->u.target_size,
514                          ARPT_ALIGN(sizeof(struct arpt_standard_target)));
515                 return 0;
516         }
517
518         if (targ->verdict >= 0
519             && targ->verdict > max_offset - sizeof(struct arpt_entry)) {
520                 duprintf("arpt_standard_check: bad verdict (%i)\n",
521                          targ->verdict);
522                 return 0;
523         }
524
525         if (targ->verdict < -NF_MAX_VERDICT - 1) {
526                 duprintf("arpt_standard_check: bad negative verdict (%i)\n",
527                          targ->verdict);
528                 return 0;
529         }
530         return 1;
531 }
532
533 static struct arpt_target arpt_standard_target;
534
535 static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
536                               unsigned int *i)
537 {
538         struct arpt_entry_target *t;
539         struct arpt_target *target;
540         int ret;
541
542         if (!arp_checkentry(&e->arp)) {
543                 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
544                 return -EINVAL;
545         }
546
547         t = arpt_get_target(e);
548         target = arpt_find_target_lock(t->u.user.name, &ret, &arpt_mutex);
549         if (!target) {
550                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
551                 goto out;
552         }
553         if (!try_module_get((target->me))) {
554                 ret = -ENOENT;
555                 goto out_unlock;
556         }
557         t->u.kernel.target = target;
558         up(&arpt_mutex);
559
560         if (t->u.kernel.target == &arpt_standard_target) {
561                 if (!standard_check(t, size)) {
562                         ret = -EINVAL;
563                         goto out;
564                 }
565         } else if (t->u.kernel.target->checkentry
566                    && !t->u.kernel.target->checkentry(name, e, t->data,
567                                                       t->u.target_size
568                                                       - sizeof(*t),
569                                                       e->comefrom)) {
570                 module_put(t->u.kernel.target->me);
571                 duprintf("arp_tables: check failed for `%s'.\n",
572                          t->u.kernel.target->name);
573                 ret = -EINVAL;
574                 goto out;
575         }
576
577         (*i)++;
578         return 0;
579
580 out_unlock:
581         up(&arpt_mutex);
582 out:
583         return ret;
584 }
585
586 static inline int check_entry_size_and_hooks(struct arpt_entry *e,
587                                              struct arpt_table_info *newinfo,
588                                              unsigned char *base,
589                                              unsigned char *limit,
590                                              const unsigned int *hook_entries,
591                                              const unsigned int *underflows,
592                                              unsigned int *i)
593 {
594         unsigned int h;
595
596         if ((unsigned long)e % __alignof__(struct arpt_entry) != 0
597             || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
598                 duprintf("Bad offset %p\n", e);
599                 return -EINVAL;
600         }
601
602         if (e->next_offset
603             < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
604                 duprintf("checking: element %p size %u\n",
605                          e, e->next_offset);
606                 return -EINVAL;
607         }
608
609         /* Check hooks & underflows */
610         for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
611                 if ((unsigned char *)e - base == hook_entries[h])
612                         newinfo->hook_entry[h] = hook_entries[h];
613                 if ((unsigned char *)e - base == underflows[h])
614                         newinfo->underflow[h] = underflows[h];
615         }
616
617         /* FIXME: underflows must be unconditional, standard verdicts
618            < 0 (not ARPT_RETURN). --RR */
619
620         /* Clear counters and comefrom */
621         e->counters = ((struct arpt_counters) { 0, 0 });
622         e->comefrom = 0;
623
624         (*i)++;
625         return 0;
626 }
627
628 static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
629 {
630         struct arpt_entry_target *t;
631
632         if (i && (*i)-- == 0)
633                 return 1;
634
635         t = arpt_get_target(e);
636         if (t->u.kernel.target->destroy)
637                 t->u.kernel.target->destroy(t->data,
638                                             t->u.target_size - sizeof(*t));
639         module_put(t->u.kernel.target->me);
640         return 0;
641 }
642
643 /* Checks and translates the user-supplied table segment (held in
644  * newinfo).
645  */
646 static int translate_table(const char *name,
647                            unsigned int valid_hooks,
648                            struct arpt_table_info *newinfo,
649                            unsigned int size,
650                            unsigned int number,
651                            const unsigned int *hook_entries,
652                            const unsigned int *underflows)
653 {
654         unsigned int i;
655         int ret;
656
657         newinfo->size = size;
658         newinfo->number = number;
659
660         /* Init all hooks to impossible value. */
661         for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
662                 newinfo->hook_entry[i] = 0xFFFFFFFF;
663                 newinfo->underflow[i] = 0xFFFFFFFF;
664         }
665
666         duprintf("translate_table: size %u\n", newinfo->size);
667         i = 0;
668
669         /* Walk through entries, checking offsets. */
670         ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
671                                  check_entry_size_and_hooks,
672                                  newinfo,
673                                  newinfo->entries,
674                                  newinfo->entries + size,
675                                  hook_entries, underflows, &i);
676         duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
677         if (ret != 0)
678                 return ret;
679
680         if (i != number) {
681                 duprintf("translate_table: %u not %u entries\n",
682                          i, number);
683                 return -EINVAL;
684         }
685
686         /* Check hooks all assigned */
687         for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
688                 /* Only hooks which are valid */
689                 if (!(valid_hooks & (1 << i)))
690                         continue;
691                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
692                         duprintf("Invalid hook entry %u %u\n",
693                                  i, hook_entries[i]);
694                         return -EINVAL;
695                 }
696                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
697                         duprintf("Invalid underflow %u %u\n",
698                                  i, underflows[i]);
699                         return -EINVAL;
700                 }
701         }
702
703         if (!mark_source_chains(newinfo, valid_hooks)) {
704                 duprintf("Looping hook\n");
705                 return -ELOOP;
706         }
707
708         /* Finally, each sanity check must pass */
709         i = 0;
710         ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
711                                  check_entry, name, size, &i);
712
713         if (ret != 0) {
714                 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
715                                    cleanup_entry, &i);
716                 return ret;
717         }
718
719         /* And one copy for every other CPU */
720         for (i = 1; i < num_possible_cpus(); i++) {
721                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
722                        newinfo->entries,
723                        SMP_ALIGN(newinfo->size));
724         }
725
726         return ret;
727 }
728
729 static struct arpt_table_info *replace_table(struct arpt_table *table,
730                                              unsigned int num_counters,
731                                              struct arpt_table_info *newinfo,
732                                              int *error)
733 {
734         struct arpt_table_info *oldinfo;
735
736         /* Do the substitution. */
737         write_lock_bh(&table->lock);
738         /* Check inside lock: is the old number correct? */
739         if (num_counters != table->private->number) {
740                 duprintf("num_counters != table->private->number (%u/%u)\n",
741                          num_counters, table->private->number);
742                 write_unlock_bh(&table->lock);
743                 *error = -EAGAIN;
744                 return NULL;
745         }
746         oldinfo = table->private;
747         table->private = newinfo;
748         newinfo->initial_entries = oldinfo->initial_entries;
749         write_unlock_bh(&table->lock);
750
751         return oldinfo;
752 }
753
754 /* Gets counters. */
755 static inline int add_entry_to_counter(const struct arpt_entry *e,
756                                        struct arpt_counters total[],
757                                        unsigned int *i)
758 {
759         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
760
761         (*i)++;
762         return 0;
763 }
764
765 static void get_counters(const struct arpt_table_info *t,
766                          struct arpt_counters counters[])
767 {
768         unsigned int cpu;
769         unsigned int i;
770
771         for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
772                 i = 0;
773                 ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
774                                    t->size,
775                                    add_entry_to_counter,
776                                    counters,
777                                    &i);
778         }
779 }
780
781 static int copy_entries_to_user(unsigned int total_size,
782                                 struct arpt_table *table,
783                                 void __user *userptr)
784 {
785         unsigned int off, num, countersize;
786         struct arpt_entry *e;
787         struct arpt_counters *counters;
788         int ret = 0;
789
790         /* We need atomic snapshot of counters: rest doesn't change
791          * (other than comefrom, which userspace doesn't care
792          * about).
793          */
794         countersize = sizeof(struct arpt_counters) * table->private->number;
795         counters = vmalloc(countersize);
796
797         if (counters == NULL)
798                 return -ENOMEM;
799
800         /* First, sum counters... */
801         memset(counters, 0, countersize);
802         write_lock_bh(&table->lock);
803         get_counters(table->private, counters);
804         write_unlock_bh(&table->lock);
805
806         /* ... then copy entire thing from CPU 0... */
807         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
808                 ret = -EFAULT;
809                 goto free_counters;
810         }
811
812         /* FIXME: use iterator macros --RR */
813         /* ... then go back and fix counters and names */
814         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
815                 struct arpt_entry_target *t;
816
817                 e = (struct arpt_entry *)(table->private->entries + off);
818                 if (copy_to_user(userptr + off
819                                  + offsetof(struct arpt_entry, counters),
820                                  &counters[num],
821                                  sizeof(counters[num])) != 0) {
822                         ret = -EFAULT;
823                         goto free_counters;
824                 }
825
826                 t = arpt_get_target(e);
827                 if (copy_to_user(userptr + off + e->target_offset
828                                  + offsetof(struct arpt_entry_target,
829                                             u.user.name),
830                                  t->u.kernel.target->name,
831                                  strlen(t->u.kernel.target->name)+1) != 0) {
832                         ret = -EFAULT;
833                         goto free_counters;
834                 }
835         }
836
837  free_counters:
838         vfree(counters);
839         return ret;
840 }
841
842 static int get_entries(const struct arpt_get_entries *entries,
843                        struct arpt_get_entries __user *uptr)
844 {
845         int ret;
846         struct arpt_table *t;
847
848         t = arpt_find_table_lock(entries->name, &ret, &arpt_mutex);
849         if (t) {
850                 duprintf("t->private->number = %u\n",
851                          t->private->number);
852                 if (entries->size == t->private->size)
853                         ret = copy_entries_to_user(t->private->size,
854                                                    t, uptr->entrytable);
855                 else {
856                         duprintf("get_entries: I've got %u not %u!\n",
857                                  t->private->size,
858                                  entries->size);
859                         ret = -EINVAL;
860                 }
861                 up(&arpt_mutex);
862         } else
863                 duprintf("get_entries: Can't find %s!\n",
864                          entries->name);
865
866         return ret;
867 }
868
869 static int do_replace(void __user *user, unsigned int len)
870 {
871         int ret;
872         struct arpt_replace tmp;
873         struct arpt_table *t;
874         struct arpt_table_info *newinfo, *oldinfo;
875         struct arpt_counters *counters;
876
877         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
878                 return -EFAULT;
879
880         /* Hack: Causes ipchains to give correct error msg --RR */
881         if (len != sizeof(tmp) + tmp.size)
882                 return -ENOPROTOOPT;
883
884         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
885         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
886                 return -ENOMEM;
887
888         newinfo = vmalloc(sizeof(struct arpt_table_info)
889                           + SMP_ALIGN(tmp.size) * num_possible_cpus());
890         if (!newinfo)
891                 return -ENOMEM;
892
893         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
894                            tmp.size) != 0) {
895                 ret = -EFAULT;
896                 goto free_newinfo;
897         }
898
899         counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters));
900         if (!counters) {
901                 ret = -ENOMEM;
902                 goto free_newinfo;
903         }
904         memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
905
906         ret = translate_table(tmp.name, tmp.valid_hooks,
907                               newinfo, tmp.size, tmp.num_entries,
908                               tmp.hook_entry, tmp.underflow);
909         if (ret != 0)
910                 goto free_newinfo_counters;
911
912         duprintf("arp_tables: Translated table\n");
913
914         t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
915         if (!t)
916                 goto free_newinfo_counters_untrans;
917
918         /* You lied! */
919         if (tmp.valid_hooks != t->valid_hooks) {
920                 duprintf("Valid hook crap: %08X vs %08X\n",
921                          tmp.valid_hooks, t->valid_hooks);
922                 ret = -EINVAL;
923                 goto free_newinfo_counters_untrans_unlock;
924         }
925
926         /* Get a reference in advance, we're not allowed fail later */
927         if (!try_module_get(t->me)) {
928                 ret = -EBUSY;
929                 goto free_newinfo_counters_untrans_unlock;
930         }
931
932         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
933         if (!oldinfo)
934                 goto put_module;
935
936         /* Update module usage count based on number of rules */
937         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
938                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
939         if ((oldinfo->number > oldinfo->initial_entries) || 
940             (newinfo->number <= oldinfo->initial_entries)) 
941                 module_put(t->me);
942         if ((oldinfo->number > oldinfo->initial_entries) &&
943             (newinfo->number <= oldinfo->initial_entries))
944                 module_put(t->me);
945
946         /* Get the old counters. */
947         get_counters(oldinfo, counters);
948         /* Decrease module usage counts and free resource */
949         ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
950         vfree(oldinfo);
951         if (copy_to_user(tmp.counters, counters,
952                          sizeof(struct arpt_counters) * tmp.num_counters) != 0)
953                 ret = -EFAULT;
954         vfree(counters);
955         up(&arpt_mutex);
956         return ret;
957
958  put_module:
959         module_put(t->me);
960  free_newinfo_counters_untrans_unlock:
961         up(&arpt_mutex);
962  free_newinfo_counters_untrans:
963         ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
964  free_newinfo_counters:
965         vfree(counters);
966  free_newinfo:
967         vfree(newinfo);
968         return ret;
969 }
970
971 /* We're lazy, and add to the first CPU; overflow works its fey magic
972  * and everything is OK.
973  */
974 static inline int add_counter_to_entry(struct arpt_entry *e,
975                                        const struct arpt_counters addme[],
976                                        unsigned int *i)
977 {
978
979         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
980
981         (*i)++;
982         return 0;
983 }
984
985 static int do_add_counters(void __user *user, unsigned int len)
986 {
987         unsigned int i;
988         struct arpt_counters_info tmp, *paddc;
989         struct arpt_table *t;
990         int ret;
991
992         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
993                 return -EFAULT;
994
995         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters))
996                 return -EINVAL;
997
998         paddc = vmalloc(len);
999         if (!paddc)
1000                 return -ENOMEM;
1001
1002         if (copy_from_user(paddc, user, len) != 0) {
1003                 ret = -EFAULT;
1004                 goto free;
1005         }
1006
1007         t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
1008         if (!t)
1009                 goto free;
1010
1011         write_lock_bh(&t->lock);
1012         if (t->private->number != paddc->num_counters) {
1013                 ret = -EINVAL;
1014                 goto unlock_up_free;
1015         }
1016
1017         i = 0;
1018         ARPT_ENTRY_ITERATE(t->private->entries,
1019                            t->private->size,
1020                            add_counter_to_entry,
1021                            paddc->counters,
1022                            &i);
1023  unlock_up_free:
1024         write_unlock_bh(&t->lock);
1025         up(&arpt_mutex);
1026  free:
1027         vfree(paddc);
1028
1029         return ret;
1030 }
1031
1032 static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1033 {
1034         int ret;
1035
1036         if (!capable(CAP_NET_ADMIN))
1037                 return -EPERM;
1038
1039         switch (cmd) {
1040         case ARPT_SO_SET_REPLACE:
1041                 ret = do_replace(user, len);
1042                 break;
1043
1044         case ARPT_SO_SET_ADD_COUNTERS:
1045                 ret = do_add_counters(user, len);
1046                 break;
1047
1048         default:
1049                 duprintf("do_arpt_set_ctl:  unknown request %i\n", cmd);
1050                 ret = -EINVAL;
1051         }
1052
1053         return ret;
1054 }
1055
1056 static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1057 {
1058         int ret;
1059
1060         if (!capable(CAP_NET_ADMIN))
1061                 return -EPERM;
1062
1063         switch (cmd) {
1064         case ARPT_SO_GET_INFO: {
1065                 char name[ARPT_TABLE_MAXNAMELEN];
1066                 struct arpt_table *t;
1067
1068                 if (*len != sizeof(struct arpt_getinfo)) {
1069                         duprintf("length %u != %Zu\n", *len,
1070                                  sizeof(struct arpt_getinfo));
1071                         ret = -EINVAL;
1072                         break;
1073                 }
1074
1075                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1076                         ret = -EFAULT;
1077                         break;
1078                 }
1079                 name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
1080                 t = arpt_find_table_lock(name, &ret, &arpt_mutex);
1081                 if (t) {
1082                         struct arpt_getinfo info;
1083
1084                         info.valid_hooks = t->valid_hooks;
1085                         memcpy(info.hook_entry, t->private->hook_entry,
1086                                sizeof(info.hook_entry));
1087                         memcpy(info.underflow, t->private->underflow,
1088                                sizeof(info.underflow));
1089                         info.num_entries = t->private->number;
1090                         info.size = t->private->size;
1091                         strcpy(info.name, name);
1092
1093                         if (copy_to_user(user, &info, *len) != 0)
1094                                 ret = -EFAULT;
1095                         else
1096                                 ret = 0;
1097
1098                         up(&arpt_mutex);
1099                 }
1100         }
1101         break;
1102
1103         case ARPT_SO_GET_ENTRIES: {
1104                 struct arpt_get_entries get;
1105
1106                 if (*len < sizeof(get)) {
1107                         duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
1108                         ret = -EINVAL;
1109                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1110                         ret = -EFAULT;
1111                 } else if (*len != sizeof(struct arpt_get_entries) + get.size) {
1112                         duprintf("get_entries: %u != %Zu\n", *len,
1113                                  sizeof(struct arpt_get_entries) + get.size);
1114                         ret = -EINVAL;
1115                 } else
1116                         ret = get_entries(&get, user);
1117                 break;
1118         }
1119
1120         default:
1121                 duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
1122                 ret = -EINVAL;
1123         }
1124
1125         return ret;
1126 }
1127
1128 /* Registration hooks for targets. */
1129 int arpt_register_target(struct arpt_target *target)
1130 {
1131         int ret;
1132
1133         ret = down_interruptible(&arpt_mutex);
1134         if (ret != 0)
1135                 return ret;
1136
1137         if (!list_named_insert(&arpt_target, target)) {
1138                 duprintf("arpt_register_target: `%s' already in list!\n",
1139                          target->name);
1140                 ret = -EINVAL;
1141         }
1142         up(&arpt_mutex);
1143         return ret;
1144 }
1145
1146 void arpt_unregister_target(struct arpt_target *target)
1147 {
1148         down(&arpt_mutex);
1149         LIST_DELETE(&arpt_target, target);
1150         up(&arpt_mutex);
1151 }
1152
1153 int arpt_register_table(struct arpt_table *table,
1154                         const struct arpt_replace *repl)
1155 {
1156         int ret;
1157         struct arpt_table_info *newinfo;
1158         static struct arpt_table_info bootstrap
1159                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1160
1161         newinfo = vmalloc(sizeof(struct arpt_table_info)
1162                           + SMP_ALIGN(repl->size) * num_possible_cpus());
1163         if (!newinfo) {
1164                 ret = -ENOMEM;
1165                 return ret;
1166         }
1167         memcpy(newinfo->entries, repl->entries, repl->size);
1168
1169         ret = translate_table(table->name, table->valid_hooks,
1170                               newinfo, repl->size,
1171                               repl->num_entries,
1172                               repl->hook_entry,
1173                               repl->underflow);
1174         duprintf("arpt_register_table: translate table gives %d\n", ret);
1175         if (ret != 0) {
1176                 vfree(newinfo);
1177                 return ret;
1178         }
1179
1180         ret = down_interruptible(&arpt_mutex);
1181         if (ret != 0) {
1182                 vfree(newinfo);
1183                 return ret;
1184         }
1185
1186         /* Don't autoload: we'd eat our tail... */
1187         if (list_named_find(&arpt_tables, table->name)) {
1188                 ret = -EEXIST;
1189                 goto free_unlock;
1190         }
1191
1192         /* Simplifies replace_table code. */
1193         table->private = &bootstrap;
1194         if (!replace_table(table, 0, newinfo, &ret))
1195                 goto free_unlock;
1196
1197         duprintf("table->private->number = %u\n",
1198                  table->private->number);
1199         
1200         /* save number of initial entries */
1201         table->private->initial_entries = table->private->number;
1202
1203         rwlock_init(&table->lock);
1204         list_prepend(&arpt_tables, table);
1205
1206  unlock:
1207         up(&arpt_mutex);
1208         return ret;
1209
1210  free_unlock:
1211         vfree(newinfo);
1212         goto unlock;
1213 }
1214
1215 void arpt_unregister_table(struct arpt_table *table)
1216 {
1217         down(&arpt_mutex);
1218         LIST_DELETE(&arpt_tables, table);
1219         up(&arpt_mutex);
1220
1221         /* Decrease module usage counts and free resources */
1222         ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1223                            cleanup_entry, NULL);
1224         vfree(table->private);
1225 }
1226
1227 /* The built-in targets: standard (NULL) and error. */
1228 static struct arpt_target arpt_standard_target = {
1229         .name           = ARPT_STANDARD_TARGET,
1230 };
1231
1232 static struct arpt_target arpt_error_target = {
1233         .name           = ARPT_ERROR_TARGET,
1234         .target         = arpt_error,
1235 };
1236
1237 static struct nf_sockopt_ops arpt_sockopts = {
1238         .pf             = PF_INET,
1239         .set_optmin     = ARPT_BASE_CTL,
1240         .set_optmax     = ARPT_SO_SET_MAX+1,
1241         .set            = do_arpt_set_ctl,
1242         .get_optmin     = ARPT_BASE_CTL,
1243         .get_optmax     = ARPT_SO_GET_MAX+1,
1244         .get            = do_arpt_get_ctl,
1245 };
1246
1247 #ifdef CONFIG_PROC_FS
1248 static inline int print_name(const struct arpt_table *t,
1249                              off_t start_offset, char *buffer, int length,
1250                              off_t *pos, unsigned int *count)
1251 {
1252         if ((*count)++ >= start_offset) {
1253                 unsigned int namelen;
1254
1255                 namelen = sprintf(buffer + *pos, "%s\n", t->name);
1256                 if (*pos + namelen > length) {
1257                         /* Stop iterating */
1258                         return 1;
1259                 }
1260                 *pos += namelen;
1261         }
1262         return 0;
1263 }
1264
1265 static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
1266 {
1267         off_t pos = 0;
1268         unsigned int count = 0;
1269
1270         if (down_interruptible(&arpt_mutex) != 0)
1271                 return 0;
1272
1273         LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
1274                   offset, buffer, length, &pos, &count);
1275
1276         up(&arpt_mutex);
1277
1278         /* `start' hack - see fs/proc/generic.c line ~105 */
1279         *start=(char *)((unsigned long)count-offset);
1280         return pos;
1281 }
1282 #endif /*CONFIG_PROC_FS*/
1283
1284 static int __init init(void)
1285 {
1286         int ret;
1287
1288         /* Noone else will be downing sem now, so we won't sleep */
1289         down(&arpt_mutex);
1290         list_append(&arpt_target, &arpt_standard_target);
1291         list_append(&arpt_target, &arpt_error_target);
1292         up(&arpt_mutex);
1293
1294         /* Register setsockopt */
1295         ret = nf_register_sockopt(&arpt_sockopts);
1296         if (ret < 0) {
1297                 duprintf("Unable to register sockopts.\n");
1298                 return ret;
1299         }
1300
1301 #ifdef CONFIG_PROC_FS
1302         {
1303                 struct proc_dir_entry *proc;
1304
1305                 proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
1306                 if (!proc) {
1307                         nf_unregister_sockopt(&arpt_sockopts);
1308                         return -ENOMEM;
1309                 }
1310                 proc->owner = THIS_MODULE;
1311         }
1312 #endif
1313
1314         printk("arp_tables: (C) 2002 David S. Miller\n");
1315         return 0;
1316 }
1317
1318 static void __exit fini(void)
1319 {
1320         nf_unregister_sockopt(&arpt_sockopts);
1321 #ifdef CONFIG_PROC_FS
1322         proc_net_remove("arp_tables_names");
1323 #endif
1324 }
1325
1326 EXPORT_SYMBOL(arpt_register_table);
1327 EXPORT_SYMBOL(arpt_unregister_table);
1328 EXPORT_SYMBOL(arpt_do_table);
1329 EXPORT_SYMBOL(arpt_register_target);
1330 EXPORT_SYMBOL(arpt_unregister_target);
1331
1332 module_init(init);
1333 module_exit(fini);