ipv4: Check return of dev_set_allmulti
[safe/jmp/linux-2.6] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@redhat.com>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
69
70 static struct sock *mroute_socket;
71
72
73 /* Big lock, protecting vif table, mrt cache and mroute socket state.
74    Note that the changes are semaphored via rtnl_lock.
75  */
76
77 static DEFINE_RWLOCK(mrt_lock);
78
79 /*
80  *      Multicast router control variables
81  */
82
83 static struct vif_device vif_table[MAXVIFS];            /* Devices              */
84 static int maxvif;
85
86 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
87
88 static int mroute_do_assert;                            /* Set in PIM assert    */
89 static int mroute_do_pim;
90
91 static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
92
93 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
94 static atomic_t cache_resolve_queue_len;                /* Size of unresolved   */
95
96 /* Special spinlock for queue of unresolved entries */
97 static DEFINE_SPINLOCK(mfc_unres_lock);
98
99 /* We return to original Alan's scheme. Hash table of resolved
100    entries is changed only in process context and protected
101    with weak lock mrt_lock. Queue of unresolved entries is protected
102    with strong spinlock mfc_unres_lock.
103
104    In this case data path is free of exclusive locks at all.
105  */
106
107 static struct kmem_cache *mrt_cachep __read_mostly;
108
109 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
110 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
111 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
112
113 #ifdef CONFIG_IP_PIMSM_V2
114 static struct net_protocol pim_protocol;
115 #endif
116
117 static struct timer_list ipmr_expire_timer;
118
119 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
120
121 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122 {
123         dev_close(dev);
124
125         dev = __dev_get_by_name(&init_net, "tunl0");
126         if (dev) {
127                 struct ifreq ifr;
128                 mm_segment_t    oldfs;
129                 struct ip_tunnel_parm p;
130
131                 memset(&p, 0, sizeof(p));
132                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134                 p.iph.version = 4;
135                 p.iph.ihl = 5;
136                 p.iph.protocol = IPPROTO_IPIP;
137                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
139
140                 oldfs = get_fs(); set_fs(KERNEL_DS);
141                 dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
142                 set_fs(oldfs);
143         }
144 }
145
146 static
147 struct net_device *ipmr_new_tunnel(struct vifctl *v)
148 {
149         struct net_device  *dev;
150
151         dev = __dev_get_by_name(&init_net, "tunl0");
152
153         if (dev) {
154                 int err;
155                 struct ifreq ifr;
156                 mm_segment_t    oldfs;
157                 struct ip_tunnel_parm p;
158                 struct in_device  *in_dev;
159
160                 memset(&p, 0, sizeof(p));
161                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
162                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
163                 p.iph.version = 4;
164                 p.iph.ihl = 5;
165                 p.iph.protocol = IPPROTO_IPIP;
166                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
167                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
168
169                 oldfs = get_fs(); set_fs(KERNEL_DS);
170                 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
171                 set_fs(oldfs);
172
173                 dev = NULL;
174
175                 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
176                         dev->flags |= IFF_MULTICAST;
177
178                         in_dev = __in_dev_get_rtnl(dev);
179                         if (in_dev == NULL)
180                                 goto failure;
181
182                         ipv4_devconf_setall(in_dev);
183                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
184
185                         if (dev_open(dev))
186                                 goto failure;
187                 }
188         }
189         return dev;
190
191 failure:
192         /* allow the register to be completed before unregistering. */
193         rtnl_unlock();
194         rtnl_lock();
195
196         unregister_netdevice(dev);
197         return NULL;
198 }
199
200 #ifdef CONFIG_IP_PIMSM
201
202 static int reg_vif_num = -1;
203
204 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205 {
206         read_lock(&mrt_lock);
207         dev->stats.tx_bytes += skb->len;
208         dev->stats.tx_packets++;
209         ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
210         read_unlock(&mrt_lock);
211         kfree_skb(skb);
212         return 0;
213 }
214
215 static void reg_vif_setup(struct net_device *dev)
216 {
217         dev->type               = ARPHRD_PIMREG;
218         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
219         dev->flags              = IFF_NOARP;
220         dev->hard_start_xmit    = reg_vif_xmit;
221         dev->destructor         = free_netdev;
222 }
223
224 static struct net_device *ipmr_reg_vif(void)
225 {
226         struct net_device *dev;
227         struct in_device *in_dev;
228
229         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
230
231         if (dev == NULL)
232                 return NULL;
233
234         if (register_netdevice(dev)) {
235                 free_netdev(dev);
236                 return NULL;
237         }
238         dev->iflink = 0;
239
240         rcu_read_lock();
241         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
242                 rcu_read_unlock();
243                 goto failure;
244         }
245
246         ipv4_devconf_setall(in_dev);
247         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
248         rcu_read_unlock();
249
250         if (dev_open(dev))
251                 goto failure;
252
253         return dev;
254
255 failure:
256         /* allow the register to be completed before unregistering. */
257         rtnl_unlock();
258         rtnl_lock();
259
260         unregister_netdevice(dev);
261         return NULL;
262 }
263 #endif
264
265 /*
266  *      Delete a VIF entry
267  */
268
269 static int vif_delete(int vifi)
270 {
271         struct vif_device *v;
272         struct net_device *dev;
273         struct in_device *in_dev;
274
275         if (vifi < 0 || vifi >= maxvif)
276                 return -EADDRNOTAVAIL;
277
278         v = &vif_table[vifi];
279
280         write_lock_bh(&mrt_lock);
281         dev = v->dev;
282         v->dev = NULL;
283
284         if (!dev) {
285                 write_unlock_bh(&mrt_lock);
286                 return -EADDRNOTAVAIL;
287         }
288
289 #ifdef CONFIG_IP_PIMSM
290         if (vifi == reg_vif_num)
291                 reg_vif_num = -1;
292 #endif
293
294         if (vifi+1 == maxvif) {
295                 int tmp;
296                 for (tmp=vifi-1; tmp>=0; tmp--) {
297                         if (VIF_EXISTS(tmp))
298                                 break;
299                 }
300                 maxvif = tmp+1;
301         }
302
303         write_unlock_bh(&mrt_lock);
304
305         dev_set_allmulti(dev, -1);
306
307         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
308                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
309                 ip_rt_multicast_event(in_dev);
310         }
311
312         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
313                 unregister_netdevice(dev);
314
315         dev_put(dev);
316         return 0;
317 }
318
319 /* Destroy an unresolved cache entry, killing queued skbs
320    and reporting error to netlink readers.
321  */
322
323 static void ipmr_destroy_unres(struct mfc_cache *c)
324 {
325         struct sk_buff *skb;
326         struct nlmsgerr *e;
327
328         atomic_dec(&cache_resolve_queue_len);
329
330         while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
331                 if (ip_hdr(skb)->version == 0) {
332                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
333                         nlh->nlmsg_type = NLMSG_ERROR;
334                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
335                         skb_trim(skb, nlh->nlmsg_len);
336                         e = NLMSG_DATA(nlh);
337                         e->error = -ETIMEDOUT;
338                         memset(&e->msg, 0, sizeof(e->msg));
339
340                         rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
341                 } else
342                         kfree_skb(skb);
343         }
344
345         kmem_cache_free(mrt_cachep, c);
346 }
347
348
349 /* Single timer process for all the unresolved queue. */
350
351 static void ipmr_expire_process(unsigned long dummy)
352 {
353         unsigned long now;
354         unsigned long expires;
355         struct mfc_cache *c, **cp;
356
357         if (!spin_trylock(&mfc_unres_lock)) {
358                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
359                 return;
360         }
361
362         if (atomic_read(&cache_resolve_queue_len) == 0)
363                 goto out;
364
365         now = jiffies;
366         expires = 10*HZ;
367         cp = &mfc_unres_queue;
368
369         while ((c=*cp) != NULL) {
370                 if (time_after(c->mfc_un.unres.expires, now)) {
371                         unsigned long interval = c->mfc_un.unres.expires - now;
372                         if (interval < expires)
373                                 expires = interval;
374                         cp = &c->next;
375                         continue;
376                 }
377
378                 *cp = c->next;
379
380                 ipmr_destroy_unres(c);
381         }
382
383         if (atomic_read(&cache_resolve_queue_len))
384                 mod_timer(&ipmr_expire_timer, jiffies + expires);
385
386 out:
387         spin_unlock(&mfc_unres_lock);
388 }
389
390 /* Fill oifs list. It is called under write locked mrt_lock. */
391
392 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
393 {
394         int vifi;
395
396         cache->mfc_un.res.minvif = MAXVIFS;
397         cache->mfc_un.res.maxvif = 0;
398         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
399
400         for (vifi=0; vifi<maxvif; vifi++) {
401                 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
402                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
403                         if (cache->mfc_un.res.minvif > vifi)
404                                 cache->mfc_un.res.minvif = vifi;
405                         if (cache->mfc_un.res.maxvif <= vifi)
406                                 cache->mfc_un.res.maxvif = vifi + 1;
407                 }
408         }
409 }
410
411 static int vif_add(struct vifctl *vifc, int mrtsock)
412 {
413         int vifi = vifc->vifc_vifi;
414         struct vif_device *v = &vif_table[vifi];
415         struct net_device *dev;
416         struct in_device *in_dev;
417         int err;
418
419         /* Is vif busy ? */
420         if (VIF_EXISTS(vifi))
421                 return -EADDRINUSE;
422
423         switch (vifc->vifc_flags) {
424 #ifdef CONFIG_IP_PIMSM
425         case VIFF_REGISTER:
426                 /*
427                  * Special Purpose VIF in PIM
428                  * All the packets will be sent to the daemon
429                  */
430                 if (reg_vif_num >= 0)
431                         return -EADDRINUSE;
432                 dev = ipmr_reg_vif();
433                 if (!dev)
434                         return -ENOBUFS;
435                 err = dev_set_allmulti(dev, 1);
436                 if (err) {
437                         unregister_netdevice(dev);
438                         return err;
439                 }
440                 break;
441 #endif
442         case VIFF_TUNNEL:
443                 dev = ipmr_new_tunnel(vifc);
444                 if (!dev)
445                         return -ENOBUFS;
446                 err = dev_set_allmulti(dev, 1);
447                 if (err) {
448                         ipmr_del_tunnel(dev, vifc);
449                         return err;
450                 }
451                 break;
452         case 0:
453                 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
454                 if (!dev)
455                         return -EADDRNOTAVAIL;
456                 dev_put(dev);
457                 err = dev_set_allmulti(dev, 1);
458                 if (err)
459                         return err;
460                 break;
461         default:
462                 return -EINVAL;
463         }
464
465         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
466                 return -EADDRNOTAVAIL;
467         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
468         ip_rt_multicast_event(in_dev);
469
470         /*
471          *      Fill in the VIF structures
472          */
473         v->rate_limit=vifc->vifc_rate_limit;
474         v->local=vifc->vifc_lcl_addr.s_addr;
475         v->remote=vifc->vifc_rmt_addr.s_addr;
476         v->flags=vifc->vifc_flags;
477         if (!mrtsock)
478                 v->flags |= VIFF_STATIC;
479         v->threshold=vifc->vifc_threshold;
480         v->bytes_in = 0;
481         v->bytes_out = 0;
482         v->pkt_in = 0;
483         v->pkt_out = 0;
484         v->link = dev->ifindex;
485         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
486                 v->link = dev->iflink;
487
488         /* And finish update writing critical data */
489         write_lock_bh(&mrt_lock);
490         dev_hold(dev);
491         v->dev=dev;
492 #ifdef CONFIG_IP_PIMSM
493         if (v->flags&VIFF_REGISTER)
494                 reg_vif_num = vifi;
495 #endif
496         if (vifi+1 > maxvif)
497                 maxvif = vifi+1;
498         write_unlock_bh(&mrt_lock);
499         return 0;
500 }
501
502 static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
503 {
504         int line=MFC_HASH(mcastgrp,origin);
505         struct mfc_cache *c;
506
507         for (c=mfc_cache_array[line]; c; c = c->next) {
508                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
509                         break;
510         }
511         return c;
512 }
513
514 /*
515  *      Allocate a multicast cache entry
516  */
517 static struct mfc_cache *ipmr_cache_alloc(void)
518 {
519         struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
520         if (c==NULL)
521                 return NULL;
522         c->mfc_un.res.minvif = MAXVIFS;
523         return c;
524 }
525
526 static struct mfc_cache *ipmr_cache_alloc_unres(void)
527 {
528         struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
529         if (c==NULL)
530                 return NULL;
531         skb_queue_head_init(&c->mfc_un.unres.unresolved);
532         c->mfc_un.unres.expires = jiffies + 10*HZ;
533         return c;
534 }
535
536 /*
537  *      A cache entry has gone into a resolved state from queued
538  */
539
540 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
541 {
542         struct sk_buff *skb;
543         struct nlmsgerr *e;
544
545         /*
546          *      Play the pending entries through our router
547          */
548
549         while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
550                 if (ip_hdr(skb)->version == 0) {
551                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
552
553                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
554                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
555                                                   (u8 *)nlh);
556                         } else {
557                                 nlh->nlmsg_type = NLMSG_ERROR;
558                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
559                                 skb_trim(skb, nlh->nlmsg_len);
560                                 e = NLMSG_DATA(nlh);
561                                 e->error = -EMSGSIZE;
562                                 memset(&e->msg, 0, sizeof(e->msg));
563                         }
564
565                         rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
566                 } else
567                         ip_mr_forward(skb, c, 0);
568         }
569 }
570
571 /*
572  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
573  *      expects the following bizarre scheme.
574  *
575  *      Called under mrt_lock.
576  */
577
578 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
579 {
580         struct sk_buff *skb;
581         const int ihl = ip_hdrlen(pkt);
582         struct igmphdr *igmp;
583         struct igmpmsg *msg;
584         int ret;
585
586 #ifdef CONFIG_IP_PIMSM
587         if (assert == IGMPMSG_WHOLEPKT)
588                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
589         else
590 #endif
591                 skb = alloc_skb(128, GFP_ATOMIC);
592
593         if (!skb)
594                 return -ENOBUFS;
595
596 #ifdef CONFIG_IP_PIMSM
597         if (assert == IGMPMSG_WHOLEPKT) {
598                 /* Ugly, but we have no choice with this interface.
599                    Duplicate old header, fix ihl, length etc.
600                    And all this only to mangle msg->im_msgtype and
601                    to set msg->im_mbz to "mbz" :-)
602                  */
603                 skb_push(skb, sizeof(struct iphdr));
604                 skb_reset_network_header(skb);
605                 skb_reset_transport_header(skb);
606                 msg = (struct igmpmsg *)skb_network_header(skb);
607                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
608                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
609                 msg->im_mbz = 0;
610                 msg->im_vif = reg_vif_num;
611                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
612                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
613                                              sizeof(struct iphdr));
614         } else
615 #endif
616         {
617
618         /*
619          *      Copy the IP header
620          */
621
622         skb->network_header = skb->tail;
623         skb_put(skb, ihl);
624         skb_copy_to_linear_data(skb, pkt->data, ihl);
625         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
626         msg = (struct igmpmsg *)skb_network_header(skb);
627         msg->im_vif = vifi;
628         skb->dst = dst_clone(pkt->dst);
629
630         /*
631          *      Add our header
632          */
633
634         igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
635         igmp->type      =
636         msg->im_msgtype = assert;
637         igmp->code      =       0;
638         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
639         skb->transport_header = skb->network_header;
640         }
641
642         if (mroute_socket == NULL) {
643                 kfree_skb(skb);
644                 return -EINVAL;
645         }
646
647         /*
648          *      Deliver to mrouted
649          */
650         if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
651                 if (net_ratelimit())
652                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
653                 kfree_skb(skb);
654         }
655
656         return ret;
657 }
658
659 /*
660  *      Queue a packet for resolution. It gets locked cache entry!
661  */
662
663 static int
664 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
665 {
666         int err;
667         struct mfc_cache *c;
668         const struct iphdr *iph = ip_hdr(skb);
669
670         spin_lock_bh(&mfc_unres_lock);
671         for (c=mfc_unres_queue; c; c=c->next) {
672                 if (c->mfc_mcastgrp == iph->daddr &&
673                     c->mfc_origin == iph->saddr)
674                         break;
675         }
676
677         if (c == NULL) {
678                 /*
679                  *      Create a new entry if allowable
680                  */
681
682                 if (atomic_read(&cache_resolve_queue_len)>=10 ||
683                     (c=ipmr_cache_alloc_unres())==NULL) {
684                         spin_unlock_bh(&mfc_unres_lock);
685
686                         kfree_skb(skb);
687                         return -ENOBUFS;
688                 }
689
690                 /*
691                  *      Fill in the new cache entry
692                  */
693                 c->mfc_parent   = -1;
694                 c->mfc_origin   = iph->saddr;
695                 c->mfc_mcastgrp = iph->daddr;
696
697                 /*
698                  *      Reflect first query at mrouted.
699                  */
700                 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
701                         /* If the report failed throw the cache entry
702                            out - Brad Parker
703                          */
704                         spin_unlock_bh(&mfc_unres_lock);
705
706                         kmem_cache_free(mrt_cachep, c);
707                         kfree_skb(skb);
708                         return err;
709                 }
710
711                 atomic_inc(&cache_resolve_queue_len);
712                 c->next = mfc_unres_queue;
713                 mfc_unres_queue = c;
714
715                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
716         }
717
718         /*
719          *      See if we can append the packet
720          */
721         if (c->mfc_un.unres.unresolved.qlen>3) {
722                 kfree_skb(skb);
723                 err = -ENOBUFS;
724         } else {
725                 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
726                 err = 0;
727         }
728
729         spin_unlock_bh(&mfc_unres_lock);
730         return err;
731 }
732
733 /*
734  *      MFC cache manipulation by user space mroute daemon
735  */
736
737 static int ipmr_mfc_delete(struct mfcctl *mfc)
738 {
739         int line;
740         struct mfc_cache *c, **cp;
741
742         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
743
744         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
745                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
746                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
747                         write_lock_bh(&mrt_lock);
748                         *cp = c->next;
749                         write_unlock_bh(&mrt_lock);
750
751                         kmem_cache_free(mrt_cachep, c);
752                         return 0;
753                 }
754         }
755         return -ENOENT;
756 }
757
758 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
759 {
760         int line;
761         struct mfc_cache *uc, *c, **cp;
762
763         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
764
765         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
766                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
767                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
768                         break;
769         }
770
771         if (c != NULL) {
772                 write_lock_bh(&mrt_lock);
773                 c->mfc_parent = mfc->mfcc_parent;
774                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
775                 if (!mrtsock)
776                         c->mfc_flags |= MFC_STATIC;
777                 write_unlock_bh(&mrt_lock);
778                 return 0;
779         }
780
781         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
782                 return -EINVAL;
783
784         c=ipmr_cache_alloc();
785         if (c==NULL)
786                 return -ENOMEM;
787
788         c->mfc_origin=mfc->mfcc_origin.s_addr;
789         c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
790         c->mfc_parent=mfc->mfcc_parent;
791         ipmr_update_thresholds(c, mfc->mfcc_ttls);
792         if (!mrtsock)
793                 c->mfc_flags |= MFC_STATIC;
794
795         write_lock_bh(&mrt_lock);
796         c->next = mfc_cache_array[line];
797         mfc_cache_array[line] = c;
798         write_unlock_bh(&mrt_lock);
799
800         /*
801          *      Check to see if we resolved a queued list. If so we
802          *      need to send on the frames and tidy up.
803          */
804         spin_lock_bh(&mfc_unres_lock);
805         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
806              cp = &uc->next) {
807                 if (uc->mfc_origin == c->mfc_origin &&
808                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
809                         *cp = uc->next;
810                         if (atomic_dec_and_test(&cache_resolve_queue_len))
811                                 del_timer(&ipmr_expire_timer);
812                         break;
813                 }
814         }
815         spin_unlock_bh(&mfc_unres_lock);
816
817         if (uc) {
818                 ipmr_cache_resolve(uc, c);
819                 kmem_cache_free(mrt_cachep, uc);
820         }
821         return 0;
822 }
823
824 /*
825  *      Close the multicast socket, and clear the vif tables etc
826  */
827
828 static void mroute_clean_tables(struct sock *sk)
829 {
830         int i;
831
832         /*
833          *      Shut down all active vif entries
834          */
835         for (i=0; i<maxvif; i++) {
836                 if (!(vif_table[i].flags&VIFF_STATIC))
837                         vif_delete(i);
838         }
839
840         /*
841          *      Wipe the cache
842          */
843         for (i=0;i<MFC_LINES;i++) {
844                 struct mfc_cache *c, **cp;
845
846                 cp = &mfc_cache_array[i];
847                 while ((c = *cp) != NULL) {
848                         if (c->mfc_flags&MFC_STATIC) {
849                                 cp = &c->next;
850                                 continue;
851                         }
852                         write_lock_bh(&mrt_lock);
853                         *cp = c->next;
854                         write_unlock_bh(&mrt_lock);
855
856                         kmem_cache_free(mrt_cachep, c);
857                 }
858         }
859
860         if (atomic_read(&cache_resolve_queue_len) != 0) {
861                 struct mfc_cache *c;
862
863                 spin_lock_bh(&mfc_unres_lock);
864                 while (mfc_unres_queue != NULL) {
865                         c = mfc_unres_queue;
866                         mfc_unres_queue = c->next;
867                         spin_unlock_bh(&mfc_unres_lock);
868
869                         ipmr_destroy_unres(c);
870
871                         spin_lock_bh(&mfc_unres_lock);
872                 }
873                 spin_unlock_bh(&mfc_unres_lock);
874         }
875 }
876
877 static void mrtsock_destruct(struct sock *sk)
878 {
879         rtnl_lock();
880         if (sk == mroute_socket) {
881                 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
882
883                 write_lock_bh(&mrt_lock);
884                 mroute_socket=NULL;
885                 write_unlock_bh(&mrt_lock);
886
887                 mroute_clean_tables(sk);
888         }
889         rtnl_unlock();
890 }
891
892 /*
893  *      Socket options and virtual interface manipulation. The whole
894  *      virtual interface system is a complete heap, but unfortunately
895  *      that's how BSD mrouted happens to think. Maybe one day with a proper
896  *      MOSPF/PIM router set up we can clean this up.
897  */
898
899 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
900 {
901         int ret;
902         struct vifctl vif;
903         struct mfcctl mfc;
904
905         if (optname != MRT_INIT) {
906                 if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
907                         return -EACCES;
908         }
909
910         switch (optname) {
911         case MRT_INIT:
912                 if (sk->sk_type != SOCK_RAW ||
913                     inet_sk(sk)->num != IPPROTO_IGMP)
914                         return -EOPNOTSUPP;
915                 if (optlen!=sizeof(int))
916                         return -ENOPROTOOPT;
917
918                 rtnl_lock();
919                 if (mroute_socket) {
920                         rtnl_unlock();
921                         return -EADDRINUSE;
922                 }
923
924                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
925                 if (ret == 0) {
926                         write_lock_bh(&mrt_lock);
927                         mroute_socket=sk;
928                         write_unlock_bh(&mrt_lock);
929
930                         IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
931                 }
932                 rtnl_unlock();
933                 return ret;
934         case MRT_DONE:
935                 if (sk!=mroute_socket)
936                         return -EACCES;
937                 return ip_ra_control(sk, 0, NULL);
938         case MRT_ADD_VIF:
939         case MRT_DEL_VIF:
940                 if (optlen!=sizeof(vif))
941                         return -EINVAL;
942                 if (copy_from_user(&vif,optval,sizeof(vif)))
943                         return -EFAULT;
944                 if (vif.vifc_vifi >= MAXVIFS)
945                         return -ENFILE;
946                 rtnl_lock();
947                 if (optname==MRT_ADD_VIF) {
948                         ret = vif_add(&vif, sk==mroute_socket);
949                 } else {
950                         ret = vif_delete(vif.vifc_vifi);
951                 }
952                 rtnl_unlock();
953                 return ret;
954
955                 /*
956                  *      Manipulate the forwarding caches. These live
957                  *      in a sort of kernel/user symbiosis.
958                  */
959         case MRT_ADD_MFC:
960         case MRT_DEL_MFC:
961                 if (optlen!=sizeof(mfc))
962                         return -EINVAL;
963                 if (copy_from_user(&mfc,optval, sizeof(mfc)))
964                         return -EFAULT;
965                 rtnl_lock();
966                 if (optname==MRT_DEL_MFC)
967                         ret = ipmr_mfc_delete(&mfc);
968                 else
969                         ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
970                 rtnl_unlock();
971                 return ret;
972                 /*
973                  *      Control PIM assert.
974                  */
975         case MRT_ASSERT:
976         {
977                 int v;
978                 if (get_user(v,(int __user *)optval))
979                         return -EFAULT;
980                 mroute_do_assert=(v)?1:0;
981                 return 0;
982         }
983 #ifdef CONFIG_IP_PIMSM
984         case MRT_PIM:
985         {
986                 int v;
987
988                 if (get_user(v,(int __user *)optval))
989                         return -EFAULT;
990                 v = (v) ? 1 : 0;
991
992                 rtnl_lock();
993                 ret = 0;
994                 if (v != mroute_do_pim) {
995                         mroute_do_pim = v;
996                         mroute_do_assert = v;
997 #ifdef CONFIG_IP_PIMSM_V2
998                         if (mroute_do_pim)
999                                 ret = inet_add_protocol(&pim_protocol,
1000                                                         IPPROTO_PIM);
1001                         else
1002                                 ret = inet_del_protocol(&pim_protocol,
1003                                                         IPPROTO_PIM);
1004                         if (ret < 0)
1005                                 ret = -EAGAIN;
1006 #endif
1007                 }
1008                 rtnl_unlock();
1009                 return ret;
1010         }
1011 #endif
1012         /*
1013          *      Spurious command, or MRT_VERSION which you cannot
1014          *      set.
1015          */
1016         default:
1017                 return -ENOPROTOOPT;
1018         }
1019 }
1020
1021 /*
1022  *      Getsock opt support for the multicast routing system.
1023  */
1024
1025 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
1026 {
1027         int olr;
1028         int val;
1029
1030         if (optname!=MRT_VERSION &&
1031 #ifdef CONFIG_IP_PIMSM
1032            optname!=MRT_PIM &&
1033 #endif
1034            optname!=MRT_ASSERT)
1035                 return -ENOPROTOOPT;
1036
1037         if (get_user(olr, optlen))
1038                 return -EFAULT;
1039
1040         olr = min_t(unsigned int, olr, sizeof(int));
1041         if (olr < 0)
1042                 return -EINVAL;
1043
1044         if (put_user(olr,optlen))
1045                 return -EFAULT;
1046         if (optname==MRT_VERSION)
1047                 val=0x0305;
1048 #ifdef CONFIG_IP_PIMSM
1049         else if (optname==MRT_PIM)
1050                 val=mroute_do_pim;
1051 #endif
1052         else
1053                 val=mroute_do_assert;
1054         if (copy_to_user(optval,&val,olr))
1055                 return -EFAULT;
1056         return 0;
1057 }
1058
1059 /*
1060  *      The IP multicast ioctl support routines.
1061  */
1062
1063 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1064 {
1065         struct sioc_sg_req sr;
1066         struct sioc_vif_req vr;
1067         struct vif_device *vif;
1068         struct mfc_cache *c;
1069
1070         switch (cmd) {
1071         case SIOCGETVIFCNT:
1072                 if (copy_from_user(&vr,arg,sizeof(vr)))
1073                         return -EFAULT;
1074                 if (vr.vifi>=maxvif)
1075                         return -EINVAL;
1076                 read_lock(&mrt_lock);
1077                 vif=&vif_table[vr.vifi];
1078                 if (VIF_EXISTS(vr.vifi))        {
1079                         vr.icount=vif->pkt_in;
1080                         vr.ocount=vif->pkt_out;
1081                         vr.ibytes=vif->bytes_in;
1082                         vr.obytes=vif->bytes_out;
1083                         read_unlock(&mrt_lock);
1084
1085                         if (copy_to_user(arg,&vr,sizeof(vr)))
1086                                 return -EFAULT;
1087                         return 0;
1088                 }
1089                 read_unlock(&mrt_lock);
1090                 return -EADDRNOTAVAIL;
1091         case SIOCGETSGCNT:
1092                 if (copy_from_user(&sr,arg,sizeof(sr)))
1093                         return -EFAULT;
1094
1095                 read_lock(&mrt_lock);
1096                 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1097                 if (c) {
1098                         sr.pktcnt = c->mfc_un.res.pkt;
1099                         sr.bytecnt = c->mfc_un.res.bytes;
1100                         sr.wrong_if = c->mfc_un.res.wrong_if;
1101                         read_unlock(&mrt_lock);
1102
1103                         if (copy_to_user(arg,&sr,sizeof(sr)))
1104                                 return -EFAULT;
1105                         return 0;
1106                 }
1107                 read_unlock(&mrt_lock);
1108                 return -EADDRNOTAVAIL;
1109         default:
1110                 return -ENOIOCTLCMD;
1111         }
1112 }
1113
1114
1115 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1116 {
1117         struct net_device *dev = ptr;
1118         struct vif_device *v;
1119         int ct;
1120
1121         if (dev_net(dev) != &init_net)
1122                 return NOTIFY_DONE;
1123
1124         if (event != NETDEV_UNREGISTER)
1125                 return NOTIFY_DONE;
1126         v=&vif_table[0];
1127         for (ct=0;ct<maxvif;ct++,v++) {
1128                 if (v->dev==dev)
1129                         vif_delete(ct);
1130         }
1131         return NOTIFY_DONE;
1132 }
1133
1134
1135 static struct notifier_block ip_mr_notifier={
1136         .notifier_call = ipmr_device_event,
1137 };
1138
1139 /*
1140  *      Encapsulate a packet by attaching a valid IPIP header to it.
1141  *      This avoids tunnel drivers and other mess and gives us the speed so
1142  *      important for multicast video.
1143  */
1144
1145 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1146 {
1147         struct iphdr *iph;
1148         struct iphdr *old_iph = ip_hdr(skb);
1149
1150         skb_push(skb, sizeof(struct iphdr));
1151         skb->transport_header = skb->network_header;
1152         skb_reset_network_header(skb);
1153         iph = ip_hdr(skb);
1154
1155         iph->version    =       4;
1156         iph->tos        =       old_iph->tos;
1157         iph->ttl        =       old_iph->ttl;
1158         iph->frag_off   =       0;
1159         iph->daddr      =       daddr;
1160         iph->saddr      =       saddr;
1161         iph->protocol   =       IPPROTO_IPIP;
1162         iph->ihl        =       5;
1163         iph->tot_len    =       htons(skb->len);
1164         ip_select_ident(iph, skb->dst, NULL);
1165         ip_send_check(iph);
1166
1167         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1168         nf_reset(skb);
1169 }
1170
1171 static inline int ipmr_forward_finish(struct sk_buff *skb)
1172 {
1173         struct ip_options * opt = &(IPCB(skb)->opt);
1174
1175         IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1176
1177         if (unlikely(opt->optlen))
1178                 ip_forward_options(skb);
1179
1180         return dst_output(skb);
1181 }
1182
1183 /*
1184  *      Processing handlers for ipmr_forward
1185  */
1186
1187 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1188 {
1189         const struct iphdr *iph = ip_hdr(skb);
1190         struct vif_device *vif = &vif_table[vifi];
1191         struct net_device *dev;
1192         struct rtable *rt;
1193         int    encap = 0;
1194
1195         if (vif->dev == NULL)
1196                 goto out_free;
1197
1198 #ifdef CONFIG_IP_PIMSM
1199         if (vif->flags & VIFF_REGISTER) {
1200                 vif->pkt_out++;
1201                 vif->bytes_out+=skb->len;
1202                 vif->dev->stats.tx_bytes += skb->len;
1203                 vif->dev->stats.tx_packets++;
1204                 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1205                 kfree_skb(skb);
1206                 return;
1207         }
1208 #endif
1209
1210         if (vif->flags&VIFF_TUNNEL) {
1211                 struct flowi fl = { .oif = vif->link,
1212                                     .nl_u = { .ip4_u =
1213                                               { .daddr = vif->remote,
1214                                                 .saddr = vif->local,
1215                                                 .tos = RT_TOS(iph->tos) } },
1216                                     .proto = IPPROTO_IPIP };
1217                 if (ip_route_output_key(&init_net, &rt, &fl))
1218                         goto out_free;
1219                 encap = sizeof(struct iphdr);
1220         } else {
1221                 struct flowi fl = { .oif = vif->link,
1222                                     .nl_u = { .ip4_u =
1223                                               { .daddr = iph->daddr,
1224                                                 .tos = RT_TOS(iph->tos) } },
1225                                     .proto = IPPROTO_IPIP };
1226                 if (ip_route_output_key(&init_net, &rt, &fl))
1227                         goto out_free;
1228         }
1229
1230         dev = rt->u.dst.dev;
1231
1232         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1233                 /* Do not fragment multicasts. Alas, IPv4 does not
1234                    allow to send ICMP, so that packets will disappear
1235                    to blackhole.
1236                  */
1237
1238                 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1239                 ip_rt_put(rt);
1240                 goto out_free;
1241         }
1242
1243         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1244
1245         if (skb_cow(skb, encap)) {
1246                 ip_rt_put(rt);
1247                 goto out_free;
1248         }
1249
1250         vif->pkt_out++;
1251         vif->bytes_out+=skb->len;
1252
1253         dst_release(skb->dst);
1254         skb->dst = &rt->u.dst;
1255         ip_decrease_ttl(ip_hdr(skb));
1256
1257         /* FIXME: forward and output firewalls used to be called here.
1258          * What do we do with netfilter? -- RR */
1259         if (vif->flags & VIFF_TUNNEL) {
1260                 ip_encap(skb, vif->local, vif->remote);
1261                 /* FIXME: extra output firewall step used to be here. --RR */
1262                 vif->dev->stats.tx_packets++;
1263                 vif->dev->stats.tx_bytes += skb->len;
1264         }
1265
1266         IPCB(skb)->flags |= IPSKB_FORWARDED;
1267
1268         /*
1269          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1270          * not only before forwarding, but after forwarding on all output
1271          * interfaces. It is clear, if mrouter runs a multicasting
1272          * program, it should receive packets not depending to what interface
1273          * program is joined.
1274          * If we will not make it, the program will have to join on all
1275          * interfaces. On the other hand, multihoming host (or router, but
1276          * not mrouter) cannot join to more than one interface - it will
1277          * result in receiving multiple packets.
1278          */
1279         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1280                 ipmr_forward_finish);
1281         return;
1282
1283 out_free:
1284         kfree_skb(skb);
1285         return;
1286 }
1287
1288 static int ipmr_find_vif(struct net_device *dev)
1289 {
1290         int ct;
1291         for (ct=maxvif-1; ct>=0; ct--) {
1292                 if (vif_table[ct].dev == dev)
1293                         break;
1294         }
1295         return ct;
1296 }
1297
1298 /* "local" means that we should preserve one skb (for local delivery) */
1299
1300 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1301 {
1302         int psend = -1;
1303         int vif, ct;
1304
1305         vif = cache->mfc_parent;
1306         cache->mfc_un.res.pkt++;
1307         cache->mfc_un.res.bytes += skb->len;
1308
1309         /*
1310          * Wrong interface: drop packet and (maybe) send PIM assert.
1311          */
1312         if (vif_table[vif].dev != skb->dev) {
1313                 int true_vifi;
1314
1315                 if (skb->rtable->fl.iif == 0) {
1316                         /* It is our own packet, looped back.
1317                            Very complicated situation...
1318
1319                            The best workaround until routing daemons will be
1320                            fixed is not to redistribute packet, if it was
1321                            send through wrong interface. It means, that
1322                            multicast applications WILL NOT work for
1323                            (S,G), which have default multicast route pointing
1324                            to wrong oif. In any case, it is not a good
1325                            idea to use multicasting applications on router.
1326                          */
1327                         goto dont_forward;
1328                 }
1329
1330                 cache->mfc_un.res.wrong_if++;
1331                 true_vifi = ipmr_find_vif(skb->dev);
1332
1333                 if (true_vifi >= 0 && mroute_do_assert &&
1334                     /* pimsm uses asserts, when switching from RPT to SPT,
1335                        so that we cannot check that packet arrived on an oif.
1336                        It is bad, but otherwise we would need to move pretty
1337                        large chunk of pimd to kernel. Ough... --ANK
1338                      */
1339                     (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1340                     time_after(jiffies,
1341                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1342                         cache->mfc_un.res.last_assert = jiffies;
1343                         ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1344                 }
1345                 goto dont_forward;
1346         }
1347
1348         vif_table[vif].pkt_in++;
1349         vif_table[vif].bytes_in+=skb->len;
1350
1351         /*
1352          *      Forward the frame
1353          */
1354         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1355                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1356                         if (psend != -1) {
1357                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1358                                 if (skb2)
1359                                         ipmr_queue_xmit(skb2, cache, psend);
1360                         }
1361                         psend=ct;
1362                 }
1363         }
1364         if (psend != -1) {
1365                 if (local) {
1366                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1367                         if (skb2)
1368                                 ipmr_queue_xmit(skb2, cache, psend);
1369                 } else {
1370                         ipmr_queue_xmit(skb, cache, psend);
1371                         return 0;
1372                 }
1373         }
1374
1375 dont_forward:
1376         if (!local)
1377                 kfree_skb(skb);
1378         return 0;
1379 }
1380
1381
1382 /*
1383  *      Multicast packets for forwarding arrive here
1384  */
1385
1386 int ip_mr_input(struct sk_buff *skb)
1387 {
1388         struct mfc_cache *cache;
1389         int local = skb->rtable->rt_flags&RTCF_LOCAL;
1390
1391         /* Packet is looped back after forward, it should not be
1392            forwarded second time, but still can be delivered locally.
1393          */
1394         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1395                 goto dont_forward;
1396
1397         if (!local) {
1398                     if (IPCB(skb)->opt.router_alert) {
1399                             if (ip_call_ra_chain(skb))
1400                                     return 0;
1401                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1402                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1403                                Cisco IOS <= 11.2(8)) do not put router alert
1404                                option to IGMP packets destined to routable
1405                                groups. It is very bad, because it means
1406                                that we can forward NO IGMP messages.
1407                              */
1408                             read_lock(&mrt_lock);
1409                             if (mroute_socket) {
1410                                     nf_reset(skb);
1411                                     raw_rcv(mroute_socket, skb);
1412                                     read_unlock(&mrt_lock);
1413                                     return 0;
1414                             }
1415                             read_unlock(&mrt_lock);
1416                     }
1417         }
1418
1419         read_lock(&mrt_lock);
1420         cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1421
1422         /*
1423          *      No usable cache entry
1424          */
1425         if (cache==NULL) {
1426                 int vif;
1427
1428                 if (local) {
1429                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1430                         ip_local_deliver(skb);
1431                         if (skb2 == NULL) {
1432                                 read_unlock(&mrt_lock);
1433                                 return -ENOBUFS;
1434                         }
1435                         skb = skb2;
1436                 }
1437
1438                 vif = ipmr_find_vif(skb->dev);
1439                 if (vif >= 0) {
1440                         int err = ipmr_cache_unresolved(vif, skb);
1441                         read_unlock(&mrt_lock);
1442
1443                         return err;
1444                 }
1445                 read_unlock(&mrt_lock);
1446                 kfree_skb(skb);
1447                 return -ENODEV;
1448         }
1449
1450         ip_mr_forward(skb, cache, local);
1451
1452         read_unlock(&mrt_lock);
1453
1454         if (local)
1455                 return ip_local_deliver(skb);
1456
1457         return 0;
1458
1459 dont_forward:
1460         if (local)
1461                 return ip_local_deliver(skb);
1462         kfree_skb(skb);
1463         return 0;
1464 }
1465
1466 #ifdef CONFIG_IP_PIMSM_V1
1467 /*
1468  * Handle IGMP messages of PIMv1
1469  */
1470
1471 int pim_rcv_v1(struct sk_buff * skb)
1472 {
1473         struct igmphdr *pim;
1474         struct iphdr   *encap;
1475         struct net_device  *reg_dev = NULL;
1476
1477         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1478                 goto drop;
1479
1480         pim = igmp_hdr(skb);
1481
1482         if (!mroute_do_pim ||
1483             skb->len < sizeof(*pim) + sizeof(*encap) ||
1484             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1485                 goto drop;
1486
1487         encap = (struct iphdr *)(skb_transport_header(skb) +
1488                                  sizeof(struct igmphdr));
1489         /*
1490            Check that:
1491            a. packet is really destinted to a multicast group
1492            b. packet is not a NULL-REGISTER
1493            c. packet is not truncated
1494          */
1495         if (!ipv4_is_multicast(encap->daddr) ||
1496             encap->tot_len == 0 ||
1497             ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1498                 goto drop;
1499
1500         read_lock(&mrt_lock);
1501         if (reg_vif_num >= 0)
1502                 reg_dev = vif_table[reg_vif_num].dev;
1503         if (reg_dev)
1504                 dev_hold(reg_dev);
1505         read_unlock(&mrt_lock);
1506
1507         if (reg_dev == NULL)
1508                 goto drop;
1509
1510         skb->mac_header = skb->network_header;
1511         skb_pull(skb, (u8*)encap - skb->data);
1512         skb_reset_network_header(skb);
1513         skb->dev = reg_dev;
1514         skb->protocol = htons(ETH_P_IP);
1515         skb->ip_summed = 0;
1516         skb->pkt_type = PACKET_HOST;
1517         dst_release(skb->dst);
1518         skb->dst = NULL;
1519         reg_dev->stats.rx_bytes += skb->len;
1520         reg_dev->stats.rx_packets++;
1521         nf_reset(skb);
1522         netif_rx(skb);
1523         dev_put(reg_dev);
1524         return 0;
1525  drop:
1526         kfree_skb(skb);
1527         return 0;
1528 }
1529 #endif
1530
1531 #ifdef CONFIG_IP_PIMSM_V2
1532 static int pim_rcv(struct sk_buff * skb)
1533 {
1534         struct pimreghdr *pim;
1535         struct iphdr   *encap;
1536         struct net_device  *reg_dev = NULL;
1537
1538         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1539                 goto drop;
1540
1541         pim = (struct pimreghdr *)skb_transport_header(skb);
1542         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1543             (pim->flags&PIM_NULL_REGISTER) ||
1544             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1545              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1546                 goto drop;
1547
1548         /* check if the inner packet is destined to mcast group */
1549         encap = (struct iphdr *)(skb_transport_header(skb) +
1550                                  sizeof(struct pimreghdr));
1551         if (!ipv4_is_multicast(encap->daddr) ||
1552             encap->tot_len == 0 ||
1553             ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1554                 goto drop;
1555
1556         read_lock(&mrt_lock);
1557         if (reg_vif_num >= 0)
1558                 reg_dev = vif_table[reg_vif_num].dev;
1559         if (reg_dev)
1560                 dev_hold(reg_dev);
1561         read_unlock(&mrt_lock);
1562
1563         if (reg_dev == NULL)
1564                 goto drop;
1565
1566         skb->mac_header = skb->network_header;
1567         skb_pull(skb, (u8*)encap - skb->data);
1568         skb_reset_network_header(skb);
1569         skb->dev = reg_dev;
1570         skb->protocol = htons(ETH_P_IP);
1571         skb->ip_summed = 0;
1572         skb->pkt_type = PACKET_HOST;
1573         dst_release(skb->dst);
1574         reg_dev->stats.rx_bytes += skb->len;
1575         reg_dev->stats.rx_packets++;
1576         skb->dst = NULL;
1577         nf_reset(skb);
1578         netif_rx(skb);
1579         dev_put(reg_dev);
1580         return 0;
1581  drop:
1582         kfree_skb(skb);
1583         return 0;
1584 }
1585 #endif
1586
1587 static int
1588 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1589 {
1590         int ct;
1591         struct rtnexthop *nhp;
1592         struct net_device *dev = vif_table[c->mfc_parent].dev;
1593         u8 *b = skb_tail_pointer(skb);
1594         struct rtattr *mp_head;
1595
1596         if (dev)
1597                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1598
1599         mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1600
1601         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1602                 if (c->mfc_un.res.ttls[ct] < 255) {
1603                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1604                                 goto rtattr_failure;
1605                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1606                         nhp->rtnh_flags = 0;
1607                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1608                         nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1609                         nhp->rtnh_len = sizeof(*nhp);
1610                 }
1611         }
1612         mp_head->rta_type = RTA_MULTIPATH;
1613         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1614         rtm->rtm_type = RTN_MULTICAST;
1615         return 1;
1616
1617 rtattr_failure:
1618         nlmsg_trim(skb, b);
1619         return -EMSGSIZE;
1620 }
1621
1622 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1623 {
1624         int err;
1625         struct mfc_cache *cache;
1626         struct rtable *rt = skb->rtable;
1627
1628         read_lock(&mrt_lock);
1629         cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1630
1631         if (cache==NULL) {
1632                 struct sk_buff *skb2;
1633                 struct iphdr *iph;
1634                 struct net_device *dev;
1635                 int vif;
1636
1637                 if (nowait) {
1638                         read_unlock(&mrt_lock);
1639                         return -EAGAIN;
1640                 }
1641
1642                 dev = skb->dev;
1643                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1644                         read_unlock(&mrt_lock);
1645                         return -ENODEV;
1646                 }
1647                 skb2 = skb_clone(skb, GFP_ATOMIC);
1648                 if (!skb2) {
1649                         read_unlock(&mrt_lock);
1650                         return -ENOMEM;
1651                 }
1652
1653                 skb_push(skb2, sizeof(struct iphdr));
1654                 skb_reset_network_header(skb2);
1655                 iph = ip_hdr(skb2);
1656                 iph->ihl = sizeof(struct iphdr) >> 2;
1657                 iph->saddr = rt->rt_src;
1658                 iph->daddr = rt->rt_dst;
1659                 iph->version = 0;
1660                 err = ipmr_cache_unresolved(vif, skb2);
1661                 read_unlock(&mrt_lock);
1662                 return err;
1663         }
1664
1665         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1666                 cache->mfc_flags |= MFC_NOTIFY;
1667         err = ipmr_fill_mroute(skb, cache, rtm);
1668         read_unlock(&mrt_lock);
1669         return err;
1670 }
1671
1672 #ifdef CONFIG_PROC_FS
1673 /*
1674  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1675  */
1676 struct ipmr_vif_iter {
1677         int ct;
1678 };
1679
1680 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1681                                            loff_t pos)
1682 {
1683         for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1684                 if (!VIF_EXISTS(iter->ct))
1685                         continue;
1686                 if (pos-- == 0)
1687                         return &vif_table[iter->ct];
1688         }
1689         return NULL;
1690 }
1691
1692 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1693         __acquires(mrt_lock)
1694 {
1695         read_lock(&mrt_lock);
1696         return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1697                 : SEQ_START_TOKEN;
1698 }
1699
1700 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1701 {
1702         struct ipmr_vif_iter *iter = seq->private;
1703
1704         ++*pos;
1705         if (v == SEQ_START_TOKEN)
1706                 return ipmr_vif_seq_idx(iter, 0);
1707
1708         while (++iter->ct < maxvif) {
1709                 if (!VIF_EXISTS(iter->ct))
1710                         continue;
1711                 return &vif_table[iter->ct];
1712         }
1713         return NULL;
1714 }
1715
1716 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1717         __releases(mrt_lock)
1718 {
1719         read_unlock(&mrt_lock);
1720 }
1721
1722 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1723 {
1724         if (v == SEQ_START_TOKEN) {
1725                 seq_puts(seq,
1726                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1727         } else {
1728                 const struct vif_device *vif = v;
1729                 const char *name =  vif->dev ? vif->dev->name : "none";
1730
1731                 seq_printf(seq,
1732                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1733                            vif - vif_table,
1734                            name, vif->bytes_in, vif->pkt_in,
1735                            vif->bytes_out, vif->pkt_out,
1736                            vif->flags, vif->local, vif->remote);
1737         }
1738         return 0;
1739 }
1740
1741 static const struct seq_operations ipmr_vif_seq_ops = {
1742         .start = ipmr_vif_seq_start,
1743         .next  = ipmr_vif_seq_next,
1744         .stop  = ipmr_vif_seq_stop,
1745         .show  = ipmr_vif_seq_show,
1746 };
1747
1748 static int ipmr_vif_open(struct inode *inode, struct file *file)
1749 {
1750         return seq_open_private(file, &ipmr_vif_seq_ops,
1751                         sizeof(struct ipmr_vif_iter));
1752 }
1753
1754 static const struct file_operations ipmr_vif_fops = {
1755         .owner   = THIS_MODULE,
1756         .open    = ipmr_vif_open,
1757         .read    = seq_read,
1758         .llseek  = seq_lseek,
1759         .release = seq_release_private,
1760 };
1761
1762 struct ipmr_mfc_iter {
1763         struct mfc_cache **cache;
1764         int ct;
1765 };
1766
1767
1768 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1769 {
1770         struct mfc_cache *mfc;
1771
1772         it->cache = mfc_cache_array;
1773         read_lock(&mrt_lock);
1774         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1775                 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1776                         if (pos-- == 0)
1777                                 return mfc;
1778         read_unlock(&mrt_lock);
1779
1780         it->cache = &mfc_unres_queue;
1781         spin_lock_bh(&mfc_unres_lock);
1782         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1783                 if (pos-- == 0)
1784                         return mfc;
1785         spin_unlock_bh(&mfc_unres_lock);
1786
1787         it->cache = NULL;
1788         return NULL;
1789 }
1790
1791
1792 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1793 {
1794         struct ipmr_mfc_iter *it = seq->private;
1795         it->cache = NULL;
1796         it->ct = 0;
1797         return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1798                 : SEQ_START_TOKEN;
1799 }
1800
1801 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1802 {
1803         struct mfc_cache *mfc = v;
1804         struct ipmr_mfc_iter *it = seq->private;
1805
1806         ++*pos;
1807
1808         if (v == SEQ_START_TOKEN)
1809                 return ipmr_mfc_seq_idx(seq->private, 0);
1810
1811         if (mfc->next)
1812                 return mfc->next;
1813
1814         if (it->cache == &mfc_unres_queue)
1815                 goto end_of_list;
1816
1817         BUG_ON(it->cache != mfc_cache_array);
1818
1819         while (++it->ct < MFC_LINES) {
1820                 mfc = mfc_cache_array[it->ct];
1821                 if (mfc)
1822                         return mfc;
1823         }
1824
1825         /* exhausted cache_array, show unresolved */
1826         read_unlock(&mrt_lock);
1827         it->cache = &mfc_unres_queue;
1828         it->ct = 0;
1829
1830         spin_lock_bh(&mfc_unres_lock);
1831         mfc = mfc_unres_queue;
1832         if (mfc)
1833                 return mfc;
1834
1835  end_of_list:
1836         spin_unlock_bh(&mfc_unres_lock);
1837         it->cache = NULL;
1838
1839         return NULL;
1840 }
1841
1842 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1843 {
1844         struct ipmr_mfc_iter *it = seq->private;
1845
1846         if (it->cache == &mfc_unres_queue)
1847                 spin_unlock_bh(&mfc_unres_lock);
1848         else if (it->cache == mfc_cache_array)
1849                 read_unlock(&mrt_lock);
1850 }
1851
1852 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1853 {
1854         int n;
1855
1856         if (v == SEQ_START_TOKEN) {
1857                 seq_puts(seq,
1858                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1859         } else {
1860                 const struct mfc_cache *mfc = v;
1861                 const struct ipmr_mfc_iter *it = seq->private;
1862
1863                 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1864                            (unsigned long) mfc->mfc_mcastgrp,
1865                            (unsigned long) mfc->mfc_origin,
1866                            mfc->mfc_parent,
1867                            mfc->mfc_un.res.pkt,
1868                            mfc->mfc_un.res.bytes,
1869                            mfc->mfc_un.res.wrong_if);
1870
1871                 if (it->cache != &mfc_unres_queue) {
1872                         for (n = mfc->mfc_un.res.minvif;
1873                              n < mfc->mfc_un.res.maxvif; n++ ) {
1874                                 if (VIF_EXISTS(n)
1875                                    && mfc->mfc_un.res.ttls[n] < 255)
1876                                 seq_printf(seq,
1877                                            " %2d:%-3d",
1878                                            n, mfc->mfc_un.res.ttls[n]);
1879                         }
1880                 }
1881                 seq_putc(seq, '\n');
1882         }
1883         return 0;
1884 }
1885
1886 static const struct seq_operations ipmr_mfc_seq_ops = {
1887         .start = ipmr_mfc_seq_start,
1888         .next  = ipmr_mfc_seq_next,
1889         .stop  = ipmr_mfc_seq_stop,
1890         .show  = ipmr_mfc_seq_show,
1891 };
1892
1893 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1894 {
1895         return seq_open_private(file, &ipmr_mfc_seq_ops,
1896                         sizeof(struct ipmr_mfc_iter));
1897 }
1898
1899 static const struct file_operations ipmr_mfc_fops = {
1900         .owner   = THIS_MODULE,
1901         .open    = ipmr_mfc_open,
1902         .read    = seq_read,
1903         .llseek  = seq_lseek,
1904         .release = seq_release_private,
1905 };
1906 #endif
1907
1908 #ifdef CONFIG_IP_PIMSM_V2
1909 static struct net_protocol pim_protocol = {
1910         .handler        =       pim_rcv,
1911 };
1912 #endif
1913
1914
1915 /*
1916  *      Setup for IP multicast routing
1917  */
1918
1919 int __init ip_mr_init(void)
1920 {
1921         int err;
1922
1923         mrt_cachep = kmem_cache_create("ip_mrt_cache",
1924                                        sizeof(struct mfc_cache),
1925                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1926                                        NULL);
1927         if (!mrt_cachep)
1928                 return -ENOMEM;
1929
1930         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1931         err = register_netdevice_notifier(&ip_mr_notifier);
1932         if (err)
1933                 goto reg_notif_fail;
1934 #ifdef CONFIG_PROC_FS
1935         err = -ENOMEM;
1936         if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1937                 goto proc_vif_fail;
1938         if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1939                 goto proc_cache_fail;
1940 #endif
1941         return 0;
1942 reg_notif_fail:
1943         kmem_cache_destroy(mrt_cachep);
1944 #ifdef CONFIG_PROC_FS
1945 proc_vif_fail:
1946         unregister_netdevice_notifier(&ip_mr_notifier);
1947 proc_cache_fail:
1948         proc_net_remove(&init_net, "ip_mr_vif");
1949 #endif
1950         return err;
1951 }