ipv4: Don't drop redirected route cache entry unless PTMU actually expired
[safe/jmp/linux-2.6] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
69
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73
74 static DEFINE_RWLOCK(mrt_lock);
75
76 /*
77  *      Multicast router control variables
78  */
79
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81
82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
83
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91
92    In this case data path is free of exclusive locks at all.
93  */
94
95 static struct kmem_cache *mrt_cachep __read_mostly;
96
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99                              struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102 static struct timer_list ipmr_expire_timer;
103
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
107 {
108         struct net *net = dev_net(dev);
109
110         dev_close(dev);
111
112         dev = __dev_get_by_name(net, "tunl0");
113         if (dev) {
114                 const struct net_device_ops *ops = dev->netdev_ops;
115                 struct ifreq ifr;
116                 struct ip_tunnel_parm p;
117
118                 memset(&p, 0, sizeof(p));
119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
121                 p.iph.version = 4;
122                 p.iph.ihl = 5;
123                 p.iph.protocol = IPPROTO_IPIP;
124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
126
127                 if (ops->ndo_do_ioctl) {
128                         mm_segment_t oldfs = get_fs();
129
130                         set_fs(KERNEL_DS);
131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132                         set_fs(oldfs);
133                 }
134         }
135 }
136
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
139 {
140         struct net_device  *dev;
141
142         dev = __dev_get_by_name(net, "tunl0");
143
144         if (dev) {
145                 const struct net_device_ops *ops = dev->netdev_ops;
146                 int err;
147                 struct ifreq ifr;
148                 struct ip_tunnel_parm p;
149                 struct in_device  *in_dev;
150
151                 memset(&p, 0, sizeof(p));
152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
154                 p.iph.version = 4;
155                 p.iph.ihl = 5;
156                 p.iph.protocol = IPPROTO_IPIP;
157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
159
160                 if (ops->ndo_do_ioctl) {
161                         mm_segment_t oldfs = get_fs();
162
163                         set_fs(KERNEL_DS);
164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165                         set_fs(oldfs);
166                 } else
167                         err = -EOPNOTSUPP;
168
169                 dev = NULL;
170
171                 if (err == 0 &&
172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
173                         dev->flags |= IFF_MULTICAST;
174
175                         in_dev = __in_dev_get_rtnl(dev);
176                         if (in_dev == NULL)
177                                 goto failure;
178
179                         ipv4_devconf_setall(in_dev);
180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
181
182                         if (dev_open(dev))
183                                 goto failure;
184                         dev_hold(dev);
185                 }
186         }
187         return dev;
188
189 failure:
190         /* allow the register to be completed before unregistering. */
191         rtnl_unlock();
192         rtnl_lock();
193
194         unregister_netdevice(dev);
195         return NULL;
196 }
197
198 #ifdef CONFIG_IP_PIMSM
199
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201 {
202         struct net *net = dev_net(dev);
203
204         read_lock(&mrt_lock);
205         dev->stats.tx_bytes += skb->len;
206         dev->stats.tx_packets++;
207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208                           IGMPMSG_WHOLEPKT);
209         read_unlock(&mrt_lock);
210         kfree_skb(skb);
211         return NETDEV_TX_OK;
212 }
213
214 static const struct net_device_ops reg_vif_netdev_ops = {
215         .ndo_start_xmit = reg_vif_xmit,
216 };
217
218 static void reg_vif_setup(struct net_device *dev)
219 {
220         dev->type               = ARPHRD_PIMREG;
221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222         dev->flags              = IFF_NOARP;
223         dev->netdev_ops         = &reg_vif_netdev_ops,
224         dev->destructor         = free_netdev;
225         dev->features           |= NETIF_F_NETNS_LOCAL;
226 }
227
228 static struct net_device *ipmr_reg_vif(struct net *net)
229 {
230         struct net_device *dev;
231         struct in_device *in_dev;
232
233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
234
235         if (dev == NULL)
236                 return NULL;
237
238         dev_net_set(dev, net);
239
240         if (register_netdevice(dev)) {
241                 free_netdev(dev);
242                 return NULL;
243         }
244         dev->iflink = 0;
245
246         rcu_read_lock();
247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248                 rcu_read_unlock();
249                 goto failure;
250         }
251
252         ipv4_devconf_setall(in_dev);
253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254         rcu_read_unlock();
255
256         if (dev_open(dev))
257                 goto failure;
258
259         dev_hold(dev);
260
261         return dev;
262
263 failure:
264         /* allow the register to be completed before unregistering. */
265         rtnl_unlock();
266         rtnl_lock();
267
268         unregister_netdevice(dev);
269         return NULL;
270 }
271 #endif
272
273 /*
274  *      Delete a VIF entry
275  *      @notify: Set to 1, if the caller is a notifier_call
276  */
277
278 static int vif_delete(struct net *net, int vifi, int notify,
279                       struct list_head *head)
280 {
281         struct vif_device *v;
282         struct net_device *dev;
283         struct in_device *in_dev;
284
285         if (vifi < 0 || vifi >= net->ipv4.maxvif)
286                 return -EADDRNOTAVAIL;
287
288         v = &net->ipv4.vif_table[vifi];
289
290         write_lock_bh(&mrt_lock);
291         dev = v->dev;
292         v->dev = NULL;
293
294         if (!dev) {
295                 write_unlock_bh(&mrt_lock);
296                 return -EADDRNOTAVAIL;
297         }
298
299 #ifdef CONFIG_IP_PIMSM
300         if (vifi == net->ipv4.mroute_reg_vif_num)
301                 net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303
304         if (vifi+1 == net->ipv4.maxvif) {
305                 int tmp;
306                 for (tmp=vifi-1; tmp>=0; tmp--) {
307                         if (VIF_EXISTS(net, tmp))
308                                 break;
309                 }
310                 net->ipv4.maxvif = tmp+1;
311         }
312
313         write_unlock_bh(&mrt_lock);
314
315         dev_set_allmulti(dev, -1);
316
317         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319                 ip_rt_multicast_event(in_dev);
320         }
321
322         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323                 unregister_netdevice_queue(dev, head);
324
325         dev_put(dev);
326         return 0;
327 }
328
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331         release_net(mfc_net(c));
332         kmem_cache_free(mrt_cachep, c);
333 }
334
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341         struct sk_buff *skb;
342         struct nlmsgerr *e;
343         struct net *net = mfc_net(c);
344
345         atomic_dec(&net->ipv4.cache_resolve_queue_len);
346
347         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348                 if (ip_hdr(skb)->version == 0) {
349                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350                         nlh->nlmsg_type = NLMSG_ERROR;
351                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352                         skb_trim(skb, nlh->nlmsg_len);
353                         e = NLMSG_DATA(nlh);
354                         e->error = -ETIMEDOUT;
355                         memset(&e->msg, 0, sizeof(e->msg));
356
357                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358                 } else
359                         kfree_skb(skb);
360         }
361
362         ipmr_cache_free(c);
363 }
364
365
366 /* Single timer process for all the unresolved queue. */
367
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370         unsigned long now;
371         unsigned long expires;
372         struct mfc_cache *c, **cp;
373
374         if (!spin_trylock(&mfc_unres_lock)) {
375                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376                 return;
377         }
378
379         if (mfc_unres_queue == NULL)
380                 goto out;
381
382         now = jiffies;
383         expires = 10*HZ;
384         cp = &mfc_unres_queue;
385
386         while ((c=*cp) != NULL) {
387                 if (time_after(c->mfc_un.unres.expires, now)) {
388                         unsigned long interval = c->mfc_un.unres.expires - now;
389                         if (interval < expires)
390                                 expires = interval;
391                         cp = &c->next;
392                         continue;
393                 }
394
395                 *cp = c->next;
396
397                 ipmr_destroy_unres(c);
398         }
399
400         if (mfc_unres_queue != NULL)
401                 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403 out:
404         spin_unlock(&mfc_unres_lock);
405 }
406
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411         int vifi;
412         struct net *net = mfc_net(cache);
413
414         cache->mfc_un.res.minvif = MAXVIFS;
415         cache->mfc_un.res.maxvif = 0;
416         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417
418         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419                 if (VIF_EXISTS(net, vifi) &&
420                     ttls[vifi] && ttls[vifi] < 255) {
421                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422                         if (cache->mfc_un.res.minvif > vifi)
423                                 cache->mfc_un.res.minvif = vifi;
424                         if (cache->mfc_un.res.maxvif <= vifi)
425                                 cache->mfc_un.res.maxvif = vifi + 1;
426                 }
427         }
428 }
429
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432         int vifi = vifc->vifc_vifi;
433         struct vif_device *v = &net->ipv4.vif_table[vifi];
434         struct net_device *dev;
435         struct in_device *in_dev;
436         int err;
437
438         /* Is vif busy ? */
439         if (VIF_EXISTS(net, vifi))
440                 return -EADDRINUSE;
441
442         switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444         case VIFF_REGISTER:
445                 /*
446                  * Special Purpose VIF in PIM
447                  * All the packets will be sent to the daemon
448                  */
449                 if (net->ipv4.mroute_reg_vif_num >= 0)
450                         return -EADDRINUSE;
451                 dev = ipmr_reg_vif(net);
452                 if (!dev)
453                         return -ENOBUFS;
454                 err = dev_set_allmulti(dev, 1);
455                 if (err) {
456                         unregister_netdevice(dev);
457                         dev_put(dev);
458                         return err;
459                 }
460                 break;
461 #endif
462         case VIFF_TUNNEL:
463                 dev = ipmr_new_tunnel(net, vifc);
464                 if (!dev)
465                         return -ENOBUFS;
466                 err = dev_set_allmulti(dev, 1);
467                 if (err) {
468                         ipmr_del_tunnel(dev, vifc);
469                         dev_put(dev);
470                         return err;
471                 }
472                 break;
473
474         case VIFF_USE_IFINDEX:
475         case 0:
476                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
477                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
478                         if (dev && dev->ip_ptr == NULL) {
479                                 dev_put(dev);
480                                 return -EADDRNOTAVAIL;
481                         }
482                 } else
483                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
484
485                 if (!dev)
486                         return -EADDRNOTAVAIL;
487                 err = dev_set_allmulti(dev, 1);
488                 if (err) {
489                         dev_put(dev);
490                         return err;
491                 }
492                 break;
493         default:
494                 return -EINVAL;
495         }
496
497         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
498                 dev_put(dev);
499                 return -EADDRNOTAVAIL;
500         }
501         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
502         ip_rt_multicast_event(in_dev);
503
504         /*
505          *      Fill in the VIF structures
506          */
507         v->rate_limit = vifc->vifc_rate_limit;
508         v->local = vifc->vifc_lcl_addr.s_addr;
509         v->remote = vifc->vifc_rmt_addr.s_addr;
510         v->flags = vifc->vifc_flags;
511         if (!mrtsock)
512                 v->flags |= VIFF_STATIC;
513         v->threshold = vifc->vifc_threshold;
514         v->bytes_in = 0;
515         v->bytes_out = 0;
516         v->pkt_in = 0;
517         v->pkt_out = 0;
518         v->link = dev->ifindex;
519         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
520                 v->link = dev->iflink;
521
522         /* And finish update writing critical data */
523         write_lock_bh(&mrt_lock);
524         v->dev = dev;
525 #ifdef CONFIG_IP_PIMSM
526         if (v->flags&VIFF_REGISTER)
527                 net->ipv4.mroute_reg_vif_num = vifi;
528 #endif
529         if (vifi+1 > net->ipv4.maxvif)
530                 net->ipv4.maxvif = vifi+1;
531         write_unlock_bh(&mrt_lock);
532         return 0;
533 }
534
535 static struct mfc_cache *ipmr_cache_find(struct net *net,
536                                          __be32 origin,
537                                          __be32 mcastgrp)
538 {
539         int line = MFC_HASH(mcastgrp, origin);
540         struct mfc_cache *c;
541
542         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
543                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
544                         break;
545         }
546         return c;
547 }
548
549 /*
550  *      Allocate a multicast cache entry
551  */
552 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
553 {
554         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
555         if (c == NULL)
556                 return NULL;
557         c->mfc_un.res.minvif = MAXVIFS;
558         mfc_net_set(c, net);
559         return c;
560 }
561
562 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
563 {
564         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
565         if (c == NULL)
566                 return NULL;
567         skb_queue_head_init(&c->mfc_un.unres.unresolved);
568         c->mfc_un.unres.expires = jiffies + 10*HZ;
569         mfc_net_set(c, net);
570         return c;
571 }
572
573 /*
574  *      A cache entry has gone into a resolved state from queued
575  */
576
577 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
578 {
579         struct sk_buff *skb;
580         struct nlmsgerr *e;
581
582         /*
583          *      Play the pending entries through our router
584          */
585
586         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
587                 if (ip_hdr(skb)->version == 0) {
588                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
589
590                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
591                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
592                                                   (u8 *)nlh);
593                         } else {
594                                 nlh->nlmsg_type = NLMSG_ERROR;
595                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
596                                 skb_trim(skb, nlh->nlmsg_len);
597                                 e = NLMSG_DATA(nlh);
598                                 e->error = -EMSGSIZE;
599                                 memset(&e->msg, 0, sizeof(e->msg));
600                         }
601
602                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
603                 } else
604                         ip_mr_forward(skb, c, 0);
605         }
606 }
607
608 /*
609  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
610  *      expects the following bizarre scheme.
611  *
612  *      Called under mrt_lock.
613  */
614
615 static int ipmr_cache_report(struct net *net,
616                              struct sk_buff *pkt, vifi_t vifi, int assert)
617 {
618         struct sk_buff *skb;
619         const int ihl = ip_hdrlen(pkt);
620         struct igmphdr *igmp;
621         struct igmpmsg *msg;
622         int ret;
623
624 #ifdef CONFIG_IP_PIMSM
625         if (assert == IGMPMSG_WHOLEPKT)
626                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
627         else
628 #endif
629                 skb = alloc_skb(128, GFP_ATOMIC);
630
631         if (!skb)
632                 return -ENOBUFS;
633
634 #ifdef CONFIG_IP_PIMSM
635         if (assert == IGMPMSG_WHOLEPKT) {
636                 /* Ugly, but we have no choice with this interface.
637                    Duplicate old header, fix ihl, length etc.
638                    And all this only to mangle msg->im_msgtype and
639                    to set msg->im_mbz to "mbz" :-)
640                  */
641                 skb_push(skb, sizeof(struct iphdr));
642                 skb_reset_network_header(skb);
643                 skb_reset_transport_header(skb);
644                 msg = (struct igmpmsg *)skb_network_header(skb);
645                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
646                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
647                 msg->im_mbz = 0;
648                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
649                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
650                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
651                                              sizeof(struct iphdr));
652         } else
653 #endif
654         {
655
656         /*
657          *      Copy the IP header
658          */
659
660         skb->network_header = skb->tail;
661         skb_put(skb, ihl);
662         skb_copy_to_linear_data(skb, pkt->data, ihl);
663         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
664         msg = (struct igmpmsg *)skb_network_header(skb);
665         msg->im_vif = vifi;
666         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
667
668         /*
669          *      Add our header
670          */
671
672         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
673         igmp->type      =
674         msg->im_msgtype = assert;
675         igmp->code      =       0;
676         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
677         skb->transport_header = skb->network_header;
678         }
679
680         if (net->ipv4.mroute_sk == NULL) {
681                 kfree_skb(skb);
682                 return -EINVAL;
683         }
684
685         /*
686          *      Deliver to mrouted
687          */
688         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
689         if (ret < 0) {
690                 if (net_ratelimit())
691                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
692                 kfree_skb(skb);
693         }
694
695         return ret;
696 }
697
698 /*
699  *      Queue a packet for resolution. It gets locked cache entry!
700  */
701
702 static int
703 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
704 {
705         int err;
706         struct mfc_cache *c;
707         const struct iphdr *iph = ip_hdr(skb);
708
709         spin_lock_bh(&mfc_unres_lock);
710         for (c=mfc_unres_queue; c; c=c->next) {
711                 if (net_eq(mfc_net(c), net) &&
712                     c->mfc_mcastgrp == iph->daddr &&
713                     c->mfc_origin == iph->saddr)
714                         break;
715         }
716
717         if (c == NULL) {
718                 /*
719                  *      Create a new entry if allowable
720                  */
721
722                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
723                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
724                         spin_unlock_bh(&mfc_unres_lock);
725
726                         kfree_skb(skb);
727                         return -ENOBUFS;
728                 }
729
730                 /*
731                  *      Fill in the new cache entry
732                  */
733                 c->mfc_parent   = -1;
734                 c->mfc_origin   = iph->saddr;
735                 c->mfc_mcastgrp = iph->daddr;
736
737                 /*
738                  *      Reflect first query at mrouted.
739                  */
740                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
741                 if (err < 0) {
742                         /* If the report failed throw the cache entry
743                            out - Brad Parker
744                          */
745                         spin_unlock_bh(&mfc_unres_lock);
746
747                         ipmr_cache_free(c);
748                         kfree_skb(skb);
749                         return err;
750                 }
751
752                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
753                 c->next = mfc_unres_queue;
754                 mfc_unres_queue = c;
755
756                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
757         }
758
759         /*
760          *      See if we can append the packet
761          */
762         if (c->mfc_un.unres.unresolved.qlen>3) {
763                 kfree_skb(skb);
764                 err = -ENOBUFS;
765         } else {
766                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
767                 err = 0;
768         }
769
770         spin_unlock_bh(&mfc_unres_lock);
771         return err;
772 }
773
774 /*
775  *      MFC cache manipulation by user space mroute daemon
776  */
777
778 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
779 {
780         int line;
781         struct mfc_cache *c, **cp;
782
783         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
784
785         for (cp = &net->ipv4.mfc_cache_array[line];
786              (c = *cp) != NULL; cp = &c->next) {
787                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
788                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
789                         write_lock_bh(&mrt_lock);
790                         *cp = c->next;
791                         write_unlock_bh(&mrt_lock);
792
793                         ipmr_cache_free(c);
794                         return 0;
795                 }
796         }
797         return -ENOENT;
798 }
799
800 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
801 {
802         int line;
803         struct mfc_cache *uc, *c, **cp;
804
805         if (mfc->mfcc_parent >= MAXVIFS)
806                 return -ENFILE;
807
808         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
809
810         for (cp = &net->ipv4.mfc_cache_array[line];
811              (c = *cp) != NULL; cp = &c->next) {
812                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
813                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
814                         break;
815         }
816
817         if (c != NULL) {
818                 write_lock_bh(&mrt_lock);
819                 c->mfc_parent = mfc->mfcc_parent;
820                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
821                 if (!mrtsock)
822                         c->mfc_flags |= MFC_STATIC;
823                 write_unlock_bh(&mrt_lock);
824                 return 0;
825         }
826
827         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
828                 return -EINVAL;
829
830         c = ipmr_cache_alloc(net);
831         if (c == NULL)
832                 return -ENOMEM;
833
834         c->mfc_origin = mfc->mfcc_origin.s_addr;
835         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
836         c->mfc_parent = mfc->mfcc_parent;
837         ipmr_update_thresholds(c, mfc->mfcc_ttls);
838         if (!mrtsock)
839                 c->mfc_flags |= MFC_STATIC;
840
841         write_lock_bh(&mrt_lock);
842         c->next = net->ipv4.mfc_cache_array[line];
843         net->ipv4.mfc_cache_array[line] = c;
844         write_unlock_bh(&mrt_lock);
845
846         /*
847          *      Check to see if we resolved a queued list. If so we
848          *      need to send on the frames and tidy up.
849          */
850         spin_lock_bh(&mfc_unres_lock);
851         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
852              cp = &uc->next) {
853                 if (net_eq(mfc_net(uc), net) &&
854                     uc->mfc_origin == c->mfc_origin &&
855                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
856                         *cp = uc->next;
857                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
858                         break;
859                 }
860         }
861         if (mfc_unres_queue == NULL)
862                 del_timer(&ipmr_expire_timer);
863         spin_unlock_bh(&mfc_unres_lock);
864
865         if (uc) {
866                 ipmr_cache_resolve(uc, c);
867                 ipmr_cache_free(uc);
868         }
869         return 0;
870 }
871
872 /*
873  *      Close the multicast socket, and clear the vif tables etc
874  */
875
876 static void mroute_clean_tables(struct net *net)
877 {
878         int i;
879         LIST_HEAD(list);
880
881         /*
882          *      Shut down all active vif entries
883          */
884         for (i = 0; i < net->ipv4.maxvif; i++) {
885                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
886                         vif_delete(net, i, 0, &list);
887         }
888         unregister_netdevice_many(&list);
889
890         /*
891          *      Wipe the cache
892          */
893         for (i=0; i<MFC_LINES; i++) {
894                 struct mfc_cache *c, **cp;
895
896                 cp = &net->ipv4.mfc_cache_array[i];
897                 while ((c = *cp) != NULL) {
898                         if (c->mfc_flags&MFC_STATIC) {
899                                 cp = &c->next;
900                                 continue;
901                         }
902                         write_lock_bh(&mrt_lock);
903                         *cp = c->next;
904                         write_unlock_bh(&mrt_lock);
905
906                         ipmr_cache_free(c);
907                 }
908         }
909
910         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
911                 struct mfc_cache *c, **cp;
912
913                 spin_lock_bh(&mfc_unres_lock);
914                 cp = &mfc_unres_queue;
915                 while ((c = *cp) != NULL) {
916                         if (!net_eq(mfc_net(c), net)) {
917                                 cp = &c->next;
918                                 continue;
919                         }
920                         *cp = c->next;
921
922                         ipmr_destroy_unres(c);
923                 }
924                 spin_unlock_bh(&mfc_unres_lock);
925         }
926 }
927
928 static void mrtsock_destruct(struct sock *sk)
929 {
930         struct net *net = sock_net(sk);
931
932         rtnl_lock();
933         if (sk == net->ipv4.mroute_sk) {
934                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
935
936                 write_lock_bh(&mrt_lock);
937                 net->ipv4.mroute_sk = NULL;
938                 write_unlock_bh(&mrt_lock);
939
940                 mroute_clean_tables(net);
941         }
942         rtnl_unlock();
943 }
944
945 /*
946  *      Socket options and virtual interface manipulation. The whole
947  *      virtual interface system is a complete heap, but unfortunately
948  *      that's how BSD mrouted happens to think. Maybe one day with a proper
949  *      MOSPF/PIM router set up we can clean this up.
950  */
951
952 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
953 {
954         int ret;
955         struct vifctl vif;
956         struct mfcctl mfc;
957         struct net *net = sock_net(sk);
958
959         if (optname != MRT_INIT) {
960                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
961                         return -EACCES;
962         }
963
964         switch (optname) {
965         case MRT_INIT:
966                 if (sk->sk_type != SOCK_RAW ||
967                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
968                         return -EOPNOTSUPP;
969                 if (optlen != sizeof(int))
970                         return -ENOPROTOOPT;
971
972                 rtnl_lock();
973                 if (net->ipv4.mroute_sk) {
974                         rtnl_unlock();
975                         return -EADDRINUSE;
976                 }
977
978                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
979                 if (ret == 0) {
980                         write_lock_bh(&mrt_lock);
981                         net->ipv4.mroute_sk = sk;
982                         write_unlock_bh(&mrt_lock);
983
984                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
985                 }
986                 rtnl_unlock();
987                 return ret;
988         case MRT_DONE:
989                 if (sk != net->ipv4.mroute_sk)
990                         return -EACCES;
991                 return ip_ra_control(sk, 0, NULL);
992         case MRT_ADD_VIF:
993         case MRT_DEL_VIF:
994                 if (optlen != sizeof(vif))
995                         return -EINVAL;
996                 if (copy_from_user(&vif, optval, sizeof(vif)))
997                         return -EFAULT;
998                 if (vif.vifc_vifi >= MAXVIFS)
999                         return -ENFILE;
1000                 rtnl_lock();
1001                 if (optname == MRT_ADD_VIF) {
1002                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1003                 } else {
1004                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1005                 }
1006                 rtnl_unlock();
1007                 return ret;
1008
1009                 /*
1010                  *      Manipulate the forwarding caches. These live
1011                  *      in a sort of kernel/user symbiosis.
1012                  */
1013         case MRT_ADD_MFC:
1014         case MRT_DEL_MFC:
1015                 if (optlen != sizeof(mfc))
1016                         return -EINVAL;
1017                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1018                         return -EFAULT;
1019                 rtnl_lock();
1020                 if (optname == MRT_DEL_MFC)
1021                         ret = ipmr_mfc_delete(net, &mfc);
1022                 else
1023                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1024                 rtnl_unlock();
1025                 return ret;
1026                 /*
1027                  *      Control PIM assert.
1028                  */
1029         case MRT_ASSERT:
1030         {
1031                 int v;
1032                 if (get_user(v,(int __user *)optval))
1033                         return -EFAULT;
1034                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1035                 return 0;
1036         }
1037 #ifdef CONFIG_IP_PIMSM
1038         case MRT_PIM:
1039         {
1040                 int v;
1041
1042                 if (get_user(v,(int __user *)optval))
1043                         return -EFAULT;
1044                 v = (v) ? 1 : 0;
1045
1046                 rtnl_lock();
1047                 ret = 0;
1048                 if (v != net->ipv4.mroute_do_pim) {
1049                         net->ipv4.mroute_do_pim = v;
1050                         net->ipv4.mroute_do_assert = v;
1051                 }
1052                 rtnl_unlock();
1053                 return ret;
1054         }
1055 #endif
1056         /*
1057          *      Spurious command, or MRT_VERSION which you cannot
1058          *      set.
1059          */
1060         default:
1061                 return -ENOPROTOOPT;
1062         }
1063 }
1064
1065 /*
1066  *      Getsock opt support for the multicast routing system.
1067  */
1068
1069 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1070 {
1071         int olr;
1072         int val;
1073         struct net *net = sock_net(sk);
1074
1075         if (optname != MRT_VERSION &&
1076 #ifdef CONFIG_IP_PIMSM
1077            optname!=MRT_PIM &&
1078 #endif
1079            optname!=MRT_ASSERT)
1080                 return -ENOPROTOOPT;
1081
1082         if (get_user(olr, optlen))
1083                 return -EFAULT;
1084
1085         olr = min_t(unsigned int, olr, sizeof(int));
1086         if (olr < 0)
1087                 return -EINVAL;
1088
1089         if (put_user(olr, optlen))
1090                 return -EFAULT;
1091         if (optname == MRT_VERSION)
1092                 val = 0x0305;
1093 #ifdef CONFIG_IP_PIMSM
1094         else if (optname == MRT_PIM)
1095                 val = net->ipv4.mroute_do_pim;
1096 #endif
1097         else
1098                 val = net->ipv4.mroute_do_assert;
1099         if (copy_to_user(optval, &val, olr))
1100                 return -EFAULT;
1101         return 0;
1102 }
1103
1104 /*
1105  *      The IP multicast ioctl support routines.
1106  */
1107
1108 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1109 {
1110         struct sioc_sg_req sr;
1111         struct sioc_vif_req vr;
1112         struct vif_device *vif;
1113         struct mfc_cache *c;
1114         struct net *net = sock_net(sk);
1115
1116         switch (cmd) {
1117         case SIOCGETVIFCNT:
1118                 if (copy_from_user(&vr, arg, sizeof(vr)))
1119                         return -EFAULT;
1120                 if (vr.vifi >= net->ipv4.maxvif)
1121                         return -EINVAL;
1122                 read_lock(&mrt_lock);
1123                 vif = &net->ipv4.vif_table[vr.vifi];
1124                 if (VIF_EXISTS(net, vr.vifi)) {
1125                         vr.icount = vif->pkt_in;
1126                         vr.ocount = vif->pkt_out;
1127                         vr.ibytes = vif->bytes_in;
1128                         vr.obytes = vif->bytes_out;
1129                         read_unlock(&mrt_lock);
1130
1131                         if (copy_to_user(arg, &vr, sizeof(vr)))
1132                                 return -EFAULT;
1133                         return 0;
1134                 }
1135                 read_unlock(&mrt_lock);
1136                 return -EADDRNOTAVAIL;
1137         case SIOCGETSGCNT:
1138                 if (copy_from_user(&sr, arg, sizeof(sr)))
1139                         return -EFAULT;
1140
1141                 read_lock(&mrt_lock);
1142                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1143                 if (c) {
1144                         sr.pktcnt = c->mfc_un.res.pkt;
1145                         sr.bytecnt = c->mfc_un.res.bytes;
1146                         sr.wrong_if = c->mfc_un.res.wrong_if;
1147                         read_unlock(&mrt_lock);
1148
1149                         if (copy_to_user(arg, &sr, sizeof(sr)))
1150                                 return -EFAULT;
1151                         return 0;
1152                 }
1153                 read_unlock(&mrt_lock);
1154                 return -EADDRNOTAVAIL;
1155         default:
1156                 return -ENOIOCTLCMD;
1157         }
1158 }
1159
1160
1161 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1162 {
1163         struct net_device *dev = ptr;
1164         struct net *net = dev_net(dev);
1165         struct vif_device *v;
1166         int ct;
1167         LIST_HEAD(list);
1168
1169         if (event != NETDEV_UNREGISTER)
1170                 return NOTIFY_DONE;
1171         v = &net->ipv4.vif_table[0];
1172         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1173                 if (v->dev == dev)
1174                         vif_delete(net, ct, 1, &list);
1175         }
1176         unregister_netdevice_many(&list);
1177         return NOTIFY_DONE;
1178 }
1179
1180
1181 static struct notifier_block ip_mr_notifier = {
1182         .notifier_call = ipmr_device_event,
1183 };
1184
1185 /*
1186  *      Encapsulate a packet by attaching a valid IPIP header to it.
1187  *      This avoids tunnel drivers and other mess and gives us the speed so
1188  *      important for multicast video.
1189  */
1190
1191 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1192 {
1193         struct iphdr *iph;
1194         struct iphdr *old_iph = ip_hdr(skb);
1195
1196         skb_push(skb, sizeof(struct iphdr));
1197         skb->transport_header = skb->network_header;
1198         skb_reset_network_header(skb);
1199         iph = ip_hdr(skb);
1200
1201         iph->version    =       4;
1202         iph->tos        =       old_iph->tos;
1203         iph->ttl        =       old_iph->ttl;
1204         iph->frag_off   =       0;
1205         iph->daddr      =       daddr;
1206         iph->saddr      =       saddr;
1207         iph->protocol   =       IPPROTO_IPIP;
1208         iph->ihl        =       5;
1209         iph->tot_len    =       htons(skb->len);
1210         ip_select_ident(iph, skb_dst(skb), NULL);
1211         ip_send_check(iph);
1212
1213         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1214         nf_reset(skb);
1215 }
1216
1217 static inline int ipmr_forward_finish(struct sk_buff *skb)
1218 {
1219         struct ip_options * opt = &(IPCB(skb)->opt);
1220
1221         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1222
1223         if (unlikely(opt->optlen))
1224                 ip_forward_options(skb);
1225
1226         return dst_output(skb);
1227 }
1228
1229 /*
1230  *      Processing handlers for ipmr_forward
1231  */
1232
1233 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1234 {
1235         struct net *net = mfc_net(c);
1236         const struct iphdr *iph = ip_hdr(skb);
1237         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1238         struct net_device *dev;
1239         struct rtable *rt;
1240         int    encap = 0;
1241
1242         if (vif->dev == NULL)
1243                 goto out_free;
1244
1245 #ifdef CONFIG_IP_PIMSM
1246         if (vif->flags & VIFF_REGISTER) {
1247                 vif->pkt_out++;
1248                 vif->bytes_out += skb->len;
1249                 vif->dev->stats.tx_bytes += skb->len;
1250                 vif->dev->stats.tx_packets++;
1251                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1252                 goto out_free;
1253         }
1254 #endif
1255
1256         if (vif->flags&VIFF_TUNNEL) {
1257                 struct flowi fl = { .oif = vif->link,
1258                                     .nl_u = { .ip4_u =
1259                                               { .daddr = vif->remote,
1260                                                 .saddr = vif->local,
1261                                                 .tos = RT_TOS(iph->tos) } },
1262                                     .proto = IPPROTO_IPIP };
1263                 if (ip_route_output_key(net, &rt, &fl))
1264                         goto out_free;
1265                 encap = sizeof(struct iphdr);
1266         } else {
1267                 struct flowi fl = { .oif = vif->link,
1268                                     .nl_u = { .ip4_u =
1269                                               { .daddr = iph->daddr,
1270                                                 .tos = RT_TOS(iph->tos) } },
1271                                     .proto = IPPROTO_IPIP };
1272                 if (ip_route_output_key(net, &rt, &fl))
1273                         goto out_free;
1274         }
1275
1276         dev = rt->u.dst.dev;
1277
1278         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1279                 /* Do not fragment multicasts. Alas, IPv4 does not
1280                    allow to send ICMP, so that packets will disappear
1281                    to blackhole.
1282                  */
1283
1284                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1285                 ip_rt_put(rt);
1286                 goto out_free;
1287         }
1288
1289         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1290
1291         if (skb_cow(skb, encap)) {
1292                 ip_rt_put(rt);
1293                 goto out_free;
1294         }
1295
1296         vif->pkt_out++;
1297         vif->bytes_out += skb->len;
1298
1299         skb_dst_drop(skb);
1300         skb_dst_set(skb, &rt->u.dst);
1301         ip_decrease_ttl(ip_hdr(skb));
1302
1303         /* FIXME: forward and output firewalls used to be called here.
1304          * What do we do with netfilter? -- RR */
1305         if (vif->flags & VIFF_TUNNEL) {
1306                 ip_encap(skb, vif->local, vif->remote);
1307                 /* FIXME: extra output firewall step used to be here. --RR */
1308                 vif->dev->stats.tx_packets++;
1309                 vif->dev->stats.tx_bytes += skb->len;
1310         }
1311
1312         IPCB(skb)->flags |= IPSKB_FORWARDED;
1313
1314         /*
1315          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1316          * not only before forwarding, but after forwarding on all output
1317          * interfaces. It is clear, if mrouter runs a multicasting
1318          * program, it should receive packets not depending to what interface
1319          * program is joined.
1320          * If we will not make it, the program will have to join on all
1321          * interfaces. On the other hand, multihoming host (or router, but
1322          * not mrouter) cannot join to more than one interface - it will
1323          * result in receiving multiple packets.
1324          */
1325         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1326                 ipmr_forward_finish);
1327         return;
1328
1329 out_free:
1330         kfree_skb(skb);
1331         return;
1332 }
1333
1334 static int ipmr_find_vif(struct net_device *dev)
1335 {
1336         struct net *net = dev_net(dev);
1337         int ct;
1338         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1339                 if (net->ipv4.vif_table[ct].dev == dev)
1340                         break;
1341         }
1342         return ct;
1343 }
1344
1345 /* "local" means that we should preserve one skb (for local delivery) */
1346
1347 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1348 {
1349         int psend = -1;
1350         int vif, ct;
1351         struct net *net = mfc_net(cache);
1352
1353         vif = cache->mfc_parent;
1354         cache->mfc_un.res.pkt++;
1355         cache->mfc_un.res.bytes += skb->len;
1356
1357         /*
1358          * Wrong interface: drop packet and (maybe) send PIM assert.
1359          */
1360         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1361                 int true_vifi;
1362
1363                 if (skb_rtable(skb)->fl.iif == 0) {
1364                         /* It is our own packet, looped back.
1365                            Very complicated situation...
1366
1367                            The best workaround until routing daemons will be
1368                            fixed is not to redistribute packet, if it was
1369                            send through wrong interface. It means, that
1370                            multicast applications WILL NOT work for
1371                            (S,G), which have default multicast route pointing
1372                            to wrong oif. In any case, it is not a good
1373                            idea to use multicasting applications on router.
1374                          */
1375                         goto dont_forward;
1376                 }
1377
1378                 cache->mfc_un.res.wrong_if++;
1379                 true_vifi = ipmr_find_vif(skb->dev);
1380
1381                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1382                     /* pimsm uses asserts, when switching from RPT to SPT,
1383                        so that we cannot check that packet arrived on an oif.
1384                        It is bad, but otherwise we would need to move pretty
1385                        large chunk of pimd to kernel. Ough... --ANK
1386                      */
1387                     (net->ipv4.mroute_do_pim ||
1388                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1389                     time_after(jiffies,
1390                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1391                         cache->mfc_un.res.last_assert = jiffies;
1392                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1393                 }
1394                 goto dont_forward;
1395         }
1396
1397         net->ipv4.vif_table[vif].pkt_in++;
1398         net->ipv4.vif_table[vif].bytes_in += skb->len;
1399
1400         /*
1401          *      Forward the frame
1402          */
1403         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1404                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1405                         if (psend != -1) {
1406                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1407                                 if (skb2)
1408                                         ipmr_queue_xmit(skb2, cache, psend);
1409                         }
1410                         psend = ct;
1411                 }
1412         }
1413         if (psend != -1) {
1414                 if (local) {
1415                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1416                         if (skb2)
1417                                 ipmr_queue_xmit(skb2, cache, psend);
1418                 } else {
1419                         ipmr_queue_xmit(skb, cache, psend);
1420                         return 0;
1421                 }
1422         }
1423
1424 dont_forward:
1425         if (!local)
1426                 kfree_skb(skb);
1427         return 0;
1428 }
1429
1430
1431 /*
1432  *      Multicast packets for forwarding arrive here
1433  */
1434
1435 int ip_mr_input(struct sk_buff *skb)
1436 {
1437         struct mfc_cache *cache;
1438         struct net *net = dev_net(skb->dev);
1439         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1440
1441         /* Packet is looped back after forward, it should not be
1442            forwarded second time, but still can be delivered locally.
1443          */
1444         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1445                 goto dont_forward;
1446
1447         if (!local) {
1448                     if (IPCB(skb)->opt.router_alert) {
1449                             if (ip_call_ra_chain(skb))
1450                                     return 0;
1451                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1452                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1453                                Cisco IOS <= 11.2(8)) do not put router alert
1454                                option to IGMP packets destined to routable
1455                                groups. It is very bad, because it means
1456                                that we can forward NO IGMP messages.
1457                              */
1458                             read_lock(&mrt_lock);
1459                             if (net->ipv4.mroute_sk) {
1460                                     nf_reset(skb);
1461                                     raw_rcv(net->ipv4.mroute_sk, skb);
1462                                     read_unlock(&mrt_lock);
1463                                     return 0;
1464                             }
1465                             read_unlock(&mrt_lock);
1466                     }
1467         }
1468
1469         read_lock(&mrt_lock);
1470         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1471
1472         /*
1473          *      No usable cache entry
1474          */
1475         if (cache == NULL) {
1476                 int vif;
1477
1478                 if (local) {
1479                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1480                         ip_local_deliver(skb);
1481                         if (skb2 == NULL) {
1482                                 read_unlock(&mrt_lock);
1483                                 return -ENOBUFS;
1484                         }
1485                         skb = skb2;
1486                 }
1487
1488                 vif = ipmr_find_vif(skb->dev);
1489                 if (vif >= 0) {
1490                         int err = ipmr_cache_unresolved(net, vif, skb);
1491                         read_unlock(&mrt_lock);
1492
1493                         return err;
1494                 }
1495                 read_unlock(&mrt_lock);
1496                 kfree_skb(skb);
1497                 return -ENODEV;
1498         }
1499
1500         ip_mr_forward(skb, cache, local);
1501
1502         read_unlock(&mrt_lock);
1503
1504         if (local)
1505                 return ip_local_deliver(skb);
1506
1507         return 0;
1508
1509 dont_forward:
1510         if (local)
1511                 return ip_local_deliver(skb);
1512         kfree_skb(skb);
1513         return 0;
1514 }
1515
1516 #ifdef CONFIG_IP_PIMSM
1517 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1518 {
1519         struct net_device *reg_dev = NULL;
1520         struct iphdr *encap;
1521         struct net *net = dev_net(skb->dev);
1522
1523         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1524         /*
1525            Check that:
1526            a. packet is really destinted to a multicast group
1527            b. packet is not a NULL-REGISTER
1528            c. packet is not truncated
1529          */
1530         if (!ipv4_is_multicast(encap->daddr) ||
1531             encap->tot_len == 0 ||
1532             ntohs(encap->tot_len) + pimlen > skb->len)
1533                 return 1;
1534
1535         read_lock(&mrt_lock);
1536         if (net->ipv4.mroute_reg_vif_num >= 0)
1537                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1538         if (reg_dev)
1539                 dev_hold(reg_dev);
1540         read_unlock(&mrt_lock);
1541
1542         if (reg_dev == NULL)
1543                 return 1;
1544
1545         skb->mac_header = skb->network_header;
1546         skb_pull(skb, (u8*)encap - skb->data);
1547         skb_reset_network_header(skb);
1548         skb->dev = reg_dev;
1549         skb->protocol = htons(ETH_P_IP);
1550         skb->ip_summed = 0;
1551         skb->pkt_type = PACKET_HOST;
1552         skb_dst_drop(skb);
1553         reg_dev->stats.rx_bytes += skb->len;
1554         reg_dev->stats.rx_packets++;
1555         nf_reset(skb);
1556         netif_rx(skb);
1557         dev_put(reg_dev);
1558
1559         return 0;
1560 }
1561 #endif
1562
1563 #ifdef CONFIG_IP_PIMSM_V1
1564 /*
1565  * Handle IGMP messages of PIMv1
1566  */
1567
1568 int pim_rcv_v1(struct sk_buff * skb)
1569 {
1570         struct igmphdr *pim;
1571         struct net *net = dev_net(skb->dev);
1572
1573         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1574                 goto drop;
1575
1576         pim = igmp_hdr(skb);
1577
1578         if (!net->ipv4.mroute_do_pim ||
1579             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1580                 goto drop;
1581
1582         if (__pim_rcv(skb, sizeof(*pim))) {
1583 drop:
1584                 kfree_skb(skb);
1585         }
1586         return 0;
1587 }
1588 #endif
1589
1590 #ifdef CONFIG_IP_PIMSM_V2
1591 static int pim_rcv(struct sk_buff * skb)
1592 {
1593         struct pimreghdr *pim;
1594
1595         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1596                 goto drop;
1597
1598         pim = (struct pimreghdr *)skb_transport_header(skb);
1599         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1600             (pim->flags&PIM_NULL_REGISTER) ||
1601             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1602              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1603                 goto drop;
1604
1605         if (__pim_rcv(skb, sizeof(*pim))) {
1606 drop:
1607                 kfree_skb(skb);
1608         }
1609         return 0;
1610 }
1611 #endif
1612
1613 static int
1614 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1615 {
1616         int ct;
1617         struct rtnexthop *nhp;
1618         struct net *net = mfc_net(c);
1619         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1620         u8 *b = skb_tail_pointer(skb);
1621         struct rtattr *mp_head;
1622
1623         if (dev)
1624                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1625
1626         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1627
1628         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1629                 if (c->mfc_un.res.ttls[ct] < 255) {
1630                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1631                                 goto rtattr_failure;
1632                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1633                         nhp->rtnh_flags = 0;
1634                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1635                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1636                         nhp->rtnh_len = sizeof(*nhp);
1637                 }
1638         }
1639         mp_head->rta_type = RTA_MULTIPATH;
1640         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1641         rtm->rtm_type = RTN_MULTICAST;
1642         return 1;
1643
1644 rtattr_failure:
1645         nlmsg_trim(skb, b);
1646         return -EMSGSIZE;
1647 }
1648
1649 int ipmr_get_route(struct net *net,
1650                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1651 {
1652         int err;
1653         struct mfc_cache *cache;
1654         struct rtable *rt = skb_rtable(skb);
1655
1656         read_lock(&mrt_lock);
1657         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1658
1659         if (cache == NULL) {
1660                 struct sk_buff *skb2;
1661                 struct iphdr *iph;
1662                 struct net_device *dev;
1663                 int vif;
1664
1665                 if (nowait) {
1666                         read_unlock(&mrt_lock);
1667                         return -EAGAIN;
1668                 }
1669
1670                 dev = skb->dev;
1671                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1672                         read_unlock(&mrt_lock);
1673                         return -ENODEV;
1674                 }
1675                 skb2 = skb_clone(skb, GFP_ATOMIC);
1676                 if (!skb2) {
1677                         read_unlock(&mrt_lock);
1678                         return -ENOMEM;
1679                 }
1680
1681                 skb_push(skb2, sizeof(struct iphdr));
1682                 skb_reset_network_header(skb2);
1683                 iph = ip_hdr(skb2);
1684                 iph->ihl = sizeof(struct iphdr) >> 2;
1685                 iph->saddr = rt->rt_src;
1686                 iph->daddr = rt->rt_dst;
1687                 iph->version = 0;
1688                 err = ipmr_cache_unresolved(net, vif, skb2);
1689                 read_unlock(&mrt_lock);
1690                 return err;
1691         }
1692
1693         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1694                 cache->mfc_flags |= MFC_NOTIFY;
1695         err = ipmr_fill_mroute(skb, cache, rtm);
1696         read_unlock(&mrt_lock);
1697         return err;
1698 }
1699
1700 #ifdef CONFIG_PROC_FS
1701 /*
1702  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1703  */
1704 struct ipmr_vif_iter {
1705         struct seq_net_private p;
1706         int ct;
1707 };
1708
1709 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1710                                            struct ipmr_vif_iter *iter,
1711                                            loff_t pos)
1712 {
1713         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1714                 if (!VIF_EXISTS(net, iter->ct))
1715                         continue;
1716                 if (pos-- == 0)
1717                         return &net->ipv4.vif_table[iter->ct];
1718         }
1719         return NULL;
1720 }
1721
1722 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1723         __acquires(mrt_lock)
1724 {
1725         struct net *net = seq_file_net(seq);
1726
1727         read_lock(&mrt_lock);
1728         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1729                 : SEQ_START_TOKEN;
1730 }
1731
1732 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1733 {
1734         struct ipmr_vif_iter *iter = seq->private;
1735         struct net *net = seq_file_net(seq);
1736
1737         ++*pos;
1738         if (v == SEQ_START_TOKEN)
1739                 return ipmr_vif_seq_idx(net, iter, 0);
1740
1741         while (++iter->ct < net->ipv4.maxvif) {
1742                 if (!VIF_EXISTS(net, iter->ct))
1743                         continue;
1744                 return &net->ipv4.vif_table[iter->ct];
1745         }
1746         return NULL;
1747 }
1748
1749 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1750         __releases(mrt_lock)
1751 {
1752         read_unlock(&mrt_lock);
1753 }
1754
1755 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1756 {
1757         struct net *net = seq_file_net(seq);
1758
1759         if (v == SEQ_START_TOKEN) {
1760                 seq_puts(seq,
1761                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1762         } else {
1763                 const struct vif_device *vif = v;
1764                 const char *name =  vif->dev ? vif->dev->name : "none";
1765
1766                 seq_printf(seq,
1767                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1768                            vif - net->ipv4.vif_table,
1769                            name, vif->bytes_in, vif->pkt_in,
1770                            vif->bytes_out, vif->pkt_out,
1771                            vif->flags, vif->local, vif->remote);
1772         }
1773         return 0;
1774 }
1775
1776 static const struct seq_operations ipmr_vif_seq_ops = {
1777         .start = ipmr_vif_seq_start,
1778         .next  = ipmr_vif_seq_next,
1779         .stop  = ipmr_vif_seq_stop,
1780         .show  = ipmr_vif_seq_show,
1781 };
1782
1783 static int ipmr_vif_open(struct inode *inode, struct file *file)
1784 {
1785         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1786                             sizeof(struct ipmr_vif_iter));
1787 }
1788
1789 static const struct file_operations ipmr_vif_fops = {
1790         .owner   = THIS_MODULE,
1791         .open    = ipmr_vif_open,
1792         .read    = seq_read,
1793         .llseek  = seq_lseek,
1794         .release = seq_release_net,
1795 };
1796
1797 struct ipmr_mfc_iter {
1798         struct seq_net_private p;
1799         struct mfc_cache **cache;
1800         int ct;
1801 };
1802
1803
1804 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1805                                           struct ipmr_mfc_iter *it, loff_t pos)
1806 {
1807         struct mfc_cache *mfc;
1808
1809         it->cache = net->ipv4.mfc_cache_array;
1810         read_lock(&mrt_lock);
1811         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1812                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1813                      mfc; mfc = mfc->next)
1814                         if (pos-- == 0)
1815                                 return mfc;
1816         read_unlock(&mrt_lock);
1817
1818         it->cache = &mfc_unres_queue;
1819         spin_lock_bh(&mfc_unres_lock);
1820         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1821                 if (net_eq(mfc_net(mfc), net) &&
1822                     pos-- == 0)
1823                         return mfc;
1824         spin_unlock_bh(&mfc_unres_lock);
1825
1826         it->cache = NULL;
1827         return NULL;
1828 }
1829
1830
1831 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1832 {
1833         struct ipmr_mfc_iter *it = seq->private;
1834         struct net *net = seq_file_net(seq);
1835
1836         it->cache = NULL;
1837         it->ct = 0;
1838         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1839                 : SEQ_START_TOKEN;
1840 }
1841
1842 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843 {
1844         struct mfc_cache *mfc = v;
1845         struct ipmr_mfc_iter *it = seq->private;
1846         struct net *net = seq_file_net(seq);
1847
1848         ++*pos;
1849
1850         if (v == SEQ_START_TOKEN)
1851                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1852
1853         if (mfc->next)
1854                 return mfc->next;
1855
1856         if (it->cache == &mfc_unres_queue)
1857                 goto end_of_list;
1858
1859         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1860
1861         while (++it->ct < MFC_LINES) {
1862                 mfc = net->ipv4.mfc_cache_array[it->ct];
1863                 if (mfc)
1864                         return mfc;
1865         }
1866
1867         /* exhausted cache_array, show unresolved */
1868         read_unlock(&mrt_lock);
1869         it->cache = &mfc_unres_queue;
1870         it->ct = 0;
1871
1872         spin_lock_bh(&mfc_unres_lock);
1873         mfc = mfc_unres_queue;
1874         while (mfc && !net_eq(mfc_net(mfc), net))
1875                 mfc = mfc->next;
1876         if (mfc)
1877                 return mfc;
1878
1879  end_of_list:
1880         spin_unlock_bh(&mfc_unres_lock);
1881         it->cache = NULL;
1882
1883         return NULL;
1884 }
1885
1886 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1887 {
1888         struct ipmr_mfc_iter *it = seq->private;
1889         struct net *net = seq_file_net(seq);
1890
1891         if (it->cache == &mfc_unres_queue)
1892                 spin_unlock_bh(&mfc_unres_lock);
1893         else if (it->cache == net->ipv4.mfc_cache_array)
1894                 read_unlock(&mrt_lock);
1895 }
1896
1897 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1898 {
1899         int n;
1900         struct net *net = seq_file_net(seq);
1901
1902         if (v == SEQ_START_TOKEN) {
1903                 seq_puts(seq,
1904                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1905         } else {
1906                 const struct mfc_cache *mfc = v;
1907                 const struct ipmr_mfc_iter *it = seq->private;
1908
1909                 seq_printf(seq, "%08lX %08lX %-3hd",
1910                            (unsigned long) mfc->mfc_mcastgrp,
1911                            (unsigned long) mfc->mfc_origin,
1912                            mfc->mfc_parent);
1913
1914                 if (it->cache != &mfc_unres_queue) {
1915                         seq_printf(seq, " %8lu %8lu %8lu",
1916                                    mfc->mfc_un.res.pkt,
1917                                    mfc->mfc_un.res.bytes,
1918                                    mfc->mfc_un.res.wrong_if);
1919                         for (n = mfc->mfc_un.res.minvif;
1920                              n < mfc->mfc_un.res.maxvif; n++ ) {
1921                                 if (VIF_EXISTS(net, n) &&
1922                                     mfc->mfc_un.res.ttls[n] < 255)
1923                                         seq_printf(seq,
1924                                            " %2d:%-3d",
1925                                            n, mfc->mfc_un.res.ttls[n]);
1926                         }
1927                 } else {
1928                         /* unresolved mfc_caches don't contain
1929                          * pkt, bytes and wrong_if values
1930                          */
1931                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1932                 }
1933                 seq_putc(seq, '\n');
1934         }
1935         return 0;
1936 }
1937
1938 static const struct seq_operations ipmr_mfc_seq_ops = {
1939         .start = ipmr_mfc_seq_start,
1940         .next  = ipmr_mfc_seq_next,
1941         .stop  = ipmr_mfc_seq_stop,
1942         .show  = ipmr_mfc_seq_show,
1943 };
1944
1945 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1946 {
1947         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1948                             sizeof(struct ipmr_mfc_iter));
1949 }
1950
1951 static const struct file_operations ipmr_mfc_fops = {
1952         .owner   = THIS_MODULE,
1953         .open    = ipmr_mfc_open,
1954         .read    = seq_read,
1955         .llseek  = seq_lseek,
1956         .release = seq_release_net,
1957 };
1958 #endif
1959
1960 #ifdef CONFIG_IP_PIMSM_V2
1961 static const struct net_protocol pim_protocol = {
1962         .handler        =       pim_rcv,
1963         .netns_ok       =       1,
1964 };
1965 #endif
1966
1967
1968 /*
1969  *      Setup for IP multicast routing
1970  */
1971 static int __net_init ipmr_net_init(struct net *net)
1972 {
1973         int err = 0;
1974
1975         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1976                                       GFP_KERNEL);
1977         if (!net->ipv4.vif_table) {
1978                 err = -ENOMEM;
1979                 goto fail;
1980         }
1981
1982         /* Forwarding cache */
1983         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1984                                             sizeof(struct mfc_cache *),
1985                                             GFP_KERNEL);
1986         if (!net->ipv4.mfc_cache_array) {
1987                 err = -ENOMEM;
1988                 goto fail_mfc_cache;
1989         }
1990
1991 #ifdef CONFIG_IP_PIMSM
1992         net->ipv4.mroute_reg_vif_num = -1;
1993 #endif
1994
1995 #ifdef CONFIG_PROC_FS
1996         err = -ENOMEM;
1997         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1998                 goto proc_vif_fail;
1999         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2000                 goto proc_cache_fail;
2001 #endif
2002         return 0;
2003
2004 #ifdef CONFIG_PROC_FS
2005 proc_cache_fail:
2006         proc_net_remove(net, "ip_mr_vif");
2007 proc_vif_fail:
2008         kfree(net->ipv4.mfc_cache_array);
2009 #endif
2010 fail_mfc_cache:
2011         kfree(net->ipv4.vif_table);
2012 fail:
2013         return err;
2014 }
2015
2016 static void __net_exit ipmr_net_exit(struct net *net)
2017 {
2018 #ifdef CONFIG_PROC_FS
2019         proc_net_remove(net, "ip_mr_cache");
2020         proc_net_remove(net, "ip_mr_vif");
2021 #endif
2022         kfree(net->ipv4.mfc_cache_array);
2023         kfree(net->ipv4.vif_table);
2024 }
2025
2026 static struct pernet_operations ipmr_net_ops = {
2027         .init = ipmr_net_init,
2028         .exit = ipmr_net_exit,
2029 };
2030
2031 int __init ip_mr_init(void)
2032 {
2033         int err;
2034
2035         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2036                                        sizeof(struct mfc_cache),
2037                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2038                                        NULL);
2039         if (!mrt_cachep)
2040                 return -ENOMEM;
2041
2042         err = register_pernet_subsys(&ipmr_net_ops);
2043         if (err)
2044                 goto reg_pernet_fail;
2045
2046         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2047         err = register_netdevice_notifier(&ip_mr_notifier);
2048         if (err)
2049                 goto reg_notif_fail;
2050 #ifdef CONFIG_IP_PIMSM_V2
2051         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2052                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2053                 err = -EAGAIN;
2054                 goto add_proto_fail;
2055         }
2056 #endif
2057         return 0;
2058
2059 #ifdef CONFIG_IP_PIMSM_V2
2060 add_proto_fail:
2061         unregister_netdevice_notifier(&ip_mr_notifier);
2062 #endif
2063 reg_notif_fail:
2064         del_timer(&ipmr_expire_timer);
2065         unregister_pernet_subsys(&ipmr_net_ops);
2066 reg_pernet_fail:
2067         kmem_cache_destroy(mrt_cachep);
2068         return err;
2069 }