net: skb->dst accessors
[safe/jmp/linux-2.6] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
69
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73
74 static DEFINE_RWLOCK(mrt_lock);
75
76 /*
77  *      Multicast router control variables
78  */
79
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81
82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
83
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91
92    In this case data path is free of exclusive locks at all.
93  */
94
95 static struct kmem_cache *mrt_cachep __read_mostly;
96
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99                              struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102 #ifdef CONFIG_IP_PIMSM_V2
103 static struct net_protocol pim_protocol;
104 #endif
105
106 static struct timer_list ipmr_expire_timer;
107
108 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109
110 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111 {
112         struct net *net = dev_net(dev);
113
114         dev_close(dev);
115
116         dev = __dev_get_by_name(net, "tunl0");
117         if (dev) {
118                 const struct net_device_ops *ops = dev->netdev_ops;
119                 struct ifreq ifr;
120                 struct ip_tunnel_parm p;
121
122                 memset(&p, 0, sizeof(p));
123                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
124                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
125                 p.iph.version = 4;
126                 p.iph.ihl = 5;
127                 p.iph.protocol = IPPROTO_IPIP;
128                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130
131                 if (ops->ndo_do_ioctl) {
132                         mm_segment_t oldfs = get_fs();
133
134                         set_fs(KERNEL_DS);
135                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136                         set_fs(oldfs);
137                 }
138         }
139 }
140
141 static
142 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
143 {
144         struct net_device  *dev;
145
146         dev = __dev_get_by_name(net, "tunl0");
147
148         if (dev) {
149                 const struct net_device_ops *ops = dev->netdev_ops;
150                 int err;
151                 struct ifreq ifr;
152                 struct ip_tunnel_parm p;
153                 struct in_device  *in_dev;
154
155                 memset(&p, 0, sizeof(p));
156                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
157                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
158                 p.iph.version = 4;
159                 p.iph.ihl = 5;
160                 p.iph.protocol = IPPROTO_IPIP;
161                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
162                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
163
164                 if (ops->ndo_do_ioctl) {
165                         mm_segment_t oldfs = get_fs();
166
167                         set_fs(KERNEL_DS);
168                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169                         set_fs(oldfs);
170                 } else
171                         err = -EOPNOTSUPP;
172
173                 dev = NULL;
174
175                 if (err == 0 &&
176                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
177                         dev->flags |= IFF_MULTICAST;
178
179                         in_dev = __in_dev_get_rtnl(dev);
180                         if (in_dev == NULL)
181                                 goto failure;
182
183                         ipv4_devconf_setall(in_dev);
184                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
185
186                         if (dev_open(dev))
187                                 goto failure;
188                         dev_hold(dev);
189                 }
190         }
191         return dev;
192
193 failure:
194         /* allow the register to be completed before unregistering. */
195         rtnl_unlock();
196         rtnl_lock();
197
198         unregister_netdevice(dev);
199         return NULL;
200 }
201
202 #ifdef CONFIG_IP_PIMSM
203
204 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205 {
206         struct net *net = dev_net(dev);
207
208         read_lock(&mrt_lock);
209         dev->stats.tx_bytes += skb->len;
210         dev->stats.tx_packets++;
211         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212                           IGMPMSG_WHOLEPKT);
213         read_unlock(&mrt_lock);
214         kfree_skb(skb);
215         return 0;
216 }
217
218 static const struct net_device_ops reg_vif_netdev_ops = {
219         .ndo_start_xmit = reg_vif_xmit,
220 };
221
222 static void reg_vif_setup(struct net_device *dev)
223 {
224         dev->type               = ARPHRD_PIMREG;
225         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
226         dev->flags              = IFF_NOARP;
227         dev->netdev_ops         = &reg_vif_netdev_ops,
228         dev->destructor         = free_netdev;
229 }
230
231 static struct net_device *ipmr_reg_vif(void)
232 {
233         struct net_device *dev;
234         struct in_device *in_dev;
235
236         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
237
238         if (dev == NULL)
239                 return NULL;
240
241         if (register_netdevice(dev)) {
242                 free_netdev(dev);
243                 return NULL;
244         }
245         dev->iflink = 0;
246
247         rcu_read_lock();
248         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249                 rcu_read_unlock();
250                 goto failure;
251         }
252
253         ipv4_devconf_setall(in_dev);
254         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255         rcu_read_unlock();
256
257         if (dev_open(dev))
258                 goto failure;
259
260         dev_hold(dev);
261
262         return dev;
263
264 failure:
265         /* allow the register to be completed before unregistering. */
266         rtnl_unlock();
267         rtnl_lock();
268
269         unregister_netdevice(dev);
270         return NULL;
271 }
272 #endif
273
274 /*
275  *      Delete a VIF entry
276  *      @notify: Set to 1, if the caller is a notifier_call
277  */
278
279 static int vif_delete(struct net *net, int vifi, int notify)
280 {
281         struct vif_device *v;
282         struct net_device *dev;
283         struct in_device *in_dev;
284
285         if (vifi < 0 || vifi >= net->ipv4.maxvif)
286                 return -EADDRNOTAVAIL;
287
288         v = &net->ipv4.vif_table[vifi];
289
290         write_lock_bh(&mrt_lock);
291         dev = v->dev;
292         v->dev = NULL;
293
294         if (!dev) {
295                 write_unlock_bh(&mrt_lock);
296                 return -EADDRNOTAVAIL;
297         }
298
299 #ifdef CONFIG_IP_PIMSM
300         if (vifi == net->ipv4.mroute_reg_vif_num)
301                 net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303
304         if (vifi+1 == net->ipv4.maxvif) {
305                 int tmp;
306                 for (tmp=vifi-1; tmp>=0; tmp--) {
307                         if (VIF_EXISTS(net, tmp))
308                                 break;
309                 }
310                 net->ipv4.maxvif = tmp+1;
311         }
312
313         write_unlock_bh(&mrt_lock);
314
315         dev_set_allmulti(dev, -1);
316
317         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319                 ip_rt_multicast_event(in_dev);
320         }
321
322         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323                 unregister_netdevice(dev);
324
325         dev_put(dev);
326         return 0;
327 }
328
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331         release_net(mfc_net(c));
332         kmem_cache_free(mrt_cachep, c);
333 }
334
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341         struct sk_buff *skb;
342         struct nlmsgerr *e;
343         struct net *net = mfc_net(c);
344
345         atomic_dec(&net->ipv4.cache_resolve_queue_len);
346
347         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348                 if (ip_hdr(skb)->version == 0) {
349                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350                         nlh->nlmsg_type = NLMSG_ERROR;
351                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352                         skb_trim(skb, nlh->nlmsg_len);
353                         e = NLMSG_DATA(nlh);
354                         e->error = -ETIMEDOUT;
355                         memset(&e->msg, 0, sizeof(e->msg));
356
357                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358                 } else
359                         kfree_skb(skb);
360         }
361
362         ipmr_cache_free(c);
363 }
364
365
366 /* Single timer process for all the unresolved queue. */
367
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370         unsigned long now;
371         unsigned long expires;
372         struct mfc_cache *c, **cp;
373
374         if (!spin_trylock(&mfc_unres_lock)) {
375                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376                 return;
377         }
378
379         if (mfc_unres_queue == NULL)
380                 goto out;
381
382         now = jiffies;
383         expires = 10*HZ;
384         cp = &mfc_unres_queue;
385
386         while ((c=*cp) != NULL) {
387                 if (time_after(c->mfc_un.unres.expires, now)) {
388                         unsigned long interval = c->mfc_un.unres.expires - now;
389                         if (interval < expires)
390                                 expires = interval;
391                         cp = &c->next;
392                         continue;
393                 }
394
395                 *cp = c->next;
396
397                 ipmr_destroy_unres(c);
398         }
399
400         if (mfc_unres_queue != NULL)
401                 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403 out:
404         spin_unlock(&mfc_unres_lock);
405 }
406
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411         int vifi;
412         struct net *net = mfc_net(cache);
413
414         cache->mfc_un.res.minvif = MAXVIFS;
415         cache->mfc_un.res.maxvif = 0;
416         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417
418         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419                 if (VIF_EXISTS(net, vifi) &&
420                     ttls[vifi] && ttls[vifi] < 255) {
421                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422                         if (cache->mfc_un.res.minvif > vifi)
423                                 cache->mfc_un.res.minvif = vifi;
424                         if (cache->mfc_un.res.maxvif <= vifi)
425                                 cache->mfc_un.res.maxvif = vifi + 1;
426                 }
427         }
428 }
429
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432         int vifi = vifc->vifc_vifi;
433         struct vif_device *v = &net->ipv4.vif_table[vifi];
434         struct net_device *dev;
435         struct in_device *in_dev;
436         int err;
437
438         /* Is vif busy ? */
439         if (VIF_EXISTS(net, vifi))
440                 return -EADDRINUSE;
441
442         switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444         case VIFF_REGISTER:
445                 /*
446                  * Special Purpose VIF in PIM
447                  * All the packets will be sent to the daemon
448                  */
449                 if (net->ipv4.mroute_reg_vif_num >= 0)
450                         return -EADDRINUSE;
451                 dev = ipmr_reg_vif();
452                 if (!dev)
453                         return -ENOBUFS;
454                 err = dev_set_allmulti(dev, 1);
455                 if (err) {
456                         unregister_netdevice(dev);
457                         dev_put(dev);
458                         return err;
459                 }
460                 break;
461 #endif
462         case VIFF_TUNNEL:
463                 dev = ipmr_new_tunnel(net, vifc);
464                 if (!dev)
465                         return -ENOBUFS;
466                 err = dev_set_allmulti(dev, 1);
467                 if (err) {
468                         ipmr_del_tunnel(dev, vifc);
469                         dev_put(dev);
470                         return err;
471                 }
472                 break;
473         case 0:
474                 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
475                 if (!dev)
476                         return -EADDRNOTAVAIL;
477                 err = dev_set_allmulti(dev, 1);
478                 if (err) {
479                         dev_put(dev);
480                         return err;
481                 }
482                 break;
483         default:
484                 return -EINVAL;
485         }
486
487         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
488                 return -EADDRNOTAVAIL;
489         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
490         ip_rt_multicast_event(in_dev);
491
492         /*
493          *      Fill in the VIF structures
494          */
495         v->rate_limit = vifc->vifc_rate_limit;
496         v->local = vifc->vifc_lcl_addr.s_addr;
497         v->remote = vifc->vifc_rmt_addr.s_addr;
498         v->flags = vifc->vifc_flags;
499         if (!mrtsock)
500                 v->flags |= VIFF_STATIC;
501         v->threshold = vifc->vifc_threshold;
502         v->bytes_in = 0;
503         v->bytes_out = 0;
504         v->pkt_in = 0;
505         v->pkt_out = 0;
506         v->link = dev->ifindex;
507         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
508                 v->link = dev->iflink;
509
510         /* And finish update writing critical data */
511         write_lock_bh(&mrt_lock);
512         v->dev = dev;
513 #ifdef CONFIG_IP_PIMSM
514         if (v->flags&VIFF_REGISTER)
515                 net->ipv4.mroute_reg_vif_num = vifi;
516 #endif
517         if (vifi+1 > net->ipv4.maxvif)
518                 net->ipv4.maxvif = vifi+1;
519         write_unlock_bh(&mrt_lock);
520         return 0;
521 }
522
523 static struct mfc_cache *ipmr_cache_find(struct net *net,
524                                          __be32 origin,
525                                          __be32 mcastgrp)
526 {
527         int line = MFC_HASH(mcastgrp, origin);
528         struct mfc_cache *c;
529
530         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
531                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
532                         break;
533         }
534         return c;
535 }
536
537 /*
538  *      Allocate a multicast cache entry
539  */
540 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
541 {
542         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
543         if (c == NULL)
544                 return NULL;
545         c->mfc_un.res.minvif = MAXVIFS;
546         mfc_net_set(c, net);
547         return c;
548 }
549
550 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
551 {
552         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
553         if (c == NULL)
554                 return NULL;
555         skb_queue_head_init(&c->mfc_un.unres.unresolved);
556         c->mfc_un.unres.expires = jiffies + 10*HZ;
557         mfc_net_set(c, net);
558         return c;
559 }
560
561 /*
562  *      A cache entry has gone into a resolved state from queued
563  */
564
565 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
566 {
567         struct sk_buff *skb;
568         struct nlmsgerr *e;
569
570         /*
571          *      Play the pending entries through our router
572          */
573
574         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
575                 if (ip_hdr(skb)->version == 0) {
576                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
577
578                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
579                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
580                                                   (u8 *)nlh);
581                         } else {
582                                 nlh->nlmsg_type = NLMSG_ERROR;
583                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
584                                 skb_trim(skb, nlh->nlmsg_len);
585                                 e = NLMSG_DATA(nlh);
586                                 e->error = -EMSGSIZE;
587                                 memset(&e->msg, 0, sizeof(e->msg));
588                         }
589
590                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
591                 } else
592                         ip_mr_forward(skb, c, 0);
593         }
594 }
595
596 /*
597  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
598  *      expects the following bizarre scheme.
599  *
600  *      Called under mrt_lock.
601  */
602
603 static int ipmr_cache_report(struct net *net,
604                              struct sk_buff *pkt, vifi_t vifi, int assert)
605 {
606         struct sk_buff *skb;
607         const int ihl = ip_hdrlen(pkt);
608         struct igmphdr *igmp;
609         struct igmpmsg *msg;
610         int ret;
611
612 #ifdef CONFIG_IP_PIMSM
613         if (assert == IGMPMSG_WHOLEPKT)
614                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
615         else
616 #endif
617                 skb = alloc_skb(128, GFP_ATOMIC);
618
619         if (!skb)
620                 return -ENOBUFS;
621
622 #ifdef CONFIG_IP_PIMSM
623         if (assert == IGMPMSG_WHOLEPKT) {
624                 /* Ugly, but we have no choice with this interface.
625                    Duplicate old header, fix ihl, length etc.
626                    And all this only to mangle msg->im_msgtype and
627                    to set msg->im_mbz to "mbz" :-)
628                  */
629                 skb_push(skb, sizeof(struct iphdr));
630                 skb_reset_network_header(skb);
631                 skb_reset_transport_header(skb);
632                 msg = (struct igmpmsg *)skb_network_header(skb);
633                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
634                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
635                 msg->im_mbz = 0;
636                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
637                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
638                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
639                                              sizeof(struct iphdr));
640         } else
641 #endif
642         {
643
644         /*
645          *      Copy the IP header
646          */
647
648         skb->network_header = skb->tail;
649         skb_put(skb, ihl);
650         skb_copy_to_linear_data(skb, pkt->data, ihl);
651         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
652         msg = (struct igmpmsg *)skb_network_header(skb);
653         msg->im_vif = vifi;
654         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
655
656         /*
657          *      Add our header
658          */
659
660         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
661         igmp->type      =
662         msg->im_msgtype = assert;
663         igmp->code      =       0;
664         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
665         skb->transport_header = skb->network_header;
666         }
667
668         if (net->ipv4.mroute_sk == NULL) {
669                 kfree_skb(skb);
670                 return -EINVAL;
671         }
672
673         /*
674          *      Deliver to mrouted
675          */
676         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
677         if (ret < 0) {
678                 if (net_ratelimit())
679                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
680                 kfree_skb(skb);
681         }
682
683         return ret;
684 }
685
686 /*
687  *      Queue a packet for resolution. It gets locked cache entry!
688  */
689
690 static int
691 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
692 {
693         int err;
694         struct mfc_cache *c;
695         const struct iphdr *iph = ip_hdr(skb);
696
697         spin_lock_bh(&mfc_unres_lock);
698         for (c=mfc_unres_queue; c; c=c->next) {
699                 if (net_eq(mfc_net(c), net) &&
700                     c->mfc_mcastgrp == iph->daddr &&
701                     c->mfc_origin == iph->saddr)
702                         break;
703         }
704
705         if (c == NULL) {
706                 /*
707                  *      Create a new entry if allowable
708                  */
709
710                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
711                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
712                         spin_unlock_bh(&mfc_unres_lock);
713
714                         kfree_skb(skb);
715                         return -ENOBUFS;
716                 }
717
718                 /*
719                  *      Fill in the new cache entry
720                  */
721                 c->mfc_parent   = -1;
722                 c->mfc_origin   = iph->saddr;
723                 c->mfc_mcastgrp = iph->daddr;
724
725                 /*
726                  *      Reflect first query at mrouted.
727                  */
728                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
729                 if (err < 0) {
730                         /* If the report failed throw the cache entry
731                            out - Brad Parker
732                          */
733                         spin_unlock_bh(&mfc_unres_lock);
734
735                         ipmr_cache_free(c);
736                         kfree_skb(skb);
737                         return err;
738                 }
739
740                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
741                 c->next = mfc_unres_queue;
742                 mfc_unres_queue = c;
743
744                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
745         }
746
747         /*
748          *      See if we can append the packet
749          */
750         if (c->mfc_un.unres.unresolved.qlen>3) {
751                 kfree_skb(skb);
752                 err = -ENOBUFS;
753         } else {
754                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
755                 err = 0;
756         }
757
758         spin_unlock_bh(&mfc_unres_lock);
759         return err;
760 }
761
762 /*
763  *      MFC cache manipulation by user space mroute daemon
764  */
765
766 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
767 {
768         int line;
769         struct mfc_cache *c, **cp;
770
771         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
772
773         for (cp = &net->ipv4.mfc_cache_array[line];
774              (c = *cp) != NULL; cp = &c->next) {
775                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
776                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
777                         write_lock_bh(&mrt_lock);
778                         *cp = c->next;
779                         write_unlock_bh(&mrt_lock);
780
781                         ipmr_cache_free(c);
782                         return 0;
783                 }
784         }
785         return -ENOENT;
786 }
787
788 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
789 {
790         int line;
791         struct mfc_cache *uc, *c, **cp;
792
793         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
794
795         for (cp = &net->ipv4.mfc_cache_array[line];
796              (c = *cp) != NULL; cp = &c->next) {
797                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
798                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
799                         break;
800         }
801
802         if (c != NULL) {
803                 write_lock_bh(&mrt_lock);
804                 c->mfc_parent = mfc->mfcc_parent;
805                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
806                 if (!mrtsock)
807                         c->mfc_flags |= MFC_STATIC;
808                 write_unlock_bh(&mrt_lock);
809                 return 0;
810         }
811
812         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
813                 return -EINVAL;
814
815         c = ipmr_cache_alloc(net);
816         if (c == NULL)
817                 return -ENOMEM;
818
819         c->mfc_origin = mfc->mfcc_origin.s_addr;
820         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
821         c->mfc_parent = mfc->mfcc_parent;
822         ipmr_update_thresholds(c, mfc->mfcc_ttls);
823         if (!mrtsock)
824                 c->mfc_flags |= MFC_STATIC;
825
826         write_lock_bh(&mrt_lock);
827         c->next = net->ipv4.mfc_cache_array[line];
828         net->ipv4.mfc_cache_array[line] = c;
829         write_unlock_bh(&mrt_lock);
830
831         /*
832          *      Check to see if we resolved a queued list. If so we
833          *      need to send on the frames and tidy up.
834          */
835         spin_lock_bh(&mfc_unres_lock);
836         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
837              cp = &uc->next) {
838                 if (net_eq(mfc_net(uc), net) &&
839                     uc->mfc_origin == c->mfc_origin &&
840                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
841                         *cp = uc->next;
842                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
843                         break;
844                 }
845         }
846         if (mfc_unres_queue == NULL)
847                 del_timer(&ipmr_expire_timer);
848         spin_unlock_bh(&mfc_unres_lock);
849
850         if (uc) {
851                 ipmr_cache_resolve(uc, c);
852                 ipmr_cache_free(uc);
853         }
854         return 0;
855 }
856
857 /*
858  *      Close the multicast socket, and clear the vif tables etc
859  */
860
861 static void mroute_clean_tables(struct net *net)
862 {
863         int i;
864
865         /*
866          *      Shut down all active vif entries
867          */
868         for (i = 0; i < net->ipv4.maxvif; i++) {
869                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
870                         vif_delete(net, i, 0);
871         }
872
873         /*
874          *      Wipe the cache
875          */
876         for (i=0; i<MFC_LINES; i++) {
877                 struct mfc_cache *c, **cp;
878
879                 cp = &net->ipv4.mfc_cache_array[i];
880                 while ((c = *cp) != NULL) {
881                         if (c->mfc_flags&MFC_STATIC) {
882                                 cp = &c->next;
883                                 continue;
884                         }
885                         write_lock_bh(&mrt_lock);
886                         *cp = c->next;
887                         write_unlock_bh(&mrt_lock);
888
889                         ipmr_cache_free(c);
890                 }
891         }
892
893         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
894                 struct mfc_cache *c, **cp;
895
896                 spin_lock_bh(&mfc_unres_lock);
897                 cp = &mfc_unres_queue;
898                 while ((c = *cp) != NULL) {
899                         if (!net_eq(mfc_net(c), net)) {
900                                 cp = &c->next;
901                                 continue;
902                         }
903                         *cp = c->next;
904
905                         ipmr_destroy_unres(c);
906                 }
907                 spin_unlock_bh(&mfc_unres_lock);
908         }
909 }
910
911 static void mrtsock_destruct(struct sock *sk)
912 {
913         struct net *net = sock_net(sk);
914
915         rtnl_lock();
916         if (sk == net->ipv4.mroute_sk) {
917                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
918
919                 write_lock_bh(&mrt_lock);
920                 net->ipv4.mroute_sk = NULL;
921                 write_unlock_bh(&mrt_lock);
922
923                 mroute_clean_tables(net);
924         }
925         rtnl_unlock();
926 }
927
928 /*
929  *      Socket options and virtual interface manipulation. The whole
930  *      virtual interface system is a complete heap, but unfortunately
931  *      that's how BSD mrouted happens to think. Maybe one day with a proper
932  *      MOSPF/PIM router set up we can clean this up.
933  */
934
935 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
936 {
937         int ret;
938         struct vifctl vif;
939         struct mfcctl mfc;
940         struct net *net = sock_net(sk);
941
942         if (optname != MRT_INIT) {
943                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
944                         return -EACCES;
945         }
946
947         switch (optname) {
948         case MRT_INIT:
949                 if (sk->sk_type != SOCK_RAW ||
950                     inet_sk(sk)->num != IPPROTO_IGMP)
951                         return -EOPNOTSUPP;
952                 if (optlen != sizeof(int))
953                         return -ENOPROTOOPT;
954
955                 rtnl_lock();
956                 if (net->ipv4.mroute_sk) {
957                         rtnl_unlock();
958                         return -EADDRINUSE;
959                 }
960
961                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
962                 if (ret == 0) {
963                         write_lock_bh(&mrt_lock);
964                         net->ipv4.mroute_sk = sk;
965                         write_unlock_bh(&mrt_lock);
966
967                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
968                 }
969                 rtnl_unlock();
970                 return ret;
971         case MRT_DONE:
972                 if (sk != net->ipv4.mroute_sk)
973                         return -EACCES;
974                 return ip_ra_control(sk, 0, NULL);
975         case MRT_ADD_VIF:
976         case MRT_DEL_VIF:
977                 if (optlen != sizeof(vif))
978                         return -EINVAL;
979                 if (copy_from_user(&vif, optval, sizeof(vif)))
980                         return -EFAULT;
981                 if (vif.vifc_vifi >= MAXVIFS)
982                         return -ENFILE;
983                 rtnl_lock();
984                 if (optname == MRT_ADD_VIF) {
985                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
986                 } else {
987                         ret = vif_delete(net, vif.vifc_vifi, 0);
988                 }
989                 rtnl_unlock();
990                 return ret;
991
992                 /*
993                  *      Manipulate the forwarding caches. These live
994                  *      in a sort of kernel/user symbiosis.
995                  */
996         case MRT_ADD_MFC:
997         case MRT_DEL_MFC:
998                 if (optlen != sizeof(mfc))
999                         return -EINVAL;
1000                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1001                         return -EFAULT;
1002                 rtnl_lock();
1003                 if (optname == MRT_DEL_MFC)
1004                         ret = ipmr_mfc_delete(net, &mfc);
1005                 else
1006                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1007                 rtnl_unlock();
1008                 return ret;
1009                 /*
1010                  *      Control PIM assert.
1011                  */
1012         case MRT_ASSERT:
1013         {
1014                 int v;
1015                 if (get_user(v,(int __user *)optval))
1016                         return -EFAULT;
1017                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1018                 return 0;
1019         }
1020 #ifdef CONFIG_IP_PIMSM
1021         case MRT_PIM:
1022         {
1023                 int v;
1024
1025                 if (get_user(v,(int __user *)optval))
1026                         return -EFAULT;
1027                 v = (v) ? 1 : 0;
1028
1029                 rtnl_lock();
1030                 ret = 0;
1031                 if (v != net->ipv4.mroute_do_pim) {
1032                         net->ipv4.mroute_do_pim = v;
1033                         net->ipv4.mroute_do_assert = v;
1034 #ifdef CONFIG_IP_PIMSM_V2
1035                         if (net->ipv4.mroute_do_pim)
1036                                 ret = inet_add_protocol(&pim_protocol,
1037                                                         IPPROTO_PIM);
1038                         else
1039                                 ret = inet_del_protocol(&pim_protocol,
1040                                                         IPPROTO_PIM);
1041                         if (ret < 0)
1042                                 ret = -EAGAIN;
1043 #endif
1044                 }
1045                 rtnl_unlock();
1046                 return ret;
1047         }
1048 #endif
1049         /*
1050          *      Spurious command, or MRT_VERSION which you cannot
1051          *      set.
1052          */
1053         default:
1054                 return -ENOPROTOOPT;
1055         }
1056 }
1057
1058 /*
1059  *      Getsock opt support for the multicast routing system.
1060  */
1061
1062 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1063 {
1064         int olr;
1065         int val;
1066         struct net *net = sock_net(sk);
1067
1068         if (optname != MRT_VERSION &&
1069 #ifdef CONFIG_IP_PIMSM
1070            optname!=MRT_PIM &&
1071 #endif
1072            optname!=MRT_ASSERT)
1073                 return -ENOPROTOOPT;
1074
1075         if (get_user(olr, optlen))
1076                 return -EFAULT;
1077
1078         olr = min_t(unsigned int, olr, sizeof(int));
1079         if (olr < 0)
1080                 return -EINVAL;
1081
1082         if (put_user(olr, optlen))
1083                 return -EFAULT;
1084         if (optname == MRT_VERSION)
1085                 val = 0x0305;
1086 #ifdef CONFIG_IP_PIMSM
1087         else if (optname == MRT_PIM)
1088                 val = net->ipv4.mroute_do_pim;
1089 #endif
1090         else
1091                 val = net->ipv4.mroute_do_assert;
1092         if (copy_to_user(optval, &val, olr))
1093                 return -EFAULT;
1094         return 0;
1095 }
1096
1097 /*
1098  *      The IP multicast ioctl support routines.
1099  */
1100
1101 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1102 {
1103         struct sioc_sg_req sr;
1104         struct sioc_vif_req vr;
1105         struct vif_device *vif;
1106         struct mfc_cache *c;
1107         struct net *net = sock_net(sk);
1108
1109         switch (cmd) {
1110         case SIOCGETVIFCNT:
1111                 if (copy_from_user(&vr, arg, sizeof(vr)))
1112                         return -EFAULT;
1113                 if (vr.vifi >= net->ipv4.maxvif)
1114                         return -EINVAL;
1115                 read_lock(&mrt_lock);
1116                 vif = &net->ipv4.vif_table[vr.vifi];
1117                 if (VIF_EXISTS(net, vr.vifi)) {
1118                         vr.icount = vif->pkt_in;
1119                         vr.ocount = vif->pkt_out;
1120                         vr.ibytes = vif->bytes_in;
1121                         vr.obytes = vif->bytes_out;
1122                         read_unlock(&mrt_lock);
1123
1124                         if (copy_to_user(arg, &vr, sizeof(vr)))
1125                                 return -EFAULT;
1126                         return 0;
1127                 }
1128                 read_unlock(&mrt_lock);
1129                 return -EADDRNOTAVAIL;
1130         case SIOCGETSGCNT:
1131                 if (copy_from_user(&sr, arg, sizeof(sr)))
1132                         return -EFAULT;
1133
1134                 read_lock(&mrt_lock);
1135                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1136                 if (c) {
1137                         sr.pktcnt = c->mfc_un.res.pkt;
1138                         sr.bytecnt = c->mfc_un.res.bytes;
1139                         sr.wrong_if = c->mfc_un.res.wrong_if;
1140                         read_unlock(&mrt_lock);
1141
1142                         if (copy_to_user(arg, &sr, sizeof(sr)))
1143                                 return -EFAULT;
1144                         return 0;
1145                 }
1146                 read_unlock(&mrt_lock);
1147                 return -EADDRNOTAVAIL;
1148         default:
1149                 return -ENOIOCTLCMD;
1150         }
1151 }
1152
1153
1154 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1155 {
1156         struct net_device *dev = ptr;
1157         struct net *net = dev_net(dev);
1158         struct vif_device *v;
1159         int ct;
1160
1161         if (!net_eq(dev_net(dev), net))
1162                 return NOTIFY_DONE;
1163
1164         if (event != NETDEV_UNREGISTER)
1165                 return NOTIFY_DONE;
1166         v = &net->ipv4.vif_table[0];
1167         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1168                 if (v->dev == dev)
1169                         vif_delete(net, ct, 1);
1170         }
1171         return NOTIFY_DONE;
1172 }
1173
1174
1175 static struct notifier_block ip_mr_notifier = {
1176         .notifier_call = ipmr_device_event,
1177 };
1178
1179 /*
1180  *      Encapsulate a packet by attaching a valid IPIP header to it.
1181  *      This avoids tunnel drivers and other mess and gives us the speed so
1182  *      important for multicast video.
1183  */
1184
1185 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1186 {
1187         struct iphdr *iph;
1188         struct iphdr *old_iph = ip_hdr(skb);
1189
1190         skb_push(skb, sizeof(struct iphdr));
1191         skb->transport_header = skb->network_header;
1192         skb_reset_network_header(skb);
1193         iph = ip_hdr(skb);
1194
1195         iph->version    =       4;
1196         iph->tos        =       old_iph->tos;
1197         iph->ttl        =       old_iph->ttl;
1198         iph->frag_off   =       0;
1199         iph->daddr      =       daddr;
1200         iph->saddr      =       saddr;
1201         iph->protocol   =       IPPROTO_IPIP;
1202         iph->ihl        =       5;
1203         iph->tot_len    =       htons(skb->len);
1204         ip_select_ident(iph, skb_dst(skb), NULL);
1205         ip_send_check(iph);
1206
1207         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1208         nf_reset(skb);
1209 }
1210
1211 static inline int ipmr_forward_finish(struct sk_buff *skb)
1212 {
1213         struct ip_options * opt = &(IPCB(skb)->opt);
1214
1215         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1216
1217         if (unlikely(opt->optlen))
1218                 ip_forward_options(skb);
1219
1220         return dst_output(skb);
1221 }
1222
1223 /*
1224  *      Processing handlers for ipmr_forward
1225  */
1226
1227 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1228 {
1229         struct net *net = mfc_net(c);
1230         const struct iphdr *iph = ip_hdr(skb);
1231         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1232         struct net_device *dev;
1233         struct rtable *rt;
1234         int    encap = 0;
1235
1236         if (vif->dev == NULL)
1237                 goto out_free;
1238
1239 #ifdef CONFIG_IP_PIMSM
1240         if (vif->flags & VIFF_REGISTER) {
1241                 vif->pkt_out++;
1242                 vif->bytes_out += skb->len;
1243                 vif->dev->stats.tx_bytes += skb->len;
1244                 vif->dev->stats.tx_packets++;
1245                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1246                 goto out_free;
1247         }
1248 #endif
1249
1250         if (vif->flags&VIFF_TUNNEL) {
1251                 struct flowi fl = { .oif = vif->link,
1252                                     .nl_u = { .ip4_u =
1253                                               { .daddr = vif->remote,
1254                                                 .saddr = vif->local,
1255                                                 .tos = RT_TOS(iph->tos) } },
1256                                     .proto = IPPROTO_IPIP };
1257                 if (ip_route_output_key(net, &rt, &fl))
1258                         goto out_free;
1259                 encap = sizeof(struct iphdr);
1260         } else {
1261                 struct flowi fl = { .oif = vif->link,
1262                                     .nl_u = { .ip4_u =
1263                                               { .daddr = iph->daddr,
1264                                                 .tos = RT_TOS(iph->tos) } },
1265                                     .proto = IPPROTO_IPIP };
1266                 if (ip_route_output_key(net, &rt, &fl))
1267                         goto out_free;
1268         }
1269
1270         dev = rt->u.dst.dev;
1271
1272         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1273                 /* Do not fragment multicasts. Alas, IPv4 does not
1274                    allow to send ICMP, so that packets will disappear
1275                    to blackhole.
1276                  */
1277
1278                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1279                 ip_rt_put(rt);
1280                 goto out_free;
1281         }
1282
1283         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1284
1285         if (skb_cow(skb, encap)) {
1286                 ip_rt_put(rt);
1287                 goto out_free;
1288         }
1289
1290         vif->pkt_out++;
1291         vif->bytes_out += skb->len;
1292
1293         skb_dst_drop(skb);
1294         skb_dst_set(skb, &rt->u.dst);
1295         ip_decrease_ttl(ip_hdr(skb));
1296
1297         /* FIXME: forward and output firewalls used to be called here.
1298          * What do we do with netfilter? -- RR */
1299         if (vif->flags & VIFF_TUNNEL) {
1300                 ip_encap(skb, vif->local, vif->remote);
1301                 /* FIXME: extra output firewall step used to be here. --RR */
1302                 vif->dev->stats.tx_packets++;
1303                 vif->dev->stats.tx_bytes += skb->len;
1304         }
1305
1306         IPCB(skb)->flags |= IPSKB_FORWARDED;
1307
1308         /*
1309          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1310          * not only before forwarding, but after forwarding on all output
1311          * interfaces. It is clear, if mrouter runs a multicasting
1312          * program, it should receive packets not depending to what interface
1313          * program is joined.
1314          * If we will not make it, the program will have to join on all
1315          * interfaces. On the other hand, multihoming host (or router, but
1316          * not mrouter) cannot join to more than one interface - it will
1317          * result in receiving multiple packets.
1318          */
1319         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1320                 ipmr_forward_finish);
1321         return;
1322
1323 out_free:
1324         kfree_skb(skb);
1325         return;
1326 }
1327
1328 static int ipmr_find_vif(struct net_device *dev)
1329 {
1330         struct net *net = dev_net(dev);
1331         int ct;
1332         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1333                 if (net->ipv4.vif_table[ct].dev == dev)
1334                         break;
1335         }
1336         return ct;
1337 }
1338
1339 /* "local" means that we should preserve one skb (for local delivery) */
1340
1341 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1342 {
1343         int psend = -1;
1344         int vif, ct;
1345         struct net *net = mfc_net(cache);
1346
1347         vif = cache->mfc_parent;
1348         cache->mfc_un.res.pkt++;
1349         cache->mfc_un.res.bytes += skb->len;
1350
1351         /*
1352          * Wrong interface: drop packet and (maybe) send PIM assert.
1353          */
1354         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1355                 int true_vifi;
1356
1357                 if (skb_rtable(skb)->fl.iif == 0) {
1358                         /* It is our own packet, looped back.
1359                            Very complicated situation...
1360
1361                            The best workaround until routing daemons will be
1362                            fixed is not to redistribute packet, if it was
1363                            send through wrong interface. It means, that
1364                            multicast applications WILL NOT work for
1365                            (S,G), which have default multicast route pointing
1366                            to wrong oif. In any case, it is not a good
1367                            idea to use multicasting applications on router.
1368                          */
1369                         goto dont_forward;
1370                 }
1371
1372                 cache->mfc_un.res.wrong_if++;
1373                 true_vifi = ipmr_find_vif(skb->dev);
1374
1375                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1376                     /* pimsm uses asserts, when switching from RPT to SPT,
1377                        so that we cannot check that packet arrived on an oif.
1378                        It is bad, but otherwise we would need to move pretty
1379                        large chunk of pimd to kernel. Ough... --ANK
1380                      */
1381                     (net->ipv4.mroute_do_pim ||
1382                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1383                     time_after(jiffies,
1384                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1385                         cache->mfc_un.res.last_assert = jiffies;
1386                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1387                 }
1388                 goto dont_forward;
1389         }
1390
1391         net->ipv4.vif_table[vif].pkt_in++;
1392         net->ipv4.vif_table[vif].bytes_in += skb->len;
1393
1394         /*
1395          *      Forward the frame
1396          */
1397         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1398                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1399                         if (psend != -1) {
1400                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401                                 if (skb2)
1402                                         ipmr_queue_xmit(skb2, cache, psend);
1403                         }
1404                         psend = ct;
1405                 }
1406         }
1407         if (psend != -1) {
1408                 if (local) {
1409                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1410                         if (skb2)
1411                                 ipmr_queue_xmit(skb2, cache, psend);
1412                 } else {
1413                         ipmr_queue_xmit(skb, cache, psend);
1414                         return 0;
1415                 }
1416         }
1417
1418 dont_forward:
1419         if (!local)
1420                 kfree_skb(skb);
1421         return 0;
1422 }
1423
1424
1425 /*
1426  *      Multicast packets for forwarding arrive here
1427  */
1428
1429 int ip_mr_input(struct sk_buff *skb)
1430 {
1431         struct mfc_cache *cache;
1432         struct net *net = dev_net(skb->dev);
1433         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1434
1435         /* Packet is looped back after forward, it should not be
1436            forwarded second time, but still can be delivered locally.
1437          */
1438         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1439                 goto dont_forward;
1440
1441         if (!local) {
1442                     if (IPCB(skb)->opt.router_alert) {
1443                             if (ip_call_ra_chain(skb))
1444                                     return 0;
1445                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1446                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1447                                Cisco IOS <= 11.2(8)) do not put router alert
1448                                option to IGMP packets destined to routable
1449                                groups. It is very bad, because it means
1450                                that we can forward NO IGMP messages.
1451                              */
1452                             read_lock(&mrt_lock);
1453                             if (net->ipv4.mroute_sk) {
1454                                     nf_reset(skb);
1455                                     raw_rcv(net->ipv4.mroute_sk, skb);
1456                                     read_unlock(&mrt_lock);
1457                                     return 0;
1458                             }
1459                             read_unlock(&mrt_lock);
1460                     }
1461         }
1462
1463         read_lock(&mrt_lock);
1464         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1465
1466         /*
1467          *      No usable cache entry
1468          */
1469         if (cache == NULL) {
1470                 int vif;
1471
1472                 if (local) {
1473                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1474                         ip_local_deliver(skb);
1475                         if (skb2 == NULL) {
1476                                 read_unlock(&mrt_lock);
1477                                 return -ENOBUFS;
1478                         }
1479                         skb = skb2;
1480                 }
1481
1482                 vif = ipmr_find_vif(skb->dev);
1483                 if (vif >= 0) {
1484                         int err = ipmr_cache_unresolved(net, vif, skb);
1485                         read_unlock(&mrt_lock);
1486
1487                         return err;
1488                 }
1489                 read_unlock(&mrt_lock);
1490                 kfree_skb(skb);
1491                 return -ENODEV;
1492         }
1493
1494         ip_mr_forward(skb, cache, local);
1495
1496         read_unlock(&mrt_lock);
1497
1498         if (local)
1499                 return ip_local_deliver(skb);
1500
1501         return 0;
1502
1503 dont_forward:
1504         if (local)
1505                 return ip_local_deliver(skb);
1506         kfree_skb(skb);
1507         return 0;
1508 }
1509
1510 #ifdef CONFIG_IP_PIMSM
1511 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1512 {
1513         struct net_device *reg_dev = NULL;
1514         struct iphdr *encap;
1515         struct net *net = dev_net(skb->dev);
1516
1517         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1518         /*
1519            Check that:
1520            a. packet is really destinted to a multicast group
1521            b. packet is not a NULL-REGISTER
1522            c. packet is not truncated
1523          */
1524         if (!ipv4_is_multicast(encap->daddr) ||
1525             encap->tot_len == 0 ||
1526             ntohs(encap->tot_len) + pimlen > skb->len)
1527                 return 1;
1528
1529         read_lock(&mrt_lock);
1530         if (net->ipv4.mroute_reg_vif_num >= 0)
1531                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1532         if (reg_dev)
1533                 dev_hold(reg_dev);
1534         read_unlock(&mrt_lock);
1535
1536         if (reg_dev == NULL)
1537                 return 1;
1538
1539         skb->mac_header = skb->network_header;
1540         skb_pull(skb, (u8*)encap - skb->data);
1541         skb_reset_network_header(skb);
1542         skb->dev = reg_dev;
1543         skb->protocol = htons(ETH_P_IP);
1544         skb->ip_summed = 0;
1545         skb->pkt_type = PACKET_HOST;
1546         skb_dst_drop(skb);
1547         reg_dev->stats.rx_bytes += skb->len;
1548         reg_dev->stats.rx_packets++;
1549         nf_reset(skb);
1550         netif_rx(skb);
1551         dev_put(reg_dev);
1552
1553         return 0;
1554 }
1555 #endif
1556
1557 #ifdef CONFIG_IP_PIMSM_V1
1558 /*
1559  * Handle IGMP messages of PIMv1
1560  */
1561
1562 int pim_rcv_v1(struct sk_buff * skb)
1563 {
1564         struct igmphdr *pim;
1565         struct net *net = dev_net(skb->dev);
1566
1567         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1568                 goto drop;
1569
1570         pim = igmp_hdr(skb);
1571
1572         if (!net->ipv4.mroute_do_pim ||
1573             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1574                 goto drop;
1575
1576         if (__pim_rcv(skb, sizeof(*pim))) {
1577 drop:
1578                 kfree_skb(skb);
1579         }
1580         return 0;
1581 }
1582 #endif
1583
1584 #ifdef CONFIG_IP_PIMSM_V2
1585 static int pim_rcv(struct sk_buff * skb)
1586 {
1587         struct pimreghdr *pim;
1588
1589         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1590                 goto drop;
1591
1592         pim = (struct pimreghdr *)skb_transport_header(skb);
1593         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1594             (pim->flags&PIM_NULL_REGISTER) ||
1595             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1596              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1597                 goto drop;
1598
1599         if (__pim_rcv(skb, sizeof(*pim))) {
1600 drop:
1601                 kfree_skb(skb);
1602         }
1603         return 0;
1604 }
1605 #endif
1606
1607 static int
1608 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1609 {
1610         int ct;
1611         struct rtnexthop *nhp;
1612         struct net *net = mfc_net(c);
1613         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1614         u8 *b = skb_tail_pointer(skb);
1615         struct rtattr *mp_head;
1616
1617         if (dev)
1618                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1619
1620         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1621
1622         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1623                 if (c->mfc_un.res.ttls[ct] < 255) {
1624                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1625                                 goto rtattr_failure;
1626                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1627                         nhp->rtnh_flags = 0;
1628                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1629                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1630                         nhp->rtnh_len = sizeof(*nhp);
1631                 }
1632         }
1633         mp_head->rta_type = RTA_MULTIPATH;
1634         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1635         rtm->rtm_type = RTN_MULTICAST;
1636         return 1;
1637
1638 rtattr_failure:
1639         nlmsg_trim(skb, b);
1640         return -EMSGSIZE;
1641 }
1642
1643 int ipmr_get_route(struct net *net,
1644                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1645 {
1646         int err;
1647         struct mfc_cache *cache;
1648         struct rtable *rt = skb_rtable(skb);
1649
1650         read_lock(&mrt_lock);
1651         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1652
1653         if (cache == NULL) {
1654                 struct sk_buff *skb2;
1655                 struct iphdr *iph;
1656                 struct net_device *dev;
1657                 int vif;
1658
1659                 if (nowait) {
1660                         read_unlock(&mrt_lock);
1661                         return -EAGAIN;
1662                 }
1663
1664                 dev = skb->dev;
1665                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1666                         read_unlock(&mrt_lock);
1667                         return -ENODEV;
1668                 }
1669                 skb2 = skb_clone(skb, GFP_ATOMIC);
1670                 if (!skb2) {
1671                         read_unlock(&mrt_lock);
1672                         return -ENOMEM;
1673                 }
1674
1675                 skb_push(skb2, sizeof(struct iphdr));
1676                 skb_reset_network_header(skb2);
1677                 iph = ip_hdr(skb2);
1678                 iph->ihl = sizeof(struct iphdr) >> 2;
1679                 iph->saddr = rt->rt_src;
1680                 iph->daddr = rt->rt_dst;
1681                 iph->version = 0;
1682                 err = ipmr_cache_unresolved(net, vif, skb2);
1683                 read_unlock(&mrt_lock);
1684                 return err;
1685         }
1686
1687         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1688                 cache->mfc_flags |= MFC_NOTIFY;
1689         err = ipmr_fill_mroute(skb, cache, rtm);
1690         read_unlock(&mrt_lock);
1691         return err;
1692 }
1693
1694 #ifdef CONFIG_PROC_FS
1695 /*
1696  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1697  */
1698 struct ipmr_vif_iter {
1699         struct seq_net_private p;
1700         int ct;
1701 };
1702
1703 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1704                                            struct ipmr_vif_iter *iter,
1705                                            loff_t pos)
1706 {
1707         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1708                 if (!VIF_EXISTS(net, iter->ct))
1709                         continue;
1710                 if (pos-- == 0)
1711                         return &net->ipv4.vif_table[iter->ct];
1712         }
1713         return NULL;
1714 }
1715
1716 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1717         __acquires(mrt_lock)
1718 {
1719         struct net *net = seq_file_net(seq);
1720
1721         read_lock(&mrt_lock);
1722         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1723                 : SEQ_START_TOKEN;
1724 }
1725
1726 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1727 {
1728         struct ipmr_vif_iter *iter = seq->private;
1729         struct net *net = seq_file_net(seq);
1730
1731         ++*pos;
1732         if (v == SEQ_START_TOKEN)
1733                 return ipmr_vif_seq_idx(net, iter, 0);
1734
1735         while (++iter->ct < net->ipv4.maxvif) {
1736                 if (!VIF_EXISTS(net, iter->ct))
1737                         continue;
1738                 return &net->ipv4.vif_table[iter->ct];
1739         }
1740         return NULL;
1741 }
1742
1743 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1744         __releases(mrt_lock)
1745 {
1746         read_unlock(&mrt_lock);
1747 }
1748
1749 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1750 {
1751         struct net *net = seq_file_net(seq);
1752
1753         if (v == SEQ_START_TOKEN) {
1754                 seq_puts(seq,
1755                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1756         } else {
1757                 const struct vif_device *vif = v;
1758                 const char *name =  vif->dev ? vif->dev->name : "none";
1759
1760                 seq_printf(seq,
1761                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1762                            vif - net->ipv4.vif_table,
1763                            name, vif->bytes_in, vif->pkt_in,
1764                            vif->bytes_out, vif->pkt_out,
1765                            vif->flags, vif->local, vif->remote);
1766         }
1767         return 0;
1768 }
1769
1770 static const struct seq_operations ipmr_vif_seq_ops = {
1771         .start = ipmr_vif_seq_start,
1772         .next  = ipmr_vif_seq_next,
1773         .stop  = ipmr_vif_seq_stop,
1774         .show  = ipmr_vif_seq_show,
1775 };
1776
1777 static int ipmr_vif_open(struct inode *inode, struct file *file)
1778 {
1779         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1780                             sizeof(struct ipmr_vif_iter));
1781 }
1782
1783 static const struct file_operations ipmr_vif_fops = {
1784         .owner   = THIS_MODULE,
1785         .open    = ipmr_vif_open,
1786         .read    = seq_read,
1787         .llseek  = seq_lseek,
1788         .release = seq_release_net,
1789 };
1790
1791 struct ipmr_mfc_iter {
1792         struct seq_net_private p;
1793         struct mfc_cache **cache;
1794         int ct;
1795 };
1796
1797
1798 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1799                                           struct ipmr_mfc_iter *it, loff_t pos)
1800 {
1801         struct mfc_cache *mfc;
1802
1803         it->cache = net->ipv4.mfc_cache_array;
1804         read_lock(&mrt_lock);
1805         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1806                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1807                      mfc; mfc = mfc->next)
1808                         if (pos-- == 0)
1809                                 return mfc;
1810         read_unlock(&mrt_lock);
1811
1812         it->cache = &mfc_unres_queue;
1813         spin_lock_bh(&mfc_unres_lock);
1814         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1815                 if (net_eq(mfc_net(mfc), net) &&
1816                     pos-- == 0)
1817                         return mfc;
1818         spin_unlock_bh(&mfc_unres_lock);
1819
1820         it->cache = NULL;
1821         return NULL;
1822 }
1823
1824
1825 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1826 {
1827         struct ipmr_mfc_iter *it = seq->private;
1828         struct net *net = seq_file_net(seq);
1829
1830         it->cache = NULL;
1831         it->ct = 0;
1832         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1833                 : SEQ_START_TOKEN;
1834 }
1835
1836 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1837 {
1838         struct mfc_cache *mfc = v;
1839         struct ipmr_mfc_iter *it = seq->private;
1840         struct net *net = seq_file_net(seq);
1841
1842         ++*pos;
1843
1844         if (v == SEQ_START_TOKEN)
1845                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1846
1847         if (mfc->next)
1848                 return mfc->next;
1849
1850         if (it->cache == &mfc_unres_queue)
1851                 goto end_of_list;
1852
1853         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1854
1855         while (++it->ct < MFC_LINES) {
1856                 mfc = net->ipv4.mfc_cache_array[it->ct];
1857                 if (mfc)
1858                         return mfc;
1859         }
1860
1861         /* exhausted cache_array, show unresolved */
1862         read_unlock(&mrt_lock);
1863         it->cache = &mfc_unres_queue;
1864         it->ct = 0;
1865
1866         spin_lock_bh(&mfc_unres_lock);
1867         mfc = mfc_unres_queue;
1868         while (mfc && !net_eq(mfc_net(mfc), net))
1869                 mfc = mfc->next;
1870         if (mfc)
1871                 return mfc;
1872
1873  end_of_list:
1874         spin_unlock_bh(&mfc_unres_lock);
1875         it->cache = NULL;
1876
1877         return NULL;
1878 }
1879
1880 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1881 {
1882         struct ipmr_mfc_iter *it = seq->private;
1883         struct net *net = seq_file_net(seq);
1884
1885         if (it->cache == &mfc_unres_queue)
1886                 spin_unlock_bh(&mfc_unres_lock);
1887         else if (it->cache == net->ipv4.mfc_cache_array)
1888                 read_unlock(&mrt_lock);
1889 }
1890
1891 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1892 {
1893         int n;
1894         struct net *net = seq_file_net(seq);
1895
1896         if (v == SEQ_START_TOKEN) {
1897                 seq_puts(seq,
1898                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1899         } else {
1900                 const struct mfc_cache *mfc = v;
1901                 const struct ipmr_mfc_iter *it = seq->private;
1902
1903                 seq_printf(seq, "%08lX %08lX %-3hd",
1904                            (unsigned long) mfc->mfc_mcastgrp,
1905                            (unsigned long) mfc->mfc_origin,
1906                            mfc->mfc_parent);
1907
1908                 if (it->cache != &mfc_unres_queue) {
1909                         seq_printf(seq, " %8lu %8lu %8lu",
1910                                    mfc->mfc_un.res.pkt,
1911                                    mfc->mfc_un.res.bytes,
1912                                    mfc->mfc_un.res.wrong_if);
1913                         for (n = mfc->mfc_un.res.minvif;
1914                              n < mfc->mfc_un.res.maxvif; n++ ) {
1915                                 if (VIF_EXISTS(net, n) &&
1916                                     mfc->mfc_un.res.ttls[n] < 255)
1917                                         seq_printf(seq,
1918                                            " %2d:%-3d",
1919                                            n, mfc->mfc_un.res.ttls[n]);
1920                         }
1921                 } else {
1922                         /* unresolved mfc_caches don't contain
1923                          * pkt, bytes and wrong_if values
1924                          */
1925                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1926                 }
1927                 seq_putc(seq, '\n');
1928         }
1929         return 0;
1930 }
1931
1932 static const struct seq_operations ipmr_mfc_seq_ops = {
1933         .start = ipmr_mfc_seq_start,
1934         .next  = ipmr_mfc_seq_next,
1935         .stop  = ipmr_mfc_seq_stop,
1936         .show  = ipmr_mfc_seq_show,
1937 };
1938
1939 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1940 {
1941         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1942                             sizeof(struct ipmr_mfc_iter));
1943 }
1944
1945 static const struct file_operations ipmr_mfc_fops = {
1946         .owner   = THIS_MODULE,
1947         .open    = ipmr_mfc_open,
1948         .read    = seq_read,
1949         .llseek  = seq_lseek,
1950         .release = seq_release_net,
1951 };
1952 #endif
1953
1954 #ifdef CONFIG_IP_PIMSM_V2
1955 static struct net_protocol pim_protocol = {
1956         .handler        =       pim_rcv,
1957 };
1958 #endif
1959
1960
1961 /*
1962  *      Setup for IP multicast routing
1963  */
1964 static int __net_init ipmr_net_init(struct net *net)
1965 {
1966         int err = 0;
1967
1968         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1969                                       GFP_KERNEL);
1970         if (!net->ipv4.vif_table) {
1971                 err = -ENOMEM;
1972                 goto fail;
1973         }
1974
1975         /* Forwarding cache */
1976         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1977                                             sizeof(struct mfc_cache *),
1978                                             GFP_KERNEL);
1979         if (!net->ipv4.mfc_cache_array) {
1980                 err = -ENOMEM;
1981                 goto fail_mfc_cache;
1982         }
1983
1984 #ifdef CONFIG_IP_PIMSM
1985         net->ipv4.mroute_reg_vif_num = -1;
1986 #endif
1987
1988 #ifdef CONFIG_PROC_FS
1989         err = -ENOMEM;
1990         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1991                 goto proc_vif_fail;
1992         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1993                 goto proc_cache_fail;
1994 #endif
1995         return 0;
1996
1997 #ifdef CONFIG_PROC_FS
1998 proc_cache_fail:
1999         proc_net_remove(net, "ip_mr_vif");
2000 proc_vif_fail:
2001         kfree(net->ipv4.mfc_cache_array);
2002 #endif
2003 fail_mfc_cache:
2004         kfree(net->ipv4.vif_table);
2005 fail:
2006         return err;
2007 }
2008
2009 static void __net_exit ipmr_net_exit(struct net *net)
2010 {
2011 #ifdef CONFIG_PROC_FS
2012         proc_net_remove(net, "ip_mr_cache");
2013         proc_net_remove(net, "ip_mr_vif");
2014 #endif
2015         kfree(net->ipv4.mfc_cache_array);
2016         kfree(net->ipv4.vif_table);
2017 }
2018
2019 static struct pernet_operations ipmr_net_ops = {
2020         .init = ipmr_net_init,
2021         .exit = ipmr_net_exit,
2022 };
2023
2024 int __init ip_mr_init(void)
2025 {
2026         int err;
2027
2028         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2029                                        sizeof(struct mfc_cache),
2030                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2031                                        NULL);
2032         if (!mrt_cachep)
2033                 return -ENOMEM;
2034
2035         err = register_pernet_subsys(&ipmr_net_ops);
2036         if (err)
2037                 goto reg_pernet_fail;
2038
2039         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2040         err = register_netdevice_notifier(&ip_mr_notifier);
2041         if (err)
2042                 goto reg_notif_fail;
2043         return 0;
2044
2045 reg_notif_fail:
2046         del_timer(&ipmr_expire_timer);
2047         unregister_pernet_subsys(&ipmr_net_ops);
2048 reg_pernet_fail:
2049         kmem_cache_destroy(mrt_cachep);
2050         return err;
2051 }