2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
52 /* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
56 static DEFINE_RWLOCK(mrt_lock);
59 * Multicast router control variables
62 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 static int mroute_do_assert; /* Set in PIM assert */
65 #ifdef CONFIG_IPV6_PIMSM_V2
66 static int mroute_do_pim;
68 #define mroute_do_pim 0
71 static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
73 /* Special spinlock for queue of unresolved entries */
74 static DEFINE_SPINLOCK(mfc_unres_lock);
76 /* We return to original Alan's scheme. Hash table of resolved
77 entries is changed only in process context and protected
78 with weak lock mrt_lock. Queue of unresolved entries is protected
79 with strong spinlock mfc_unres_lock.
81 In this case data path is free of exclusive locks at all.
84 static struct kmem_cache *mrt_cachep __read_mostly;
86 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
87 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
88 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
90 #ifdef CONFIG_IPV6_PIMSM_V2
91 static struct inet6_protocol pim6_protocol;
94 static struct timer_list ipmr_expire_timer;
99 struct ipmr_mfc_iter {
100 struct mfc6_cache **cache;
105 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
107 struct mfc6_cache *mfc;
109 it->cache = init_net.ipv6.mfc6_cache_array;
110 read_lock(&mrt_lock);
111 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
112 for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
113 mfc; mfc = mfc->next)
116 read_unlock(&mrt_lock);
118 it->cache = &mfc_unres_queue;
119 spin_lock_bh(&mfc_unres_lock);
120 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
123 spin_unlock_bh(&mfc_unres_lock);
133 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
136 struct ipmr_vif_iter {
140 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
143 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
144 if (!MIF_EXISTS(&init_net, iter->ct))
147 return &init_net.ipv6.vif6_table[iter->ct];
152 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
155 read_lock(&mrt_lock);
156 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
160 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
162 struct ipmr_vif_iter *iter = seq->private;
165 if (v == SEQ_START_TOKEN)
166 return ip6mr_vif_seq_idx(iter, 0);
168 while (++iter->ct < init_net.ipv6.maxvif) {
169 if (!MIF_EXISTS(&init_net, iter->ct))
171 return &init_net.ipv6.vif6_table[iter->ct];
176 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
179 read_unlock(&mrt_lock);
182 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
184 if (v == SEQ_START_TOKEN) {
186 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
188 const struct mif_device *vif = v;
189 const char *name = vif->dev ? vif->dev->name : "none";
192 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
193 vif - init_net.ipv6.vif6_table,
194 name, vif->bytes_in, vif->pkt_in,
195 vif->bytes_out, vif->pkt_out,
201 static struct seq_operations ip6mr_vif_seq_ops = {
202 .start = ip6mr_vif_seq_start,
203 .next = ip6mr_vif_seq_next,
204 .stop = ip6mr_vif_seq_stop,
205 .show = ip6mr_vif_seq_show,
208 static int ip6mr_vif_open(struct inode *inode, struct file *file)
210 return seq_open_private(file, &ip6mr_vif_seq_ops,
211 sizeof(struct ipmr_vif_iter));
214 static struct file_operations ip6mr_vif_fops = {
215 .owner = THIS_MODULE,
216 .open = ip6mr_vif_open,
219 .release = seq_release_private,
222 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
224 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
228 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
230 struct mfc6_cache *mfc = v;
231 struct ipmr_mfc_iter *it = seq->private;
235 if (v == SEQ_START_TOKEN)
236 return ipmr_mfc_seq_idx(seq->private, 0);
241 if (it->cache == &mfc_unres_queue)
244 BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
246 while (++it->ct < MFC6_LINES) {
247 mfc = init_net.ipv6.mfc6_cache_array[it->ct];
252 /* exhausted cache_array, show unresolved */
253 read_unlock(&mrt_lock);
254 it->cache = &mfc_unres_queue;
257 spin_lock_bh(&mfc_unres_lock);
258 mfc = mfc_unres_queue;
263 spin_unlock_bh(&mfc_unres_lock);
269 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
271 struct ipmr_mfc_iter *it = seq->private;
273 if (it->cache == &mfc_unres_queue)
274 spin_unlock_bh(&mfc_unres_lock);
275 else if (it->cache == init_net.ipv6.mfc6_cache_array)
276 read_unlock(&mrt_lock);
279 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
283 if (v == SEQ_START_TOKEN) {
287 "Iif Pkts Bytes Wrong Oifs\n");
289 const struct mfc6_cache *mfc = v;
290 const struct ipmr_mfc_iter *it = seq->private;
292 seq_printf(seq, "%pI6 %pI6 %-3hd",
293 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
296 if (it->cache != &mfc_unres_queue) {
297 seq_printf(seq, " %8lu %8lu %8lu",
299 mfc->mfc_un.res.bytes,
300 mfc->mfc_un.res.wrong_if);
301 for (n = mfc->mfc_un.res.minvif;
302 n < mfc->mfc_un.res.maxvif; n++) {
303 if (MIF_EXISTS(&init_net, n) &&
304 mfc->mfc_un.res.ttls[n] < 255)
307 n, mfc->mfc_un.res.ttls[n]);
310 /* unresolved mfc_caches don't contain
311 * pkt, bytes and wrong_if values
313 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
320 static struct seq_operations ipmr_mfc_seq_ops = {
321 .start = ipmr_mfc_seq_start,
322 .next = ipmr_mfc_seq_next,
323 .stop = ipmr_mfc_seq_stop,
324 .show = ipmr_mfc_seq_show,
327 static int ipmr_mfc_open(struct inode *inode, struct file *file)
329 return seq_open_private(file, &ipmr_mfc_seq_ops,
330 sizeof(struct ipmr_mfc_iter));
333 static struct file_operations ip6mr_mfc_fops = {
334 .owner = THIS_MODULE,
335 .open = ipmr_mfc_open,
338 .release = seq_release_private,
342 #ifdef CONFIG_IPV6_PIMSM_V2
343 static int reg_vif_num = -1;
345 static int pim6_rcv(struct sk_buff *skb)
347 struct pimreghdr *pim;
348 struct ipv6hdr *encap;
349 struct net_device *reg_dev = NULL;
351 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
354 pim = (struct pimreghdr *)skb_transport_header(skb);
355 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
356 (pim->flags & PIM_NULL_REGISTER) ||
357 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
358 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
361 /* check if the inner packet is destined to mcast group */
362 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
365 if (!ipv6_addr_is_multicast(&encap->daddr) ||
366 encap->payload_len == 0 ||
367 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
370 read_lock(&mrt_lock);
371 if (reg_vif_num >= 0)
372 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
375 read_unlock(&mrt_lock);
380 skb->mac_header = skb->network_header;
381 skb_pull(skb, (u8 *)encap - skb->data);
382 skb_reset_network_header(skb);
384 skb->protocol = htons(ETH_P_IP);
386 skb->pkt_type = PACKET_HOST;
387 dst_release(skb->dst);
388 reg_dev->stats.rx_bytes += skb->len;
389 reg_dev->stats.rx_packets++;
400 static struct inet6_protocol pim6_protocol = {
404 /* Service routines creating virtual interfaces: PIMREG */
406 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
408 read_lock(&mrt_lock);
409 dev->stats.tx_bytes += skb->len;
410 dev->stats.tx_packets++;
411 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
412 read_unlock(&mrt_lock);
417 static const struct net_device_ops reg_vif_netdev_ops = {
418 .ndo_start_xmit = reg_vif_xmit,
421 static void reg_vif_setup(struct net_device *dev)
423 dev->type = ARPHRD_PIMREG;
424 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
425 dev->flags = IFF_NOARP;
426 dev->netdev_ops = ®_vif_netdev_ops;
427 dev->destructor = free_netdev;
430 static struct net_device *ip6mr_reg_vif(void)
432 struct net_device *dev;
434 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
438 if (register_netdevice(dev)) {
451 /* allow the register to be completed before unregistering. */
455 unregister_netdevice(dev);
464 static int mif6_delete(int vifi)
466 struct mif_device *v;
467 struct net_device *dev;
468 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
469 return -EADDRNOTAVAIL;
471 v = &init_net.ipv6.vif6_table[vifi];
473 write_lock_bh(&mrt_lock);
478 write_unlock_bh(&mrt_lock);
479 return -EADDRNOTAVAIL;
482 #ifdef CONFIG_IPV6_PIMSM_V2
483 if (vifi == reg_vif_num)
487 if (vifi + 1 == init_net.ipv6.maxvif) {
489 for (tmp = vifi - 1; tmp >= 0; tmp--) {
490 if (MIF_EXISTS(&init_net, tmp))
493 init_net.ipv6.maxvif = tmp + 1;
496 write_unlock_bh(&mrt_lock);
498 dev_set_allmulti(dev, -1);
500 if (v->flags & MIFF_REGISTER)
501 unregister_netdevice(dev);
507 static inline void ip6mr_cache_free(struct mfc6_cache *c)
509 release_net(mfc6_net(c));
510 kmem_cache_free(mrt_cachep, c);
513 /* Destroy an unresolved cache entry, killing queued skbs
514 and reporting error to netlink readers.
517 static void ip6mr_destroy_unres(struct mfc6_cache *c)
521 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
523 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
524 if (ipv6_hdr(skb)->version == 0) {
525 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
526 nlh->nlmsg_type = NLMSG_ERROR;
527 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
528 skb_trim(skb, nlh->nlmsg_len);
529 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
530 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
539 /* Single timer process for all the unresolved queue. */
541 static void ipmr_do_expire_process(unsigned long dummy)
543 unsigned long now = jiffies;
544 unsigned long expires = 10 * HZ;
545 struct mfc6_cache *c, **cp;
547 cp = &mfc_unres_queue;
549 while ((c = *cp) != NULL) {
550 if (time_after(c->mfc_un.unres.expires, now)) {
552 unsigned long interval = c->mfc_un.unres.expires - now;
553 if (interval < expires)
560 ip6mr_destroy_unres(c);
563 if (mfc_unres_queue != NULL)
564 mod_timer(&ipmr_expire_timer, jiffies + expires);
567 static void ipmr_expire_process(unsigned long dummy)
569 if (!spin_trylock(&mfc_unres_lock)) {
570 mod_timer(&ipmr_expire_timer, jiffies + 1);
574 if (mfc_unres_queue != NULL)
575 ipmr_do_expire_process(dummy);
577 spin_unlock(&mfc_unres_lock);
580 /* Fill oifs list. It is called under write locked mrt_lock. */
582 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
586 cache->mfc_un.res.minvif = MAXMIFS;
587 cache->mfc_un.res.maxvif = 0;
588 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
590 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
591 if (MIF_EXISTS(&init_net, vifi) &&
592 ttls[vifi] && ttls[vifi] < 255) {
593 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
594 if (cache->mfc_un.res.minvif > vifi)
595 cache->mfc_un.res.minvif = vifi;
596 if (cache->mfc_un.res.maxvif <= vifi)
597 cache->mfc_un.res.maxvif = vifi + 1;
602 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
604 int vifi = vifc->mif6c_mifi;
605 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
606 struct net_device *dev;
610 if (MIF_EXISTS(&init_net, vifi))
613 switch (vifc->mif6c_flags) {
614 #ifdef CONFIG_IPV6_PIMSM_V2
617 * Special Purpose VIF in PIM
618 * All the packets will be sent to the daemon
620 if (reg_vif_num >= 0)
622 dev = ip6mr_reg_vif();
625 err = dev_set_allmulti(dev, 1);
627 unregister_netdevice(dev);
634 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
636 return -EADDRNOTAVAIL;
637 err = dev_set_allmulti(dev, 1);
648 * Fill in the VIF structures
650 v->rate_limit = vifc->vifc_rate_limit;
651 v->flags = vifc->mif6c_flags;
653 v->flags |= VIFF_STATIC;
654 v->threshold = vifc->vifc_threshold;
659 v->link = dev->ifindex;
660 if (v->flags & MIFF_REGISTER)
661 v->link = dev->iflink;
663 /* And finish update writing critical data */
664 write_lock_bh(&mrt_lock);
666 #ifdef CONFIG_IPV6_PIMSM_V2
667 if (v->flags & MIFF_REGISTER)
670 if (vifi + 1 > init_net.ipv6.maxvif)
671 init_net.ipv6.maxvif = vifi + 1;
672 write_unlock_bh(&mrt_lock);
676 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
678 int line = MFC6_HASH(mcastgrp, origin);
679 struct mfc6_cache *c;
681 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
682 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
683 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
690 * Allocate a multicast cache entry
692 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
694 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
697 c->mfc_un.res.minvif = MAXMIFS;
698 mfc6_net_set(c, net);
702 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
704 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
707 skb_queue_head_init(&c->mfc_un.unres.unresolved);
708 c->mfc_un.unres.expires = jiffies + 10 * HZ;
709 mfc6_net_set(c, net);
714 * A cache entry has gone into a resolved state from queued
717 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
722 * Play the pending entries through our router
725 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
726 if (ipv6_hdr(skb)->version == 0) {
728 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
730 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
731 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
733 nlh->nlmsg_type = NLMSG_ERROR;
734 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
735 skb_trim(skb, nlh->nlmsg_len);
736 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
738 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
740 ip6_mr_forward(skb, c);
745 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
746 * expects the following bizarre scheme.
748 * Called under mrt_lock.
751 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
757 #ifdef CONFIG_IPV6_PIMSM_V2
758 if (assert == MRT6MSG_WHOLEPKT)
759 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
763 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
768 /* I suppose that internal messages
769 * do not require checksums */
771 skb->ip_summed = CHECKSUM_UNNECESSARY;
773 #ifdef CONFIG_IPV6_PIMSM_V2
774 if (assert == MRT6MSG_WHOLEPKT) {
775 /* Ugly, but we have no choice with this interface.
776 Duplicate old header, fix length etc.
777 And all this only to mangle msg->im6_msgtype and
778 to set msg->im6_mbz to "mbz" :-)
780 skb_push(skb, -skb_network_offset(pkt));
782 skb_push(skb, sizeof(*msg));
783 skb_reset_transport_header(skb);
784 msg = (struct mrt6msg *)skb_transport_header(skb);
786 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
787 msg->im6_mif = reg_vif_num;
789 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
790 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
792 skb->ip_summed = CHECKSUM_UNNECESSARY;
800 skb_put(skb, sizeof(struct ipv6hdr));
801 skb_reset_network_header(skb);
802 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
807 skb_put(skb, sizeof(*msg));
808 skb_reset_transport_header(skb);
809 msg = (struct mrt6msg *)skb_transport_header(skb);
812 msg->im6_msgtype = assert;
815 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
816 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
818 skb->dst = dst_clone(pkt->dst);
819 skb->ip_summed = CHECKSUM_UNNECESSARY;
821 skb_pull(skb, sizeof(struct ipv6hdr));
824 if (init_net.ipv6.mroute6_sk == NULL) {
830 * Deliver to user space multicast routing algorithms
832 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
835 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
843 * Queue a packet for resolution. It gets locked cache entry!
847 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
850 struct mfc6_cache *c;
852 spin_lock_bh(&mfc_unres_lock);
853 for (c = mfc_unres_queue; c; c = c->next) {
854 if (net_eq(mfc6_net(c), &init_net) &&
855 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
856 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
862 * Create a new entry if allowable
865 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
866 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
867 spin_unlock_bh(&mfc_unres_lock);
874 * Fill in the new cache entry
877 c->mf6c_origin = ipv6_hdr(skb)->saddr;
878 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
881 * Reflect first query at pim6sd
883 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
884 /* If the report failed throw the cache entry
887 spin_unlock_bh(&mfc_unres_lock);
894 atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
895 c->next = mfc_unres_queue;
898 ipmr_do_expire_process(1);
902 * See if we can append the packet
904 if (c->mfc_un.unres.unresolved.qlen > 3) {
908 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
912 spin_unlock_bh(&mfc_unres_lock);
917 * MFC6 cache manipulation by user space
920 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
923 struct mfc6_cache *c, **cp;
925 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
927 for (cp = &init_net.ipv6.mfc6_cache_array[line];
928 (c = *cp) != NULL; cp = &c->next) {
929 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
930 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
931 write_lock_bh(&mrt_lock);
933 write_unlock_bh(&mrt_lock);
942 static int ip6mr_device_event(struct notifier_block *this,
943 unsigned long event, void *ptr)
945 struct net_device *dev = ptr;
946 struct mif_device *v;
949 if (!net_eq(dev_net(dev), &init_net))
952 if (event != NETDEV_UNREGISTER)
955 v = &init_net.ipv6.vif6_table[0];
956 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
963 static struct notifier_block ip6_mr_notifier = {
964 .notifier_call = ip6mr_device_event
968 * Setup for IP multicast routing
971 static int __net_init ip6mr_net_init(struct net *net)
975 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
977 if (!net->ipv6.vif6_table) {
982 /* Forwarding cache */
983 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
984 sizeof(struct mfc6_cache *),
986 if (!net->ipv6.mfc6_cache_array) {
988 goto fail_mfc6_cache;
993 kfree(net->ipv6.vif6_table);
998 static void __net_exit ip6mr_net_exit(struct net *net)
1000 kfree(net->ipv6.mfc6_cache_array);
1001 kfree(net->ipv6.vif6_table);
1004 static struct pernet_operations ip6mr_net_ops = {
1005 .init = ip6mr_net_init,
1006 .exit = ip6mr_net_exit,
1009 int __init ip6_mr_init(void)
1013 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1014 sizeof(struct mfc6_cache),
1015 0, SLAB_HWCACHE_ALIGN,
1020 err = register_pernet_subsys(&ip6mr_net_ops);
1022 goto reg_pernet_fail;
1024 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1025 err = register_netdevice_notifier(&ip6_mr_notifier);
1027 goto reg_notif_fail;
1028 #ifdef CONFIG_PROC_FS
1030 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1032 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1033 0, &ip6mr_mfc_fops))
1034 goto proc_cache_fail;
1037 #ifdef CONFIG_PROC_FS
1039 proc_net_remove(&init_net, "ip6_mr_vif");
1041 unregister_netdevice_notifier(&ip6_mr_notifier);
1044 del_timer(&ipmr_expire_timer);
1045 unregister_pernet_subsys(&ip6mr_net_ops);
1047 kmem_cache_destroy(mrt_cachep);
1051 void ip6_mr_cleanup(void)
1053 #ifdef CONFIG_PROC_FS
1054 proc_net_remove(&init_net, "ip6_mr_cache");
1055 proc_net_remove(&init_net, "ip6_mr_vif");
1057 unregister_netdevice_notifier(&ip6_mr_notifier);
1058 del_timer(&ipmr_expire_timer);
1059 unregister_pernet_subsys(&ip6mr_net_ops);
1060 kmem_cache_destroy(mrt_cachep);
1063 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1066 struct mfc6_cache *uc, *c, **cp;
1067 unsigned char ttls[MAXMIFS];
1070 memset(ttls, 255, MAXMIFS);
1071 for (i = 0; i < MAXMIFS; i++) {
1072 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1077 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1079 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1080 (c = *cp) != NULL; cp = &c->next) {
1081 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1082 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1087 write_lock_bh(&mrt_lock);
1088 c->mf6c_parent = mfc->mf6cc_parent;
1089 ip6mr_update_thresholds(c, ttls);
1091 c->mfc_flags |= MFC_STATIC;
1092 write_unlock_bh(&mrt_lock);
1096 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1099 c = ip6mr_cache_alloc(&init_net);
1103 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1104 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1105 c->mf6c_parent = mfc->mf6cc_parent;
1106 ip6mr_update_thresholds(c, ttls);
1108 c->mfc_flags |= MFC_STATIC;
1110 write_lock_bh(&mrt_lock);
1111 c->next = init_net.ipv6.mfc6_cache_array[line];
1112 init_net.ipv6.mfc6_cache_array[line] = c;
1113 write_unlock_bh(&mrt_lock);
1116 * Check to see if we resolved a queued list. If so we
1117 * need to send on the frames and tidy up.
1119 spin_lock_bh(&mfc_unres_lock);
1120 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1122 if (net_eq(mfc6_net(uc), &init_net) &&
1123 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1124 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1126 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
1130 if (mfc_unres_queue == NULL)
1131 del_timer(&ipmr_expire_timer);
1132 spin_unlock_bh(&mfc_unres_lock);
1135 ip6mr_cache_resolve(uc, c);
1136 ip6mr_cache_free(uc);
1142 * Close the multicast socket, and clear the vif tables etc
1145 static void mroute_clean_tables(struct sock *sk)
1150 * Shut down all active vif entries
1152 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1153 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
1160 for (i = 0; i < MFC6_LINES; i++) {
1161 struct mfc6_cache *c, **cp;
1163 cp = &init_net.ipv6.mfc6_cache_array[i];
1164 while ((c = *cp) != NULL) {
1165 if (c->mfc_flags & MFC_STATIC) {
1169 write_lock_bh(&mrt_lock);
1171 write_unlock_bh(&mrt_lock);
1173 ip6mr_cache_free(c);
1177 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
1178 struct mfc6_cache *c, **cp;
1180 spin_lock_bh(&mfc_unres_lock);
1181 cp = &mfc_unres_queue;
1182 while ((c = *cp) != NULL) {
1183 if (!net_eq(mfc6_net(c), &init_net)) {
1188 ip6mr_destroy_unres(c);
1190 spin_unlock_bh(&mfc_unres_lock);
1194 static int ip6mr_sk_init(struct sock *sk)
1199 write_lock_bh(&mrt_lock);
1200 if (likely(init_net.ipv6.mroute6_sk == NULL))
1201 init_net.ipv6.mroute6_sk = sk;
1204 write_unlock_bh(&mrt_lock);
1211 int ip6mr_sk_done(struct sock *sk)
1216 if (sk == init_net.ipv6.mroute6_sk) {
1217 write_lock_bh(&mrt_lock);
1218 init_net.ipv6.mroute6_sk = NULL;
1219 write_unlock_bh(&mrt_lock);
1221 mroute_clean_tables(sk);
1230 * Socket options and virtual interface manipulation. The whole
1231 * virtual interface system is a complete heap, but unfortunately
1232 * that's how BSD mrouted happens to think. Maybe one day with a proper
1233 * MOSPF/PIM router set up we can clean this up.
1236 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1243 if (optname != MRT6_INIT) {
1244 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1250 if (sk->sk_type != SOCK_RAW ||
1251 inet_sk(sk)->num != IPPROTO_ICMPV6)
1253 if (optlen < sizeof(int))
1256 return ip6mr_sk_init(sk);
1259 return ip6mr_sk_done(sk);
1262 if (optlen < sizeof(vif))
1264 if (copy_from_user(&vif, optval, sizeof(vif)))
1266 if (vif.mif6c_mifi >= MAXMIFS)
1269 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
1274 if (optlen < sizeof(mifi_t))
1276 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1279 ret = mif6_delete(mifi);
1284 * Manipulate the forwarding caches. These live
1285 * in a sort of kernel/user symbiosis.
1289 if (optlen < sizeof(mfc))
1291 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1294 if (optname == MRT6_DEL_MFC)
1295 ret = ip6mr_mfc_delete(&mfc);
1297 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
1302 * Control PIM assert (to activate pim will activate assert)
1307 if (get_user(v, (int __user *)optval))
1309 mroute_do_assert = !!v;
1313 #ifdef CONFIG_IPV6_PIMSM_V2
1317 if (get_user(v, (int __user *)optval))
1322 if (v != mroute_do_pim) {
1324 mroute_do_assert = v;
1326 ret = inet6_add_protocol(&pim6_protocol,
1329 ret = inet6_del_protocol(&pim6_protocol,
1340 * Spurious command, or MRT6_VERSION which you cannot
1344 return -ENOPROTOOPT;
1349 * Getsock opt support for the multicast routing system.
1352 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1362 #ifdef CONFIG_IPV6_PIMSM_V2
1364 val = mroute_do_pim;
1368 val = mroute_do_assert;
1371 return -ENOPROTOOPT;
1374 if (get_user(olr, optlen))
1377 olr = min_t(int, olr, sizeof(int));
1381 if (put_user(olr, optlen))
1383 if (copy_to_user(optval, &val, olr))
1389 * The IP multicast ioctl support routines.
1392 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1394 struct sioc_sg_req6 sr;
1395 struct sioc_mif_req6 vr;
1396 struct mif_device *vif;
1397 struct mfc6_cache *c;
1400 case SIOCGETMIFCNT_IN6:
1401 if (copy_from_user(&vr, arg, sizeof(vr)))
1403 if (vr.mifi >= init_net.ipv6.maxvif)
1405 read_lock(&mrt_lock);
1406 vif = &init_net.ipv6.vif6_table[vr.mifi];
1407 if (MIF_EXISTS(&init_net, vr.mifi)) {
1408 vr.icount = vif->pkt_in;
1409 vr.ocount = vif->pkt_out;
1410 vr.ibytes = vif->bytes_in;
1411 vr.obytes = vif->bytes_out;
1412 read_unlock(&mrt_lock);
1414 if (copy_to_user(arg, &vr, sizeof(vr)))
1418 read_unlock(&mrt_lock);
1419 return -EADDRNOTAVAIL;
1420 case SIOCGETSGCNT_IN6:
1421 if (copy_from_user(&sr, arg, sizeof(sr)))
1424 read_lock(&mrt_lock);
1425 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1427 sr.pktcnt = c->mfc_un.res.pkt;
1428 sr.bytecnt = c->mfc_un.res.bytes;
1429 sr.wrong_if = c->mfc_un.res.wrong_if;
1430 read_unlock(&mrt_lock);
1432 if (copy_to_user(arg, &sr, sizeof(sr)))
1436 read_unlock(&mrt_lock);
1437 return -EADDRNOTAVAIL;
1439 return -ENOIOCTLCMD;
1444 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1446 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1447 IPSTATS_MIB_OUTFORWDATAGRAMS);
1448 return dst_output(skb);
1452 * Processing handlers for ip6mr_forward
1455 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1457 struct ipv6hdr *ipv6h;
1458 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
1459 struct net_device *dev;
1460 struct dst_entry *dst;
1463 if (vif->dev == NULL)
1466 #ifdef CONFIG_IPV6_PIMSM_V2
1467 if (vif->flags & MIFF_REGISTER) {
1469 vif->bytes_out += skb->len;
1470 vif->dev->stats.tx_bytes += skb->len;
1471 vif->dev->stats.tx_packets++;
1472 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1478 ipv6h = ipv6_hdr(skb);
1480 fl = (struct flowi) {
1483 { .daddr = ipv6h->daddr, }
1487 dst = ip6_route_output(&init_net, NULL, &fl);
1491 dst_release(skb->dst);
1495 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1496 * not only before forwarding, but after forwarding on all output
1497 * interfaces. It is clear, if mrouter runs a multicasting
1498 * program, it should receive packets not depending to what interface
1499 * program is joined.
1500 * If we will not make it, the program will have to join on all
1501 * interfaces. On the other hand, multihoming host (or router, but
1502 * not mrouter) cannot join to more than one interface - it will
1503 * result in receiving multiple packets.
1508 vif->bytes_out += skb->len;
1510 /* We are about to write */
1511 /* XXX: extension headers? */
1512 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1515 ipv6h = ipv6_hdr(skb);
1518 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1520 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1521 ip6mr_forward2_finish);
1528 static int ip6mr_find_vif(struct net_device *dev)
1531 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1532 if (init_net.ipv6.vif6_table[ct].dev == dev)
1538 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1543 vif = cache->mf6c_parent;
1544 cache->mfc_un.res.pkt++;
1545 cache->mfc_un.res.bytes += skb->len;
1548 * Wrong interface: drop packet and (maybe) send PIM assert.
1550 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
1553 cache->mfc_un.res.wrong_if++;
1554 true_vifi = ip6mr_find_vif(skb->dev);
1556 if (true_vifi >= 0 && mroute_do_assert &&
1557 /* pimsm uses asserts, when switching from RPT to SPT,
1558 so that we cannot check that packet arrived on an oif.
1559 It is bad, but otherwise we would need to move pretty
1560 large chunk of pimd to kernel. Ough... --ANK
1562 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1564 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1565 cache->mfc_un.res.last_assert = jiffies;
1566 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1571 init_net.ipv6.vif6_table[vif].pkt_in++;
1572 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
1577 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1578 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1580 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1582 ip6mr_forward2(skb2, cache, psend);
1588 ip6mr_forward2(skb, cache, psend);
1599 * Multicast packets for forwarding arrive here
1602 int ip6_mr_input(struct sk_buff *skb)
1604 struct mfc6_cache *cache;
1606 read_lock(&mrt_lock);
1607 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1610 * No usable cache entry
1612 if (cache == NULL) {
1615 vif = ip6mr_find_vif(skb->dev);
1617 int err = ip6mr_cache_unresolved(vif, skb);
1618 read_unlock(&mrt_lock);
1622 read_unlock(&mrt_lock);
1627 ip6_mr_forward(skb, cache);
1629 read_unlock(&mrt_lock);
1636 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1639 struct rtnexthop *nhp;
1640 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
1641 u8 *b = skb_tail_pointer(skb);
1642 struct rtattr *mp_head;
1645 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1647 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1649 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1650 if (c->mfc_un.res.ttls[ct] < 255) {
1651 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1652 goto rtattr_failure;
1653 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1654 nhp->rtnh_flags = 0;
1655 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1656 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
1657 nhp->rtnh_len = sizeof(*nhp);
1660 mp_head->rta_type = RTA_MULTIPATH;
1661 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1662 rtm->rtm_type = RTN_MULTICAST;
1670 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1673 struct mfc6_cache *cache;
1674 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1676 read_lock(&mrt_lock);
1677 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1680 struct sk_buff *skb2;
1681 struct ipv6hdr *iph;
1682 struct net_device *dev;
1686 read_unlock(&mrt_lock);
1691 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1692 read_unlock(&mrt_lock);
1696 /* really correct? */
1697 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1699 read_unlock(&mrt_lock);
1703 skb_reset_transport_header(skb2);
1705 skb_put(skb2, sizeof(struct ipv6hdr));
1706 skb_reset_network_header(skb2);
1708 iph = ipv6_hdr(skb2);
1711 iph->flow_lbl[0] = 0;
1712 iph->flow_lbl[1] = 0;
1713 iph->flow_lbl[2] = 0;
1714 iph->payload_len = 0;
1715 iph->nexthdr = IPPROTO_NONE;
1717 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1718 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1720 err = ip6mr_cache_unresolved(vif, skb2);
1721 read_unlock(&mrt_lock);
1726 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1727 cache->mfc_flags |= MFC_NOTIFY;
1729 err = ip6mr_fill_mroute(skb, cache, rtm);
1730 read_unlock(&mrt_lock);