2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
52 /* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
56 static DEFINE_RWLOCK(mrt_lock);
59 * Multicast router control variables
62 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
66 /* Special spinlock for queue of unresolved entries */
67 static DEFINE_SPINLOCK(mfc_unres_lock);
69 /* We return to original Alan's scheme. Hash table of resolved
70 entries is changed only in process context and protected
71 with weak lock mrt_lock. Queue of unresolved entries is protected
72 with strong spinlock mfc_unres_lock.
74 In this case data path is free of exclusive locks at all.
77 static struct kmem_cache *mrt_cachep __read_mostly;
79 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
80 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
81 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
83 #ifdef CONFIG_IPV6_PIMSM_V2
84 static struct inet6_protocol pim6_protocol;
87 static struct timer_list ipmr_expire_timer;
92 struct ipmr_mfc_iter {
93 struct mfc6_cache **cache;
98 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
100 struct mfc6_cache *mfc;
102 it->cache = init_net.ipv6.mfc6_cache_array;
103 read_lock(&mrt_lock);
104 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
105 for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
106 mfc; mfc = mfc->next)
109 read_unlock(&mrt_lock);
111 it->cache = &mfc_unres_queue;
112 spin_lock_bh(&mfc_unres_lock);
113 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
116 spin_unlock_bh(&mfc_unres_lock);
126 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
129 struct ipmr_vif_iter {
133 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
136 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
137 if (!MIF_EXISTS(&init_net, iter->ct))
140 return &init_net.ipv6.vif6_table[iter->ct];
145 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
148 read_lock(&mrt_lock);
149 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
153 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
155 struct ipmr_vif_iter *iter = seq->private;
158 if (v == SEQ_START_TOKEN)
159 return ip6mr_vif_seq_idx(iter, 0);
161 while (++iter->ct < init_net.ipv6.maxvif) {
162 if (!MIF_EXISTS(&init_net, iter->ct))
164 return &init_net.ipv6.vif6_table[iter->ct];
169 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
172 read_unlock(&mrt_lock);
175 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
177 if (v == SEQ_START_TOKEN) {
179 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
181 const struct mif_device *vif = v;
182 const char *name = vif->dev ? vif->dev->name : "none";
185 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
186 vif - init_net.ipv6.vif6_table,
187 name, vif->bytes_in, vif->pkt_in,
188 vif->bytes_out, vif->pkt_out,
194 static struct seq_operations ip6mr_vif_seq_ops = {
195 .start = ip6mr_vif_seq_start,
196 .next = ip6mr_vif_seq_next,
197 .stop = ip6mr_vif_seq_stop,
198 .show = ip6mr_vif_seq_show,
201 static int ip6mr_vif_open(struct inode *inode, struct file *file)
203 return seq_open_private(file, &ip6mr_vif_seq_ops,
204 sizeof(struct ipmr_vif_iter));
207 static struct file_operations ip6mr_vif_fops = {
208 .owner = THIS_MODULE,
209 .open = ip6mr_vif_open,
212 .release = seq_release_private,
215 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
217 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
221 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
223 struct mfc6_cache *mfc = v;
224 struct ipmr_mfc_iter *it = seq->private;
228 if (v == SEQ_START_TOKEN)
229 return ipmr_mfc_seq_idx(seq->private, 0);
234 if (it->cache == &mfc_unres_queue)
237 BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
239 while (++it->ct < MFC6_LINES) {
240 mfc = init_net.ipv6.mfc6_cache_array[it->ct];
245 /* exhausted cache_array, show unresolved */
246 read_unlock(&mrt_lock);
247 it->cache = &mfc_unres_queue;
250 spin_lock_bh(&mfc_unres_lock);
251 mfc = mfc_unres_queue;
256 spin_unlock_bh(&mfc_unres_lock);
262 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
264 struct ipmr_mfc_iter *it = seq->private;
266 if (it->cache == &mfc_unres_queue)
267 spin_unlock_bh(&mfc_unres_lock);
268 else if (it->cache == init_net.ipv6.mfc6_cache_array)
269 read_unlock(&mrt_lock);
272 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
276 if (v == SEQ_START_TOKEN) {
280 "Iif Pkts Bytes Wrong Oifs\n");
282 const struct mfc6_cache *mfc = v;
283 const struct ipmr_mfc_iter *it = seq->private;
285 seq_printf(seq, "%pI6 %pI6 %-3hd",
286 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
289 if (it->cache != &mfc_unres_queue) {
290 seq_printf(seq, " %8lu %8lu %8lu",
292 mfc->mfc_un.res.bytes,
293 mfc->mfc_un.res.wrong_if);
294 for (n = mfc->mfc_un.res.minvif;
295 n < mfc->mfc_un.res.maxvif; n++) {
296 if (MIF_EXISTS(&init_net, n) &&
297 mfc->mfc_un.res.ttls[n] < 255)
300 n, mfc->mfc_un.res.ttls[n]);
303 /* unresolved mfc_caches don't contain
304 * pkt, bytes and wrong_if values
306 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
313 static struct seq_operations ipmr_mfc_seq_ops = {
314 .start = ipmr_mfc_seq_start,
315 .next = ipmr_mfc_seq_next,
316 .stop = ipmr_mfc_seq_stop,
317 .show = ipmr_mfc_seq_show,
320 static int ipmr_mfc_open(struct inode *inode, struct file *file)
322 return seq_open_private(file, &ipmr_mfc_seq_ops,
323 sizeof(struct ipmr_mfc_iter));
326 static struct file_operations ip6mr_mfc_fops = {
327 .owner = THIS_MODULE,
328 .open = ipmr_mfc_open,
331 .release = seq_release_private,
335 #ifdef CONFIG_IPV6_PIMSM_V2
336 static int reg_vif_num = -1;
338 static int pim6_rcv(struct sk_buff *skb)
340 struct pimreghdr *pim;
341 struct ipv6hdr *encap;
342 struct net_device *reg_dev = NULL;
344 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
347 pim = (struct pimreghdr *)skb_transport_header(skb);
348 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
349 (pim->flags & PIM_NULL_REGISTER) ||
350 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
351 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
354 /* check if the inner packet is destined to mcast group */
355 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
358 if (!ipv6_addr_is_multicast(&encap->daddr) ||
359 encap->payload_len == 0 ||
360 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
363 read_lock(&mrt_lock);
364 if (reg_vif_num >= 0)
365 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
368 read_unlock(&mrt_lock);
373 skb->mac_header = skb->network_header;
374 skb_pull(skb, (u8 *)encap - skb->data);
375 skb_reset_network_header(skb);
377 skb->protocol = htons(ETH_P_IP);
379 skb->pkt_type = PACKET_HOST;
380 dst_release(skb->dst);
381 reg_dev->stats.rx_bytes += skb->len;
382 reg_dev->stats.rx_packets++;
393 static struct inet6_protocol pim6_protocol = {
397 /* Service routines creating virtual interfaces: PIMREG */
399 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
401 read_lock(&mrt_lock);
402 dev->stats.tx_bytes += skb->len;
403 dev->stats.tx_packets++;
404 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
405 read_unlock(&mrt_lock);
410 static const struct net_device_ops reg_vif_netdev_ops = {
411 .ndo_start_xmit = reg_vif_xmit,
414 static void reg_vif_setup(struct net_device *dev)
416 dev->type = ARPHRD_PIMREG;
417 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
418 dev->flags = IFF_NOARP;
419 dev->netdev_ops = ®_vif_netdev_ops;
420 dev->destructor = free_netdev;
423 static struct net_device *ip6mr_reg_vif(void)
425 struct net_device *dev;
427 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
431 if (register_netdevice(dev)) {
444 /* allow the register to be completed before unregistering. */
448 unregister_netdevice(dev);
457 static int mif6_delete(int vifi)
459 struct mif_device *v;
460 struct net_device *dev;
461 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
462 return -EADDRNOTAVAIL;
464 v = &init_net.ipv6.vif6_table[vifi];
466 write_lock_bh(&mrt_lock);
471 write_unlock_bh(&mrt_lock);
472 return -EADDRNOTAVAIL;
475 #ifdef CONFIG_IPV6_PIMSM_V2
476 if (vifi == reg_vif_num)
480 if (vifi + 1 == init_net.ipv6.maxvif) {
482 for (tmp = vifi - 1; tmp >= 0; tmp--) {
483 if (MIF_EXISTS(&init_net, tmp))
486 init_net.ipv6.maxvif = tmp + 1;
489 write_unlock_bh(&mrt_lock);
491 dev_set_allmulti(dev, -1);
493 if (v->flags & MIFF_REGISTER)
494 unregister_netdevice(dev);
500 static inline void ip6mr_cache_free(struct mfc6_cache *c)
502 release_net(mfc6_net(c));
503 kmem_cache_free(mrt_cachep, c);
506 /* Destroy an unresolved cache entry, killing queued skbs
507 and reporting error to netlink readers.
510 static void ip6mr_destroy_unres(struct mfc6_cache *c)
514 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
516 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
517 if (ipv6_hdr(skb)->version == 0) {
518 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
519 nlh->nlmsg_type = NLMSG_ERROR;
520 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
521 skb_trim(skb, nlh->nlmsg_len);
522 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
523 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
532 /* Single timer process for all the unresolved queue. */
534 static void ipmr_do_expire_process(unsigned long dummy)
536 unsigned long now = jiffies;
537 unsigned long expires = 10 * HZ;
538 struct mfc6_cache *c, **cp;
540 cp = &mfc_unres_queue;
542 while ((c = *cp) != NULL) {
543 if (time_after(c->mfc_un.unres.expires, now)) {
545 unsigned long interval = c->mfc_un.unres.expires - now;
546 if (interval < expires)
553 ip6mr_destroy_unres(c);
556 if (mfc_unres_queue != NULL)
557 mod_timer(&ipmr_expire_timer, jiffies + expires);
560 static void ipmr_expire_process(unsigned long dummy)
562 if (!spin_trylock(&mfc_unres_lock)) {
563 mod_timer(&ipmr_expire_timer, jiffies + 1);
567 if (mfc_unres_queue != NULL)
568 ipmr_do_expire_process(dummy);
570 spin_unlock(&mfc_unres_lock);
573 /* Fill oifs list. It is called under write locked mrt_lock. */
575 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
579 cache->mfc_un.res.minvif = MAXMIFS;
580 cache->mfc_un.res.maxvif = 0;
581 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
583 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
584 if (MIF_EXISTS(&init_net, vifi) &&
585 ttls[vifi] && ttls[vifi] < 255) {
586 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
587 if (cache->mfc_un.res.minvif > vifi)
588 cache->mfc_un.res.minvif = vifi;
589 if (cache->mfc_un.res.maxvif <= vifi)
590 cache->mfc_un.res.maxvif = vifi + 1;
595 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
597 int vifi = vifc->mif6c_mifi;
598 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
599 struct net_device *dev;
603 if (MIF_EXISTS(&init_net, vifi))
606 switch (vifc->mif6c_flags) {
607 #ifdef CONFIG_IPV6_PIMSM_V2
610 * Special Purpose VIF in PIM
611 * All the packets will be sent to the daemon
613 if (reg_vif_num >= 0)
615 dev = ip6mr_reg_vif();
618 err = dev_set_allmulti(dev, 1);
620 unregister_netdevice(dev);
627 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
629 return -EADDRNOTAVAIL;
630 err = dev_set_allmulti(dev, 1);
641 * Fill in the VIF structures
643 v->rate_limit = vifc->vifc_rate_limit;
644 v->flags = vifc->mif6c_flags;
646 v->flags |= VIFF_STATIC;
647 v->threshold = vifc->vifc_threshold;
652 v->link = dev->ifindex;
653 if (v->flags & MIFF_REGISTER)
654 v->link = dev->iflink;
656 /* And finish update writing critical data */
657 write_lock_bh(&mrt_lock);
659 #ifdef CONFIG_IPV6_PIMSM_V2
660 if (v->flags & MIFF_REGISTER)
663 if (vifi + 1 > init_net.ipv6.maxvif)
664 init_net.ipv6.maxvif = vifi + 1;
665 write_unlock_bh(&mrt_lock);
669 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
671 int line = MFC6_HASH(mcastgrp, origin);
672 struct mfc6_cache *c;
674 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
675 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
676 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
683 * Allocate a multicast cache entry
685 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
687 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
690 c->mfc_un.res.minvif = MAXMIFS;
691 mfc6_net_set(c, net);
695 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
697 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
700 skb_queue_head_init(&c->mfc_un.unres.unresolved);
701 c->mfc_un.unres.expires = jiffies + 10 * HZ;
702 mfc6_net_set(c, net);
707 * A cache entry has gone into a resolved state from queued
710 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
715 * Play the pending entries through our router
718 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
719 if (ipv6_hdr(skb)->version == 0) {
721 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
723 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
724 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
726 nlh->nlmsg_type = NLMSG_ERROR;
727 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
728 skb_trim(skb, nlh->nlmsg_len);
729 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
731 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
733 ip6_mr_forward(skb, c);
738 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
739 * expects the following bizarre scheme.
741 * Called under mrt_lock.
744 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
750 #ifdef CONFIG_IPV6_PIMSM_V2
751 if (assert == MRT6MSG_WHOLEPKT)
752 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
756 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
761 /* I suppose that internal messages
762 * do not require checksums */
764 skb->ip_summed = CHECKSUM_UNNECESSARY;
766 #ifdef CONFIG_IPV6_PIMSM_V2
767 if (assert == MRT6MSG_WHOLEPKT) {
768 /* Ugly, but we have no choice with this interface.
769 Duplicate old header, fix length etc.
770 And all this only to mangle msg->im6_msgtype and
771 to set msg->im6_mbz to "mbz" :-)
773 skb_push(skb, -skb_network_offset(pkt));
775 skb_push(skb, sizeof(*msg));
776 skb_reset_transport_header(skb);
777 msg = (struct mrt6msg *)skb_transport_header(skb);
779 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
780 msg->im6_mif = reg_vif_num;
782 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
783 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
785 skb->ip_summed = CHECKSUM_UNNECESSARY;
793 skb_put(skb, sizeof(struct ipv6hdr));
794 skb_reset_network_header(skb);
795 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
800 skb_put(skb, sizeof(*msg));
801 skb_reset_transport_header(skb);
802 msg = (struct mrt6msg *)skb_transport_header(skb);
805 msg->im6_msgtype = assert;
808 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
809 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
811 skb->dst = dst_clone(pkt->dst);
812 skb->ip_summed = CHECKSUM_UNNECESSARY;
814 skb_pull(skb, sizeof(struct ipv6hdr));
817 if (init_net.ipv6.mroute6_sk == NULL) {
823 * Deliver to user space multicast routing algorithms
825 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
828 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
836 * Queue a packet for resolution. It gets locked cache entry!
840 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
843 struct mfc6_cache *c;
845 spin_lock_bh(&mfc_unres_lock);
846 for (c = mfc_unres_queue; c; c = c->next) {
847 if (net_eq(mfc6_net(c), &init_net) &&
848 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
849 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
855 * Create a new entry if allowable
858 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
859 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
860 spin_unlock_bh(&mfc_unres_lock);
867 * Fill in the new cache entry
870 c->mf6c_origin = ipv6_hdr(skb)->saddr;
871 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
874 * Reflect first query at pim6sd
876 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
877 /* If the report failed throw the cache entry
880 spin_unlock_bh(&mfc_unres_lock);
887 atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
888 c->next = mfc_unres_queue;
891 ipmr_do_expire_process(1);
895 * See if we can append the packet
897 if (c->mfc_un.unres.unresolved.qlen > 3) {
901 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
905 spin_unlock_bh(&mfc_unres_lock);
910 * MFC6 cache manipulation by user space
913 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
916 struct mfc6_cache *c, **cp;
918 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
920 for (cp = &init_net.ipv6.mfc6_cache_array[line];
921 (c = *cp) != NULL; cp = &c->next) {
922 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
923 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
924 write_lock_bh(&mrt_lock);
926 write_unlock_bh(&mrt_lock);
935 static int ip6mr_device_event(struct notifier_block *this,
936 unsigned long event, void *ptr)
938 struct net_device *dev = ptr;
939 struct mif_device *v;
942 if (!net_eq(dev_net(dev), &init_net))
945 if (event != NETDEV_UNREGISTER)
948 v = &init_net.ipv6.vif6_table[0];
949 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
956 static struct notifier_block ip6_mr_notifier = {
957 .notifier_call = ip6mr_device_event
961 * Setup for IP multicast routing
964 static int __net_init ip6mr_net_init(struct net *net)
968 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
970 if (!net->ipv6.vif6_table) {
975 /* Forwarding cache */
976 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
977 sizeof(struct mfc6_cache *),
979 if (!net->ipv6.mfc6_cache_array) {
981 goto fail_mfc6_cache;
986 kfree(net->ipv6.vif6_table);
991 static void __net_exit ip6mr_net_exit(struct net *net)
993 kfree(net->ipv6.mfc6_cache_array);
994 kfree(net->ipv6.vif6_table);
997 static struct pernet_operations ip6mr_net_ops = {
998 .init = ip6mr_net_init,
999 .exit = ip6mr_net_exit,
1002 int __init ip6_mr_init(void)
1006 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1007 sizeof(struct mfc6_cache),
1008 0, SLAB_HWCACHE_ALIGN,
1013 err = register_pernet_subsys(&ip6mr_net_ops);
1015 goto reg_pernet_fail;
1017 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1018 err = register_netdevice_notifier(&ip6_mr_notifier);
1020 goto reg_notif_fail;
1021 #ifdef CONFIG_PROC_FS
1023 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1025 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1026 0, &ip6mr_mfc_fops))
1027 goto proc_cache_fail;
1030 #ifdef CONFIG_PROC_FS
1032 proc_net_remove(&init_net, "ip6_mr_vif");
1034 unregister_netdevice_notifier(&ip6_mr_notifier);
1037 del_timer(&ipmr_expire_timer);
1038 unregister_pernet_subsys(&ip6mr_net_ops);
1040 kmem_cache_destroy(mrt_cachep);
1044 void ip6_mr_cleanup(void)
1046 #ifdef CONFIG_PROC_FS
1047 proc_net_remove(&init_net, "ip6_mr_cache");
1048 proc_net_remove(&init_net, "ip6_mr_vif");
1050 unregister_netdevice_notifier(&ip6_mr_notifier);
1051 del_timer(&ipmr_expire_timer);
1052 unregister_pernet_subsys(&ip6mr_net_ops);
1053 kmem_cache_destroy(mrt_cachep);
1056 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1059 struct mfc6_cache *uc, *c, **cp;
1060 unsigned char ttls[MAXMIFS];
1063 memset(ttls, 255, MAXMIFS);
1064 for (i = 0; i < MAXMIFS; i++) {
1065 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1070 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1072 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1073 (c = *cp) != NULL; cp = &c->next) {
1074 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1075 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1080 write_lock_bh(&mrt_lock);
1081 c->mf6c_parent = mfc->mf6cc_parent;
1082 ip6mr_update_thresholds(c, ttls);
1084 c->mfc_flags |= MFC_STATIC;
1085 write_unlock_bh(&mrt_lock);
1089 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1092 c = ip6mr_cache_alloc(&init_net);
1096 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1097 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1098 c->mf6c_parent = mfc->mf6cc_parent;
1099 ip6mr_update_thresholds(c, ttls);
1101 c->mfc_flags |= MFC_STATIC;
1103 write_lock_bh(&mrt_lock);
1104 c->next = init_net.ipv6.mfc6_cache_array[line];
1105 init_net.ipv6.mfc6_cache_array[line] = c;
1106 write_unlock_bh(&mrt_lock);
1109 * Check to see if we resolved a queued list. If so we
1110 * need to send on the frames and tidy up.
1112 spin_lock_bh(&mfc_unres_lock);
1113 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1115 if (net_eq(mfc6_net(uc), &init_net) &&
1116 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1117 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1119 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
1123 if (mfc_unres_queue == NULL)
1124 del_timer(&ipmr_expire_timer);
1125 spin_unlock_bh(&mfc_unres_lock);
1128 ip6mr_cache_resolve(uc, c);
1129 ip6mr_cache_free(uc);
1135 * Close the multicast socket, and clear the vif tables etc
1138 static void mroute_clean_tables(struct sock *sk)
1143 * Shut down all active vif entries
1145 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1146 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
1153 for (i = 0; i < MFC6_LINES; i++) {
1154 struct mfc6_cache *c, **cp;
1156 cp = &init_net.ipv6.mfc6_cache_array[i];
1157 while ((c = *cp) != NULL) {
1158 if (c->mfc_flags & MFC_STATIC) {
1162 write_lock_bh(&mrt_lock);
1164 write_unlock_bh(&mrt_lock);
1166 ip6mr_cache_free(c);
1170 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
1171 struct mfc6_cache *c, **cp;
1173 spin_lock_bh(&mfc_unres_lock);
1174 cp = &mfc_unres_queue;
1175 while ((c = *cp) != NULL) {
1176 if (!net_eq(mfc6_net(c), &init_net)) {
1181 ip6mr_destroy_unres(c);
1183 spin_unlock_bh(&mfc_unres_lock);
1187 static int ip6mr_sk_init(struct sock *sk)
1192 write_lock_bh(&mrt_lock);
1193 if (likely(init_net.ipv6.mroute6_sk == NULL))
1194 init_net.ipv6.mroute6_sk = sk;
1197 write_unlock_bh(&mrt_lock);
1204 int ip6mr_sk_done(struct sock *sk)
1209 if (sk == init_net.ipv6.mroute6_sk) {
1210 write_lock_bh(&mrt_lock);
1211 init_net.ipv6.mroute6_sk = NULL;
1212 write_unlock_bh(&mrt_lock);
1214 mroute_clean_tables(sk);
1223 * Socket options and virtual interface manipulation. The whole
1224 * virtual interface system is a complete heap, but unfortunately
1225 * that's how BSD mrouted happens to think. Maybe one day with a proper
1226 * MOSPF/PIM router set up we can clean this up.
1229 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1236 if (optname != MRT6_INIT) {
1237 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1243 if (sk->sk_type != SOCK_RAW ||
1244 inet_sk(sk)->num != IPPROTO_ICMPV6)
1246 if (optlen < sizeof(int))
1249 return ip6mr_sk_init(sk);
1252 return ip6mr_sk_done(sk);
1255 if (optlen < sizeof(vif))
1257 if (copy_from_user(&vif, optval, sizeof(vif)))
1259 if (vif.mif6c_mifi >= MAXMIFS)
1262 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
1267 if (optlen < sizeof(mifi_t))
1269 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1272 ret = mif6_delete(mifi);
1277 * Manipulate the forwarding caches. These live
1278 * in a sort of kernel/user symbiosis.
1282 if (optlen < sizeof(mfc))
1284 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1287 if (optname == MRT6_DEL_MFC)
1288 ret = ip6mr_mfc_delete(&mfc);
1290 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
1295 * Control PIM assert (to activate pim will activate assert)
1300 if (get_user(v, (int __user *)optval))
1302 init_net.ipv6.mroute_do_assert = !!v;
1306 #ifdef CONFIG_IPV6_PIMSM_V2
1310 if (get_user(v, (int __user *)optval))
1315 if (v != init_net.ipv6.mroute_do_pim) {
1316 init_net.ipv6.mroute_do_pim = v;
1317 init_net.ipv6.mroute_do_assert = v;
1318 if (init_net.ipv6.mroute_do_pim)
1319 ret = inet6_add_protocol(&pim6_protocol,
1322 ret = inet6_del_protocol(&pim6_protocol,
1333 * Spurious command, or MRT6_VERSION which you cannot
1337 return -ENOPROTOOPT;
1342 * Getsock opt support for the multicast routing system.
1345 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1355 #ifdef CONFIG_IPV6_PIMSM_V2
1357 val = init_net.ipv6.mroute_do_pim;
1361 val = init_net.ipv6.mroute_do_assert;
1364 return -ENOPROTOOPT;
1367 if (get_user(olr, optlen))
1370 olr = min_t(int, olr, sizeof(int));
1374 if (put_user(olr, optlen))
1376 if (copy_to_user(optval, &val, olr))
1382 * The IP multicast ioctl support routines.
1385 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1387 struct sioc_sg_req6 sr;
1388 struct sioc_mif_req6 vr;
1389 struct mif_device *vif;
1390 struct mfc6_cache *c;
1393 case SIOCGETMIFCNT_IN6:
1394 if (copy_from_user(&vr, arg, sizeof(vr)))
1396 if (vr.mifi >= init_net.ipv6.maxvif)
1398 read_lock(&mrt_lock);
1399 vif = &init_net.ipv6.vif6_table[vr.mifi];
1400 if (MIF_EXISTS(&init_net, vr.mifi)) {
1401 vr.icount = vif->pkt_in;
1402 vr.ocount = vif->pkt_out;
1403 vr.ibytes = vif->bytes_in;
1404 vr.obytes = vif->bytes_out;
1405 read_unlock(&mrt_lock);
1407 if (copy_to_user(arg, &vr, sizeof(vr)))
1411 read_unlock(&mrt_lock);
1412 return -EADDRNOTAVAIL;
1413 case SIOCGETSGCNT_IN6:
1414 if (copy_from_user(&sr, arg, sizeof(sr)))
1417 read_lock(&mrt_lock);
1418 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1420 sr.pktcnt = c->mfc_un.res.pkt;
1421 sr.bytecnt = c->mfc_un.res.bytes;
1422 sr.wrong_if = c->mfc_un.res.wrong_if;
1423 read_unlock(&mrt_lock);
1425 if (copy_to_user(arg, &sr, sizeof(sr)))
1429 read_unlock(&mrt_lock);
1430 return -EADDRNOTAVAIL;
1432 return -ENOIOCTLCMD;
1437 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1439 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1440 IPSTATS_MIB_OUTFORWDATAGRAMS);
1441 return dst_output(skb);
1445 * Processing handlers for ip6mr_forward
1448 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1450 struct ipv6hdr *ipv6h;
1451 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
1452 struct net_device *dev;
1453 struct dst_entry *dst;
1456 if (vif->dev == NULL)
1459 #ifdef CONFIG_IPV6_PIMSM_V2
1460 if (vif->flags & MIFF_REGISTER) {
1462 vif->bytes_out += skb->len;
1463 vif->dev->stats.tx_bytes += skb->len;
1464 vif->dev->stats.tx_packets++;
1465 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1471 ipv6h = ipv6_hdr(skb);
1473 fl = (struct flowi) {
1476 { .daddr = ipv6h->daddr, }
1480 dst = ip6_route_output(&init_net, NULL, &fl);
1484 dst_release(skb->dst);
1488 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1489 * not only before forwarding, but after forwarding on all output
1490 * interfaces. It is clear, if mrouter runs a multicasting
1491 * program, it should receive packets not depending to what interface
1492 * program is joined.
1493 * If we will not make it, the program will have to join on all
1494 * interfaces. On the other hand, multihoming host (or router, but
1495 * not mrouter) cannot join to more than one interface - it will
1496 * result in receiving multiple packets.
1501 vif->bytes_out += skb->len;
1503 /* We are about to write */
1504 /* XXX: extension headers? */
1505 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1508 ipv6h = ipv6_hdr(skb);
1511 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1513 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1514 ip6mr_forward2_finish);
1521 static int ip6mr_find_vif(struct net_device *dev)
1524 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1525 if (init_net.ipv6.vif6_table[ct].dev == dev)
1531 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1536 vif = cache->mf6c_parent;
1537 cache->mfc_un.res.pkt++;
1538 cache->mfc_un.res.bytes += skb->len;
1541 * Wrong interface: drop packet and (maybe) send PIM assert.
1543 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
1546 cache->mfc_un.res.wrong_if++;
1547 true_vifi = ip6mr_find_vif(skb->dev);
1549 if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert &&
1550 /* pimsm uses asserts, when switching from RPT to SPT,
1551 so that we cannot check that packet arrived on an oif.
1552 It is bad, but otherwise we would need to move pretty
1553 large chunk of pimd to kernel. Ough... --ANK
1555 (init_net.ipv6.mroute_do_pim ||
1556 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1558 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1559 cache->mfc_un.res.last_assert = jiffies;
1560 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1565 init_net.ipv6.vif6_table[vif].pkt_in++;
1566 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
1571 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1572 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1574 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1576 ip6mr_forward2(skb2, cache, psend);
1582 ip6mr_forward2(skb, cache, psend);
1593 * Multicast packets for forwarding arrive here
1596 int ip6_mr_input(struct sk_buff *skb)
1598 struct mfc6_cache *cache;
1600 read_lock(&mrt_lock);
1601 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1604 * No usable cache entry
1606 if (cache == NULL) {
1609 vif = ip6mr_find_vif(skb->dev);
1611 int err = ip6mr_cache_unresolved(vif, skb);
1612 read_unlock(&mrt_lock);
1616 read_unlock(&mrt_lock);
1621 ip6_mr_forward(skb, cache);
1623 read_unlock(&mrt_lock);
1630 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1633 struct rtnexthop *nhp;
1634 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
1635 u8 *b = skb_tail_pointer(skb);
1636 struct rtattr *mp_head;
1639 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1641 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1643 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1644 if (c->mfc_un.res.ttls[ct] < 255) {
1645 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1646 goto rtattr_failure;
1647 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1648 nhp->rtnh_flags = 0;
1649 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1650 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
1651 nhp->rtnh_len = sizeof(*nhp);
1654 mp_head->rta_type = RTA_MULTIPATH;
1655 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1656 rtm->rtm_type = RTN_MULTICAST;
1664 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1667 struct mfc6_cache *cache;
1668 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1670 read_lock(&mrt_lock);
1671 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1674 struct sk_buff *skb2;
1675 struct ipv6hdr *iph;
1676 struct net_device *dev;
1680 read_unlock(&mrt_lock);
1685 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1686 read_unlock(&mrt_lock);
1690 /* really correct? */
1691 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1693 read_unlock(&mrt_lock);
1697 skb_reset_transport_header(skb2);
1699 skb_put(skb2, sizeof(struct ipv6hdr));
1700 skb_reset_network_header(skb2);
1702 iph = ipv6_hdr(skb2);
1705 iph->flow_lbl[0] = 0;
1706 iph->flow_lbl[1] = 0;
1707 iph->flow_lbl[2] = 0;
1708 iph->payload_len = 0;
1709 iph->nexthdr = IPPROTO_NONE;
1711 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1712 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1714 err = ip6mr_cache_unresolved(vif, skb2);
1715 read_unlock(&mrt_lock);
1720 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1721 cache->mfc_flags |= MFC_NOTIFY;
1723 err = ip6mr_fill_mroute(skb, cache, rtm);
1724 read_unlock(&mrt_lock);