2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
52 /* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
56 static DEFINE_RWLOCK(mrt_lock);
59 * Multicast router control variables
62 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 static int mroute_do_assert; /* Set in PIM assert */
65 #ifdef CONFIG_IPV6_PIMSM_V2
66 static int mroute_do_pim;
68 #define mroute_do_pim 0
71 static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
73 static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
74 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
76 /* Special spinlock for queue of unresolved entries */
77 static DEFINE_SPINLOCK(mfc_unres_lock);
79 /* We return to original Alan's scheme. Hash table of resolved
80 entries is changed only in process context and protected
81 with weak lock mrt_lock. Queue of unresolved entries is protected
82 with strong spinlock mfc_unres_lock.
84 In this case data path is free of exclusive locks at all.
87 static struct kmem_cache *mrt_cachep __read_mostly;
89 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
90 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
91 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
93 #ifdef CONFIG_IPV6_PIMSM_V2
94 static struct inet6_protocol pim6_protocol;
97 static struct timer_list ipmr_expire_timer;
100 #ifdef CONFIG_PROC_FS
102 struct ipmr_mfc_iter {
103 struct mfc6_cache **cache;
108 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
110 struct mfc6_cache *mfc;
112 it->cache = mfc6_cache_array;
113 read_lock(&mrt_lock);
114 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
115 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
118 read_unlock(&mrt_lock);
120 it->cache = &mfc_unres_queue;
121 spin_lock_bh(&mfc_unres_lock);
122 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
125 spin_unlock_bh(&mfc_unres_lock);
135 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
138 struct ipmr_vif_iter {
142 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
145 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
146 if (!MIF_EXISTS(&init_net, iter->ct))
149 return &init_net.ipv6.vif6_table[iter->ct];
154 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
157 read_lock(&mrt_lock);
158 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
162 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
164 struct ipmr_vif_iter *iter = seq->private;
167 if (v == SEQ_START_TOKEN)
168 return ip6mr_vif_seq_idx(iter, 0);
170 while (++iter->ct < init_net.ipv6.maxvif) {
171 if (!MIF_EXISTS(&init_net, iter->ct))
173 return &init_net.ipv6.vif6_table[iter->ct];
178 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
181 read_unlock(&mrt_lock);
184 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
186 if (v == SEQ_START_TOKEN) {
188 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
190 const struct mif_device *vif = v;
191 const char *name = vif->dev ? vif->dev->name : "none";
194 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
195 vif - init_net.ipv6.vif6_table,
196 name, vif->bytes_in, vif->pkt_in,
197 vif->bytes_out, vif->pkt_out,
203 static struct seq_operations ip6mr_vif_seq_ops = {
204 .start = ip6mr_vif_seq_start,
205 .next = ip6mr_vif_seq_next,
206 .stop = ip6mr_vif_seq_stop,
207 .show = ip6mr_vif_seq_show,
210 static int ip6mr_vif_open(struct inode *inode, struct file *file)
212 return seq_open_private(file, &ip6mr_vif_seq_ops,
213 sizeof(struct ipmr_vif_iter));
216 static struct file_operations ip6mr_vif_fops = {
217 .owner = THIS_MODULE,
218 .open = ip6mr_vif_open,
221 .release = seq_release_private,
224 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
226 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
230 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
232 struct mfc6_cache *mfc = v;
233 struct ipmr_mfc_iter *it = seq->private;
237 if (v == SEQ_START_TOKEN)
238 return ipmr_mfc_seq_idx(seq->private, 0);
243 if (it->cache == &mfc_unres_queue)
246 BUG_ON(it->cache != mfc6_cache_array);
248 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
249 mfc = mfc6_cache_array[it->ct];
254 /* exhausted cache_array, show unresolved */
255 read_unlock(&mrt_lock);
256 it->cache = &mfc_unres_queue;
259 spin_lock_bh(&mfc_unres_lock);
260 mfc = mfc_unres_queue;
265 spin_unlock_bh(&mfc_unres_lock);
271 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
273 struct ipmr_mfc_iter *it = seq->private;
275 if (it->cache == &mfc_unres_queue)
276 spin_unlock_bh(&mfc_unres_lock);
277 else if (it->cache == mfc6_cache_array)
278 read_unlock(&mrt_lock);
281 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
285 if (v == SEQ_START_TOKEN) {
289 "Iif Pkts Bytes Wrong Oifs\n");
291 const struct mfc6_cache *mfc = v;
292 const struct ipmr_mfc_iter *it = seq->private;
294 seq_printf(seq, "%pI6 %pI6 %-3hd",
295 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
298 if (it->cache != &mfc_unres_queue) {
299 seq_printf(seq, " %8lu %8lu %8lu",
301 mfc->mfc_un.res.bytes,
302 mfc->mfc_un.res.wrong_if);
303 for (n = mfc->mfc_un.res.minvif;
304 n < mfc->mfc_un.res.maxvif; n++) {
305 if (MIF_EXISTS(&init_net, n) &&
306 mfc->mfc_un.res.ttls[n] < 255)
309 n, mfc->mfc_un.res.ttls[n]);
312 /* unresolved mfc_caches don't contain
313 * pkt, bytes and wrong_if values
315 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
322 static struct seq_operations ipmr_mfc_seq_ops = {
323 .start = ipmr_mfc_seq_start,
324 .next = ipmr_mfc_seq_next,
325 .stop = ipmr_mfc_seq_stop,
326 .show = ipmr_mfc_seq_show,
329 static int ipmr_mfc_open(struct inode *inode, struct file *file)
331 return seq_open_private(file, &ipmr_mfc_seq_ops,
332 sizeof(struct ipmr_mfc_iter));
335 static struct file_operations ip6mr_mfc_fops = {
336 .owner = THIS_MODULE,
337 .open = ipmr_mfc_open,
340 .release = seq_release_private,
344 #ifdef CONFIG_IPV6_PIMSM_V2
345 static int reg_vif_num = -1;
347 static int pim6_rcv(struct sk_buff *skb)
349 struct pimreghdr *pim;
350 struct ipv6hdr *encap;
351 struct net_device *reg_dev = NULL;
353 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
356 pim = (struct pimreghdr *)skb_transport_header(skb);
357 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
358 (pim->flags & PIM_NULL_REGISTER) ||
359 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
360 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
363 /* check if the inner packet is destined to mcast group */
364 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
367 if (!ipv6_addr_is_multicast(&encap->daddr) ||
368 encap->payload_len == 0 ||
369 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
372 read_lock(&mrt_lock);
373 if (reg_vif_num >= 0)
374 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
377 read_unlock(&mrt_lock);
382 skb->mac_header = skb->network_header;
383 skb_pull(skb, (u8 *)encap - skb->data);
384 skb_reset_network_header(skb);
386 skb->protocol = htons(ETH_P_IP);
388 skb->pkt_type = PACKET_HOST;
389 dst_release(skb->dst);
390 reg_dev->stats.rx_bytes += skb->len;
391 reg_dev->stats.rx_packets++;
402 static struct inet6_protocol pim6_protocol = {
406 /* Service routines creating virtual interfaces: PIMREG */
408 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410 read_lock(&mrt_lock);
411 dev->stats.tx_bytes += skb->len;
412 dev->stats.tx_packets++;
413 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
414 read_unlock(&mrt_lock);
419 static const struct net_device_ops reg_vif_netdev_ops = {
420 .ndo_start_xmit = reg_vif_xmit,
423 static void reg_vif_setup(struct net_device *dev)
425 dev->type = ARPHRD_PIMREG;
426 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
427 dev->flags = IFF_NOARP;
428 dev->netdev_ops = ®_vif_netdev_ops;
429 dev->destructor = free_netdev;
432 static struct net_device *ip6mr_reg_vif(void)
434 struct net_device *dev;
436 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
440 if (register_netdevice(dev)) {
453 /* allow the register to be completed before unregistering. */
457 unregister_netdevice(dev);
466 static int mif6_delete(int vifi)
468 struct mif_device *v;
469 struct net_device *dev;
470 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
471 return -EADDRNOTAVAIL;
473 v = &init_net.ipv6.vif6_table[vifi];
475 write_lock_bh(&mrt_lock);
480 write_unlock_bh(&mrt_lock);
481 return -EADDRNOTAVAIL;
484 #ifdef CONFIG_IPV6_PIMSM_V2
485 if (vifi == reg_vif_num)
489 if (vifi + 1 == init_net.ipv6.maxvif) {
491 for (tmp = vifi - 1; tmp >= 0; tmp--) {
492 if (MIF_EXISTS(&init_net, tmp))
495 init_net.ipv6.maxvif = tmp + 1;
498 write_unlock_bh(&mrt_lock);
500 dev_set_allmulti(dev, -1);
502 if (v->flags & MIFF_REGISTER)
503 unregister_netdevice(dev);
509 static inline void ip6mr_cache_free(struct mfc6_cache *c)
511 release_net(mfc6_net(c));
512 kmem_cache_free(mrt_cachep, c);
515 /* Destroy an unresolved cache entry, killing queued skbs
516 and reporting error to netlink readers.
519 static void ip6mr_destroy_unres(struct mfc6_cache *c)
523 atomic_dec(&cache_resolve_queue_len);
525 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
526 if (ipv6_hdr(skb)->version == 0) {
527 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
528 nlh->nlmsg_type = NLMSG_ERROR;
529 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
530 skb_trim(skb, nlh->nlmsg_len);
531 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
532 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
541 /* Single timer process for all the unresolved queue. */
543 static void ipmr_do_expire_process(unsigned long dummy)
545 unsigned long now = jiffies;
546 unsigned long expires = 10 * HZ;
547 struct mfc6_cache *c, **cp;
549 cp = &mfc_unres_queue;
551 while ((c = *cp) != NULL) {
552 if (time_after(c->mfc_un.unres.expires, now)) {
554 unsigned long interval = c->mfc_un.unres.expires - now;
555 if (interval < expires)
562 ip6mr_destroy_unres(c);
565 if (atomic_read(&cache_resolve_queue_len))
566 mod_timer(&ipmr_expire_timer, jiffies + expires);
569 static void ipmr_expire_process(unsigned long dummy)
571 if (!spin_trylock(&mfc_unres_lock)) {
572 mod_timer(&ipmr_expire_timer, jiffies + 1);
576 if (atomic_read(&cache_resolve_queue_len))
577 ipmr_do_expire_process(dummy);
579 spin_unlock(&mfc_unres_lock);
582 /* Fill oifs list. It is called under write locked mrt_lock. */
584 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
588 cache->mfc_un.res.minvif = MAXMIFS;
589 cache->mfc_un.res.maxvif = 0;
590 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
592 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
593 if (MIF_EXISTS(&init_net, vifi) &&
594 ttls[vifi] && ttls[vifi] < 255) {
595 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
596 if (cache->mfc_un.res.minvif > vifi)
597 cache->mfc_un.res.minvif = vifi;
598 if (cache->mfc_un.res.maxvif <= vifi)
599 cache->mfc_un.res.maxvif = vifi + 1;
604 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
606 int vifi = vifc->mif6c_mifi;
607 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
608 struct net_device *dev;
612 if (MIF_EXISTS(&init_net, vifi))
615 switch (vifc->mif6c_flags) {
616 #ifdef CONFIG_IPV6_PIMSM_V2
619 * Special Purpose VIF in PIM
620 * All the packets will be sent to the daemon
622 if (reg_vif_num >= 0)
624 dev = ip6mr_reg_vif();
627 err = dev_set_allmulti(dev, 1);
629 unregister_netdevice(dev);
636 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
638 return -EADDRNOTAVAIL;
639 err = dev_set_allmulti(dev, 1);
650 * Fill in the VIF structures
652 v->rate_limit = vifc->vifc_rate_limit;
653 v->flags = vifc->mif6c_flags;
655 v->flags |= VIFF_STATIC;
656 v->threshold = vifc->vifc_threshold;
661 v->link = dev->ifindex;
662 if (v->flags & MIFF_REGISTER)
663 v->link = dev->iflink;
665 /* And finish update writing critical data */
666 write_lock_bh(&mrt_lock);
668 #ifdef CONFIG_IPV6_PIMSM_V2
669 if (v->flags & MIFF_REGISTER)
672 if (vifi + 1 > init_net.ipv6.maxvif)
673 init_net.ipv6.maxvif = vifi + 1;
674 write_unlock_bh(&mrt_lock);
678 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
680 int line = MFC6_HASH(mcastgrp, origin);
681 struct mfc6_cache *c;
683 for (c = mfc6_cache_array[line]; c; c = c->next) {
684 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
685 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
692 * Allocate a multicast cache entry
694 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
696 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
699 c->mfc_un.res.minvif = MAXMIFS;
700 mfc6_net_set(c, net);
704 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
706 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
709 skb_queue_head_init(&c->mfc_un.unres.unresolved);
710 c->mfc_un.unres.expires = jiffies + 10 * HZ;
711 mfc6_net_set(c, net);
716 * A cache entry has gone into a resolved state from queued
719 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
724 * Play the pending entries through our router
727 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
728 if (ipv6_hdr(skb)->version == 0) {
730 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
732 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
733 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
735 nlh->nlmsg_type = NLMSG_ERROR;
736 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
737 skb_trim(skb, nlh->nlmsg_len);
738 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
740 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
742 ip6_mr_forward(skb, c);
747 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
748 * expects the following bizarre scheme.
750 * Called under mrt_lock.
753 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
759 #ifdef CONFIG_IPV6_PIMSM_V2
760 if (assert == MRT6MSG_WHOLEPKT)
761 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
765 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
770 /* I suppose that internal messages
771 * do not require checksums */
773 skb->ip_summed = CHECKSUM_UNNECESSARY;
775 #ifdef CONFIG_IPV6_PIMSM_V2
776 if (assert == MRT6MSG_WHOLEPKT) {
777 /* Ugly, but we have no choice with this interface.
778 Duplicate old header, fix length etc.
779 And all this only to mangle msg->im6_msgtype and
780 to set msg->im6_mbz to "mbz" :-)
782 skb_push(skb, -skb_network_offset(pkt));
784 skb_push(skb, sizeof(*msg));
785 skb_reset_transport_header(skb);
786 msg = (struct mrt6msg *)skb_transport_header(skb);
788 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
789 msg->im6_mif = reg_vif_num;
791 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
792 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
794 skb->ip_summed = CHECKSUM_UNNECESSARY;
802 skb_put(skb, sizeof(struct ipv6hdr));
803 skb_reset_network_header(skb);
804 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
809 skb_put(skb, sizeof(*msg));
810 skb_reset_transport_header(skb);
811 msg = (struct mrt6msg *)skb_transport_header(skb);
814 msg->im6_msgtype = assert;
817 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
818 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
820 skb->dst = dst_clone(pkt->dst);
821 skb->ip_summed = CHECKSUM_UNNECESSARY;
823 skb_pull(skb, sizeof(struct ipv6hdr));
826 if (init_net.ipv6.mroute6_sk == NULL) {
832 * Deliver to user space multicast routing algorithms
834 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
837 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
845 * Queue a packet for resolution. It gets locked cache entry!
849 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
852 struct mfc6_cache *c;
854 spin_lock_bh(&mfc_unres_lock);
855 for (c = mfc_unres_queue; c; c = c->next) {
856 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
857 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
863 * Create a new entry if allowable
866 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
867 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
868 spin_unlock_bh(&mfc_unres_lock);
875 * Fill in the new cache entry
878 c->mf6c_origin = ipv6_hdr(skb)->saddr;
879 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
882 * Reflect first query at pim6sd
884 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
885 /* If the report failed throw the cache entry
888 spin_unlock_bh(&mfc_unres_lock);
895 atomic_inc(&cache_resolve_queue_len);
896 c->next = mfc_unres_queue;
899 ipmr_do_expire_process(1);
903 * See if we can append the packet
905 if (c->mfc_un.unres.unresolved.qlen > 3) {
909 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
913 spin_unlock_bh(&mfc_unres_lock);
918 * MFC6 cache manipulation by user space
921 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
924 struct mfc6_cache *c, **cp;
926 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
928 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
929 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
930 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
931 write_lock_bh(&mrt_lock);
933 write_unlock_bh(&mrt_lock);
942 static int ip6mr_device_event(struct notifier_block *this,
943 unsigned long event, void *ptr)
945 struct net_device *dev = ptr;
946 struct mif_device *v;
949 if (!net_eq(dev_net(dev), &init_net))
952 if (event != NETDEV_UNREGISTER)
955 v = &init_net.ipv6.vif6_table[0];
956 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
963 static struct notifier_block ip6_mr_notifier = {
964 .notifier_call = ip6mr_device_event
968 * Setup for IP multicast routing
971 static int __net_init ip6mr_net_init(struct net *net)
975 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
977 if (!net->ipv6.vif6_table) {
985 static void __net_exit ip6mr_net_exit(struct net *net)
987 kfree(net->ipv6.vif6_table);
990 static struct pernet_operations ip6mr_net_ops = {
991 .init = ip6mr_net_init,
992 .exit = ip6mr_net_exit,
995 int __init ip6_mr_init(void)
999 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1000 sizeof(struct mfc6_cache),
1001 0, SLAB_HWCACHE_ALIGN,
1006 err = register_pernet_subsys(&ip6mr_net_ops);
1008 goto reg_pernet_fail;
1010 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1011 err = register_netdevice_notifier(&ip6_mr_notifier);
1013 goto reg_notif_fail;
1014 #ifdef CONFIG_PROC_FS
1016 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1018 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1019 0, &ip6mr_mfc_fops))
1020 goto proc_cache_fail;
1023 #ifdef CONFIG_PROC_FS
1025 proc_net_remove(&init_net, "ip6_mr_vif");
1027 unregister_netdevice_notifier(&ip6_mr_notifier);
1030 del_timer(&ipmr_expire_timer);
1031 unregister_pernet_subsys(&ip6mr_net_ops);
1033 kmem_cache_destroy(mrt_cachep);
1037 void ip6_mr_cleanup(void)
1039 #ifdef CONFIG_PROC_FS
1040 proc_net_remove(&init_net, "ip6_mr_cache");
1041 proc_net_remove(&init_net, "ip6_mr_vif");
1043 unregister_netdevice_notifier(&ip6_mr_notifier);
1044 del_timer(&ipmr_expire_timer);
1045 unregister_pernet_subsys(&ip6mr_net_ops);
1046 kmem_cache_destroy(mrt_cachep);
1049 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1052 struct mfc6_cache *uc, *c, **cp;
1053 unsigned char ttls[MAXMIFS];
1056 memset(ttls, 255, MAXMIFS);
1057 for (i = 0; i < MAXMIFS; i++) {
1058 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1063 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1065 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1066 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1067 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1072 write_lock_bh(&mrt_lock);
1073 c->mf6c_parent = mfc->mf6cc_parent;
1074 ip6mr_update_thresholds(c, ttls);
1076 c->mfc_flags |= MFC_STATIC;
1077 write_unlock_bh(&mrt_lock);
1081 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1084 c = ip6mr_cache_alloc(&init_net);
1088 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1089 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1090 c->mf6c_parent = mfc->mf6cc_parent;
1091 ip6mr_update_thresholds(c, ttls);
1093 c->mfc_flags |= MFC_STATIC;
1095 write_lock_bh(&mrt_lock);
1096 c->next = mfc6_cache_array[line];
1097 mfc6_cache_array[line] = c;
1098 write_unlock_bh(&mrt_lock);
1101 * Check to see if we resolved a queued list. If so we
1102 * need to send on the frames and tidy up.
1104 spin_lock_bh(&mfc_unres_lock);
1105 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1107 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1108 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1110 if (atomic_dec_and_test(&cache_resolve_queue_len))
1111 del_timer(&ipmr_expire_timer);
1115 spin_unlock_bh(&mfc_unres_lock);
1118 ip6mr_cache_resolve(uc, c);
1119 ip6mr_cache_free(uc);
1125 * Close the multicast socket, and clear the vif tables etc
1128 static void mroute_clean_tables(struct sock *sk)
1133 * Shut down all active vif entries
1135 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1136 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
1143 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1144 struct mfc6_cache *c, **cp;
1146 cp = &mfc6_cache_array[i];
1147 while ((c = *cp) != NULL) {
1148 if (c->mfc_flags & MFC_STATIC) {
1152 write_lock_bh(&mrt_lock);
1154 write_unlock_bh(&mrt_lock);
1156 ip6mr_cache_free(c);
1160 if (atomic_read(&cache_resolve_queue_len) != 0) {
1161 struct mfc6_cache *c;
1163 spin_lock_bh(&mfc_unres_lock);
1164 while (mfc_unres_queue != NULL) {
1165 c = mfc_unres_queue;
1166 mfc_unres_queue = c->next;
1167 spin_unlock_bh(&mfc_unres_lock);
1169 ip6mr_destroy_unres(c);
1171 spin_lock_bh(&mfc_unres_lock);
1173 spin_unlock_bh(&mfc_unres_lock);
1177 static int ip6mr_sk_init(struct sock *sk)
1182 write_lock_bh(&mrt_lock);
1183 if (likely(init_net.ipv6.mroute6_sk == NULL))
1184 init_net.ipv6.mroute6_sk = sk;
1187 write_unlock_bh(&mrt_lock);
1194 int ip6mr_sk_done(struct sock *sk)
1199 if (sk == init_net.ipv6.mroute6_sk) {
1200 write_lock_bh(&mrt_lock);
1201 init_net.ipv6.mroute6_sk = NULL;
1202 write_unlock_bh(&mrt_lock);
1204 mroute_clean_tables(sk);
1213 * Socket options and virtual interface manipulation. The whole
1214 * virtual interface system is a complete heap, but unfortunately
1215 * that's how BSD mrouted happens to think. Maybe one day with a proper
1216 * MOSPF/PIM router set up we can clean this up.
1219 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1226 if (optname != MRT6_INIT) {
1227 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1233 if (sk->sk_type != SOCK_RAW ||
1234 inet_sk(sk)->num != IPPROTO_ICMPV6)
1236 if (optlen < sizeof(int))
1239 return ip6mr_sk_init(sk);
1242 return ip6mr_sk_done(sk);
1245 if (optlen < sizeof(vif))
1247 if (copy_from_user(&vif, optval, sizeof(vif)))
1249 if (vif.mif6c_mifi >= MAXMIFS)
1252 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
1257 if (optlen < sizeof(mifi_t))
1259 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1262 ret = mif6_delete(mifi);
1267 * Manipulate the forwarding caches. These live
1268 * in a sort of kernel/user symbiosis.
1272 if (optlen < sizeof(mfc))
1274 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1277 if (optname == MRT6_DEL_MFC)
1278 ret = ip6mr_mfc_delete(&mfc);
1280 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
1285 * Control PIM assert (to activate pim will activate assert)
1290 if (get_user(v, (int __user *)optval))
1292 mroute_do_assert = !!v;
1296 #ifdef CONFIG_IPV6_PIMSM_V2
1300 if (get_user(v, (int __user *)optval))
1305 if (v != mroute_do_pim) {
1307 mroute_do_assert = v;
1309 ret = inet6_add_protocol(&pim6_protocol,
1312 ret = inet6_del_protocol(&pim6_protocol,
1323 * Spurious command, or MRT6_VERSION which you cannot
1327 return -ENOPROTOOPT;
1332 * Getsock opt support for the multicast routing system.
1335 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1345 #ifdef CONFIG_IPV6_PIMSM_V2
1347 val = mroute_do_pim;
1351 val = mroute_do_assert;
1354 return -ENOPROTOOPT;
1357 if (get_user(olr, optlen))
1360 olr = min_t(int, olr, sizeof(int));
1364 if (put_user(olr, optlen))
1366 if (copy_to_user(optval, &val, olr))
1372 * The IP multicast ioctl support routines.
1375 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1377 struct sioc_sg_req6 sr;
1378 struct sioc_mif_req6 vr;
1379 struct mif_device *vif;
1380 struct mfc6_cache *c;
1383 case SIOCGETMIFCNT_IN6:
1384 if (copy_from_user(&vr, arg, sizeof(vr)))
1386 if (vr.mifi >= init_net.ipv6.maxvif)
1388 read_lock(&mrt_lock);
1389 vif = &init_net.ipv6.vif6_table[vr.mifi];
1390 if (MIF_EXISTS(&init_net, vr.mifi)) {
1391 vr.icount = vif->pkt_in;
1392 vr.ocount = vif->pkt_out;
1393 vr.ibytes = vif->bytes_in;
1394 vr.obytes = vif->bytes_out;
1395 read_unlock(&mrt_lock);
1397 if (copy_to_user(arg, &vr, sizeof(vr)))
1401 read_unlock(&mrt_lock);
1402 return -EADDRNOTAVAIL;
1403 case SIOCGETSGCNT_IN6:
1404 if (copy_from_user(&sr, arg, sizeof(sr)))
1407 read_lock(&mrt_lock);
1408 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1410 sr.pktcnt = c->mfc_un.res.pkt;
1411 sr.bytecnt = c->mfc_un.res.bytes;
1412 sr.wrong_if = c->mfc_un.res.wrong_if;
1413 read_unlock(&mrt_lock);
1415 if (copy_to_user(arg, &sr, sizeof(sr)))
1419 read_unlock(&mrt_lock);
1420 return -EADDRNOTAVAIL;
1422 return -ENOIOCTLCMD;
1427 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1429 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1430 IPSTATS_MIB_OUTFORWDATAGRAMS);
1431 return dst_output(skb);
1435 * Processing handlers for ip6mr_forward
1438 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1440 struct ipv6hdr *ipv6h;
1441 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
1442 struct net_device *dev;
1443 struct dst_entry *dst;
1446 if (vif->dev == NULL)
1449 #ifdef CONFIG_IPV6_PIMSM_V2
1450 if (vif->flags & MIFF_REGISTER) {
1452 vif->bytes_out += skb->len;
1453 vif->dev->stats.tx_bytes += skb->len;
1454 vif->dev->stats.tx_packets++;
1455 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1461 ipv6h = ipv6_hdr(skb);
1463 fl = (struct flowi) {
1466 { .daddr = ipv6h->daddr, }
1470 dst = ip6_route_output(&init_net, NULL, &fl);
1474 dst_release(skb->dst);
1478 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1479 * not only before forwarding, but after forwarding on all output
1480 * interfaces. It is clear, if mrouter runs a multicasting
1481 * program, it should receive packets not depending to what interface
1482 * program is joined.
1483 * If we will not make it, the program will have to join on all
1484 * interfaces. On the other hand, multihoming host (or router, but
1485 * not mrouter) cannot join to more than one interface - it will
1486 * result in receiving multiple packets.
1491 vif->bytes_out += skb->len;
1493 /* We are about to write */
1494 /* XXX: extension headers? */
1495 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1498 ipv6h = ipv6_hdr(skb);
1501 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1503 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1504 ip6mr_forward2_finish);
1511 static int ip6mr_find_vif(struct net_device *dev)
1514 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1515 if (init_net.ipv6.vif6_table[ct].dev == dev)
1521 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1526 vif = cache->mf6c_parent;
1527 cache->mfc_un.res.pkt++;
1528 cache->mfc_un.res.bytes += skb->len;
1531 * Wrong interface: drop packet and (maybe) send PIM assert.
1533 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
1536 cache->mfc_un.res.wrong_if++;
1537 true_vifi = ip6mr_find_vif(skb->dev);
1539 if (true_vifi >= 0 && mroute_do_assert &&
1540 /* pimsm uses asserts, when switching from RPT to SPT,
1541 so that we cannot check that packet arrived on an oif.
1542 It is bad, but otherwise we would need to move pretty
1543 large chunk of pimd to kernel. Ough... --ANK
1545 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1547 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1548 cache->mfc_un.res.last_assert = jiffies;
1549 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1554 init_net.ipv6.vif6_table[vif].pkt_in++;
1555 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
1560 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1561 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1563 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1565 ip6mr_forward2(skb2, cache, psend);
1571 ip6mr_forward2(skb, cache, psend);
1582 * Multicast packets for forwarding arrive here
1585 int ip6_mr_input(struct sk_buff *skb)
1587 struct mfc6_cache *cache;
1589 read_lock(&mrt_lock);
1590 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1593 * No usable cache entry
1595 if (cache == NULL) {
1598 vif = ip6mr_find_vif(skb->dev);
1600 int err = ip6mr_cache_unresolved(vif, skb);
1601 read_unlock(&mrt_lock);
1605 read_unlock(&mrt_lock);
1610 ip6_mr_forward(skb, cache);
1612 read_unlock(&mrt_lock);
1619 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1622 struct rtnexthop *nhp;
1623 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
1624 u8 *b = skb_tail_pointer(skb);
1625 struct rtattr *mp_head;
1628 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1630 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1632 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1633 if (c->mfc_un.res.ttls[ct] < 255) {
1634 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1635 goto rtattr_failure;
1636 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1637 nhp->rtnh_flags = 0;
1638 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1639 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
1640 nhp->rtnh_len = sizeof(*nhp);
1643 mp_head->rta_type = RTA_MULTIPATH;
1644 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1645 rtm->rtm_type = RTN_MULTICAST;
1653 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1656 struct mfc6_cache *cache;
1657 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1659 read_lock(&mrt_lock);
1660 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1663 struct sk_buff *skb2;
1664 struct ipv6hdr *iph;
1665 struct net_device *dev;
1669 read_unlock(&mrt_lock);
1674 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1675 read_unlock(&mrt_lock);
1679 /* really correct? */
1680 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1682 read_unlock(&mrt_lock);
1686 skb_reset_transport_header(skb2);
1688 skb_put(skb2, sizeof(struct ipv6hdr));
1689 skb_reset_network_header(skb2);
1691 iph = ipv6_hdr(skb2);
1694 iph->flow_lbl[0] = 0;
1695 iph->flow_lbl[1] = 0;
1696 iph->flow_lbl[2] = 0;
1697 iph->payload_len = 0;
1698 iph->nexthdr = IPPROTO_NONE;
1700 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1701 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1703 err = ip6mr_cache_unresolved(vif, skb2);
1704 read_unlock(&mrt_lock);
1709 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1710 cache->mfc_flags |= MFC_NOTIFY;
1712 err = ip6mr_fill_mroute(skb, cache, rtm);
1713 read_unlock(&mrt_lock);