2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
52 /* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
56 static DEFINE_RWLOCK(mrt_lock);
59 * Multicast router control variables
62 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 static int mroute_do_assert; /* Set in PIM assert */
65 #ifdef CONFIG_IPV6_PIMSM_V2
66 static int mroute_do_pim;
68 #define mroute_do_pim 0
71 static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
73 static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
74 static atomic_t cache_resolve_queue_len; /* Size of unresolved */
76 /* Special spinlock for queue of unresolved entries */
77 static DEFINE_SPINLOCK(mfc_unres_lock);
79 /* We return to original Alan's scheme. Hash table of resolved
80 entries is changed only in process context and protected
81 with weak lock mrt_lock. Queue of unresolved entries is protected
82 with strong spinlock mfc_unres_lock.
84 In this case data path is free of exclusive locks at all.
87 static struct kmem_cache *mrt_cachep __read_mostly;
89 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
90 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
91 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
93 #ifdef CONFIG_IPV6_PIMSM_V2
94 static struct inet6_protocol pim6_protocol;
97 static struct timer_list ipmr_expire_timer;
100 #ifdef CONFIG_PROC_FS
102 struct ipmr_mfc_iter {
103 struct mfc6_cache **cache;
108 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
110 struct mfc6_cache *mfc;
112 it->cache = mfc6_cache_array;
113 read_lock(&mrt_lock);
114 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
115 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
118 read_unlock(&mrt_lock);
120 it->cache = &mfc_unres_queue;
121 spin_lock_bh(&mfc_unres_lock);
122 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
125 spin_unlock_bh(&mfc_unres_lock);
135 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
138 struct ipmr_vif_iter {
142 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
145 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
146 if (!MIF_EXISTS(&init_net, iter->ct))
149 return &init_net.ipv6.vif6_table[iter->ct];
154 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
157 read_lock(&mrt_lock);
158 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
162 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
164 struct ipmr_vif_iter *iter = seq->private;
167 if (v == SEQ_START_TOKEN)
168 return ip6mr_vif_seq_idx(iter, 0);
170 while (++iter->ct < init_net.ipv6.maxvif) {
171 if (!MIF_EXISTS(&init_net, iter->ct))
173 return &init_net.ipv6.vif6_table[iter->ct];
178 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
181 read_unlock(&mrt_lock);
184 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
186 if (v == SEQ_START_TOKEN) {
188 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
190 const struct mif_device *vif = v;
191 const char *name = vif->dev ? vif->dev->name : "none";
194 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
195 vif - init_net.ipv6.vif6_table,
196 name, vif->bytes_in, vif->pkt_in,
197 vif->bytes_out, vif->pkt_out,
203 static struct seq_operations ip6mr_vif_seq_ops = {
204 .start = ip6mr_vif_seq_start,
205 .next = ip6mr_vif_seq_next,
206 .stop = ip6mr_vif_seq_stop,
207 .show = ip6mr_vif_seq_show,
210 static int ip6mr_vif_open(struct inode *inode, struct file *file)
212 return seq_open_private(file, &ip6mr_vif_seq_ops,
213 sizeof(struct ipmr_vif_iter));
216 static struct file_operations ip6mr_vif_fops = {
217 .owner = THIS_MODULE,
218 .open = ip6mr_vif_open,
221 .release = seq_release_private,
224 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
226 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
230 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
232 struct mfc6_cache *mfc = v;
233 struct ipmr_mfc_iter *it = seq->private;
237 if (v == SEQ_START_TOKEN)
238 return ipmr_mfc_seq_idx(seq->private, 0);
243 if (it->cache == &mfc_unres_queue)
246 BUG_ON(it->cache != mfc6_cache_array);
248 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
249 mfc = mfc6_cache_array[it->ct];
254 /* exhausted cache_array, show unresolved */
255 read_unlock(&mrt_lock);
256 it->cache = &mfc_unres_queue;
259 spin_lock_bh(&mfc_unres_lock);
260 mfc = mfc_unres_queue;
265 spin_unlock_bh(&mfc_unres_lock);
271 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
273 struct ipmr_mfc_iter *it = seq->private;
275 if (it->cache == &mfc_unres_queue)
276 spin_unlock_bh(&mfc_unres_lock);
277 else if (it->cache == mfc6_cache_array)
278 read_unlock(&mrt_lock);
281 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
285 if (v == SEQ_START_TOKEN) {
289 "Iif Pkts Bytes Wrong Oifs\n");
291 const struct mfc6_cache *mfc = v;
292 const struct ipmr_mfc_iter *it = seq->private;
294 seq_printf(seq, "%pI6 %pI6 %-3hd",
295 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
298 if (it->cache != &mfc_unres_queue) {
299 seq_printf(seq, " %8lu %8lu %8lu",
301 mfc->mfc_un.res.bytes,
302 mfc->mfc_un.res.wrong_if);
303 for (n = mfc->mfc_un.res.minvif;
304 n < mfc->mfc_un.res.maxvif; n++) {
305 if (MIF_EXISTS(&init_net, n) &&
306 mfc->mfc_un.res.ttls[n] < 255)
309 n, mfc->mfc_un.res.ttls[n]);
312 /* unresolved mfc_caches don't contain
313 * pkt, bytes and wrong_if values
315 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
322 static struct seq_operations ipmr_mfc_seq_ops = {
323 .start = ipmr_mfc_seq_start,
324 .next = ipmr_mfc_seq_next,
325 .stop = ipmr_mfc_seq_stop,
326 .show = ipmr_mfc_seq_show,
329 static int ipmr_mfc_open(struct inode *inode, struct file *file)
331 return seq_open_private(file, &ipmr_mfc_seq_ops,
332 sizeof(struct ipmr_mfc_iter));
335 static struct file_operations ip6mr_mfc_fops = {
336 .owner = THIS_MODULE,
337 .open = ipmr_mfc_open,
340 .release = seq_release_private,
344 #ifdef CONFIG_IPV6_PIMSM_V2
345 static int reg_vif_num = -1;
347 static int pim6_rcv(struct sk_buff *skb)
349 struct pimreghdr *pim;
350 struct ipv6hdr *encap;
351 struct net_device *reg_dev = NULL;
353 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
356 pim = (struct pimreghdr *)skb_transport_header(skb);
357 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
358 (pim->flags & PIM_NULL_REGISTER) ||
359 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
360 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
363 /* check if the inner packet is destined to mcast group */
364 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
367 if (!ipv6_addr_is_multicast(&encap->daddr) ||
368 encap->payload_len == 0 ||
369 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
372 read_lock(&mrt_lock);
373 if (reg_vif_num >= 0)
374 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
377 read_unlock(&mrt_lock);
382 skb->mac_header = skb->network_header;
383 skb_pull(skb, (u8 *)encap - skb->data);
384 skb_reset_network_header(skb);
386 skb->protocol = htons(ETH_P_IP);
388 skb->pkt_type = PACKET_HOST;
389 dst_release(skb->dst);
390 reg_dev->stats.rx_bytes += skb->len;
391 reg_dev->stats.rx_packets++;
402 static struct inet6_protocol pim6_protocol = {
406 /* Service routines creating virtual interfaces: PIMREG */
408 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410 read_lock(&mrt_lock);
411 dev->stats.tx_bytes += skb->len;
412 dev->stats.tx_packets++;
413 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
414 read_unlock(&mrt_lock);
419 static const struct net_device_ops reg_vif_netdev_ops = {
420 .ndo_start_xmit = reg_vif_xmit,
423 static void reg_vif_setup(struct net_device *dev)
425 dev->type = ARPHRD_PIMREG;
426 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
427 dev->flags = IFF_NOARP;
428 dev->netdev_ops = ®_vif_netdev_ops;
429 dev->destructor = free_netdev;
432 static struct net_device *ip6mr_reg_vif(void)
434 struct net_device *dev;
436 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
440 if (register_netdevice(dev)) {
453 /* allow the register to be completed before unregistering. */
457 unregister_netdevice(dev);
466 static int mif6_delete(int vifi)
468 struct mif_device *v;
469 struct net_device *dev;
470 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
471 return -EADDRNOTAVAIL;
473 v = &init_net.ipv6.vif6_table[vifi];
475 write_lock_bh(&mrt_lock);
480 write_unlock_bh(&mrt_lock);
481 return -EADDRNOTAVAIL;
484 #ifdef CONFIG_IPV6_PIMSM_V2
485 if (vifi == reg_vif_num)
489 if (vifi + 1 == init_net.ipv6.maxvif) {
491 for (tmp = vifi - 1; tmp >= 0; tmp--) {
492 if (MIF_EXISTS(&init_net, tmp))
495 init_net.ipv6.maxvif = tmp + 1;
498 write_unlock_bh(&mrt_lock);
500 dev_set_allmulti(dev, -1);
502 if (v->flags & MIFF_REGISTER)
503 unregister_netdevice(dev);
509 /* Destroy an unresolved cache entry, killing queued skbs
510 and reporting error to netlink readers.
513 static void ip6mr_destroy_unres(struct mfc6_cache *c)
517 atomic_dec(&cache_resolve_queue_len);
519 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
520 if (ipv6_hdr(skb)->version == 0) {
521 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
522 nlh->nlmsg_type = NLMSG_ERROR;
523 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
524 skb_trim(skb, nlh->nlmsg_len);
525 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
526 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
531 kmem_cache_free(mrt_cachep, c);
535 /* Single timer process for all the unresolved queue. */
537 static void ipmr_do_expire_process(unsigned long dummy)
539 unsigned long now = jiffies;
540 unsigned long expires = 10 * HZ;
541 struct mfc6_cache *c, **cp;
543 cp = &mfc_unres_queue;
545 while ((c = *cp) != NULL) {
546 if (time_after(c->mfc_un.unres.expires, now)) {
548 unsigned long interval = c->mfc_un.unres.expires - now;
549 if (interval < expires)
556 ip6mr_destroy_unres(c);
559 if (atomic_read(&cache_resolve_queue_len))
560 mod_timer(&ipmr_expire_timer, jiffies + expires);
563 static void ipmr_expire_process(unsigned long dummy)
565 if (!spin_trylock(&mfc_unres_lock)) {
566 mod_timer(&ipmr_expire_timer, jiffies + 1);
570 if (atomic_read(&cache_resolve_queue_len))
571 ipmr_do_expire_process(dummy);
573 spin_unlock(&mfc_unres_lock);
576 /* Fill oifs list. It is called under write locked mrt_lock. */
578 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
582 cache->mfc_un.res.minvif = MAXMIFS;
583 cache->mfc_un.res.maxvif = 0;
584 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
586 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
587 if (MIF_EXISTS(&init_net, vifi) &&
588 ttls[vifi] && ttls[vifi] < 255) {
589 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
590 if (cache->mfc_un.res.minvif > vifi)
591 cache->mfc_un.res.minvif = vifi;
592 if (cache->mfc_un.res.maxvif <= vifi)
593 cache->mfc_un.res.maxvif = vifi + 1;
598 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
600 int vifi = vifc->mif6c_mifi;
601 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
602 struct net_device *dev;
606 if (MIF_EXISTS(&init_net, vifi))
609 switch (vifc->mif6c_flags) {
610 #ifdef CONFIG_IPV6_PIMSM_V2
613 * Special Purpose VIF in PIM
614 * All the packets will be sent to the daemon
616 if (reg_vif_num >= 0)
618 dev = ip6mr_reg_vif();
621 err = dev_set_allmulti(dev, 1);
623 unregister_netdevice(dev);
630 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
632 return -EADDRNOTAVAIL;
633 err = dev_set_allmulti(dev, 1);
644 * Fill in the VIF structures
646 v->rate_limit = vifc->vifc_rate_limit;
647 v->flags = vifc->mif6c_flags;
649 v->flags |= VIFF_STATIC;
650 v->threshold = vifc->vifc_threshold;
655 v->link = dev->ifindex;
656 if (v->flags & MIFF_REGISTER)
657 v->link = dev->iflink;
659 /* And finish update writing critical data */
660 write_lock_bh(&mrt_lock);
662 #ifdef CONFIG_IPV6_PIMSM_V2
663 if (v->flags & MIFF_REGISTER)
666 if (vifi + 1 > init_net.ipv6.maxvif)
667 init_net.ipv6.maxvif = vifi + 1;
668 write_unlock_bh(&mrt_lock);
672 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
674 int line = MFC6_HASH(mcastgrp, origin);
675 struct mfc6_cache *c;
677 for (c = mfc6_cache_array[line]; c; c = c->next) {
678 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
679 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
686 * Allocate a multicast cache entry
688 static struct mfc6_cache *ip6mr_cache_alloc(void)
690 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
693 c->mfc_un.res.minvif = MAXMIFS;
697 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
699 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
702 skb_queue_head_init(&c->mfc_un.unres.unresolved);
703 c->mfc_un.unres.expires = jiffies + 10 * HZ;
708 * A cache entry has gone into a resolved state from queued
711 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
716 * Play the pending entries through our router
719 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
720 if (ipv6_hdr(skb)->version == 0) {
722 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
724 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
725 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
727 nlh->nlmsg_type = NLMSG_ERROR;
728 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
729 skb_trim(skb, nlh->nlmsg_len);
730 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
732 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
734 ip6_mr_forward(skb, c);
739 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
740 * expects the following bizarre scheme.
742 * Called under mrt_lock.
745 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
751 #ifdef CONFIG_IPV6_PIMSM_V2
752 if (assert == MRT6MSG_WHOLEPKT)
753 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
757 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
762 /* I suppose that internal messages
763 * do not require checksums */
765 skb->ip_summed = CHECKSUM_UNNECESSARY;
767 #ifdef CONFIG_IPV6_PIMSM_V2
768 if (assert == MRT6MSG_WHOLEPKT) {
769 /* Ugly, but we have no choice with this interface.
770 Duplicate old header, fix length etc.
771 And all this only to mangle msg->im6_msgtype and
772 to set msg->im6_mbz to "mbz" :-)
774 skb_push(skb, -skb_network_offset(pkt));
776 skb_push(skb, sizeof(*msg));
777 skb_reset_transport_header(skb);
778 msg = (struct mrt6msg *)skb_transport_header(skb);
780 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
781 msg->im6_mif = reg_vif_num;
783 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
784 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
786 skb->ip_summed = CHECKSUM_UNNECESSARY;
794 skb_put(skb, sizeof(struct ipv6hdr));
795 skb_reset_network_header(skb);
796 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
801 skb_put(skb, sizeof(*msg));
802 skb_reset_transport_header(skb);
803 msg = (struct mrt6msg *)skb_transport_header(skb);
806 msg->im6_msgtype = assert;
809 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
810 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
812 skb->dst = dst_clone(pkt->dst);
813 skb->ip_summed = CHECKSUM_UNNECESSARY;
815 skb_pull(skb, sizeof(struct ipv6hdr));
818 if (init_net.ipv6.mroute6_sk == NULL) {
824 * Deliver to user space multicast routing algorithms
826 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
829 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
837 * Queue a packet for resolution. It gets locked cache entry!
841 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
844 struct mfc6_cache *c;
846 spin_lock_bh(&mfc_unres_lock);
847 for (c = mfc_unres_queue; c; c = c->next) {
848 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
849 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
855 * Create a new entry if allowable
858 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
859 (c = ip6mr_cache_alloc_unres()) == NULL) {
860 spin_unlock_bh(&mfc_unres_lock);
867 * Fill in the new cache entry
870 c->mf6c_origin = ipv6_hdr(skb)->saddr;
871 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
874 * Reflect first query at pim6sd
876 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
877 /* If the report failed throw the cache entry
880 spin_unlock_bh(&mfc_unres_lock);
882 kmem_cache_free(mrt_cachep, c);
887 atomic_inc(&cache_resolve_queue_len);
888 c->next = mfc_unres_queue;
891 ipmr_do_expire_process(1);
895 * See if we can append the packet
897 if (c->mfc_un.unres.unresolved.qlen > 3) {
901 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
905 spin_unlock_bh(&mfc_unres_lock);
910 * MFC6 cache manipulation by user space
913 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
916 struct mfc6_cache *c, **cp;
918 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
920 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
921 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
922 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
923 write_lock_bh(&mrt_lock);
925 write_unlock_bh(&mrt_lock);
927 kmem_cache_free(mrt_cachep, c);
934 static int ip6mr_device_event(struct notifier_block *this,
935 unsigned long event, void *ptr)
937 struct net_device *dev = ptr;
938 struct mif_device *v;
941 if (!net_eq(dev_net(dev), &init_net))
944 if (event != NETDEV_UNREGISTER)
947 v = &init_net.ipv6.vif6_table[0];
948 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
955 static struct notifier_block ip6_mr_notifier = {
956 .notifier_call = ip6mr_device_event
960 * Setup for IP multicast routing
963 static int __net_init ip6mr_net_init(struct net *net)
967 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
969 if (!net->ipv6.vif6_table) {
977 static void __net_exit ip6mr_net_exit(struct net *net)
979 kfree(net->ipv6.vif6_table);
982 static struct pernet_operations ip6mr_net_ops = {
983 .init = ip6mr_net_init,
984 .exit = ip6mr_net_exit,
987 int __init ip6_mr_init(void)
991 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
992 sizeof(struct mfc6_cache),
993 0, SLAB_HWCACHE_ALIGN,
998 err = register_pernet_subsys(&ip6mr_net_ops);
1000 goto reg_pernet_fail;
1002 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1003 err = register_netdevice_notifier(&ip6_mr_notifier);
1005 goto reg_notif_fail;
1006 #ifdef CONFIG_PROC_FS
1008 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1010 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1011 0, &ip6mr_mfc_fops))
1012 goto proc_cache_fail;
1015 #ifdef CONFIG_PROC_FS
1017 proc_net_remove(&init_net, "ip6_mr_vif");
1019 unregister_netdevice_notifier(&ip6_mr_notifier);
1022 del_timer(&ipmr_expire_timer);
1023 unregister_pernet_subsys(&ip6mr_net_ops);
1025 kmem_cache_destroy(mrt_cachep);
1029 void ip6_mr_cleanup(void)
1031 #ifdef CONFIG_PROC_FS
1032 proc_net_remove(&init_net, "ip6_mr_cache");
1033 proc_net_remove(&init_net, "ip6_mr_vif");
1035 unregister_netdevice_notifier(&ip6_mr_notifier);
1036 del_timer(&ipmr_expire_timer);
1037 unregister_pernet_subsys(&ip6mr_net_ops);
1038 kmem_cache_destroy(mrt_cachep);
1041 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1044 struct mfc6_cache *uc, *c, **cp;
1045 unsigned char ttls[MAXMIFS];
1048 memset(ttls, 255, MAXMIFS);
1049 for (i = 0; i < MAXMIFS; i++) {
1050 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1055 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1057 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1058 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1059 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1064 write_lock_bh(&mrt_lock);
1065 c->mf6c_parent = mfc->mf6cc_parent;
1066 ip6mr_update_thresholds(c, ttls);
1068 c->mfc_flags |= MFC_STATIC;
1069 write_unlock_bh(&mrt_lock);
1073 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1076 c = ip6mr_cache_alloc();
1080 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1081 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1082 c->mf6c_parent = mfc->mf6cc_parent;
1083 ip6mr_update_thresholds(c, ttls);
1085 c->mfc_flags |= MFC_STATIC;
1087 write_lock_bh(&mrt_lock);
1088 c->next = mfc6_cache_array[line];
1089 mfc6_cache_array[line] = c;
1090 write_unlock_bh(&mrt_lock);
1093 * Check to see if we resolved a queued list. If so we
1094 * need to send on the frames and tidy up.
1096 spin_lock_bh(&mfc_unres_lock);
1097 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1099 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1100 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1102 if (atomic_dec_and_test(&cache_resolve_queue_len))
1103 del_timer(&ipmr_expire_timer);
1107 spin_unlock_bh(&mfc_unres_lock);
1110 ip6mr_cache_resolve(uc, c);
1111 kmem_cache_free(mrt_cachep, uc);
1117 * Close the multicast socket, and clear the vif tables etc
1120 static void mroute_clean_tables(struct sock *sk)
1125 * Shut down all active vif entries
1127 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1128 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
1135 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1136 struct mfc6_cache *c, **cp;
1138 cp = &mfc6_cache_array[i];
1139 while ((c = *cp) != NULL) {
1140 if (c->mfc_flags & MFC_STATIC) {
1144 write_lock_bh(&mrt_lock);
1146 write_unlock_bh(&mrt_lock);
1148 kmem_cache_free(mrt_cachep, c);
1152 if (atomic_read(&cache_resolve_queue_len) != 0) {
1153 struct mfc6_cache *c;
1155 spin_lock_bh(&mfc_unres_lock);
1156 while (mfc_unres_queue != NULL) {
1157 c = mfc_unres_queue;
1158 mfc_unres_queue = c->next;
1159 spin_unlock_bh(&mfc_unres_lock);
1161 ip6mr_destroy_unres(c);
1163 spin_lock_bh(&mfc_unres_lock);
1165 spin_unlock_bh(&mfc_unres_lock);
1169 static int ip6mr_sk_init(struct sock *sk)
1174 write_lock_bh(&mrt_lock);
1175 if (likely(init_net.ipv6.mroute6_sk == NULL))
1176 init_net.ipv6.mroute6_sk = sk;
1179 write_unlock_bh(&mrt_lock);
1186 int ip6mr_sk_done(struct sock *sk)
1191 if (sk == init_net.ipv6.mroute6_sk) {
1192 write_lock_bh(&mrt_lock);
1193 init_net.ipv6.mroute6_sk = NULL;
1194 write_unlock_bh(&mrt_lock);
1196 mroute_clean_tables(sk);
1205 * Socket options and virtual interface manipulation. The whole
1206 * virtual interface system is a complete heap, but unfortunately
1207 * that's how BSD mrouted happens to think. Maybe one day with a proper
1208 * MOSPF/PIM router set up we can clean this up.
1211 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1218 if (optname != MRT6_INIT) {
1219 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1225 if (sk->sk_type != SOCK_RAW ||
1226 inet_sk(sk)->num != IPPROTO_ICMPV6)
1228 if (optlen < sizeof(int))
1231 return ip6mr_sk_init(sk);
1234 return ip6mr_sk_done(sk);
1237 if (optlen < sizeof(vif))
1239 if (copy_from_user(&vif, optval, sizeof(vif)))
1241 if (vif.mif6c_mifi >= MAXMIFS)
1244 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
1249 if (optlen < sizeof(mifi_t))
1251 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1254 ret = mif6_delete(mifi);
1259 * Manipulate the forwarding caches. These live
1260 * in a sort of kernel/user symbiosis.
1264 if (optlen < sizeof(mfc))
1266 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1269 if (optname == MRT6_DEL_MFC)
1270 ret = ip6mr_mfc_delete(&mfc);
1272 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
1277 * Control PIM assert (to activate pim will activate assert)
1282 if (get_user(v, (int __user *)optval))
1284 mroute_do_assert = !!v;
1288 #ifdef CONFIG_IPV6_PIMSM_V2
1292 if (get_user(v, (int __user *)optval))
1297 if (v != mroute_do_pim) {
1299 mroute_do_assert = v;
1301 ret = inet6_add_protocol(&pim6_protocol,
1304 ret = inet6_del_protocol(&pim6_protocol,
1315 * Spurious command, or MRT6_VERSION which you cannot
1319 return -ENOPROTOOPT;
1324 * Getsock opt support for the multicast routing system.
1327 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1337 #ifdef CONFIG_IPV6_PIMSM_V2
1339 val = mroute_do_pim;
1343 val = mroute_do_assert;
1346 return -ENOPROTOOPT;
1349 if (get_user(olr, optlen))
1352 olr = min_t(int, olr, sizeof(int));
1356 if (put_user(olr, optlen))
1358 if (copy_to_user(optval, &val, olr))
1364 * The IP multicast ioctl support routines.
1367 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1369 struct sioc_sg_req6 sr;
1370 struct sioc_mif_req6 vr;
1371 struct mif_device *vif;
1372 struct mfc6_cache *c;
1375 case SIOCGETMIFCNT_IN6:
1376 if (copy_from_user(&vr, arg, sizeof(vr)))
1378 if (vr.mifi >= init_net.ipv6.maxvif)
1380 read_lock(&mrt_lock);
1381 vif = &init_net.ipv6.vif6_table[vr.mifi];
1382 if (MIF_EXISTS(&init_net, vr.mifi)) {
1383 vr.icount = vif->pkt_in;
1384 vr.ocount = vif->pkt_out;
1385 vr.ibytes = vif->bytes_in;
1386 vr.obytes = vif->bytes_out;
1387 read_unlock(&mrt_lock);
1389 if (copy_to_user(arg, &vr, sizeof(vr)))
1393 read_unlock(&mrt_lock);
1394 return -EADDRNOTAVAIL;
1395 case SIOCGETSGCNT_IN6:
1396 if (copy_from_user(&sr, arg, sizeof(sr)))
1399 read_lock(&mrt_lock);
1400 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1402 sr.pktcnt = c->mfc_un.res.pkt;
1403 sr.bytecnt = c->mfc_un.res.bytes;
1404 sr.wrong_if = c->mfc_un.res.wrong_if;
1405 read_unlock(&mrt_lock);
1407 if (copy_to_user(arg, &sr, sizeof(sr)))
1411 read_unlock(&mrt_lock);
1412 return -EADDRNOTAVAIL;
1414 return -ENOIOCTLCMD;
1419 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1421 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1422 IPSTATS_MIB_OUTFORWDATAGRAMS);
1423 return dst_output(skb);
1427 * Processing handlers for ip6mr_forward
1430 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1432 struct ipv6hdr *ipv6h;
1433 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
1434 struct net_device *dev;
1435 struct dst_entry *dst;
1438 if (vif->dev == NULL)
1441 #ifdef CONFIG_IPV6_PIMSM_V2
1442 if (vif->flags & MIFF_REGISTER) {
1444 vif->bytes_out += skb->len;
1445 vif->dev->stats.tx_bytes += skb->len;
1446 vif->dev->stats.tx_packets++;
1447 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1453 ipv6h = ipv6_hdr(skb);
1455 fl = (struct flowi) {
1458 { .daddr = ipv6h->daddr, }
1462 dst = ip6_route_output(&init_net, NULL, &fl);
1466 dst_release(skb->dst);
1470 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1471 * not only before forwarding, but after forwarding on all output
1472 * interfaces. It is clear, if mrouter runs a multicasting
1473 * program, it should receive packets not depending to what interface
1474 * program is joined.
1475 * If we will not make it, the program will have to join on all
1476 * interfaces. On the other hand, multihoming host (or router, but
1477 * not mrouter) cannot join to more than one interface - it will
1478 * result in receiving multiple packets.
1483 vif->bytes_out += skb->len;
1485 /* We are about to write */
1486 /* XXX: extension headers? */
1487 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1490 ipv6h = ipv6_hdr(skb);
1493 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1495 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1496 ip6mr_forward2_finish);
1503 static int ip6mr_find_vif(struct net_device *dev)
1506 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1507 if (init_net.ipv6.vif6_table[ct].dev == dev)
1513 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1518 vif = cache->mf6c_parent;
1519 cache->mfc_un.res.pkt++;
1520 cache->mfc_un.res.bytes += skb->len;
1523 * Wrong interface: drop packet and (maybe) send PIM assert.
1525 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
1528 cache->mfc_un.res.wrong_if++;
1529 true_vifi = ip6mr_find_vif(skb->dev);
1531 if (true_vifi >= 0 && mroute_do_assert &&
1532 /* pimsm uses asserts, when switching from RPT to SPT,
1533 so that we cannot check that packet arrived on an oif.
1534 It is bad, but otherwise we would need to move pretty
1535 large chunk of pimd to kernel. Ough... --ANK
1537 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1539 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1540 cache->mfc_un.res.last_assert = jiffies;
1541 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1546 init_net.ipv6.vif6_table[vif].pkt_in++;
1547 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
1552 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1553 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1555 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1557 ip6mr_forward2(skb2, cache, psend);
1563 ip6mr_forward2(skb, cache, psend);
1574 * Multicast packets for forwarding arrive here
1577 int ip6_mr_input(struct sk_buff *skb)
1579 struct mfc6_cache *cache;
1581 read_lock(&mrt_lock);
1582 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1585 * No usable cache entry
1587 if (cache == NULL) {
1590 vif = ip6mr_find_vif(skb->dev);
1592 int err = ip6mr_cache_unresolved(vif, skb);
1593 read_unlock(&mrt_lock);
1597 read_unlock(&mrt_lock);
1602 ip6_mr_forward(skb, cache);
1604 read_unlock(&mrt_lock);
1611 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1614 struct rtnexthop *nhp;
1615 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
1616 u8 *b = skb_tail_pointer(skb);
1617 struct rtattr *mp_head;
1620 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1622 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1624 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1625 if (c->mfc_un.res.ttls[ct] < 255) {
1626 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1627 goto rtattr_failure;
1628 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1629 nhp->rtnh_flags = 0;
1630 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1631 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
1632 nhp->rtnh_len = sizeof(*nhp);
1635 mp_head->rta_type = RTA_MULTIPATH;
1636 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1637 rtm->rtm_type = RTN_MULTICAST;
1645 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1648 struct mfc6_cache *cache;
1649 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1651 read_lock(&mrt_lock);
1652 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1655 struct sk_buff *skb2;
1656 struct ipv6hdr *iph;
1657 struct net_device *dev;
1661 read_unlock(&mrt_lock);
1666 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1667 read_unlock(&mrt_lock);
1671 /* really correct? */
1672 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1674 read_unlock(&mrt_lock);
1678 skb_reset_transport_header(skb2);
1680 skb_put(skb2, sizeof(struct ipv6hdr));
1681 skb_reset_network_header(skb2);
1683 iph = ipv6_hdr(skb2);
1686 iph->flow_lbl[0] = 0;
1687 iph->flow_lbl[1] = 0;
1688 iph->flow_lbl[2] = 0;
1689 iph->payload_len = 0;
1690 iph->nexthdr = IPPROTO_NONE;
1692 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1693 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1695 err = ip6mr_cache_unresolved(vif, skb2);
1696 read_unlock(&mrt_lock);
1701 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1702 cache->mfc_flags |= MFC_NOTIFY;
1704 err = ip6mr_fill_mroute(skb, cache, rtm);
1705 read_unlock(&mrt_lock);