2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
54 #include <linux/sysctl.h>
56 #include <linux/kmod.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
65 static struct ipv4_devconf ipv4_devconf = {
67 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
74 static struct ipv4_devconf ipv4_devconf_dflt = {
76 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 [IFA_LOCAL] = { .type = NLA_U32 },
89 [IFA_ADDRESS] = { .type = NLA_U32 },
90 [IFA_BROADCAST] = { .type = NLA_U32 },
91 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
103 static inline void devinet_sysctl_register(struct in_device *idev)
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
111 /* Locks all the inet devices. */
113 static struct in_ifaddr *inet_alloc_ifa(void)
115 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
118 static void inet_rcu_free_ifa(struct rcu_head *head)
120 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 in_dev_put(ifa->ifa_dev);
126 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
131 void in_dev_finish_destroy(struct in_device *idev)
133 struct net_device *dev = idev->dev;
135 WARN_ON(idev->ifa_list);
136 WARN_ON(idev->mc_list);
137 #ifdef NET_REFCNT_DEBUG
138 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139 idev, dev ? dev->name : "NIL");
143 printk("Freeing alive in_device %p\n", idev);
149 static struct in_device *inetdev_init(struct net_device *dev)
151 struct in_device *in_dev;
155 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
158 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159 sizeof(in_dev->cnf));
160 in_dev->cnf.sysctl = NULL;
162 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
164 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
165 dev_disable_lro(dev);
166 /* Reference in_dev->dev */
168 /* Account for reference dev->ip_ptr (below) */
171 devinet_sysctl_register(in_dev);
172 ip_mc_init_dev(in_dev);
173 if (dev->flags & IFF_UP)
176 /* we can receive as soon as ip_ptr is set -- do this last */
177 rcu_assign_pointer(dev->ip_ptr, in_dev);
186 static void in_dev_rcu_put(struct rcu_head *head)
188 struct in_device *idev = container_of(head, struct in_device, rcu_head);
192 static void inetdev_destroy(struct in_device *in_dev)
194 struct in_ifaddr *ifa;
195 struct net_device *dev;
203 ip_mc_destroy_dev(in_dev);
205 while ((ifa = in_dev->ifa_list) != NULL) {
206 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
212 devinet_sysctl_unregister(in_dev);
213 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 for_primary_ifa(in_dev) {
223 if (inet_ifa_match(a, ifa)) {
224 if (!b || inet_ifa_match(b, ifa)) {
229 } endfor_ifa(in_dev);
234 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
235 int destroy, struct nlmsghdr *nlh, u32 pid)
237 struct in_ifaddr *promote = NULL;
238 struct in_ifaddr *ifa, *ifa1 = *ifap;
239 struct in_ifaddr *last_prim = in_dev->ifa_list;
240 struct in_ifaddr *prev_prom = NULL;
241 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
245 /* 1. Deleting primary ifaddr forces deletion all secondaries
246 * unless alias promotion is set
249 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
250 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
252 while ((ifa = *ifap1) != NULL) {
253 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
254 ifa1->ifa_scope <= ifa->ifa_scope)
257 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
258 ifa1->ifa_mask != ifa->ifa_mask ||
259 !inet_ifa_match(ifa1->ifa_address, ifa)) {
260 ifap1 = &ifa->ifa_next;
266 *ifap1 = ifa->ifa_next;
268 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
269 blocking_notifier_call_chain(&inetaddr_chain,
281 *ifap = ifa1->ifa_next;
283 /* 3. Announce address deletion */
285 /* Send message first, then call notifier.
286 At first sight, FIB update triggered by notifier
287 will refer to already deleted ifaddr, that could confuse
288 netlink listeners. It is not true: look, gated sees
289 that route deleted and if it still thinks that ifaddr
290 is valid, it will try to restore deleted routes... Grr.
291 So that, this order is correct.
293 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
294 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
299 prev_prom->ifa_next = promote->ifa_next;
300 promote->ifa_next = last_prim->ifa_next;
301 last_prim->ifa_next = promote;
304 promote->ifa_flags &= ~IFA_F_SECONDARY;
305 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
306 blocking_notifier_call_chain(&inetaddr_chain,
308 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
309 if (ifa1->ifa_mask != ifa->ifa_mask ||
310 !inet_ifa_match(ifa1->ifa_address, ifa))
320 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329 struct in_device *in_dev = ifa->ifa_dev;
330 struct in_ifaddr *ifa1, **ifap, **last_primary;
334 if (!ifa->ifa_local) {
339 ifa->ifa_flags &= ~IFA_F_SECONDARY;
340 last_primary = &in_dev->ifa_list;
342 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
343 ifap = &ifa1->ifa_next) {
344 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
345 ifa->ifa_scope <= ifa1->ifa_scope)
346 last_primary = &ifa1->ifa_next;
347 if (ifa1->ifa_mask == ifa->ifa_mask &&
348 inet_ifa_match(ifa1->ifa_address, ifa)) {
349 if (ifa1->ifa_local == ifa->ifa_local) {
353 if (ifa1->ifa_scope != ifa->ifa_scope) {
357 ifa->ifa_flags |= IFA_F_SECONDARY;
361 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
362 net_srandom(ifa->ifa_local);
366 ifa->ifa_next = *ifap;
369 /* Send message first, then call notifier.
370 Notifier will trigger FIB update, so that
371 listeners of netlink will know about new ifaddr */
372 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
373 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
378 static int inet_insert_ifa(struct in_ifaddr *ifa)
380 return __inet_insert_ifa(ifa, NULL, 0);
383 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
385 struct in_device *in_dev = __in_dev_get_rtnl(dev);
393 ipv4_devconf_setall(in_dev);
394 if (ifa->ifa_dev != in_dev) {
395 WARN_ON(ifa->ifa_dev);
397 ifa->ifa_dev = in_dev;
399 if (ipv4_is_loopback(ifa->ifa_local))
400 ifa->ifa_scope = RT_SCOPE_HOST;
401 return inet_insert_ifa(ifa);
404 struct in_device *inetdev_by_index(struct net *net, int ifindex)
406 struct net_device *dev;
407 struct in_device *in_dev = NULL;
410 dev = dev_get_by_index_rcu(net, ifindex);
412 in_dev = in_dev_get(dev);
417 /* Called only from RTNL semaphored context. No locks. */
419 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
424 for_primary_ifa(in_dev) {
425 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
427 } endfor_ifa(in_dev);
431 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
433 struct net *net = sock_net(skb->sk);
434 struct nlattr *tb[IFA_MAX+1];
435 struct in_device *in_dev;
436 struct ifaddrmsg *ifm;
437 struct in_ifaddr *ifa, **ifap;
442 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
446 ifm = nlmsg_data(nlh);
447 in_dev = inetdev_by_index(net, ifm->ifa_index);
448 if (in_dev == NULL) {
453 __in_dev_put(in_dev);
455 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
456 ifap = &ifa->ifa_next) {
458 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
461 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
464 if (tb[IFA_ADDRESS] &&
465 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
466 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
469 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
473 err = -EADDRNOTAVAIL;
478 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
480 struct nlattr *tb[IFA_MAX+1];
481 struct in_ifaddr *ifa;
482 struct ifaddrmsg *ifm;
483 struct net_device *dev;
484 struct in_device *in_dev;
487 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
491 ifm = nlmsg_data(nlh);
493 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
496 dev = __dev_get_by_index(net, ifm->ifa_index);
501 in_dev = __in_dev_get_rtnl(dev);
506 ifa = inet_alloc_ifa();
509 * A potential indev allocation can be left alive, it stays
510 * assigned to its device and is destroy with it.
514 ipv4_devconf_setall(in_dev);
517 if (tb[IFA_ADDRESS] == NULL)
518 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
520 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
521 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
522 ifa->ifa_flags = ifm->ifa_flags;
523 ifa->ifa_scope = ifm->ifa_scope;
524 ifa->ifa_dev = in_dev;
526 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
527 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
529 if (tb[IFA_BROADCAST])
530 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
533 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
535 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
543 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
545 struct net *net = sock_net(skb->sk);
546 struct in_ifaddr *ifa;
550 ifa = rtm_to_ifaddr(net, nlh);
554 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
558 * Determine a default network mask, based on the IP address.
561 static __inline__ int inet_abc_len(__be32 addr)
563 int rc = -1; /* Something else, probably a multicast. */
565 if (ipv4_is_zeronet(addr))
568 __u32 haddr = ntohl(addr);
570 if (IN_CLASSA(haddr))
572 else if (IN_CLASSB(haddr))
574 else if (IN_CLASSC(haddr))
582 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
585 struct sockaddr_in sin_orig;
586 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
587 struct in_device *in_dev;
588 struct in_ifaddr **ifap = NULL;
589 struct in_ifaddr *ifa = NULL;
590 struct net_device *dev;
593 int tryaddrmatch = 0;
596 * Fetch the caller's info block into kernel space
599 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
601 ifr.ifr_name[IFNAMSIZ - 1] = 0;
603 /* save original address for comparison */
604 memcpy(&sin_orig, sin, sizeof(*sin));
606 colon = strchr(ifr.ifr_name, ':');
610 dev_load(net, ifr.ifr_name);
613 case SIOCGIFADDR: /* Get interface address */
614 case SIOCGIFBRDADDR: /* Get the broadcast address */
615 case SIOCGIFDSTADDR: /* Get the destination address */
616 case SIOCGIFNETMASK: /* Get the netmask for the interface */
617 /* Note that these ioctls will not sleep,
618 so that we do not impose a lock.
619 One day we will be forced to put shlock here (I mean SMP)
621 tryaddrmatch = (sin_orig.sin_family == AF_INET);
622 memset(sin, 0, sizeof(*sin));
623 sin->sin_family = AF_INET;
628 if (!capable(CAP_NET_ADMIN))
631 case SIOCSIFADDR: /* Set interface address (and family) */
632 case SIOCSIFBRDADDR: /* Set the broadcast address */
633 case SIOCSIFDSTADDR: /* Set the destination address */
634 case SIOCSIFNETMASK: /* Set the netmask for the interface */
636 if (!capable(CAP_NET_ADMIN))
639 if (sin->sin_family != AF_INET)
650 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
656 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
658 /* Matthias Andree */
659 /* compare label and address (4.4BSD style) */
660 /* note: we only do this for a limited set of ioctls
661 and only if the original address family was AF_INET.
662 This is checked above. */
663 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
664 ifap = &ifa->ifa_next) {
665 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
666 sin_orig.sin_addr.s_addr ==
672 /* we didn't get a match, maybe the application is
673 4.3BSD-style and passed in junk so we fall back to
674 comparing just the label */
676 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
677 ifap = &ifa->ifa_next)
678 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
683 ret = -EADDRNOTAVAIL;
684 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
688 case SIOCGIFADDR: /* Get interface address */
689 sin->sin_addr.s_addr = ifa->ifa_local;
692 case SIOCGIFBRDADDR: /* Get the broadcast address */
693 sin->sin_addr.s_addr = ifa->ifa_broadcast;
696 case SIOCGIFDSTADDR: /* Get the destination address */
697 sin->sin_addr.s_addr = ifa->ifa_address;
700 case SIOCGIFNETMASK: /* Get the netmask for the interface */
701 sin->sin_addr.s_addr = ifa->ifa_mask;
706 ret = -EADDRNOTAVAIL;
710 if (!(ifr.ifr_flags & IFF_UP))
711 inet_del_ifa(in_dev, ifap, 1);
714 ret = dev_change_flags(dev, ifr.ifr_flags);
717 case SIOCSIFADDR: /* Set interface address (and family) */
719 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
724 if ((ifa = inet_alloc_ifa()) == NULL)
727 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
729 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
732 if (ifa->ifa_local == sin->sin_addr.s_addr)
734 inet_del_ifa(in_dev, ifap, 0);
735 ifa->ifa_broadcast = 0;
739 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
741 if (!(dev->flags & IFF_POINTOPOINT)) {
742 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
743 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
744 if ((dev->flags & IFF_BROADCAST) &&
745 ifa->ifa_prefixlen < 31)
746 ifa->ifa_broadcast = ifa->ifa_address |
749 ifa->ifa_prefixlen = 32;
750 ifa->ifa_mask = inet_make_mask(32);
752 ret = inet_set_ifa(dev, ifa);
755 case SIOCSIFBRDADDR: /* Set the broadcast address */
757 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
758 inet_del_ifa(in_dev, ifap, 0);
759 ifa->ifa_broadcast = sin->sin_addr.s_addr;
760 inet_insert_ifa(ifa);
764 case SIOCSIFDSTADDR: /* Set the destination address */
766 if (ifa->ifa_address == sin->sin_addr.s_addr)
769 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
772 inet_del_ifa(in_dev, ifap, 0);
773 ifa->ifa_address = sin->sin_addr.s_addr;
774 inet_insert_ifa(ifa);
777 case SIOCSIFNETMASK: /* Set the netmask for the interface */
780 * The mask we set must be legal.
783 if (bad_mask(sin->sin_addr.s_addr, 0))
786 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
787 __be32 old_mask = ifa->ifa_mask;
788 inet_del_ifa(in_dev, ifap, 0);
789 ifa->ifa_mask = sin->sin_addr.s_addr;
790 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
792 /* See if current broadcast address matches
793 * with current netmask, then recalculate
794 * the broadcast address. Otherwise it's a
795 * funny address, so don't touch it since
796 * the user seems to know what (s)he's doing...
798 if ((dev->flags & IFF_BROADCAST) &&
799 (ifa->ifa_prefixlen < 31) &&
800 (ifa->ifa_broadcast ==
801 (ifa->ifa_local|~old_mask))) {
802 ifa->ifa_broadcast = (ifa->ifa_local |
803 ~sin->sin_addr.s_addr);
805 inet_insert_ifa(ifa);
815 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
819 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
821 struct in_device *in_dev = __in_dev_get_rtnl(dev);
822 struct in_ifaddr *ifa;
826 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
829 for (; ifa; ifa = ifa->ifa_next) {
834 if (len < (int) sizeof(ifr))
836 memset(&ifr, 0, sizeof(struct ifreq));
838 strcpy(ifr.ifr_name, ifa->ifa_label);
840 strcpy(ifr.ifr_name, dev->name);
842 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
843 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
846 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
850 buf += sizeof(struct ifreq);
851 len -= sizeof(struct ifreq);
852 done += sizeof(struct ifreq);
858 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
861 struct in_device *in_dev;
862 struct net *net = dev_net(dev);
865 in_dev = __in_dev_get_rcu(dev);
869 for_primary_ifa(in_dev) {
870 if (ifa->ifa_scope > scope)
872 if (!dst || inet_ifa_match(dst, ifa)) {
873 addr = ifa->ifa_local;
877 addr = ifa->ifa_local;
878 } endfor_ifa(in_dev);
884 /* Not loopback addresses on loopback should be preferred
885 in this case. It is importnat that lo is the first interface
888 for_each_netdev_rcu(net, dev) {
889 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
892 for_primary_ifa(in_dev) {
893 if (ifa->ifa_scope != RT_SCOPE_LINK &&
894 ifa->ifa_scope <= scope) {
895 addr = ifa->ifa_local;
898 } endfor_ifa(in_dev);
906 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
907 __be32 local, int scope)
914 (local == ifa->ifa_local || !local) &&
915 ifa->ifa_scope <= scope) {
916 addr = ifa->ifa_local;
921 same = (!local || inet_ifa_match(local, ifa)) &&
922 (!dst || inet_ifa_match(dst, ifa));
926 /* Is the selected addr into dst subnet? */
927 if (inet_ifa_match(addr, ifa))
929 /* No, then can we use new local src? */
930 if (ifa->ifa_scope <= scope) {
931 addr = ifa->ifa_local;
934 /* search for large dst subnet for addr */
938 } endfor_ifa(in_dev);
940 return same? addr : 0;
944 * Confirm that local IP address exists using wildcards:
945 * - in_dev: only on this interface, 0=any interface
946 * - dst: only in the same subnet as dst, 0=any dst
947 * - local: address, 0=autoselect the local address
948 * - scope: maximum allowed scope value for the local address
950 __be32 inet_confirm_addr(struct in_device *in_dev,
951 __be32 dst, __be32 local, int scope)
954 struct net_device *dev;
957 if (scope != RT_SCOPE_LINK)
958 return confirm_addr_indev(in_dev, dst, local, scope);
960 net = dev_net(in_dev->dev);
962 for_each_netdev_rcu(net, dev) {
963 if ((in_dev = __in_dev_get_rcu(dev))) {
964 addr = confirm_addr_indev(in_dev, dst, local, scope);
978 int register_inetaddr_notifier(struct notifier_block *nb)
980 return blocking_notifier_chain_register(&inetaddr_chain, nb);
983 int unregister_inetaddr_notifier(struct notifier_block *nb)
985 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
988 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
989 * alias numbering and to create unique labels if possible.
991 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
993 struct in_ifaddr *ifa;
996 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
997 char old[IFNAMSIZ], *dot;
999 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1000 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1003 dot = strchr(old, ':');
1005 sprintf(old, ":%d", named);
1008 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1009 strcat(ifa->ifa_label, dot);
1011 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1014 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1018 static inline bool inetdev_valid_mtu(unsigned mtu)
1023 /* Called only under RTNL semaphore */
1025 static int inetdev_event(struct notifier_block *this, unsigned long event,
1028 struct net_device *dev = ptr;
1029 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1034 if (event == NETDEV_REGISTER) {
1035 in_dev = inetdev_init(dev);
1037 return notifier_from_errno(-ENOMEM);
1038 if (dev->flags & IFF_LOOPBACK) {
1039 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1040 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1042 } else if (event == NETDEV_CHANGEMTU) {
1043 /* Re-enabling IP */
1044 if (inetdev_valid_mtu(dev->mtu))
1045 in_dev = inetdev_init(dev);
1051 case NETDEV_REGISTER:
1052 printk(KERN_DEBUG "inetdev_event: bug\n");
1056 if (!inetdev_valid_mtu(dev->mtu))
1058 if (dev->flags & IFF_LOOPBACK) {
1059 struct in_ifaddr *ifa;
1060 if ((ifa = inet_alloc_ifa()) != NULL) {
1062 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1063 ifa->ifa_prefixlen = 8;
1064 ifa->ifa_mask = inet_make_mask(8);
1065 in_dev_hold(in_dev);
1066 ifa->ifa_dev = in_dev;
1067 ifa->ifa_scope = RT_SCOPE_HOST;
1068 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1069 inet_insert_ifa(ifa);
1074 case NETDEV_CHANGEADDR:
1075 /* Send gratuitous ARP to notify of link change */
1076 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1077 struct in_ifaddr *ifa = in_dev->ifa_list;
1080 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1081 ifa->ifa_address, dev,
1082 ifa->ifa_address, NULL,
1083 dev->dev_addr, NULL);
1089 case NETDEV_BONDING_OLDTYPE:
1090 ip_mc_unmap(in_dev);
1092 case NETDEV_BONDING_NEWTYPE:
1093 ip_mc_remap(in_dev);
1095 case NETDEV_CHANGEMTU:
1096 if (inetdev_valid_mtu(dev->mtu))
1098 /* disable IP when MTU is not enough */
1099 case NETDEV_UNREGISTER:
1100 inetdev_destroy(in_dev);
1102 case NETDEV_CHANGENAME:
1103 /* Do not notify about label change, this event is
1104 * not interesting to applications using netlink.
1106 inetdev_changename(dev, in_dev);
1108 devinet_sysctl_unregister(in_dev);
1109 devinet_sysctl_register(in_dev);
1116 static struct notifier_block ip_netdev_notifier = {
1117 .notifier_call = inetdev_event,
1120 static inline size_t inet_nlmsg_size(void)
1122 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1123 + nla_total_size(4) /* IFA_ADDRESS */
1124 + nla_total_size(4) /* IFA_LOCAL */
1125 + nla_total_size(4) /* IFA_BROADCAST */
1126 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1129 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1130 u32 pid, u32 seq, int event, unsigned int flags)
1132 struct ifaddrmsg *ifm;
1133 struct nlmsghdr *nlh;
1135 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1139 ifm = nlmsg_data(nlh);
1140 ifm->ifa_family = AF_INET;
1141 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1142 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1143 ifm->ifa_scope = ifa->ifa_scope;
1144 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1146 if (ifa->ifa_address)
1147 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1150 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1152 if (ifa->ifa_broadcast)
1153 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1155 if (ifa->ifa_label[0])
1156 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1158 return nlmsg_end(skb, nlh);
1161 nlmsg_cancel(skb, nlh);
1165 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1167 struct net *net = sock_net(skb->sk);
1169 struct net_device *dev;
1170 struct in_device *in_dev;
1171 struct in_ifaddr *ifa;
1172 int s_ip_idx, s_idx = cb->args[0];
1174 s_ip_idx = ip_idx = cb->args[1];
1176 for_each_netdev(net, dev) {
1181 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1184 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1185 ifa = ifa->ifa_next, ip_idx++) {
1186 if (ip_idx < s_ip_idx)
1188 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1190 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1199 cb->args[1] = ip_idx;
1204 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1207 struct sk_buff *skb;
1208 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1212 net = dev_net(ifa->ifa_dev->dev);
1213 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1217 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1219 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1220 WARN_ON(err == -EMSGSIZE);
1224 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1228 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1231 #ifdef CONFIG_SYSCTL
1233 static void devinet_copy_dflt_conf(struct net *net, int i)
1235 struct net_device *dev;
1238 for_each_netdev_rcu(net, dev) {
1239 struct in_device *in_dev;
1241 in_dev = __in_dev_get_rcu(dev);
1242 if (in_dev && !test_bit(i, in_dev->cnf.state))
1243 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1248 /* called with RTNL locked */
1249 static void inet_forward_change(struct net *net)
1251 struct net_device *dev;
1252 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1254 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1255 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1257 for_each_netdev(net, dev) {
1258 struct in_device *in_dev;
1260 dev_disable_lro(dev);
1262 in_dev = __in_dev_get_rcu(dev);
1264 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1269 static int devinet_conf_proc(ctl_table *ctl, int write,
1270 void __user *buffer,
1271 size_t *lenp, loff_t *ppos)
1273 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1276 struct ipv4_devconf *cnf = ctl->extra1;
1277 struct net *net = ctl->extra2;
1278 int i = (int *)ctl->data - cnf->data;
1280 set_bit(i, cnf->state);
1282 if (cnf == net->ipv4.devconf_dflt)
1283 devinet_copy_dflt_conf(net, i);
1289 static int devinet_conf_sysctl(ctl_table *table,
1290 void __user *oldval, size_t __user *oldlenp,
1291 void __user *newval, size_t newlen)
1293 struct ipv4_devconf *cnf;
1295 int *valp = table->data;
1299 if (!newval || !newlen)
1302 if (newlen != sizeof(int))
1305 if (get_user(new, (int __user *)newval))
1311 if (oldval && oldlenp) {
1314 if (get_user(len, oldlenp))
1318 if (len > table->maxlen)
1319 len = table->maxlen;
1320 if (copy_to_user(oldval, valp, len))
1322 if (put_user(len, oldlenp))
1329 cnf = table->extra1;
1330 net = table->extra2;
1331 i = (int *)table->data - cnf->data;
1333 set_bit(i, cnf->state);
1335 if (cnf == net->ipv4.devconf_dflt)
1336 devinet_copy_dflt_conf(net, i);
1341 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1342 void __user *buffer,
1343 size_t *lenp, loff_t *ppos)
1345 int *valp = ctl->data;
1347 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1349 if (write && *valp != val) {
1350 struct net *net = ctl->extra2;
1352 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1353 if (!rtnl_trylock())
1354 return restart_syscall();
1355 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1356 inet_forward_change(net);
1358 struct ipv4_devconf *cnf = ctl->extra1;
1359 struct in_device *idev =
1360 container_of(cnf, struct in_device, cnf);
1361 dev_disable_lro(idev->dev);
1364 rt_cache_flush(net, 0);
1371 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1372 void __user *buffer,
1373 size_t *lenp, loff_t *ppos)
1375 int *valp = ctl->data;
1377 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1378 struct net *net = ctl->extra2;
1380 if (write && *valp != val)
1381 rt_cache_flush(net, 0);
1386 int ipv4_doint_and_flush_strategy(ctl_table *table,
1387 void __user *oldval, size_t __user *oldlenp,
1388 void __user *newval, size_t newlen)
1390 int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1391 struct net *net = table->extra2;
1394 rt_cache_flush(net, 0);
1400 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1402 .ctl_name = NET_IPV4_CONF_ ## attr, \
1404 .data = ipv4_devconf.data + \
1405 NET_IPV4_CONF_ ## attr - 1, \
1406 .maxlen = sizeof(int), \
1408 .proc_handler = proc, \
1409 .strategy = sysctl, \
1410 .extra1 = &ipv4_devconf, \
1413 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1414 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1415 devinet_conf_sysctl)
1417 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1418 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1419 devinet_conf_sysctl)
1421 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1422 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1424 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1425 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1426 ipv4_doint_and_flush_strategy)
1428 static struct devinet_sysctl_table {
1429 struct ctl_table_header *sysctl_header;
1430 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1432 } devinet_sysctl = {
1434 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1435 devinet_sysctl_forward,
1436 devinet_conf_sysctl),
1437 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1439 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1440 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1441 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1442 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1443 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1444 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1445 "accept_source_route"),
1446 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1447 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1448 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1449 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1450 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1451 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1452 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1453 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1454 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1455 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1457 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1458 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1459 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1460 "force_igmp_version"),
1461 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1462 "promote_secondaries"),
1466 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1467 int ctl_name, struct ipv4_devconf *p)
1470 struct devinet_sysctl_table *t;
1472 #define DEVINET_CTL_PATH_DEV 3
1474 struct ctl_path devinet_ctl_path[] = {
1475 { .procname = "net", .ctl_name = CTL_NET, },
1476 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1477 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1478 { /* to be set */ },
1482 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1486 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1487 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1488 t->devinet_vars[i].extra1 = p;
1489 t->devinet_vars[i].extra2 = net;
1493 * Make a copy of dev_name, because '.procname' is regarded as const
1494 * by sysctl and we wouldn't want anyone to change it under our feet
1495 * (see SIOCSIFNAME).
1497 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1501 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1502 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1504 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1506 if (!t->sysctl_header)
1520 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1522 struct devinet_sysctl_table *t = cnf->sysctl;
1528 unregister_sysctl_table(t->sysctl_header);
1533 static void devinet_sysctl_register(struct in_device *idev)
1535 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1536 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1537 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1538 idev->dev->ifindex, &idev->cnf);
1541 static void devinet_sysctl_unregister(struct in_device *idev)
1543 __devinet_sysctl_unregister(&idev->cnf);
1544 neigh_sysctl_unregister(idev->arp_parms);
1547 static struct ctl_table ctl_forward_entry[] = {
1549 .ctl_name = NET_IPV4_FORWARD,
1550 .procname = "ip_forward",
1551 .data = &ipv4_devconf.data[
1552 NET_IPV4_CONF_FORWARDING - 1],
1553 .maxlen = sizeof(int),
1555 .proc_handler = devinet_sysctl_forward,
1556 .strategy = devinet_conf_sysctl,
1557 .extra1 = &ipv4_devconf,
1558 .extra2 = &init_net,
1563 static __net_initdata struct ctl_path net_ipv4_path[] = {
1564 { .procname = "net", .ctl_name = CTL_NET, },
1565 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1570 static __net_init int devinet_init_net(struct net *net)
1573 struct ipv4_devconf *all, *dflt;
1574 #ifdef CONFIG_SYSCTL
1575 struct ctl_table *tbl = ctl_forward_entry;
1576 struct ctl_table_header *forw_hdr;
1580 all = &ipv4_devconf;
1581 dflt = &ipv4_devconf_dflt;
1583 if (net != &init_net) {
1584 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1588 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1590 goto err_alloc_dflt;
1592 #ifdef CONFIG_SYSCTL
1593 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1597 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1598 tbl[0].extra1 = all;
1599 tbl[0].extra2 = net;
1603 #ifdef CONFIG_SYSCTL
1604 err = __devinet_sysctl_register(net, "all",
1605 NET_PROTO_CONF_ALL, all);
1609 err = __devinet_sysctl_register(net, "default",
1610 NET_PROTO_CONF_DEFAULT, dflt);
1615 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1616 if (forw_hdr == NULL)
1618 net->ipv4.forw_hdr = forw_hdr;
1621 net->ipv4.devconf_all = all;
1622 net->ipv4.devconf_dflt = dflt;
1625 #ifdef CONFIG_SYSCTL
1627 __devinet_sysctl_unregister(dflt);
1629 __devinet_sysctl_unregister(all);
1631 if (tbl != ctl_forward_entry)
1635 if (dflt != &ipv4_devconf_dflt)
1638 if (all != &ipv4_devconf)
1644 static __net_exit void devinet_exit_net(struct net *net)
1646 #ifdef CONFIG_SYSCTL
1647 struct ctl_table *tbl;
1649 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1650 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1651 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1652 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1655 kfree(net->ipv4.devconf_dflt);
1656 kfree(net->ipv4.devconf_all);
1659 static __net_initdata struct pernet_operations devinet_ops = {
1660 .init = devinet_init_net,
1661 .exit = devinet_exit_net,
1664 void __init devinet_init(void)
1666 register_pernet_subsys(&devinet_ops);
1668 register_gifconf(PF_INET, inet_gifconf);
1669 register_netdevice_notifier(&ip_netdev_notifier);
1671 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1672 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1673 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1676 EXPORT_SYMBOL(in_dev_finish_destroy);
1677 EXPORT_SYMBOL(inet_select_addr);
1678 EXPORT_SYMBOL(inetdev_by_index);
1679 EXPORT_SYMBOL(register_inetaddr_notifier);
1680 EXPORT_SYMBOL(unregister_inetaddr_notifier);