net: Introduce for_each_netdev_rcu() iterator
[safe/jmp/linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71         },
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .data = {
76                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81         },
82 };
83
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98                          int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110
111 /* Locks all the inet devices. */
112
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
116 }
117
118 static void inet_rcu_free_ifa(struct rcu_head *head)
119 {
120         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
121         if (ifa->ifa_dev)
122                 in_dev_put(ifa->ifa_dev);
123         kfree(ifa);
124 }
125
126 static inline void inet_free_ifa(struct in_ifaddr *ifa)
127 {
128         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129 }
130
131 void in_dev_finish_destroy(struct in_device *idev)
132 {
133         struct net_device *dev = idev->dev;
134
135         WARN_ON(idev->ifa_list);
136         WARN_ON(idev->mc_list);
137 #ifdef NET_REFCNT_DEBUG
138         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139                idev, dev ? dev->name : "NIL");
140 #endif
141         dev_put(dev);
142         if (!idev->dead)
143                 printk("Freeing alive in_device %p\n", idev);
144         else {
145                 kfree(idev);
146         }
147 }
148
149 static struct in_device *inetdev_init(struct net_device *dev)
150 {
151         struct in_device *in_dev;
152
153         ASSERT_RTNL();
154
155         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156         if (!in_dev)
157                 goto out;
158         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159                         sizeof(in_dev->cnf));
160         in_dev->cnf.sysctl = NULL;
161         in_dev->dev = dev;
162         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
163                 goto out_kfree;
164         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
165                 dev_disable_lro(dev);
166         /* Reference in_dev->dev */
167         dev_hold(dev);
168         /* Account for reference dev->ip_ptr (below) */
169         in_dev_hold(in_dev);
170
171         devinet_sysctl_register(in_dev);
172         ip_mc_init_dev(in_dev);
173         if (dev->flags & IFF_UP)
174                 ip_mc_up(in_dev);
175
176         /* we can receive as soon as ip_ptr is set -- do this last */
177         rcu_assign_pointer(dev->ip_ptr, in_dev);
178 out:
179         return in_dev;
180 out_kfree:
181         kfree(in_dev);
182         in_dev = NULL;
183         goto out;
184 }
185
186 static void in_dev_rcu_put(struct rcu_head *head)
187 {
188         struct in_device *idev = container_of(head, struct in_device, rcu_head);
189         in_dev_put(idev);
190 }
191
192 static void inetdev_destroy(struct in_device *in_dev)
193 {
194         struct in_ifaddr *ifa;
195         struct net_device *dev;
196
197         ASSERT_RTNL();
198
199         dev = in_dev->dev;
200
201         in_dev->dead = 1;
202
203         ip_mc_destroy_dev(in_dev);
204
205         while ((ifa = in_dev->ifa_list) != NULL) {
206                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
207                 inet_free_ifa(ifa);
208         }
209
210         dev->ip_ptr = NULL;
211
212         devinet_sysctl_unregister(in_dev);
213         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
214         arp_ifdown(dev);
215
216         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
217 }
218
219 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
220 {
221         rcu_read_lock();
222         for_primary_ifa(in_dev) {
223                 if (inet_ifa_match(a, ifa)) {
224                         if (!b || inet_ifa_match(b, ifa)) {
225                                 rcu_read_unlock();
226                                 return 1;
227                         }
228                 }
229         } endfor_ifa(in_dev);
230         rcu_read_unlock();
231         return 0;
232 }
233
234 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
235                          int destroy, struct nlmsghdr *nlh, u32 pid)
236 {
237         struct in_ifaddr *promote = NULL;
238         struct in_ifaddr *ifa, *ifa1 = *ifap;
239         struct in_ifaddr *last_prim = in_dev->ifa_list;
240         struct in_ifaddr *prev_prom = NULL;
241         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
242
243         ASSERT_RTNL();
244
245         /* 1. Deleting primary ifaddr forces deletion all secondaries
246          * unless alias promotion is set
247          **/
248
249         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
250                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
251
252                 while ((ifa = *ifap1) != NULL) {
253                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
254                             ifa1->ifa_scope <= ifa->ifa_scope)
255                                 last_prim = ifa;
256
257                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
258                             ifa1->ifa_mask != ifa->ifa_mask ||
259                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
260                                 ifap1 = &ifa->ifa_next;
261                                 prev_prom = ifa;
262                                 continue;
263                         }
264
265                         if (!do_promote) {
266                                 *ifap1 = ifa->ifa_next;
267
268                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
269                                 blocking_notifier_call_chain(&inetaddr_chain,
270                                                 NETDEV_DOWN, ifa);
271                                 inet_free_ifa(ifa);
272                         } else {
273                                 promote = ifa;
274                                 break;
275                         }
276                 }
277         }
278
279         /* 2. Unlink it */
280
281         *ifap = ifa1->ifa_next;
282
283         /* 3. Announce address deletion */
284
285         /* Send message first, then call notifier.
286            At first sight, FIB update triggered by notifier
287            will refer to already deleted ifaddr, that could confuse
288            netlink listeners. It is not true: look, gated sees
289            that route deleted and if it still thinks that ifaddr
290            is valid, it will try to restore deleted routes... Grr.
291            So that, this order is correct.
292          */
293         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
294         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
295
296         if (promote) {
297
298                 if (prev_prom) {
299                         prev_prom->ifa_next = promote->ifa_next;
300                         promote->ifa_next = last_prim->ifa_next;
301                         last_prim->ifa_next = promote;
302                 }
303
304                 promote->ifa_flags &= ~IFA_F_SECONDARY;
305                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
306                 blocking_notifier_call_chain(&inetaddr_chain,
307                                 NETDEV_UP, promote);
308                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
309                         if (ifa1->ifa_mask != ifa->ifa_mask ||
310                             !inet_ifa_match(ifa1->ifa_address, ifa))
311                                         continue;
312                         fib_add_ifaddr(ifa);
313                 }
314
315         }
316         if (destroy)
317                 inet_free_ifa(ifa1);
318 }
319
320 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321                          int destroy)
322 {
323         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
324 }
325
326 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
327                              u32 pid)
328 {
329         struct in_device *in_dev = ifa->ifa_dev;
330         struct in_ifaddr *ifa1, **ifap, **last_primary;
331
332         ASSERT_RTNL();
333
334         if (!ifa->ifa_local) {
335                 inet_free_ifa(ifa);
336                 return 0;
337         }
338
339         ifa->ifa_flags &= ~IFA_F_SECONDARY;
340         last_primary = &in_dev->ifa_list;
341
342         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
343              ifap = &ifa1->ifa_next) {
344                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
345                     ifa->ifa_scope <= ifa1->ifa_scope)
346                         last_primary = &ifa1->ifa_next;
347                 if (ifa1->ifa_mask == ifa->ifa_mask &&
348                     inet_ifa_match(ifa1->ifa_address, ifa)) {
349                         if (ifa1->ifa_local == ifa->ifa_local) {
350                                 inet_free_ifa(ifa);
351                                 return -EEXIST;
352                         }
353                         if (ifa1->ifa_scope != ifa->ifa_scope) {
354                                 inet_free_ifa(ifa);
355                                 return -EINVAL;
356                         }
357                         ifa->ifa_flags |= IFA_F_SECONDARY;
358                 }
359         }
360
361         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
362                 net_srandom(ifa->ifa_local);
363                 ifap = last_primary;
364         }
365
366         ifa->ifa_next = *ifap;
367         *ifap = ifa;
368
369         /* Send message first, then call notifier.
370            Notifier will trigger FIB update, so that
371            listeners of netlink will know about new ifaddr */
372         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
373         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
374
375         return 0;
376 }
377
378 static int inet_insert_ifa(struct in_ifaddr *ifa)
379 {
380         return __inet_insert_ifa(ifa, NULL, 0);
381 }
382
383 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
384 {
385         struct in_device *in_dev = __in_dev_get_rtnl(dev);
386
387         ASSERT_RTNL();
388
389         if (!in_dev) {
390                 inet_free_ifa(ifa);
391                 return -ENOBUFS;
392         }
393         ipv4_devconf_setall(in_dev);
394         if (ifa->ifa_dev != in_dev) {
395                 WARN_ON(ifa->ifa_dev);
396                 in_dev_hold(in_dev);
397                 ifa->ifa_dev = in_dev;
398         }
399         if (ipv4_is_loopback(ifa->ifa_local))
400                 ifa->ifa_scope = RT_SCOPE_HOST;
401         return inet_insert_ifa(ifa);
402 }
403
404 struct in_device *inetdev_by_index(struct net *net, int ifindex)
405 {
406         struct net_device *dev;
407         struct in_device *in_dev = NULL;
408
409         rcu_read_lock();
410         dev = dev_get_by_index_rcu(net, ifindex);
411         if (dev)
412                 in_dev = in_dev_get(dev);
413         rcu_read_unlock();
414         return in_dev;
415 }
416
417 /* Called only from RTNL semaphored context. No locks. */
418
419 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
420                                     __be32 mask)
421 {
422         ASSERT_RTNL();
423
424         for_primary_ifa(in_dev) {
425                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
426                         return ifa;
427         } endfor_ifa(in_dev);
428         return NULL;
429 }
430
431 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
432 {
433         struct net *net = sock_net(skb->sk);
434         struct nlattr *tb[IFA_MAX+1];
435         struct in_device *in_dev;
436         struct ifaddrmsg *ifm;
437         struct in_ifaddr *ifa, **ifap;
438         int err = -EINVAL;
439
440         ASSERT_RTNL();
441
442         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
443         if (err < 0)
444                 goto errout;
445
446         ifm = nlmsg_data(nlh);
447         in_dev = inetdev_by_index(net, ifm->ifa_index);
448         if (in_dev == NULL) {
449                 err = -ENODEV;
450                 goto errout;
451         }
452
453         __in_dev_put(in_dev);
454
455         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
456              ifap = &ifa->ifa_next) {
457                 if (tb[IFA_LOCAL] &&
458                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
459                         continue;
460
461                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
462                         continue;
463
464                 if (tb[IFA_ADDRESS] &&
465                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
466                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
467                         continue;
468
469                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
470                 return 0;
471         }
472
473         err = -EADDRNOTAVAIL;
474 errout:
475         return err;
476 }
477
478 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
479 {
480         struct nlattr *tb[IFA_MAX+1];
481         struct in_ifaddr *ifa;
482         struct ifaddrmsg *ifm;
483         struct net_device *dev;
484         struct in_device *in_dev;
485         int err;
486
487         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
488         if (err < 0)
489                 goto errout;
490
491         ifm = nlmsg_data(nlh);
492         err = -EINVAL;
493         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
494                 goto errout;
495
496         dev = __dev_get_by_index(net, ifm->ifa_index);
497         err = -ENODEV;
498         if (dev == NULL)
499                 goto errout;
500
501         in_dev = __in_dev_get_rtnl(dev);
502         err = -ENOBUFS;
503         if (in_dev == NULL)
504                 goto errout;
505
506         ifa = inet_alloc_ifa();
507         if (ifa == NULL)
508                 /*
509                  * A potential indev allocation can be left alive, it stays
510                  * assigned to its device and is destroy with it.
511                  */
512                 goto errout;
513
514         ipv4_devconf_setall(in_dev);
515         in_dev_hold(in_dev);
516
517         if (tb[IFA_ADDRESS] == NULL)
518                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
519
520         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
521         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
522         ifa->ifa_flags = ifm->ifa_flags;
523         ifa->ifa_scope = ifm->ifa_scope;
524         ifa->ifa_dev = in_dev;
525
526         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
527         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
528
529         if (tb[IFA_BROADCAST])
530                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
531
532         if (tb[IFA_LABEL])
533                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
534         else
535                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
536
537         return ifa;
538
539 errout:
540         return ERR_PTR(err);
541 }
542
543 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
544 {
545         struct net *net = sock_net(skb->sk);
546         struct in_ifaddr *ifa;
547
548         ASSERT_RTNL();
549
550         ifa = rtm_to_ifaddr(net, nlh);
551         if (IS_ERR(ifa))
552                 return PTR_ERR(ifa);
553
554         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
555 }
556
557 /*
558  *      Determine a default network mask, based on the IP address.
559  */
560
561 static __inline__ int inet_abc_len(__be32 addr)
562 {
563         int rc = -1;    /* Something else, probably a multicast. */
564
565         if (ipv4_is_zeronet(addr))
566                 rc = 0;
567         else {
568                 __u32 haddr = ntohl(addr);
569
570                 if (IN_CLASSA(haddr))
571                         rc = 8;
572                 else if (IN_CLASSB(haddr))
573                         rc = 16;
574                 else if (IN_CLASSC(haddr))
575                         rc = 24;
576         }
577
578         return rc;
579 }
580
581
582 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
583 {
584         struct ifreq ifr;
585         struct sockaddr_in sin_orig;
586         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
587         struct in_device *in_dev;
588         struct in_ifaddr **ifap = NULL;
589         struct in_ifaddr *ifa = NULL;
590         struct net_device *dev;
591         char *colon;
592         int ret = -EFAULT;
593         int tryaddrmatch = 0;
594
595         /*
596          *      Fetch the caller's info block into kernel space
597          */
598
599         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
600                 goto out;
601         ifr.ifr_name[IFNAMSIZ - 1] = 0;
602
603         /* save original address for comparison */
604         memcpy(&sin_orig, sin, sizeof(*sin));
605
606         colon = strchr(ifr.ifr_name, ':');
607         if (colon)
608                 *colon = 0;
609
610         dev_load(net, ifr.ifr_name);
611
612         switch (cmd) {
613         case SIOCGIFADDR:       /* Get interface address */
614         case SIOCGIFBRDADDR:    /* Get the broadcast address */
615         case SIOCGIFDSTADDR:    /* Get the destination address */
616         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
617                 /* Note that these ioctls will not sleep,
618                    so that we do not impose a lock.
619                    One day we will be forced to put shlock here (I mean SMP)
620                  */
621                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
622                 memset(sin, 0, sizeof(*sin));
623                 sin->sin_family = AF_INET;
624                 break;
625
626         case SIOCSIFFLAGS:
627                 ret = -EACCES;
628                 if (!capable(CAP_NET_ADMIN))
629                         goto out;
630                 break;
631         case SIOCSIFADDR:       /* Set interface address (and family) */
632         case SIOCSIFBRDADDR:    /* Set the broadcast address */
633         case SIOCSIFDSTADDR:    /* Set the destination address */
634         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
635                 ret = -EACCES;
636                 if (!capable(CAP_NET_ADMIN))
637                         goto out;
638                 ret = -EINVAL;
639                 if (sin->sin_family != AF_INET)
640                         goto out;
641                 break;
642         default:
643                 ret = -EINVAL;
644                 goto out;
645         }
646
647         rtnl_lock();
648
649         ret = -ENODEV;
650         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
651                 goto done;
652
653         if (colon)
654                 *colon = ':';
655
656         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
657                 if (tryaddrmatch) {
658                         /* Matthias Andree */
659                         /* compare label and address (4.4BSD style) */
660                         /* note: we only do this for a limited set of ioctls
661                            and only if the original address family was AF_INET.
662                            This is checked above. */
663                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
664                              ifap = &ifa->ifa_next) {
665                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
666                                     sin_orig.sin_addr.s_addr ==
667                                                         ifa->ifa_address) {
668                                         break; /* found */
669                                 }
670                         }
671                 }
672                 /* we didn't get a match, maybe the application is
673                    4.3BSD-style and passed in junk so we fall back to
674                    comparing just the label */
675                 if (!ifa) {
676                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
677                              ifap = &ifa->ifa_next)
678                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
679                                         break;
680                 }
681         }
682
683         ret = -EADDRNOTAVAIL;
684         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
685                 goto done;
686
687         switch (cmd) {
688         case SIOCGIFADDR:       /* Get interface address */
689                 sin->sin_addr.s_addr = ifa->ifa_local;
690                 goto rarok;
691
692         case SIOCGIFBRDADDR:    /* Get the broadcast address */
693                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
694                 goto rarok;
695
696         case SIOCGIFDSTADDR:    /* Get the destination address */
697                 sin->sin_addr.s_addr = ifa->ifa_address;
698                 goto rarok;
699
700         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
701                 sin->sin_addr.s_addr = ifa->ifa_mask;
702                 goto rarok;
703
704         case SIOCSIFFLAGS:
705                 if (colon) {
706                         ret = -EADDRNOTAVAIL;
707                         if (!ifa)
708                                 break;
709                         ret = 0;
710                         if (!(ifr.ifr_flags & IFF_UP))
711                                 inet_del_ifa(in_dev, ifap, 1);
712                         break;
713                 }
714                 ret = dev_change_flags(dev, ifr.ifr_flags);
715                 break;
716
717         case SIOCSIFADDR:       /* Set interface address (and family) */
718                 ret = -EINVAL;
719                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
720                         break;
721
722                 if (!ifa) {
723                         ret = -ENOBUFS;
724                         if ((ifa = inet_alloc_ifa()) == NULL)
725                                 break;
726                         if (colon)
727                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
728                         else
729                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
730                 } else {
731                         ret = 0;
732                         if (ifa->ifa_local == sin->sin_addr.s_addr)
733                                 break;
734                         inet_del_ifa(in_dev, ifap, 0);
735                         ifa->ifa_broadcast = 0;
736                         ifa->ifa_scope = 0;
737                 }
738
739                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
740
741                 if (!(dev->flags & IFF_POINTOPOINT)) {
742                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
743                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
744                         if ((dev->flags & IFF_BROADCAST) &&
745                             ifa->ifa_prefixlen < 31)
746                                 ifa->ifa_broadcast = ifa->ifa_address |
747                                                      ~ifa->ifa_mask;
748                 } else {
749                         ifa->ifa_prefixlen = 32;
750                         ifa->ifa_mask = inet_make_mask(32);
751                 }
752                 ret = inet_set_ifa(dev, ifa);
753                 break;
754
755         case SIOCSIFBRDADDR:    /* Set the broadcast address */
756                 ret = 0;
757                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
758                         inet_del_ifa(in_dev, ifap, 0);
759                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
760                         inet_insert_ifa(ifa);
761                 }
762                 break;
763
764         case SIOCSIFDSTADDR:    /* Set the destination address */
765                 ret = 0;
766                 if (ifa->ifa_address == sin->sin_addr.s_addr)
767                         break;
768                 ret = -EINVAL;
769                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
770                         break;
771                 ret = 0;
772                 inet_del_ifa(in_dev, ifap, 0);
773                 ifa->ifa_address = sin->sin_addr.s_addr;
774                 inet_insert_ifa(ifa);
775                 break;
776
777         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
778
779                 /*
780                  *      The mask we set must be legal.
781                  */
782                 ret = -EINVAL;
783                 if (bad_mask(sin->sin_addr.s_addr, 0))
784                         break;
785                 ret = 0;
786                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
787                         __be32 old_mask = ifa->ifa_mask;
788                         inet_del_ifa(in_dev, ifap, 0);
789                         ifa->ifa_mask = sin->sin_addr.s_addr;
790                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
791
792                         /* See if current broadcast address matches
793                          * with current netmask, then recalculate
794                          * the broadcast address. Otherwise it's a
795                          * funny address, so don't touch it since
796                          * the user seems to know what (s)he's doing...
797                          */
798                         if ((dev->flags & IFF_BROADCAST) &&
799                             (ifa->ifa_prefixlen < 31) &&
800                             (ifa->ifa_broadcast ==
801                              (ifa->ifa_local|~old_mask))) {
802                                 ifa->ifa_broadcast = (ifa->ifa_local |
803                                                       ~sin->sin_addr.s_addr);
804                         }
805                         inet_insert_ifa(ifa);
806                 }
807                 break;
808         }
809 done:
810         rtnl_unlock();
811 out:
812         return ret;
813 rarok:
814         rtnl_unlock();
815         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
816         goto out;
817 }
818
819 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
820 {
821         struct in_device *in_dev = __in_dev_get_rtnl(dev);
822         struct in_ifaddr *ifa;
823         struct ifreq ifr;
824         int done = 0;
825
826         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
827                 goto out;
828
829         for (; ifa; ifa = ifa->ifa_next) {
830                 if (!buf) {
831                         done += sizeof(ifr);
832                         continue;
833                 }
834                 if (len < (int) sizeof(ifr))
835                         break;
836                 memset(&ifr, 0, sizeof(struct ifreq));
837                 if (ifa->ifa_label)
838                         strcpy(ifr.ifr_name, ifa->ifa_label);
839                 else
840                         strcpy(ifr.ifr_name, dev->name);
841
842                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
843                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
844                                                                 ifa->ifa_local;
845
846                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
847                         done = -EFAULT;
848                         break;
849                 }
850                 buf  += sizeof(struct ifreq);
851                 len  -= sizeof(struct ifreq);
852                 done += sizeof(struct ifreq);
853         }
854 out:
855         return done;
856 }
857
858 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
859 {
860         __be32 addr = 0;
861         struct in_device *in_dev;
862         struct net *net = dev_net(dev);
863
864         rcu_read_lock();
865         in_dev = __in_dev_get_rcu(dev);
866         if (!in_dev)
867                 goto no_in_dev;
868
869         for_primary_ifa(in_dev) {
870                 if (ifa->ifa_scope > scope)
871                         continue;
872                 if (!dst || inet_ifa_match(dst, ifa)) {
873                         addr = ifa->ifa_local;
874                         break;
875                 }
876                 if (!addr)
877                         addr = ifa->ifa_local;
878         } endfor_ifa(in_dev);
879
880 no_in_dev:
881         if (addr)
882                 goto out_unlock;
883
884         /* Not loopback addresses on loopback should be preferred
885            in this case. It is importnat that lo is the first interface
886            in dev_base list.
887          */
888         for_each_netdev_rcu(net, dev) {
889                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
890                         continue;
891
892                 for_primary_ifa(in_dev) {
893                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
894                             ifa->ifa_scope <= scope) {
895                                 addr = ifa->ifa_local;
896                                 goto out_unlock;
897                         }
898                 } endfor_ifa(in_dev);
899         }
900 out_unlock:
901         rcu_read_unlock();
902 out:
903         return addr;
904 }
905
906 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
907                               __be32 local, int scope)
908 {
909         int same = 0;
910         __be32 addr = 0;
911
912         for_ifa(in_dev) {
913                 if (!addr &&
914                     (local == ifa->ifa_local || !local) &&
915                     ifa->ifa_scope <= scope) {
916                         addr = ifa->ifa_local;
917                         if (same)
918                                 break;
919                 }
920                 if (!same) {
921                         same = (!local || inet_ifa_match(local, ifa)) &&
922                                 (!dst || inet_ifa_match(dst, ifa));
923                         if (same && addr) {
924                                 if (local || !dst)
925                                         break;
926                                 /* Is the selected addr into dst subnet? */
927                                 if (inet_ifa_match(addr, ifa))
928                                         break;
929                                 /* No, then can we use new local src? */
930                                 if (ifa->ifa_scope <= scope) {
931                                         addr = ifa->ifa_local;
932                                         break;
933                                 }
934                                 /* search for large dst subnet for addr */
935                                 same = 0;
936                         }
937                 }
938         } endfor_ifa(in_dev);
939
940         return same? addr : 0;
941 }
942
943 /*
944  * Confirm that local IP address exists using wildcards:
945  * - in_dev: only on this interface, 0=any interface
946  * - dst: only in the same subnet as dst, 0=any dst
947  * - local: address, 0=autoselect the local address
948  * - scope: maximum allowed scope value for the local address
949  */
950 __be32 inet_confirm_addr(struct in_device *in_dev,
951                          __be32 dst, __be32 local, int scope)
952 {
953         __be32 addr = 0;
954         struct net_device *dev;
955         struct net *net;
956
957         if (scope != RT_SCOPE_LINK)
958                 return confirm_addr_indev(in_dev, dst, local, scope);
959
960         net = dev_net(in_dev->dev);
961         rcu_read_lock();
962         for_each_netdev_rcu(net, dev) {
963                 if ((in_dev = __in_dev_get_rcu(dev))) {
964                         addr = confirm_addr_indev(in_dev, dst, local, scope);
965                         if (addr)
966                                 break;
967                 }
968         }
969         rcu_read_unlock();
970
971         return addr;
972 }
973
974 /*
975  *      Device notifier
976  */
977
978 int register_inetaddr_notifier(struct notifier_block *nb)
979 {
980         return blocking_notifier_chain_register(&inetaddr_chain, nb);
981 }
982
983 int unregister_inetaddr_notifier(struct notifier_block *nb)
984 {
985         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
986 }
987
988 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
989  * alias numbering and to create unique labels if possible.
990 */
991 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
992 {
993         struct in_ifaddr *ifa;
994         int named = 0;
995
996         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
997                 char old[IFNAMSIZ], *dot;
998
999                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1000                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1001                 if (named++ == 0)
1002                         goto skip;
1003                 dot = strchr(old, ':');
1004                 if (dot == NULL) {
1005                         sprintf(old, ":%d", named);
1006                         dot = old;
1007                 }
1008                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1009                         strcat(ifa->ifa_label, dot);
1010                 } else {
1011                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1012                 }
1013 skip:
1014                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1015         }
1016 }
1017
1018 static inline bool inetdev_valid_mtu(unsigned mtu)
1019 {
1020         return mtu >= 68;
1021 }
1022
1023 /* Called only under RTNL semaphore */
1024
1025 static int inetdev_event(struct notifier_block *this, unsigned long event,
1026                          void *ptr)
1027 {
1028         struct net_device *dev = ptr;
1029         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1030
1031         ASSERT_RTNL();
1032
1033         if (!in_dev) {
1034                 if (event == NETDEV_REGISTER) {
1035                         in_dev = inetdev_init(dev);
1036                         if (!in_dev)
1037                                 return notifier_from_errno(-ENOMEM);
1038                         if (dev->flags & IFF_LOOPBACK) {
1039                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1040                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1041                         }
1042                 } else if (event == NETDEV_CHANGEMTU) {
1043                         /* Re-enabling IP */
1044                         if (inetdev_valid_mtu(dev->mtu))
1045                                 in_dev = inetdev_init(dev);
1046                 }
1047                 goto out;
1048         }
1049
1050         switch (event) {
1051         case NETDEV_REGISTER:
1052                 printk(KERN_DEBUG "inetdev_event: bug\n");
1053                 dev->ip_ptr = NULL;
1054                 break;
1055         case NETDEV_UP:
1056                 if (!inetdev_valid_mtu(dev->mtu))
1057                         break;
1058                 if (dev->flags & IFF_LOOPBACK) {
1059                         struct in_ifaddr *ifa;
1060                         if ((ifa = inet_alloc_ifa()) != NULL) {
1061                                 ifa->ifa_local =
1062                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1063                                 ifa->ifa_prefixlen = 8;
1064                                 ifa->ifa_mask = inet_make_mask(8);
1065                                 in_dev_hold(in_dev);
1066                                 ifa->ifa_dev = in_dev;
1067                                 ifa->ifa_scope = RT_SCOPE_HOST;
1068                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1069                                 inet_insert_ifa(ifa);
1070                         }
1071                 }
1072                 ip_mc_up(in_dev);
1073                 /* fall through */
1074         case NETDEV_CHANGEADDR:
1075                 /* Send gratuitous ARP to notify of link change */
1076                 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1077                         struct in_ifaddr *ifa = in_dev->ifa_list;
1078
1079                         if (ifa)
1080                                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1081                                          ifa->ifa_address, dev,
1082                                          ifa->ifa_address, NULL,
1083                                          dev->dev_addr, NULL);
1084                 }
1085                 break;
1086         case NETDEV_DOWN:
1087                 ip_mc_down(in_dev);
1088                 break;
1089         case NETDEV_BONDING_OLDTYPE:
1090                 ip_mc_unmap(in_dev);
1091                 break;
1092         case NETDEV_BONDING_NEWTYPE:
1093                 ip_mc_remap(in_dev);
1094                 break;
1095         case NETDEV_CHANGEMTU:
1096                 if (inetdev_valid_mtu(dev->mtu))
1097                         break;
1098                 /* disable IP when MTU is not enough */
1099         case NETDEV_UNREGISTER:
1100                 inetdev_destroy(in_dev);
1101                 break;
1102         case NETDEV_CHANGENAME:
1103                 /* Do not notify about label change, this event is
1104                  * not interesting to applications using netlink.
1105                  */
1106                 inetdev_changename(dev, in_dev);
1107
1108                 devinet_sysctl_unregister(in_dev);
1109                 devinet_sysctl_register(in_dev);
1110                 break;
1111         }
1112 out:
1113         return NOTIFY_DONE;
1114 }
1115
1116 static struct notifier_block ip_netdev_notifier = {
1117         .notifier_call = inetdev_event,
1118 };
1119
1120 static inline size_t inet_nlmsg_size(void)
1121 {
1122         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1123                + nla_total_size(4) /* IFA_ADDRESS */
1124                + nla_total_size(4) /* IFA_LOCAL */
1125                + nla_total_size(4) /* IFA_BROADCAST */
1126                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1127 }
1128
1129 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1130                             u32 pid, u32 seq, int event, unsigned int flags)
1131 {
1132         struct ifaddrmsg *ifm;
1133         struct nlmsghdr  *nlh;
1134
1135         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1136         if (nlh == NULL)
1137                 return -EMSGSIZE;
1138
1139         ifm = nlmsg_data(nlh);
1140         ifm->ifa_family = AF_INET;
1141         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1142         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1143         ifm->ifa_scope = ifa->ifa_scope;
1144         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1145
1146         if (ifa->ifa_address)
1147                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1148
1149         if (ifa->ifa_local)
1150                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1151
1152         if (ifa->ifa_broadcast)
1153                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1154
1155         if (ifa->ifa_label[0])
1156                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1157
1158         return nlmsg_end(skb, nlh);
1159
1160 nla_put_failure:
1161         nlmsg_cancel(skb, nlh);
1162         return -EMSGSIZE;
1163 }
1164
1165 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1166 {
1167         struct net *net = sock_net(skb->sk);
1168         int idx, ip_idx;
1169         struct net_device *dev;
1170         struct in_device *in_dev;
1171         struct in_ifaddr *ifa;
1172         int s_ip_idx, s_idx = cb->args[0];
1173
1174         s_ip_idx = ip_idx = cb->args[1];
1175         idx = 0;
1176         for_each_netdev(net, dev) {
1177                 if (idx < s_idx)
1178                         goto cont;
1179                 if (idx > s_idx)
1180                         s_ip_idx = 0;
1181                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1182                         goto cont;
1183
1184                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1185                      ifa = ifa->ifa_next, ip_idx++) {
1186                         if (ip_idx < s_ip_idx)
1187                                 continue;
1188                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1189                                              cb->nlh->nlmsg_seq,
1190                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1191                                 goto done;
1192                 }
1193 cont:
1194                 idx++;
1195         }
1196
1197 done:
1198         cb->args[0] = idx;
1199         cb->args[1] = ip_idx;
1200
1201         return skb->len;
1202 }
1203
1204 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1205                       u32 pid)
1206 {
1207         struct sk_buff *skb;
1208         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1209         int err = -ENOBUFS;
1210         struct net *net;
1211
1212         net = dev_net(ifa->ifa_dev->dev);
1213         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1214         if (skb == NULL)
1215                 goto errout;
1216
1217         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1218         if (err < 0) {
1219                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1220                 WARN_ON(err == -EMSGSIZE);
1221                 kfree_skb(skb);
1222                 goto errout;
1223         }
1224         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1225         return;
1226 errout:
1227         if (err < 0)
1228                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1229 }
1230
1231 #ifdef CONFIG_SYSCTL
1232
1233 static void devinet_copy_dflt_conf(struct net *net, int i)
1234 {
1235         struct net_device *dev;
1236
1237         rcu_read_lock();
1238         for_each_netdev_rcu(net, dev) {
1239                 struct in_device *in_dev;
1240
1241                 in_dev = __in_dev_get_rcu(dev);
1242                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1243                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1244         }
1245         rcu_read_unlock();
1246 }
1247
1248 /* called with RTNL locked */
1249 static void inet_forward_change(struct net *net)
1250 {
1251         struct net_device *dev;
1252         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1253
1254         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1255         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1256
1257         for_each_netdev(net, dev) {
1258                 struct in_device *in_dev;
1259                 if (on)
1260                         dev_disable_lro(dev);
1261                 rcu_read_lock();
1262                 in_dev = __in_dev_get_rcu(dev);
1263                 if (in_dev)
1264                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1265                 rcu_read_unlock();
1266         }
1267 }
1268
1269 static int devinet_conf_proc(ctl_table *ctl, int write,
1270                              void __user *buffer,
1271                              size_t *lenp, loff_t *ppos)
1272 {
1273         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1274
1275         if (write) {
1276                 struct ipv4_devconf *cnf = ctl->extra1;
1277                 struct net *net = ctl->extra2;
1278                 int i = (int *)ctl->data - cnf->data;
1279
1280                 set_bit(i, cnf->state);
1281
1282                 if (cnf == net->ipv4.devconf_dflt)
1283                         devinet_copy_dflt_conf(net, i);
1284         }
1285
1286         return ret;
1287 }
1288
1289 static int devinet_conf_sysctl(ctl_table *table,
1290                                void __user *oldval, size_t __user *oldlenp,
1291                                void __user *newval, size_t newlen)
1292 {
1293         struct ipv4_devconf *cnf;
1294         struct net *net;
1295         int *valp = table->data;
1296         int new;
1297         int i;
1298
1299         if (!newval || !newlen)
1300                 return 0;
1301
1302         if (newlen != sizeof(int))
1303                 return -EINVAL;
1304
1305         if (get_user(new, (int __user *)newval))
1306                 return -EFAULT;
1307
1308         if (new == *valp)
1309                 return 0;
1310
1311         if (oldval && oldlenp) {
1312                 size_t len;
1313
1314                 if (get_user(len, oldlenp))
1315                         return -EFAULT;
1316
1317                 if (len) {
1318                         if (len > table->maxlen)
1319                                 len = table->maxlen;
1320                         if (copy_to_user(oldval, valp, len))
1321                                 return -EFAULT;
1322                         if (put_user(len, oldlenp))
1323                                 return -EFAULT;
1324                 }
1325         }
1326
1327         *valp = new;
1328
1329         cnf = table->extra1;
1330         net = table->extra2;
1331         i = (int *)table->data - cnf->data;
1332
1333         set_bit(i, cnf->state);
1334
1335         if (cnf == net->ipv4.devconf_dflt)
1336                 devinet_copy_dflt_conf(net, i);
1337
1338         return 1;
1339 }
1340
1341 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1342                                   void __user *buffer,
1343                                   size_t *lenp, loff_t *ppos)
1344 {
1345         int *valp = ctl->data;
1346         int val = *valp;
1347         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1348
1349         if (write && *valp != val) {
1350                 struct net *net = ctl->extra2;
1351
1352                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1353                         if (!rtnl_trylock())
1354                                 return restart_syscall();
1355                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1356                                 inet_forward_change(net);
1357                         } else if (*valp) {
1358                                 struct ipv4_devconf *cnf = ctl->extra1;
1359                                 struct in_device *idev =
1360                                         container_of(cnf, struct in_device, cnf);
1361                                 dev_disable_lro(idev->dev);
1362                         }
1363                         rtnl_unlock();
1364                         rt_cache_flush(net, 0);
1365                 }
1366         }
1367
1368         return ret;
1369 }
1370
1371 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1372                          void __user *buffer,
1373                          size_t *lenp, loff_t *ppos)
1374 {
1375         int *valp = ctl->data;
1376         int val = *valp;
1377         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1378         struct net *net = ctl->extra2;
1379
1380         if (write && *valp != val)
1381                 rt_cache_flush(net, 0);
1382
1383         return ret;
1384 }
1385
1386 int ipv4_doint_and_flush_strategy(ctl_table *table,
1387                                   void __user *oldval, size_t __user *oldlenp,
1388                                   void __user *newval, size_t newlen)
1389 {
1390         int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1391         struct net *net = table->extra2;
1392
1393         if (ret == 1)
1394                 rt_cache_flush(net, 0);
1395
1396         return ret;
1397 }
1398
1399
1400 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1401         { \
1402                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1403                 .procname       = name, \
1404                 .data           = ipv4_devconf.data + \
1405                                   NET_IPV4_CONF_ ## attr - 1, \
1406                 .maxlen         = sizeof(int), \
1407                 .mode           = mval, \
1408                 .proc_handler   = proc, \
1409                 .strategy       = sysctl, \
1410                 .extra1         = &ipv4_devconf, \
1411         }
1412
1413 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1414         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1415                              devinet_conf_sysctl)
1416
1417 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1418         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1419                              devinet_conf_sysctl)
1420
1421 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1422         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1423
1424 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1425         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1426                                      ipv4_doint_and_flush_strategy)
1427
1428 static struct devinet_sysctl_table {
1429         struct ctl_table_header *sysctl_header;
1430         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1431         char *dev_name;
1432 } devinet_sysctl = {
1433         .devinet_vars = {
1434                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1435                                              devinet_sysctl_forward,
1436                                              devinet_conf_sysctl),
1437                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1438
1439                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1440                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1441                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1442                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1443                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1444                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1445                                         "accept_source_route"),
1446                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1447                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1448                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1449                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1450                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1451                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1452                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1453                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1454                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1455                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1456
1457                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1458                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1459                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1460                                               "force_igmp_version"),
1461                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1462                                               "promote_secondaries"),
1463         },
1464 };
1465
1466 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1467                 int ctl_name, struct ipv4_devconf *p)
1468 {
1469         int i;
1470         struct devinet_sysctl_table *t;
1471
1472 #define DEVINET_CTL_PATH_DEV    3
1473
1474         struct ctl_path devinet_ctl_path[] = {
1475                 { .procname = "net", .ctl_name = CTL_NET, },
1476                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1477                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1478                 { /* to be set */ },
1479                 { },
1480         };
1481
1482         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1483         if (!t)
1484                 goto out;
1485
1486         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1487                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1488                 t->devinet_vars[i].extra1 = p;
1489                 t->devinet_vars[i].extra2 = net;
1490         }
1491
1492         /*
1493          * Make a copy of dev_name, because '.procname' is regarded as const
1494          * by sysctl and we wouldn't want anyone to change it under our feet
1495          * (see SIOCSIFNAME).
1496          */
1497         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1498         if (!t->dev_name)
1499                 goto free;
1500
1501         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1502         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1503
1504         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1505                         t->devinet_vars);
1506         if (!t->sysctl_header)
1507                 goto free_procname;
1508
1509         p->sysctl = t;
1510         return 0;
1511
1512 free_procname:
1513         kfree(t->dev_name);
1514 free:
1515         kfree(t);
1516 out:
1517         return -ENOBUFS;
1518 }
1519
1520 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1521 {
1522         struct devinet_sysctl_table *t = cnf->sysctl;
1523
1524         if (t == NULL)
1525                 return;
1526
1527         cnf->sysctl = NULL;
1528         unregister_sysctl_table(t->sysctl_header);
1529         kfree(t->dev_name);
1530         kfree(t);
1531 }
1532
1533 static void devinet_sysctl_register(struct in_device *idev)
1534 {
1535         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1536                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1537         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1538                         idev->dev->ifindex, &idev->cnf);
1539 }
1540
1541 static void devinet_sysctl_unregister(struct in_device *idev)
1542 {
1543         __devinet_sysctl_unregister(&idev->cnf);
1544         neigh_sysctl_unregister(idev->arp_parms);
1545 }
1546
1547 static struct ctl_table ctl_forward_entry[] = {
1548         {
1549                 .ctl_name       = NET_IPV4_FORWARD,
1550                 .procname       = "ip_forward",
1551                 .data           = &ipv4_devconf.data[
1552                                         NET_IPV4_CONF_FORWARDING - 1],
1553                 .maxlen         = sizeof(int),
1554                 .mode           = 0644,
1555                 .proc_handler   = devinet_sysctl_forward,
1556                 .strategy       = devinet_conf_sysctl,
1557                 .extra1         = &ipv4_devconf,
1558                 .extra2         = &init_net,
1559         },
1560         { },
1561 };
1562
1563 static __net_initdata struct ctl_path net_ipv4_path[] = {
1564         { .procname = "net", .ctl_name = CTL_NET, },
1565         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1566         { },
1567 };
1568 #endif
1569
1570 static __net_init int devinet_init_net(struct net *net)
1571 {
1572         int err;
1573         struct ipv4_devconf *all, *dflt;
1574 #ifdef CONFIG_SYSCTL
1575         struct ctl_table *tbl = ctl_forward_entry;
1576         struct ctl_table_header *forw_hdr;
1577 #endif
1578
1579         err = -ENOMEM;
1580         all = &ipv4_devconf;
1581         dflt = &ipv4_devconf_dflt;
1582
1583         if (net != &init_net) {
1584                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1585                 if (all == NULL)
1586                         goto err_alloc_all;
1587
1588                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1589                 if (dflt == NULL)
1590                         goto err_alloc_dflt;
1591
1592 #ifdef CONFIG_SYSCTL
1593                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1594                 if (tbl == NULL)
1595                         goto err_alloc_ctl;
1596
1597                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1598                 tbl[0].extra1 = all;
1599                 tbl[0].extra2 = net;
1600 #endif
1601         }
1602
1603 #ifdef CONFIG_SYSCTL
1604         err = __devinet_sysctl_register(net, "all",
1605                         NET_PROTO_CONF_ALL, all);
1606         if (err < 0)
1607                 goto err_reg_all;
1608
1609         err = __devinet_sysctl_register(net, "default",
1610                         NET_PROTO_CONF_DEFAULT, dflt);
1611         if (err < 0)
1612                 goto err_reg_dflt;
1613
1614         err = -ENOMEM;
1615         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1616         if (forw_hdr == NULL)
1617                 goto err_reg_ctl;
1618         net->ipv4.forw_hdr = forw_hdr;
1619 #endif
1620
1621         net->ipv4.devconf_all = all;
1622         net->ipv4.devconf_dflt = dflt;
1623         return 0;
1624
1625 #ifdef CONFIG_SYSCTL
1626 err_reg_ctl:
1627         __devinet_sysctl_unregister(dflt);
1628 err_reg_dflt:
1629         __devinet_sysctl_unregister(all);
1630 err_reg_all:
1631         if (tbl != ctl_forward_entry)
1632                 kfree(tbl);
1633 err_alloc_ctl:
1634 #endif
1635         if (dflt != &ipv4_devconf_dflt)
1636                 kfree(dflt);
1637 err_alloc_dflt:
1638         if (all != &ipv4_devconf)
1639                 kfree(all);
1640 err_alloc_all:
1641         return err;
1642 }
1643
1644 static __net_exit void devinet_exit_net(struct net *net)
1645 {
1646 #ifdef CONFIG_SYSCTL
1647         struct ctl_table *tbl;
1648
1649         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1650         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1651         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1652         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1653         kfree(tbl);
1654 #endif
1655         kfree(net->ipv4.devconf_dflt);
1656         kfree(net->ipv4.devconf_all);
1657 }
1658
1659 static __net_initdata struct pernet_operations devinet_ops = {
1660         .init = devinet_init_net,
1661         .exit = devinet_exit_net,
1662 };
1663
1664 void __init devinet_init(void)
1665 {
1666         register_pernet_subsys(&devinet_ops);
1667
1668         register_gifconf(PF_INET, inet_gifconf);
1669         register_netdevice_notifier(&ip_netdev_notifier);
1670
1671         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1672         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1673         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1674 }
1675
1676 EXPORT_SYMBOL(in_dev_finish_destroy);
1677 EXPORT_SYMBOL(inet_select_addr);
1678 EXPORT_SYMBOL(inetdev_by_index);
1679 EXPORT_SYMBOL(register_inetaddr_notifier);
1680 EXPORT_SYMBOL(unregister_inetaddr_notifier);