gre: fix netns vs proto registration ordering
[safe/jmp/linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71         },
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .data = {
76                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81         },
82 };
83
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98                          int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110
111 /* Locks all the inet devices. */
112
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
116 }
117
118 static void inet_rcu_free_ifa(struct rcu_head *head)
119 {
120         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
121         if (ifa->ifa_dev)
122                 in_dev_put(ifa->ifa_dev);
123         kfree(ifa);
124 }
125
126 static inline void inet_free_ifa(struct in_ifaddr *ifa)
127 {
128         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129 }
130
131 void in_dev_finish_destroy(struct in_device *idev)
132 {
133         struct net_device *dev = idev->dev;
134
135         WARN_ON(idev->ifa_list);
136         WARN_ON(idev->mc_list);
137 #ifdef NET_REFCNT_DEBUG
138         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139                idev, dev ? dev->name : "NIL");
140 #endif
141         dev_put(dev);
142         if (!idev->dead)
143                 pr_err("Freeing alive in_device %p\n", idev);
144         else
145                 kfree(idev);
146 }
147 EXPORT_SYMBOL(in_dev_finish_destroy);
148
149 static struct in_device *inetdev_init(struct net_device *dev)
150 {
151         struct in_device *in_dev;
152
153         ASSERT_RTNL();
154
155         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156         if (!in_dev)
157                 goto out;
158         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159                         sizeof(in_dev->cnf));
160         in_dev->cnf.sysctl = NULL;
161         in_dev->dev = dev;
162         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
163         if (!in_dev->arp_parms)
164                 goto out_kfree;
165         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
166                 dev_disable_lro(dev);
167         /* Reference in_dev->dev */
168         dev_hold(dev);
169         /* Account for reference dev->ip_ptr (below) */
170         in_dev_hold(in_dev);
171
172         devinet_sysctl_register(in_dev);
173         ip_mc_init_dev(in_dev);
174         if (dev->flags & IFF_UP)
175                 ip_mc_up(in_dev);
176
177         /* we can receive as soon as ip_ptr is set -- do this last */
178         rcu_assign_pointer(dev->ip_ptr, in_dev);
179 out:
180         return in_dev;
181 out_kfree:
182         kfree(in_dev);
183         in_dev = NULL;
184         goto out;
185 }
186
187 static void in_dev_rcu_put(struct rcu_head *head)
188 {
189         struct in_device *idev = container_of(head, struct in_device, rcu_head);
190         in_dev_put(idev);
191 }
192
193 static void inetdev_destroy(struct in_device *in_dev)
194 {
195         struct in_ifaddr *ifa;
196         struct net_device *dev;
197
198         ASSERT_RTNL();
199
200         dev = in_dev->dev;
201
202         in_dev->dead = 1;
203
204         ip_mc_destroy_dev(in_dev);
205
206         while ((ifa = in_dev->ifa_list) != NULL) {
207                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
208                 inet_free_ifa(ifa);
209         }
210
211         dev->ip_ptr = NULL;
212
213         devinet_sysctl_unregister(in_dev);
214         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
215         arp_ifdown(dev);
216
217         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
218 }
219
220 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
221 {
222         rcu_read_lock();
223         for_primary_ifa(in_dev) {
224                 if (inet_ifa_match(a, ifa)) {
225                         if (!b || inet_ifa_match(b, ifa)) {
226                                 rcu_read_unlock();
227                                 return 1;
228                         }
229                 }
230         } endfor_ifa(in_dev);
231         rcu_read_unlock();
232         return 0;
233 }
234
235 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
236                          int destroy, struct nlmsghdr *nlh, u32 pid)
237 {
238         struct in_ifaddr *promote = NULL;
239         struct in_ifaddr *ifa, *ifa1 = *ifap;
240         struct in_ifaddr *last_prim = in_dev->ifa_list;
241         struct in_ifaddr *prev_prom = NULL;
242         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
243
244         ASSERT_RTNL();
245
246         /* 1. Deleting primary ifaddr forces deletion all secondaries
247          * unless alias promotion is set
248          **/
249
250         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
251                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
252
253                 while ((ifa = *ifap1) != NULL) {
254                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
255                             ifa1->ifa_scope <= ifa->ifa_scope)
256                                 last_prim = ifa;
257
258                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
259                             ifa1->ifa_mask != ifa->ifa_mask ||
260                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
261                                 ifap1 = &ifa->ifa_next;
262                                 prev_prom = ifa;
263                                 continue;
264                         }
265
266                         if (!do_promote) {
267                                 *ifap1 = ifa->ifa_next;
268
269                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
270                                 blocking_notifier_call_chain(&inetaddr_chain,
271                                                 NETDEV_DOWN, ifa);
272                                 inet_free_ifa(ifa);
273                         } else {
274                                 promote = ifa;
275                                 break;
276                         }
277                 }
278         }
279
280         /* 2. Unlink it */
281
282         *ifap = ifa1->ifa_next;
283
284         /* 3. Announce address deletion */
285
286         /* Send message first, then call notifier.
287            At first sight, FIB update triggered by notifier
288            will refer to already deleted ifaddr, that could confuse
289            netlink listeners. It is not true: look, gated sees
290            that route deleted and if it still thinks that ifaddr
291            is valid, it will try to restore deleted routes... Grr.
292            So that, this order is correct.
293          */
294         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
295         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
296
297         if (promote) {
298
299                 if (prev_prom) {
300                         prev_prom->ifa_next = promote->ifa_next;
301                         promote->ifa_next = last_prim->ifa_next;
302                         last_prim->ifa_next = promote;
303                 }
304
305                 promote->ifa_flags &= ~IFA_F_SECONDARY;
306                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
307                 blocking_notifier_call_chain(&inetaddr_chain,
308                                 NETDEV_UP, promote);
309                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
310                         if (ifa1->ifa_mask != ifa->ifa_mask ||
311                             !inet_ifa_match(ifa1->ifa_address, ifa))
312                                         continue;
313                         fib_add_ifaddr(ifa);
314                 }
315
316         }
317         if (destroy)
318                 inet_free_ifa(ifa1);
319 }
320
321 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322                          int destroy)
323 {
324         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
325 }
326
327 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
328                              u32 pid)
329 {
330         struct in_device *in_dev = ifa->ifa_dev;
331         struct in_ifaddr *ifa1, **ifap, **last_primary;
332
333         ASSERT_RTNL();
334
335         if (!ifa->ifa_local) {
336                 inet_free_ifa(ifa);
337                 return 0;
338         }
339
340         ifa->ifa_flags &= ~IFA_F_SECONDARY;
341         last_primary = &in_dev->ifa_list;
342
343         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
344              ifap = &ifa1->ifa_next) {
345                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
346                     ifa->ifa_scope <= ifa1->ifa_scope)
347                         last_primary = &ifa1->ifa_next;
348                 if (ifa1->ifa_mask == ifa->ifa_mask &&
349                     inet_ifa_match(ifa1->ifa_address, ifa)) {
350                         if (ifa1->ifa_local == ifa->ifa_local) {
351                                 inet_free_ifa(ifa);
352                                 return -EEXIST;
353                         }
354                         if (ifa1->ifa_scope != ifa->ifa_scope) {
355                                 inet_free_ifa(ifa);
356                                 return -EINVAL;
357                         }
358                         ifa->ifa_flags |= IFA_F_SECONDARY;
359                 }
360         }
361
362         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
363                 net_srandom(ifa->ifa_local);
364                 ifap = last_primary;
365         }
366
367         ifa->ifa_next = *ifap;
368         *ifap = ifa;
369
370         /* Send message first, then call notifier.
371            Notifier will trigger FIB update, so that
372            listeners of netlink will know about new ifaddr */
373         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
374         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
375
376         return 0;
377 }
378
379 static int inet_insert_ifa(struct in_ifaddr *ifa)
380 {
381         return __inet_insert_ifa(ifa, NULL, 0);
382 }
383
384 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
385 {
386         struct in_device *in_dev = __in_dev_get_rtnl(dev);
387
388         ASSERT_RTNL();
389
390         if (!in_dev) {
391                 inet_free_ifa(ifa);
392                 return -ENOBUFS;
393         }
394         ipv4_devconf_setall(in_dev);
395         if (ifa->ifa_dev != in_dev) {
396                 WARN_ON(ifa->ifa_dev);
397                 in_dev_hold(in_dev);
398                 ifa->ifa_dev = in_dev;
399         }
400         if (ipv4_is_loopback(ifa->ifa_local))
401                 ifa->ifa_scope = RT_SCOPE_HOST;
402         return inet_insert_ifa(ifa);
403 }
404
405 struct in_device *inetdev_by_index(struct net *net, int ifindex)
406 {
407         struct net_device *dev;
408         struct in_device *in_dev = NULL;
409
410         rcu_read_lock();
411         dev = dev_get_by_index_rcu(net, ifindex);
412         if (dev)
413                 in_dev = in_dev_get(dev);
414         rcu_read_unlock();
415         return in_dev;
416 }
417 EXPORT_SYMBOL(inetdev_by_index);
418
419 /* Called only from RTNL semaphored context. No locks. */
420
421 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
422                                     __be32 mask)
423 {
424         ASSERT_RTNL();
425
426         for_primary_ifa(in_dev) {
427                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
428                         return ifa;
429         } endfor_ifa(in_dev);
430         return NULL;
431 }
432
433 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
434 {
435         struct net *net = sock_net(skb->sk);
436         struct nlattr *tb[IFA_MAX+1];
437         struct in_device *in_dev;
438         struct ifaddrmsg *ifm;
439         struct in_ifaddr *ifa, **ifap;
440         int err = -EINVAL;
441
442         ASSERT_RTNL();
443
444         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
445         if (err < 0)
446                 goto errout;
447
448         ifm = nlmsg_data(nlh);
449         in_dev = inetdev_by_index(net, ifm->ifa_index);
450         if (in_dev == NULL) {
451                 err = -ENODEV;
452                 goto errout;
453         }
454
455         __in_dev_put(in_dev);
456
457         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
458              ifap = &ifa->ifa_next) {
459                 if (tb[IFA_LOCAL] &&
460                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
461                         continue;
462
463                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
464                         continue;
465
466                 if (tb[IFA_ADDRESS] &&
467                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
468                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
469                         continue;
470
471                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
472                 return 0;
473         }
474
475         err = -EADDRNOTAVAIL;
476 errout:
477         return err;
478 }
479
480 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
481 {
482         struct nlattr *tb[IFA_MAX+1];
483         struct in_ifaddr *ifa;
484         struct ifaddrmsg *ifm;
485         struct net_device *dev;
486         struct in_device *in_dev;
487         int err;
488
489         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
490         if (err < 0)
491                 goto errout;
492
493         ifm = nlmsg_data(nlh);
494         err = -EINVAL;
495         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
496                 goto errout;
497
498         dev = __dev_get_by_index(net, ifm->ifa_index);
499         err = -ENODEV;
500         if (dev == NULL)
501                 goto errout;
502
503         in_dev = __in_dev_get_rtnl(dev);
504         err = -ENOBUFS;
505         if (in_dev == NULL)
506                 goto errout;
507
508         ifa = inet_alloc_ifa();
509         if (ifa == NULL)
510                 /*
511                  * A potential indev allocation can be left alive, it stays
512                  * assigned to its device and is destroy with it.
513                  */
514                 goto errout;
515
516         ipv4_devconf_setall(in_dev);
517         in_dev_hold(in_dev);
518
519         if (tb[IFA_ADDRESS] == NULL)
520                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
521
522         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
523         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
524         ifa->ifa_flags = ifm->ifa_flags;
525         ifa->ifa_scope = ifm->ifa_scope;
526         ifa->ifa_dev = in_dev;
527
528         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
529         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
530
531         if (tb[IFA_BROADCAST])
532                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
533
534         if (tb[IFA_LABEL])
535                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
536         else
537                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
538
539         return ifa;
540
541 errout:
542         return ERR_PTR(err);
543 }
544
545 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
546 {
547         struct net *net = sock_net(skb->sk);
548         struct in_ifaddr *ifa;
549
550         ASSERT_RTNL();
551
552         ifa = rtm_to_ifaddr(net, nlh);
553         if (IS_ERR(ifa))
554                 return PTR_ERR(ifa);
555
556         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
557 }
558
559 /*
560  *      Determine a default network mask, based on the IP address.
561  */
562
563 static inline int inet_abc_len(__be32 addr)
564 {
565         int rc = -1;    /* Something else, probably a multicast. */
566
567         if (ipv4_is_zeronet(addr))
568                 rc = 0;
569         else {
570                 __u32 haddr = ntohl(addr);
571
572                 if (IN_CLASSA(haddr))
573                         rc = 8;
574                 else if (IN_CLASSB(haddr))
575                         rc = 16;
576                 else if (IN_CLASSC(haddr))
577                         rc = 24;
578         }
579
580         return rc;
581 }
582
583
584 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
585 {
586         struct ifreq ifr;
587         struct sockaddr_in sin_orig;
588         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
589         struct in_device *in_dev;
590         struct in_ifaddr **ifap = NULL;
591         struct in_ifaddr *ifa = NULL;
592         struct net_device *dev;
593         char *colon;
594         int ret = -EFAULT;
595         int tryaddrmatch = 0;
596
597         /*
598          *      Fetch the caller's info block into kernel space
599          */
600
601         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
602                 goto out;
603         ifr.ifr_name[IFNAMSIZ - 1] = 0;
604
605         /* save original address for comparison */
606         memcpy(&sin_orig, sin, sizeof(*sin));
607
608         colon = strchr(ifr.ifr_name, ':');
609         if (colon)
610                 *colon = 0;
611
612         dev_load(net, ifr.ifr_name);
613
614         switch (cmd) {
615         case SIOCGIFADDR:       /* Get interface address */
616         case SIOCGIFBRDADDR:    /* Get the broadcast address */
617         case SIOCGIFDSTADDR:    /* Get the destination address */
618         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
619                 /* Note that these ioctls will not sleep,
620                    so that we do not impose a lock.
621                    One day we will be forced to put shlock here (I mean SMP)
622                  */
623                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
624                 memset(sin, 0, sizeof(*sin));
625                 sin->sin_family = AF_INET;
626                 break;
627
628         case SIOCSIFFLAGS:
629                 ret = -EACCES;
630                 if (!capable(CAP_NET_ADMIN))
631                         goto out;
632                 break;
633         case SIOCSIFADDR:       /* Set interface address (and family) */
634         case SIOCSIFBRDADDR:    /* Set the broadcast address */
635         case SIOCSIFDSTADDR:    /* Set the destination address */
636         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
637                 ret = -EACCES;
638                 if (!capable(CAP_NET_ADMIN))
639                         goto out;
640                 ret = -EINVAL;
641                 if (sin->sin_family != AF_INET)
642                         goto out;
643                 break;
644         default:
645                 ret = -EINVAL;
646                 goto out;
647         }
648
649         rtnl_lock();
650
651         ret = -ENODEV;
652         dev = __dev_get_by_name(net, ifr.ifr_name);
653         if (!dev)
654                 goto done;
655
656         if (colon)
657                 *colon = ':';
658
659         in_dev = __in_dev_get_rtnl(dev);
660         if (in_dev) {
661                 if (tryaddrmatch) {
662                         /* Matthias Andree */
663                         /* compare label and address (4.4BSD style) */
664                         /* note: we only do this for a limited set of ioctls
665                            and only if the original address family was AF_INET.
666                            This is checked above. */
667                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
668                              ifap = &ifa->ifa_next) {
669                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
670                                     sin_orig.sin_addr.s_addr ==
671                                                         ifa->ifa_address) {
672                                         break; /* found */
673                                 }
674                         }
675                 }
676                 /* we didn't get a match, maybe the application is
677                    4.3BSD-style and passed in junk so we fall back to
678                    comparing just the label */
679                 if (!ifa) {
680                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
681                              ifap = &ifa->ifa_next)
682                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
683                                         break;
684                 }
685         }
686
687         ret = -EADDRNOTAVAIL;
688         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
689                 goto done;
690
691         switch (cmd) {
692         case SIOCGIFADDR:       /* Get interface address */
693                 sin->sin_addr.s_addr = ifa->ifa_local;
694                 goto rarok;
695
696         case SIOCGIFBRDADDR:    /* Get the broadcast address */
697                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
698                 goto rarok;
699
700         case SIOCGIFDSTADDR:    /* Get the destination address */
701                 sin->sin_addr.s_addr = ifa->ifa_address;
702                 goto rarok;
703
704         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
705                 sin->sin_addr.s_addr = ifa->ifa_mask;
706                 goto rarok;
707
708         case SIOCSIFFLAGS:
709                 if (colon) {
710                         ret = -EADDRNOTAVAIL;
711                         if (!ifa)
712                                 break;
713                         ret = 0;
714                         if (!(ifr.ifr_flags & IFF_UP))
715                                 inet_del_ifa(in_dev, ifap, 1);
716                         break;
717                 }
718                 ret = dev_change_flags(dev, ifr.ifr_flags);
719                 break;
720
721         case SIOCSIFADDR:       /* Set interface address (and family) */
722                 ret = -EINVAL;
723                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
724                         break;
725
726                 if (!ifa) {
727                         ret = -ENOBUFS;
728                         ifa = inet_alloc_ifa();
729                         if (!ifa)
730                                 break;
731                         if (colon)
732                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
733                         else
734                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
735                 } else {
736                         ret = 0;
737                         if (ifa->ifa_local == sin->sin_addr.s_addr)
738                                 break;
739                         inet_del_ifa(in_dev, ifap, 0);
740                         ifa->ifa_broadcast = 0;
741                         ifa->ifa_scope = 0;
742                 }
743
744                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
745
746                 if (!(dev->flags & IFF_POINTOPOINT)) {
747                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
748                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
749                         if ((dev->flags & IFF_BROADCAST) &&
750                             ifa->ifa_prefixlen < 31)
751                                 ifa->ifa_broadcast = ifa->ifa_address |
752                                                      ~ifa->ifa_mask;
753                 } else {
754                         ifa->ifa_prefixlen = 32;
755                         ifa->ifa_mask = inet_make_mask(32);
756                 }
757                 ret = inet_set_ifa(dev, ifa);
758                 break;
759
760         case SIOCSIFBRDADDR:    /* Set the broadcast address */
761                 ret = 0;
762                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
763                         inet_del_ifa(in_dev, ifap, 0);
764                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
765                         inet_insert_ifa(ifa);
766                 }
767                 break;
768
769         case SIOCSIFDSTADDR:    /* Set the destination address */
770                 ret = 0;
771                 if (ifa->ifa_address == sin->sin_addr.s_addr)
772                         break;
773                 ret = -EINVAL;
774                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
775                         break;
776                 ret = 0;
777                 inet_del_ifa(in_dev, ifap, 0);
778                 ifa->ifa_address = sin->sin_addr.s_addr;
779                 inet_insert_ifa(ifa);
780                 break;
781
782         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
783
784                 /*
785                  *      The mask we set must be legal.
786                  */
787                 ret = -EINVAL;
788                 if (bad_mask(sin->sin_addr.s_addr, 0))
789                         break;
790                 ret = 0;
791                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
792                         __be32 old_mask = ifa->ifa_mask;
793                         inet_del_ifa(in_dev, ifap, 0);
794                         ifa->ifa_mask = sin->sin_addr.s_addr;
795                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
796
797                         /* See if current broadcast address matches
798                          * with current netmask, then recalculate
799                          * the broadcast address. Otherwise it's a
800                          * funny address, so don't touch it since
801                          * the user seems to know what (s)he's doing...
802                          */
803                         if ((dev->flags & IFF_BROADCAST) &&
804                             (ifa->ifa_prefixlen < 31) &&
805                             (ifa->ifa_broadcast ==
806                              (ifa->ifa_local|~old_mask))) {
807                                 ifa->ifa_broadcast = (ifa->ifa_local |
808                                                       ~sin->sin_addr.s_addr);
809                         }
810                         inet_insert_ifa(ifa);
811                 }
812                 break;
813         }
814 done:
815         rtnl_unlock();
816 out:
817         return ret;
818 rarok:
819         rtnl_unlock();
820         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
821         goto out;
822 }
823
824 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
825 {
826         struct in_device *in_dev = __in_dev_get_rtnl(dev);
827         struct in_ifaddr *ifa;
828         struct ifreq ifr;
829         int done = 0;
830
831         if (!in_dev)
832                 goto out;
833
834         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
835                 if (!buf) {
836                         done += sizeof(ifr);
837                         continue;
838                 }
839                 if (len < (int) sizeof(ifr))
840                         break;
841                 memset(&ifr, 0, sizeof(struct ifreq));
842                 if (ifa->ifa_label)
843                         strcpy(ifr.ifr_name, ifa->ifa_label);
844                 else
845                         strcpy(ifr.ifr_name, dev->name);
846
847                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
848                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
849                                                                 ifa->ifa_local;
850
851                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
852                         done = -EFAULT;
853                         break;
854                 }
855                 buf  += sizeof(struct ifreq);
856                 len  -= sizeof(struct ifreq);
857                 done += sizeof(struct ifreq);
858         }
859 out:
860         return done;
861 }
862
863 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
864 {
865         __be32 addr = 0;
866         struct in_device *in_dev;
867         struct net *net = dev_net(dev);
868
869         rcu_read_lock();
870         in_dev = __in_dev_get_rcu(dev);
871         if (!in_dev)
872                 goto no_in_dev;
873
874         for_primary_ifa(in_dev) {
875                 if (ifa->ifa_scope > scope)
876                         continue;
877                 if (!dst || inet_ifa_match(dst, ifa)) {
878                         addr = ifa->ifa_local;
879                         break;
880                 }
881                 if (!addr)
882                         addr = ifa->ifa_local;
883         } endfor_ifa(in_dev);
884
885         if (addr)
886                 goto out_unlock;
887 no_in_dev:
888
889         /* Not loopback addresses on loopback should be preferred
890            in this case. It is importnat that lo is the first interface
891            in dev_base list.
892          */
893         for_each_netdev_rcu(net, dev) {
894                 in_dev = __in_dev_get_rcu(dev);
895                 if (!in_dev)
896                         continue;
897
898                 for_primary_ifa(in_dev) {
899                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
900                             ifa->ifa_scope <= scope) {
901                                 addr = ifa->ifa_local;
902                                 goto out_unlock;
903                         }
904                 } endfor_ifa(in_dev);
905         }
906 out_unlock:
907         rcu_read_unlock();
908         return addr;
909 }
910 EXPORT_SYMBOL(inet_select_addr);
911
912 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
913                               __be32 local, int scope)
914 {
915         int same = 0;
916         __be32 addr = 0;
917
918         for_ifa(in_dev) {
919                 if (!addr &&
920                     (local == ifa->ifa_local || !local) &&
921                     ifa->ifa_scope <= scope) {
922                         addr = ifa->ifa_local;
923                         if (same)
924                                 break;
925                 }
926                 if (!same) {
927                         same = (!local || inet_ifa_match(local, ifa)) &&
928                                 (!dst || inet_ifa_match(dst, ifa));
929                         if (same && addr) {
930                                 if (local || !dst)
931                                         break;
932                                 /* Is the selected addr into dst subnet? */
933                                 if (inet_ifa_match(addr, ifa))
934                                         break;
935                                 /* No, then can we use new local src? */
936                                 if (ifa->ifa_scope <= scope) {
937                                         addr = ifa->ifa_local;
938                                         break;
939                                 }
940                                 /* search for large dst subnet for addr */
941                                 same = 0;
942                         }
943                 }
944         } endfor_ifa(in_dev);
945
946         return same ? addr : 0;
947 }
948
949 /*
950  * Confirm that local IP address exists using wildcards:
951  * - in_dev: only on this interface, 0=any interface
952  * - dst: only in the same subnet as dst, 0=any dst
953  * - local: address, 0=autoselect the local address
954  * - scope: maximum allowed scope value for the local address
955  */
956 __be32 inet_confirm_addr(struct in_device *in_dev,
957                          __be32 dst, __be32 local, int scope)
958 {
959         __be32 addr = 0;
960         struct net_device *dev;
961         struct net *net;
962
963         if (scope != RT_SCOPE_LINK)
964                 return confirm_addr_indev(in_dev, dst, local, scope);
965
966         net = dev_net(in_dev->dev);
967         rcu_read_lock();
968         for_each_netdev_rcu(net, dev) {
969                 in_dev = __in_dev_get_rcu(dev);
970                 if (in_dev) {
971                         addr = confirm_addr_indev(in_dev, dst, local, scope);
972                         if (addr)
973                                 break;
974                 }
975         }
976         rcu_read_unlock();
977
978         return addr;
979 }
980
981 /*
982  *      Device notifier
983  */
984
985 int register_inetaddr_notifier(struct notifier_block *nb)
986 {
987         return blocking_notifier_chain_register(&inetaddr_chain, nb);
988 }
989 EXPORT_SYMBOL(register_inetaddr_notifier);
990
991 int unregister_inetaddr_notifier(struct notifier_block *nb)
992 {
993         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
994 }
995 EXPORT_SYMBOL(unregister_inetaddr_notifier);
996
997 /* Rename ifa_labels for a device name change. Make some effort to preserve
998  * existing alias numbering and to create unique labels if possible.
999 */
1000 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1001 {
1002         struct in_ifaddr *ifa;
1003         int named = 0;
1004
1005         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1006                 char old[IFNAMSIZ], *dot;
1007
1008                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1009                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1010                 if (named++ == 0)
1011                         goto skip;
1012                 dot = strchr(old, ':');
1013                 if (dot == NULL) {
1014                         sprintf(old, ":%d", named);
1015                         dot = old;
1016                 }
1017                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1018                         strcat(ifa->ifa_label, dot);
1019                 else
1020                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1021 skip:
1022                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1023         }
1024 }
1025
1026 static inline bool inetdev_valid_mtu(unsigned mtu)
1027 {
1028         return mtu >= 68;
1029 }
1030
1031 /* Called only under RTNL semaphore */
1032
1033 static int inetdev_event(struct notifier_block *this, unsigned long event,
1034                          void *ptr)
1035 {
1036         struct net_device *dev = ptr;
1037         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1038
1039         ASSERT_RTNL();
1040
1041         if (!in_dev) {
1042                 if (event == NETDEV_REGISTER) {
1043                         in_dev = inetdev_init(dev);
1044                         if (!in_dev)
1045                                 return notifier_from_errno(-ENOMEM);
1046                         if (dev->flags & IFF_LOOPBACK) {
1047                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1048                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1049                         }
1050                 } else if (event == NETDEV_CHANGEMTU) {
1051                         /* Re-enabling IP */
1052                         if (inetdev_valid_mtu(dev->mtu))
1053                                 in_dev = inetdev_init(dev);
1054                 }
1055                 goto out;
1056         }
1057
1058         switch (event) {
1059         case NETDEV_REGISTER:
1060                 printk(KERN_DEBUG "inetdev_event: bug\n");
1061                 dev->ip_ptr = NULL;
1062                 break;
1063         case NETDEV_UP:
1064                 if (!inetdev_valid_mtu(dev->mtu))
1065                         break;
1066                 if (dev->flags & IFF_LOOPBACK) {
1067                         struct in_ifaddr *ifa = inet_alloc_ifa();
1068
1069                         if (ifa) {
1070                                 ifa->ifa_local =
1071                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1072                                 ifa->ifa_prefixlen = 8;
1073                                 ifa->ifa_mask = inet_make_mask(8);
1074                                 in_dev_hold(in_dev);
1075                                 ifa->ifa_dev = in_dev;
1076                                 ifa->ifa_scope = RT_SCOPE_HOST;
1077                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1078                                 inet_insert_ifa(ifa);
1079                         }
1080                 }
1081                 ip_mc_up(in_dev);
1082                 /* fall through */
1083         case NETDEV_CHANGEADDR:
1084                 /* Send gratuitous ARP to notify of link change */
1085                 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1086                         struct in_ifaddr *ifa = in_dev->ifa_list;
1087
1088                         if (ifa)
1089                                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1090                                          ifa->ifa_address, dev,
1091                                          ifa->ifa_address, NULL,
1092                                          dev->dev_addr, NULL);
1093                 }
1094                 break;
1095         case NETDEV_DOWN:
1096                 ip_mc_down(in_dev);
1097                 break;
1098         case NETDEV_BONDING_OLDTYPE:
1099                 ip_mc_unmap(in_dev);
1100                 break;
1101         case NETDEV_BONDING_NEWTYPE:
1102                 ip_mc_remap(in_dev);
1103                 break;
1104         case NETDEV_CHANGEMTU:
1105                 if (inetdev_valid_mtu(dev->mtu))
1106                         break;
1107                 /* disable IP when MTU is not enough */
1108         case NETDEV_UNREGISTER:
1109                 inetdev_destroy(in_dev);
1110                 break;
1111         case NETDEV_CHANGENAME:
1112                 /* Do not notify about label change, this event is
1113                  * not interesting to applications using netlink.
1114                  */
1115                 inetdev_changename(dev, in_dev);
1116
1117                 devinet_sysctl_unregister(in_dev);
1118                 devinet_sysctl_register(in_dev);
1119                 break;
1120         }
1121 out:
1122         return NOTIFY_DONE;
1123 }
1124
1125 static struct notifier_block ip_netdev_notifier = {
1126         .notifier_call = inetdev_event,
1127 };
1128
1129 static inline size_t inet_nlmsg_size(void)
1130 {
1131         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1132                + nla_total_size(4) /* IFA_ADDRESS */
1133                + nla_total_size(4) /* IFA_LOCAL */
1134                + nla_total_size(4) /* IFA_BROADCAST */
1135                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1136 }
1137
1138 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1139                             u32 pid, u32 seq, int event, unsigned int flags)
1140 {
1141         struct ifaddrmsg *ifm;
1142         struct nlmsghdr  *nlh;
1143
1144         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1145         if (nlh == NULL)
1146                 return -EMSGSIZE;
1147
1148         ifm = nlmsg_data(nlh);
1149         ifm->ifa_family = AF_INET;
1150         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1151         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1152         ifm->ifa_scope = ifa->ifa_scope;
1153         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1154
1155         if (ifa->ifa_address)
1156                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1157
1158         if (ifa->ifa_local)
1159                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1160
1161         if (ifa->ifa_broadcast)
1162                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1163
1164         if (ifa->ifa_label[0])
1165                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1166
1167         return nlmsg_end(skb, nlh);
1168
1169 nla_put_failure:
1170         nlmsg_cancel(skb, nlh);
1171         return -EMSGSIZE;
1172 }
1173
1174 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1175 {
1176         struct net *net = sock_net(skb->sk);
1177         int h, s_h;
1178         int idx, s_idx;
1179         int ip_idx, s_ip_idx;
1180         struct net_device *dev;
1181         struct in_device *in_dev;
1182         struct in_ifaddr *ifa;
1183         struct hlist_head *head;
1184         struct hlist_node *node;
1185
1186         s_h = cb->args[0];
1187         s_idx = idx = cb->args[1];
1188         s_ip_idx = ip_idx = cb->args[2];
1189
1190         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1191                 idx = 0;
1192                 head = &net->dev_index_head[h];
1193                 rcu_read_lock();
1194                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1195                         if (idx < s_idx)
1196                                 goto cont;
1197                         if (idx > s_idx)
1198                                 s_ip_idx = 0;
1199                         in_dev = __in_dev_get_rcu(dev);
1200                         if (!in_dev)
1201                                 goto cont;
1202
1203                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1204                              ifa = ifa->ifa_next, ip_idx++) {
1205                                 if (ip_idx < s_ip_idx)
1206                                         continue;
1207                                 if (inet_fill_ifaddr(skb, ifa,
1208                                              NETLINK_CB(cb->skb).pid,
1209                                              cb->nlh->nlmsg_seq,
1210                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1211                                         rcu_read_unlock();
1212                                         goto done;
1213                                 }
1214                         }
1215 cont:
1216                         idx++;
1217                 }
1218                 rcu_read_unlock();
1219         }
1220
1221 done:
1222         cb->args[0] = h;
1223         cb->args[1] = idx;
1224         cb->args[2] = ip_idx;
1225
1226         return skb->len;
1227 }
1228
1229 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1230                       u32 pid)
1231 {
1232         struct sk_buff *skb;
1233         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1234         int err = -ENOBUFS;
1235         struct net *net;
1236
1237         net = dev_net(ifa->ifa_dev->dev);
1238         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1239         if (skb == NULL)
1240                 goto errout;
1241
1242         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1243         if (err < 0) {
1244                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1245                 WARN_ON(err == -EMSGSIZE);
1246                 kfree_skb(skb);
1247                 goto errout;
1248         }
1249         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1250         return;
1251 errout:
1252         if (err < 0)
1253                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1254 }
1255
1256 #ifdef CONFIG_SYSCTL
1257
1258 static void devinet_copy_dflt_conf(struct net *net, int i)
1259 {
1260         struct net_device *dev;
1261
1262         rcu_read_lock();
1263         for_each_netdev_rcu(net, dev) {
1264                 struct in_device *in_dev;
1265
1266                 in_dev = __in_dev_get_rcu(dev);
1267                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1268                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1269         }
1270         rcu_read_unlock();
1271 }
1272
1273 /* called with RTNL locked */
1274 static void inet_forward_change(struct net *net)
1275 {
1276         struct net_device *dev;
1277         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1278
1279         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1280         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1281
1282         for_each_netdev(net, dev) {
1283                 struct in_device *in_dev;
1284                 if (on)
1285                         dev_disable_lro(dev);
1286                 rcu_read_lock();
1287                 in_dev = __in_dev_get_rcu(dev);
1288                 if (in_dev)
1289                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1290                 rcu_read_unlock();
1291         }
1292 }
1293
1294 static int devinet_conf_proc(ctl_table *ctl, int write,
1295                              void __user *buffer,
1296                              size_t *lenp, loff_t *ppos)
1297 {
1298         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1299
1300         if (write) {
1301                 struct ipv4_devconf *cnf = ctl->extra1;
1302                 struct net *net = ctl->extra2;
1303                 int i = (int *)ctl->data - cnf->data;
1304
1305                 set_bit(i, cnf->state);
1306
1307                 if (cnf == net->ipv4.devconf_dflt)
1308                         devinet_copy_dflt_conf(net, i);
1309         }
1310
1311         return ret;
1312 }
1313
1314 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1315                                   void __user *buffer,
1316                                   size_t *lenp, loff_t *ppos)
1317 {
1318         int *valp = ctl->data;
1319         int val = *valp;
1320         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1321
1322         if (write && *valp != val) {
1323                 struct net *net = ctl->extra2;
1324
1325                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1326                         if (!rtnl_trylock())
1327                                 return restart_syscall();
1328                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1329                                 inet_forward_change(net);
1330                         } else if (*valp) {
1331                                 struct ipv4_devconf *cnf = ctl->extra1;
1332                                 struct in_device *idev =
1333                                         container_of(cnf, struct in_device, cnf);
1334                                 dev_disable_lro(idev->dev);
1335                         }
1336                         rtnl_unlock();
1337                         rt_cache_flush(net, 0);
1338                 }
1339         }
1340
1341         return ret;
1342 }
1343
1344 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1345                          void __user *buffer,
1346                          size_t *lenp, loff_t *ppos)
1347 {
1348         int *valp = ctl->data;
1349         int val = *valp;
1350         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1351         struct net *net = ctl->extra2;
1352
1353         if (write && *valp != val)
1354                 rt_cache_flush(net, 0);
1355
1356         return ret;
1357 }
1358
1359 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1360         { \
1361                 .procname       = name, \
1362                 .data           = ipv4_devconf.data + \
1363                                   NET_IPV4_CONF_ ## attr - 1, \
1364                 .maxlen         = sizeof(int), \
1365                 .mode           = mval, \
1366                 .proc_handler   = proc, \
1367                 .extra1         = &ipv4_devconf, \
1368         }
1369
1370 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1371         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1372
1373 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1374         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1375
1376 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1377         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1378
1379 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1380         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1381
1382 static struct devinet_sysctl_table {
1383         struct ctl_table_header *sysctl_header;
1384         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1385         char *dev_name;
1386 } devinet_sysctl = {
1387         .devinet_vars = {
1388                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1389                                              devinet_sysctl_forward),
1390                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1391
1392                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1393                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1394                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1395                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1396                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1397                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1398                                         "accept_source_route"),
1399                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1400                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1401                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1402                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1403                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1404                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1405                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1406                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1407                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1408                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1409                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1410                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1411                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1412
1413                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1414                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1415                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1416                                               "force_igmp_version"),
1417                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1418                                               "promote_secondaries"),
1419         },
1420 };
1421
1422 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1423                                         struct ipv4_devconf *p)
1424 {
1425         int i;
1426         struct devinet_sysctl_table *t;
1427
1428 #define DEVINET_CTL_PATH_DEV    3
1429
1430         struct ctl_path devinet_ctl_path[] = {
1431                 { .procname = "net",  },
1432                 { .procname = "ipv4", },
1433                 { .procname = "conf", },
1434                 { /* to be set */ },
1435                 { },
1436         };
1437
1438         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1439         if (!t)
1440                 goto out;
1441
1442         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1443                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1444                 t->devinet_vars[i].extra1 = p;
1445                 t->devinet_vars[i].extra2 = net;
1446         }
1447
1448         /*
1449          * Make a copy of dev_name, because '.procname' is regarded as const
1450          * by sysctl and we wouldn't want anyone to change it under our feet
1451          * (see SIOCSIFNAME).
1452          */
1453         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1454         if (!t->dev_name)
1455                 goto free;
1456
1457         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1458
1459         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1460                         t->devinet_vars);
1461         if (!t->sysctl_header)
1462                 goto free_procname;
1463
1464         p->sysctl = t;
1465         return 0;
1466
1467 free_procname:
1468         kfree(t->dev_name);
1469 free:
1470         kfree(t);
1471 out:
1472         return -ENOBUFS;
1473 }
1474
1475 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1476 {
1477         struct devinet_sysctl_table *t = cnf->sysctl;
1478
1479         if (t == NULL)
1480                 return;
1481
1482         cnf->sysctl = NULL;
1483         unregister_sysctl_table(t->sysctl_header);
1484         kfree(t->dev_name);
1485         kfree(t);
1486 }
1487
1488 static void devinet_sysctl_register(struct in_device *idev)
1489 {
1490         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1491                         NET_IPV4_NEIGH, "ipv4", NULL);
1492         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1493                                         &idev->cnf);
1494 }
1495
1496 static void devinet_sysctl_unregister(struct in_device *idev)
1497 {
1498         __devinet_sysctl_unregister(&idev->cnf);
1499         neigh_sysctl_unregister(idev->arp_parms);
1500 }
1501
1502 static struct ctl_table ctl_forward_entry[] = {
1503         {
1504                 .procname       = "ip_forward",
1505                 .data           = &ipv4_devconf.data[
1506                                         NET_IPV4_CONF_FORWARDING - 1],
1507                 .maxlen         = sizeof(int),
1508                 .mode           = 0644,
1509                 .proc_handler   = devinet_sysctl_forward,
1510                 .extra1         = &ipv4_devconf,
1511                 .extra2         = &init_net,
1512         },
1513         { },
1514 };
1515
1516 static __net_initdata struct ctl_path net_ipv4_path[] = {
1517         { .procname = "net", },
1518         { .procname = "ipv4", },
1519         { },
1520 };
1521 #endif
1522
1523 static __net_init int devinet_init_net(struct net *net)
1524 {
1525         int err;
1526         struct ipv4_devconf *all, *dflt;
1527 #ifdef CONFIG_SYSCTL
1528         struct ctl_table *tbl = ctl_forward_entry;
1529         struct ctl_table_header *forw_hdr;
1530 #endif
1531
1532         err = -ENOMEM;
1533         all = &ipv4_devconf;
1534         dflt = &ipv4_devconf_dflt;
1535
1536         if (!net_eq(net, &init_net)) {
1537                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1538                 if (all == NULL)
1539                         goto err_alloc_all;
1540
1541                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1542                 if (dflt == NULL)
1543                         goto err_alloc_dflt;
1544
1545 #ifdef CONFIG_SYSCTL
1546                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1547                 if (tbl == NULL)
1548                         goto err_alloc_ctl;
1549
1550                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1551                 tbl[0].extra1 = all;
1552                 tbl[0].extra2 = net;
1553 #endif
1554         }
1555
1556 #ifdef CONFIG_SYSCTL
1557         err = __devinet_sysctl_register(net, "all", all);
1558         if (err < 0)
1559                 goto err_reg_all;
1560
1561         err = __devinet_sysctl_register(net, "default", dflt);
1562         if (err < 0)
1563                 goto err_reg_dflt;
1564
1565         err = -ENOMEM;
1566         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1567         if (forw_hdr == NULL)
1568                 goto err_reg_ctl;
1569         net->ipv4.forw_hdr = forw_hdr;
1570 #endif
1571
1572         net->ipv4.devconf_all = all;
1573         net->ipv4.devconf_dflt = dflt;
1574         return 0;
1575
1576 #ifdef CONFIG_SYSCTL
1577 err_reg_ctl:
1578         __devinet_sysctl_unregister(dflt);
1579 err_reg_dflt:
1580         __devinet_sysctl_unregister(all);
1581 err_reg_all:
1582         if (tbl != ctl_forward_entry)
1583                 kfree(tbl);
1584 err_alloc_ctl:
1585 #endif
1586         if (dflt != &ipv4_devconf_dflt)
1587                 kfree(dflt);
1588 err_alloc_dflt:
1589         if (all != &ipv4_devconf)
1590                 kfree(all);
1591 err_alloc_all:
1592         return err;
1593 }
1594
1595 static __net_exit void devinet_exit_net(struct net *net)
1596 {
1597 #ifdef CONFIG_SYSCTL
1598         struct ctl_table *tbl;
1599
1600         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1601         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1602         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1603         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1604         kfree(tbl);
1605 #endif
1606         kfree(net->ipv4.devconf_dflt);
1607         kfree(net->ipv4.devconf_all);
1608 }
1609
1610 static __net_initdata struct pernet_operations devinet_ops = {
1611         .init = devinet_init_net,
1612         .exit = devinet_exit_net,
1613 };
1614
1615 void __init devinet_init(void)
1616 {
1617         register_pernet_subsys(&devinet_ops);
1618
1619         register_gifconf(PF_INET, inet_gifconf);
1620         register_netdevice_notifier(&ip_netdev_notifier);
1621
1622         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1623         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1624         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1625 }
1626