/*
* NET3 IP device support routines.
*
- * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
-struct ipv4_devconf ipv4_devconf = {
+static struct ipv4_devconf ipv4_devconf = {
.data = {
[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
[IFA_LOCAL] = { .type = NLA_U32 },
[IFA_ADDRESS] = { .type = NLA_U32 },
[IFA_BROADCAST] = { .type = NLA_U32 },
- [IFA_ANYCAST] = { .type = NLA_U32 },
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
};
static struct in_ifaddr *inet_alloc_ifa(void)
{
- struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
-
- if (ifa) {
- INIT_RCU_HEAD(&ifa->rcu_head);
- }
-
- return ifa;
+ return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
}
static void inet_rcu_free_ifa(struct rcu_head *head)
{
struct net_device *dev = idev->dev;
- BUG_TRAP(!idev->ifa_list);
- BUG_TRAP(!idev->mc_list);
+ WARN_ON(idev->ifa_list);
+ WARN_ON(idev->mc_list);
#ifdef NET_REFCNT_DEBUG
printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
idev, dev ? dev->name : "NIL");
in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
if (!in_dev)
goto out;
- INIT_RCU_HEAD(&in_dev->rcu_head);
- memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
+ memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
sizeof(in_dev->cnf));
in_dev->cnf.sysctl = NULL;
in_dev->dev = dev;
if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
goto out_kfree;
+ if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
+ dev_disable_lro(dev);
/* Reference in_dev->dev */
dev_hold(dev);
/* Account for reference dev->ip_ptr (below) */
}
ipv4_devconf_setall(in_dev);
if (ifa->ifa_dev != in_dev) {
- BUG_TRAP(!ifa->ifa_dev);
+ WARN_ON(ifa->ifa_dev);
in_dev_hold(in_dev);
ifa->ifa_dev = in_dev;
}
- if (LOOPBACK(ifa->ifa_local))
+ if (ipv4_is_loopback(ifa->ifa_local))
ifa->ifa_scope = RT_SCOPE_HOST;
return inet_insert_ifa(ifa);
}
-struct in_device *inetdev_by_index(int ifindex)
+struct in_device *inetdev_by_index(struct net *net, int ifindex)
{
struct net_device *dev;
struct in_device *in_dev = NULL;
read_lock(&dev_base_lock);
- dev = __dev_get_by_index(&init_net, ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev)
in_dev = in_dev_get(dev);
read_unlock(&dev_base_lock);
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
struct in_device *in_dev;
struct ifaddrmsg *ifm;
ASSERT_RTNL();
- if (net != &init_net)
- return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err < 0)
goto errout;
ifm = nlmsg_data(nlh);
- in_dev = inetdev_by_index(ifm->ifa_index);
+ in_dev = inetdev_by_index(net, ifm->ifa_index);
if (in_dev == NULL) {
err = -ENODEV;
goto errout;
return err;
}
-static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
{
struct nlattr *tb[IFA_MAX+1];
struct in_ifaddr *ifa;
struct ifaddrmsg *ifm;
struct net_device *dev;
struct in_device *in_dev;
- int err = -EINVAL;
+ int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err < 0)
goto errout;
ifm = nlmsg_data(nlh);
- if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
- err = -EINVAL;
+ err = -EINVAL;
+ if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
goto errout;
- }
- dev = __dev_get_by_index(&init_net, ifm->ifa_index);
- if (dev == NULL) {
- err = -ENODEV;
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ err = -ENODEV;
+ if (dev == NULL)
goto errout;
- }
in_dev = __in_dev_get_rtnl(dev);
- if (in_dev == NULL) {
- err = -ENOBUFS;
+ err = -ENOBUFS;
+ if (in_dev == NULL)
goto errout;
- }
ifa = inet_alloc_ifa();
- if (ifa == NULL) {
+ if (ifa == NULL)
/*
* A potential indev allocation can be left alive, it stays
* assigned to its device and is destroy with it.
*/
- err = -ENOBUFS;
goto errout;
- }
ipv4_devconf_setall(in_dev);
in_dev_hold(in_dev);
if (tb[IFA_BROADCAST])
ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
- if (tb[IFA_ANYCAST])
- ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
-
if (tb[IFA_LABEL])
nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
else
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa;
ASSERT_RTNL();
- if (net != &init_net)
- return -EINVAL;
-
- ifa = rtm_to_ifaddr(nlh);
+ ifa = rtm_to_ifaddr(net, nlh);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
{
int rc = -1; /* Something else, probably a multicast. */
- if (ZERONET(addr))
+ if (ipv4_is_zeronet(addr))
rc = 0;
else {
__u32 haddr = ntohl(addr);
}
-int devinet_ioctl(unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct ifreq ifr;
struct sockaddr_in sin_orig;
if (colon)
*colon = 0;
-#ifdef CONFIG_KMOD
- dev_load(&init_net, ifr.ifr_name);
-#endif
+ dev_load(net, ifr.ifr_name);
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
rtnl_lock();
ret = -ENODEV;
- if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
+ if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
goto done;
if (colon)
break;
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_broadcast = 0;
- ifa->ifa_anycast = 0;
+ ifa->ifa_scope = 0;
}
ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
{
__be32 addr = 0;
struct in_device *in_dev;
+ struct net *net = dev_net(dev);
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
*/
read_lock(&dev_base_lock);
rcu_read_lock();
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
/*
* Confirm that local IP address exists using wildcards:
- * - dev: only on this interface, 0=any interface
+ * - in_dev: only on this interface, 0=any interface
* - dst: only in the same subnet as dst, 0=any dst
* - local: address, 0=autoselect the local address
* - scope: maximum allowed scope value for the local address
*/
-__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
+__be32 inet_confirm_addr(struct in_device *in_dev,
+ __be32 dst, __be32 local, int scope)
{
__be32 addr = 0;
- struct in_device *in_dev;
-
- if (dev) {
- rcu_read_lock();
- if ((in_dev = __in_dev_get_rcu(dev)))
- addr = confirm_addr_indev(in_dev, dst, local, scope);
- rcu_read_unlock();
+ struct net_device *dev;
+ struct net *net;
- return addr;
- }
+ if (scope != RT_SCOPE_LINK)
+ return confirm_addr_indev(in_dev, dst, local, scope);
+ net = dev_net(in_dev->dev);
read_lock(&dev_base_lock);
rcu_read_lock();
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
if ((in_dev = __in_dev_get_rcu(dev))) {
addr = confirm_addr_indev(in_dev, dst, local, scope);
if (addr)
memcpy(old, ifa->ifa_label, IFNAMSIZ);
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
if (named++ == 0)
- continue;
+ goto skip;
dot = strchr(old, ':');
if (dot == NULL) {
sprintf(old, ":%d", named);
} else {
strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
}
+skip:
+ rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
}
}
+static inline bool inetdev_valid_mtu(unsigned mtu)
+{
+ return mtu >= 68;
+}
+
/* Called only under RTNL semaphore */
static int inetdev_event(struct notifier_block *this, unsigned long event,
struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
-
ASSERT_RTNL();
if (!in_dev) {
IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
}
+ } else if (event == NETDEV_CHANGEMTU) {
+ /* Re-enabling IP */
+ if (inetdev_valid_mtu(dev->mtu))
+ in_dev = inetdev_init(dev);
}
goto out;
}
dev->ip_ptr = NULL;
break;
case NETDEV_UP:
- if (dev->mtu < 68)
+ if (!inetdev_valid_mtu(dev->mtu))
break;
if (dev->flags & IFF_LOOPBACK) {
struct in_ifaddr *ifa;
}
}
ip_mc_up(in_dev);
+ /* fall through */
+ case NETDEV_CHANGEADDR:
+ /* Send gratuitous ARP to notify of link change */
+ if (IN_DEV_ARP_NOTIFY(in_dev)) {
+ struct in_ifaddr *ifa = in_dev->ifa_list;
+
+ if (ifa)
+ arp_send(ARPOP_REQUEST, ETH_P_ARP,
+ ifa->ifa_address, dev,
+ ifa->ifa_address, NULL,
+ dev->dev_addr, NULL);
+ }
break;
case NETDEV_DOWN:
ip_mc_down(in_dev);
break;
+ case NETDEV_BONDING_OLDTYPE:
+ ip_mc_unmap(in_dev);
+ break;
+ case NETDEV_BONDING_NEWTYPE:
+ ip_mc_remap(in_dev);
+ break;
case NETDEV_CHANGEMTU:
- if (dev->mtu >= 68)
+ if (inetdev_valid_mtu(dev->mtu))
break;
- /* MTU falled under 68, disable IP */
+ /* disable IP when MTU is not enough */
case NETDEV_UNREGISTER:
inetdev_destroy(in_dev);
break;
}
static struct notifier_block ip_netdev_notifier = {
- .notifier_call =inetdev_event,
+ .notifier_call = inetdev_event,
};
static inline size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_ADDRESS */
+ nla_total_size(4) /* IFA_LOCAL */
+ nla_total_size(4) /* IFA_BROADCAST */
- + nla_total_size(4) /* IFA_ANYCAST */
+ nla_total_size(IFNAMSIZ); /* IFA_LABEL */
}
if (ifa->ifa_broadcast)
NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
- if (ifa->ifa_anycast)
- NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
-
if (ifa->ifa_label[0])
NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
int idx, ip_idx;
struct net_device *dev;
struct in_device *in_dev;
struct in_ifaddr *ifa;
int s_ip_idx, s_idx = cb->args[0];
- if (net != &init_net)
- return 0;
-
s_ip_idx = ip_idx = cb->args[1];
idx = 0;
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
return skb->len;
}
-static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
+static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 pid)
{
struct sk_buff *skb;
u32 seq = nlh ? nlh->nlmsg_seq : 0;
int err = -ENOBUFS;
+ struct net *net;
+ net = dev_net(ifa->ifa_dev->dev);
skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
if (skb == NULL)
goto errout;
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+ rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+ return;
errout:
if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
}
#ifdef CONFIG_SYSCTL
read_lock(&dev_base_lock);
for_each_netdev(net, dev) {
struct in_device *in_dev;
+ if (on)
+ dev_disable_lro(dev);
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev)
rcu_read_unlock();
}
read_unlock(&dev_base_lock);
-
- rt_cache_flush(0);
}
static int devinet_conf_proc(ctl_table *ctl, int write,
- struct file* filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write) {
struct ipv4_devconf *cnf = ctl->extra1;
return ret;
}
-static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
+static int devinet_conf_sysctl(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
}
static int devinet_sysctl_forward(ctl_table *ctl, int write,
- struct file* filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *valp != val) {
struct net *net = ctl->extra2;
- if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
- inet_forward_change(net);
- else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
- rt_cache_flush(0);
+ if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
+ if (!rtnl_trylock())
+ return restart_syscall();
+ if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
+ inet_forward_change(net);
+ } else if (*valp) {
+ struct ipv4_devconf *cnf = ctl->extra1;
+ struct in_device *idev =
+ container_of(cnf, struct in_device, cnf);
+ dev_disable_lro(idev->dev);
+ }
+ rtnl_unlock();
+ rt_cache_flush(net, 0);
+ }
}
return ret;
}
int ipv4_doint_and_flush(ctl_table *ctl, int write,
- struct file* filp, void __user *buffer,
+ void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ struct net *net = ctl->extra2;
if (write && *valp != val)
- rt_cache_flush(0);
+ rt_cache_flush(net, 0);
return ret;
}
-int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
+int ipv4_doint_and_flush_strategy(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
- int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
- newval, newlen);
+ int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
+ struct net *net = table->extra2;
if (ret == 1)
- rt_cache_flush(0);
+ rt_cache_flush(net, 0);
return ret;
}
DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
{
neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
NET_IPV4_NEIGH, "ipv4", NULL, NULL);
- __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
+ __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
idev->dev->ifindex, &idev->cnf);
}
__devinet_sysctl_unregister(&idev->cnf);
neigh_sysctl_unregister(idev->arp_parms);
}
-#endif
static struct ctl_table ctl_forward_entry[] = {
{
{ .procname = "ipv4", .ctl_name = NET_IPV4, },
{ },
};
+#endif
static __net_init int devinet_init_net(struct net *net)
{
int err;
- struct ctl_table *tbl;
struct ipv4_devconf *all, *dflt;
+#ifdef CONFIG_SYSCTL
+ struct ctl_table *tbl = ctl_forward_entry;
struct ctl_table_header *forw_hdr;
+#endif
err = -ENOMEM;
all = &ipv4_devconf;
dflt = &ipv4_devconf_dflt;
- tbl = ctl_forward_entry;
if (net != &init_net) {
all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
if (dflt == NULL)
goto err_alloc_dflt;
+#ifdef CONFIG_SYSCTL
tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
if (tbl == NULL)
goto err_alloc_ctl;
tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
tbl[0].extra1 = all;
tbl[0].extra2 = net;
+#endif
}
#ifdef CONFIG_SYSCTL
forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
if (forw_hdr == NULL)
goto err_reg_ctl;
+ net->ipv4.forw_hdr = forw_hdr;
#endif
- net->ipv4.forw_hdr = forw_hdr;
net->ipv4.devconf_all = all;
net->ipv4.devconf_dflt = dflt;
return 0;
err_reg_all:
if (tbl != ctl_forward_entry)
kfree(tbl);
-#endif
err_alloc_ctl:
+#endif
if (dflt != &ipv4_devconf_dflt)
kfree(dflt);
err_alloc_dflt:
static __net_exit void devinet_exit_net(struct net *net)
{
+#ifdef CONFIG_SYSCTL
struct ctl_table *tbl;
tbl = net->ipv4.forw_hdr->ctl_table_arg;
-#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(net->ipv4.forw_hdr);
__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
__devinet_sysctl_unregister(net->ipv4.devconf_all);
-#endif
kfree(tbl);
+#endif
kfree(net->ipv4.devconf_dflt);
kfree(net->ipv4.devconf_all);
}