X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fcore%2Fdev.c;h=f769098774b7e5e3b95f9aeb5b89d87412dda0e9;hb=05d17608a69b3ae653ea5c9857283bef3439c733;hp=ad8e320ceba78d75afef1d1e22a7b3cb7f8bb995;hpb=08e9897d512fe7a67e46209543b3815b57a36dc7;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/core/dev.c b/net/core/dev.c index ad8e320..f769098 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -105,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -893,7 +895,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) free_page((unsigned long) inuse); } - snprintf(buf, IFNAMSIZ, name, i); + if (buf != name) + snprintf(buf, IFNAMSIZ, name, i); if (!__dev_get_by_name(net, buf)) return i; @@ -933,6 +936,21 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); +static int dev_get_valid_name(struct net *net, const char *name, char *buf, + bool fmt) +{ + if (!dev_valid_name(name)) + return -EINVAL; + + if (fmt && strchr(name, '%')) + return __dev_alloc_name(net, name, buf); + else if (__dev_get_by_name(net, name)) + return -EEXIST; + else if (buf != name) + strlcpy(buf, name, IFNAMSIZ); + + return 0; +} /** * dev_change_name - change name of a device @@ -956,28 +974,20 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - if (!dev_valid_name(newname)) - return -EINVAL; - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) return 0; memcpy(oldname, dev->name, IFNAMSIZ); - if (strchr(newname, '%')) { - err = dev_alloc_name(dev, newname); - if (err < 0) - return err; - } else if (__dev_get_by_name(net, newname)) - return -EEXIST; - else - strlcpy(dev->name, newname, IFNAMSIZ); + err = dev_get_valid_name(net, newname, dev->name, 1); + if (err < 0) + return err; rollback: /* For now only devices in the initial network namespace * are in sysfs. */ - if (net == &init_net) { + if (net_eq(net, &init_net)) { ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); @@ -999,14 +1009,15 @@ rollback: ret = notifier_to_errno(ret); if (ret) { - if (err) { - printk(KERN_ERR - "%s: name change rollback failed: %d.\n", - dev->name, ret); - } else { + /* err >= 0 after dev_alloc_name() or stores the first errno */ + if (err >= 0) { err = ret; memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; + } else { + printk(KERN_ERR + "%s: name change rollback failed: %d.\n", + dev->name, ret); } } @@ -1103,19 +1114,7 @@ void dev_load(struct net *net, const char *name) } EXPORT_SYMBOL(dev_load); -/** - * dev_open - prepare an interface for use. - * @dev: device to open - * - * Takes a device from down to up state. The device's private open - * function is invoked and then the multicast lists are loaded. Finally - * the device is moved into the up state and a %NETDEV_UP message is - * sent to the netdev notifier chain. - * - * Calling this function on an active interface is a nop. On a failure - * a negative errno code is returned. - */ -int dev_open(struct net_device *dev) +static int __dev_open(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; int ret; @@ -1123,13 +1122,6 @@ int dev_open(struct net_device *dev) ASSERT_RTNL(); /* - * Is it already up? - */ - - if (dev->flags & IFF_UP) - return 0; - - /* * Is it even present? */ if (!netif_device_present(dev)) @@ -1177,36 +1169,57 @@ int dev_open(struct net_device *dev) * Wakeup transmit queue engine */ dev_activate(dev); - - /* - * ... and announce new interface. - */ - call_netdevice_notifiers(NETDEV_UP, dev); } return ret; } -EXPORT_SYMBOL(dev_open); /** - * dev_close - shutdown an interface. - * @dev: device to shutdown + * dev_open - prepare an interface for use. + * @dev: device to open * - * This function moves an active device into down state. A - * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device - * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier - * chain. + * Takes a device from down to up state. The device's private open + * function is invoked and then the multicast lists are loaded. Finally + * the device is moved into the up state and a %NETDEV_UP message is + * sent to the netdev notifier chain. + * + * Calling this function on an active interface is a nop. On a failure + * a negative errno code is returned. */ -int dev_close(struct net_device *dev) +int dev_open(struct net_device *dev) +{ + int ret; + + /* + * Is it already up? + */ + if (dev->flags & IFF_UP) + return 0; + + /* + * Open device + */ + ret = __dev_open(dev); + if (ret < 0) + return ret; + + /* + * ... and announce new interface. + */ + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + call_netdevice_notifiers(NETDEV_UP, dev); + + return ret; +} +EXPORT_SYMBOL(dev_open); + +static int __dev_close(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; - ASSERT_RTNL(); + ASSERT_RTNL(); might_sleep(); - if (!(dev->flags & IFF_UP)) - return 0; - /* * Tell people we are going down, so that they can * prepare to death, when device is still operating. @@ -1242,14 +1255,34 @@ int dev_close(struct net_device *dev) dev->flags &= ~IFF_UP; /* - * Tell people we are down + * Shutdown NET_DMA */ - call_netdevice_notifiers(NETDEV_DOWN, dev); + net_dmaengine_put(); + + return 0; +} + +/** + * dev_close - shutdown an interface. + * @dev: device to shutdown + * + * This function moves an active device into down state. A + * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device + * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier + * chain. + */ +int dev_close(struct net_device *dev) +{ + if (!(dev->flags & IFF_UP)) + return 0; + + __dev_close(dev); /* - * Shutdown NET_DMA + * Tell people we are down */ - net_dmaengine_put(); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + call_netdevice_notifiers(NETDEV_DOWN, dev); return 0; } @@ -1343,6 +1376,7 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } @@ -1409,6 +1443,42 @@ static inline void net_timestamp(struct sk_buff *skb) skb->tstamp.tv64 = 0; } +/** + * dev_forward_skb - loopback an skb to another netif + * + * @dev: destination network device + * @skb: buffer to forward + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + * dev_forward_skb can be used for injecting an skb from the + * start_xmit function of one device into the receive queue + * of another device. + * + * The receiving device may be in another namespace, so + * we have to clear all information in the skb that could + * impact namespace isolation. + */ +int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + skb_orphan(skb); + + if (!(dev->flags & IFF_UP)) + return NET_RX_DROP; + + if (skb->len > (dev->mtu + dev->hard_header_len)) + return NET_RX_DROP; + + skb_set_dev(skb, dev); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, dev); + return netif_rx(skb); +} +EXPORT_SYMBOL_GPL(dev_forward_skb); + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1564,6 +1634,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) return false; } +/** + * skb_dev_set -- assign a new device to a buffer + * @skb: buffer for the new device + * @dev: network device + * + * If an skb is owned by a device already, we have to reset + * all data private to the namespace a device belongs to + * before assigning it a new device. + */ +#ifdef CONFIG_NET_NS +void skb_set_dev(struct sk_buff *skb, struct net_device *dev) +{ + skb_dst_drop(skb); + if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { + secpath_reset(skb); + nf_reset(skb); + skb_init_secmark(skb); + skb->mark = 0; + skb->priority = 0; + skb->nf_trace = 0; + skb->ipvs_property = 0; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; +#endif + } + skb->dev = dev; +} +EXPORT_SYMBOL(skb_set_dev); +#endif /* CONFIG_NET_NS */ + /* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. @@ -1757,7 +1857,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; - int rc; + int rc = NETDEV_TX_OK; if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1803,8 +1903,18 @@ gso: skb->next = nskb->next; nskb->next = NULL; + + /* + * If device doesnt need nskb->dst, release it right now while + * its hot in this cpu cache + */ + if (dev->priv_flags & IFF_XMIT_DST_RELEASE) + skb_dst_drop(nskb); + rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc != NETDEV_TX_OK)) { + if (rc & ~NETDEV_TX_MASK) + goto out_kfree_gso_skb; nskb->next = skb->next; skb->next = nskb; return rc; @@ -1814,11 +1924,12 @@ gso: return NETDEV_TX_BUSY; } while (skb->next); - skb->destructor = DEV_GSO_CB(skb)->destructor; - +out_kfree_gso_skb: + if (likely(skb->next == NULL)) + skb->destructor = DEV_GSO_CB(skb)->destructor; out_kfree_skb: kfree_skb(skb); - return NETDEV_TX_OK; + return rc; } static u32 skb_tx_hashrnd; @@ -1845,6 +1956,20 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) } EXPORT_SYMBOL(skb_tx_hash); +static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) +{ + if (unlikely(queue_index >= dev->real_num_tx_queues)) { + if (net_ratelimit()) { + WARN(1, "%s selects TX queue %d, but " + "real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); + } + return 0; + } + return queue_index; +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { @@ -1858,13 +1983,18 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); } else { queue_index = 0; if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); - if (sk && sk->sk_dst_cache) - sk_tx_queue_set(sk, queue_index); + if (sk) { + struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); + + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + } } } @@ -1906,6 +2036,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } +/* + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG, or if + * at least one of fragments is in highmem and device does not + * support DMA from it. + */ +static inline int skb_needs_linearize(struct sk_buff *skb, + struct net_device *dev) +{ + return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || + illegal_highdma(dev, skb))); +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1942,18 +2087,8 @@ int dev_queue_xmit(struct sk_buff *skb) if (netif_needs_gso(dev, skb)) goto gso; - if (skb_has_frags(skb) && - !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb)) - goto out_kfree_skb; - - /* Fragmented skb is linearized if device does not support SG, - * or if at least one of fragments is in highmem and device - * does not support DMA from it. - */ - if (skb_shinfo(skb)->nr_frags && - (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb)) + /* Convert a paged skb to linear, if required */ + if (skb_needs_linearize(skb, dev) && __skb_linearize(skb)) goto out_kfree_skb; /* If packet is not checksummed and device does not support @@ -1973,7 +2108,7 @@ gso: rcu_read_lock_bh(); txq = dev_pick_tx(dev, skb); - q = rcu_dereference(txq->qdisc); + q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); @@ -2003,8 +2138,8 @@ gso: HARD_TX_LOCK(dev, txq, cpu); if (!netif_tx_queue_stopped(txq)) { - rc = NET_XMIT_SUCCESS; - if (!dev_hard_start_xmit(skb, dev, txq)) { + rc = dev_hard_start_xmit(skb, dev, txq); + if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; } @@ -2259,7 +2394,7 @@ static int ing_filter(struct sk_buff *skb) if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, dev->ifindex); + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -2353,7 +2488,9 @@ int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; + struct net_device *master; struct net_device *null_or_orig; + struct net_device *null_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2367,16 +2504,17 @@ int netif_receive_skb(struct sk_buff *skb) if (netpoll_receive_skb(skb)) return NET_RX_DROP; - if (!skb->iif) - skb->iif = skb->dev->ifindex; + if (!skb->skb_iif) + skb->skb_iif = skb->dev->ifindex; null_or_orig = NULL; orig_dev = skb->dev; - if (orig_dev->master) { - if (skb_bond_should_drop(skb)) + master = ACCESS_ONCE(orig_dev->master); + if (master) { + if (skb_bond_should_drop(skb, master)) null_or_orig = orig_dev; /* deliver only exact match */ else - skb->dev = orig_dev->master; + skb->dev = master; } __get_cpu_var(netdev_rx_stat).total++; @@ -2419,12 +2557,24 @@ ncls: if (!skb) goto out; + /* + * Make sure frames received on VLAN interfaces stacked on + * bonding interfaces still make their way to any base bonding + * device that may have registered for a specific ptype. The + * handler may have to adjust skb->dev and orig_dev. + */ + null_or_bond = NULL; + if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && + (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { + null_or_bond = vlan_dev_real_dev(skb->dev); + } + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && - (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev)) { + if (ptype->type == type && (ptype->dev == null_or_orig || + ptype->dev == skb->dev || ptype->dev == orig_dev || + ptype->dev == null_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -2493,7 +2643,7 @@ out: return netif_receive_skb(skb); } -void napi_gro_flush(struct napi_struct *napi) +static void napi_gro_flush(struct napi_struct *napi) { struct sk_buff *skb, *next; @@ -2506,7 +2656,6 @@ void napi_gro_flush(struct napi_struct *napi) napi->gro_count = 0; napi->gro_list = NULL; } -EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { @@ -2609,9 +2758,10 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return GRO_NORMAL; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) - && !compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = + (p->dev == skb->dev) && + !compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } @@ -2692,7 +2842,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, switch (ret) { case GRO_NORMAL: case GRO_HELD: - skb->protocol = eth_type_trans(skb, napi->dev); + skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_HELD) skb_gro_pull(skb, -ETH_HLEN); @@ -2897,7 +3047,7 @@ static void net_rx_action(struct softirq_action *h) * entries to the tail of this list, and only ->poll() * calls can remove this head entry from the list. */ - n = list_entry(list->next, struct napi_struct, poll_list); + n = list_first_entry(list, struct napi_struct, poll_list); have = netpoll_poll_lock(n); @@ -3116,7 +3266,7 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { const struct net_device_stats *stats = dev_get_stats(dev); - seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " + seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, @@ -3571,10 +3721,10 @@ void __dev_set_rx_mode(struct net_device *dev) /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ - if (dev->uc.count > 0 && !dev->uc_promisc) { + if (!netdev_uc_empty(dev) && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); dev->uc_promisc = 1; - } else if (dev->uc.count == 0 && dev->uc_promisc) { + } else if (netdev_uc_empty(dev) && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); dev->uc_promisc = 0; } @@ -4142,7 +4292,7 @@ static void dev_addr_discard(struct net_device *dev) netif_addr_lock_bh(dev); __dev_addr_discard(&dev->mc_list); - dev->mc_count = 0; + netdev_mc_count(dev) = 0; netif_addr_unlock_bh(dev); } @@ -4178,18 +4328,10 @@ unsigned dev_get_flags(const struct net_device *dev) } EXPORT_SYMBOL(dev_get_flags); -/** - * dev_change_flags - change device settings - * @dev: device - * @flags: device state flags - * - * Change settings on device based state flags. The flags are - * in the userspace exported format. - */ -int dev_change_flags(struct net_device *dev, unsigned flags) +int __dev_change_flags(struct net_device *dev, unsigned int flags) { - int ret, changes; int old_flags = dev->flags; + int ret; ASSERT_RTNL(); @@ -4220,17 +4362,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags) ret = 0; if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ - ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); + ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); if (!ret) dev_set_rx_mode(dev); } - if (dev->flags & IFF_UP && - ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | - IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); - if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; @@ -4249,11 +4386,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags) dev_set_allmulti(dev, inc); } - /* Exclude state transition flags, already notified */ - changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); + return ret; +} + +void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) +{ + unsigned int changes = dev->flags ^ old_flags; + + if (changes & IFF_UP) { + if (dev->flags & IFF_UP) + call_netdevice_notifiers(NETDEV_UP, dev); + else + call_netdevice_notifiers(NETDEV_DOWN, dev); + } + + if (dev->flags & IFF_UP && + (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) + call_netdevice_notifiers(NETDEV_CHANGE, dev); +} + +/** + * dev_change_flags - change device settings + * @dev: device + * @flags: device state flags + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + */ +int dev_change_flags(struct net_device *dev, unsigned flags) +{ + int ret, changes; + int old_flags = dev->flags; + + ret = __dev_change_flags(dev, flags); + if (ret < 0) + return ret; + + changes = old_flags ^ dev->flags; if (changes) rtmsg_ifinfo(RTM_NEWLINK, dev, changes); + __dev_notify_flags(dev, old_flags); return ret; } EXPORT_SYMBOL(dev_change_flags); @@ -4702,21 +4875,23 @@ static void net_set_todo(struct net_device *dev) static void rollback_registered_many(struct list_head *head) { - struct net_device *dev; + struct net_device *dev, *tmp; BUG_ON(dev_boot_phase); ASSERT_RTNL(); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry_safe(dev, tmp, head, unreg_list) { /* Some devices call without registering - * for initialization unwind. + * for initialization unwind. Remove those + * devices and proceed with the remaining. */ if (dev->reg_state == NETREG_UNINITIALIZED) { pr_debug("unregister_netdevice: device %s/%p never " "was registered\n", dev->name, dev); WARN_ON(1); - return; + list_del(&dev->unreg_list); + continue; } BUG_ON(dev->reg_state != NETREG_REGISTERED); @@ -4742,6 +4917,10 @@ static void rollback_registered_many(struct list_head *head) */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); + /* * Flush the unicast and multicast chains */ @@ -4758,6 +4937,10 @@ static void rollback_registered_many(struct list_head *head) netdev_unregister_kobject(dev); } + /* Process any work delayed until the end of the batch */ + dev = list_first_entry(head, struct net_device, unreg_list); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); + synchronize_net(); list_for_each_entry(dev, head, unreg_list) @@ -4828,6 +5011,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) EXPORT_SYMBOL(netdev_fix_features); /** + * netif_stacked_transfer_operstate - transfer operstate + * @rootdev: the root or lower level device to transfer state from + * @dev: the device to transfer operstate to + * + * Transfer operational state from root to device. This is normally + * called when a stacking relationship exists between the root + * device and the device(a leaf device). + */ +void netif_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev) +{ + if (rootdev->operstate == IF_OPER_DORMANT) + netif_dormant_on(dev); + else + netif_dormant_off(dev); + + if (netif_carrier_ok(rootdev)) { + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else { + if (netif_carrier_ok(dev)) + netif_carrier_off(dev); + } +} +EXPORT_SYMBOL(netif_stacked_transfer_operstate); + +/** * register_netdevice - register a network device * @dev: device to register * @@ -4846,8 +5056,6 @@ EXPORT_SYMBOL(netdev_fix_features); int register_netdevice(struct net_device *dev) { - struct hlist_head *head; - struct hlist_node *p; int ret; struct net *net = dev_net(dev); @@ -4876,26 +5084,14 @@ int register_netdevice(struct net_device *dev) } } - if (!dev_valid_name(dev->name)) { - ret = -EINVAL; + ret = dev_get_valid_name(net, dev->name, dev->name, 0); + if (ret) goto err_uninit; - } dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Check for existence of name */ - head = dev_name_hash(net, dev->name); - hlist_for_each(p, head) { - struct net_device *d - = hlist_entry(p, struct net_device, name_hlist); - if (!strncmp(d->name, dev->name, IFNAMSIZ)) { - ret = -EEXIST; - goto err_uninit; - } - } - /* Fix illegal checksum combinations */ if ((dev->features & NETIF_F_HW_CSUM) && (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { @@ -4947,6 +5143,13 @@ int register_netdevice(struct net_device *dev) rollback_registered(dev); dev->reg_state = NETREG_UNREGISTERED; } + /* + * Prevent userspace races by waiting until the network + * device is fully setup before sending notifications. + */ + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); out: return ret; @@ -5048,6 +5251,8 @@ static void netdev_wait_allrefs(struct net_device *dev) { unsigned long rebroadcast_time, warning_time; + linkwatch_forget_dev(dev); + rebroadcast_time = warning_time = jiffies; while (atomic_read(&dev->refcnt) != 0) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { @@ -5055,6 +5260,8 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users + * should have already handle it the first time */ if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -5119,7 +5326,7 @@ void netdev_run_todo(void) while (!list_empty(&list)) { struct net_device *dev - = list_entry(list.next, struct net_device, todo_list); + = list_first_entry(&list, struct net_device, todo_list); list_del(&dev->todo_list); if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { @@ -5150,6 +5357,32 @@ void netdev_run_todo(void) } /** + * dev_txq_stats_fold - fold tx_queues stats + * @dev: device to get statistics from + * @stats: struct net_device_stats to hold results + */ +void dev_txq_stats_fold(const struct net_device *dev, + struct net_device_stats *stats) +{ + unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + unsigned int i; + struct netdev_queue *txq; + + for (i = 0; i < dev->num_tx_queues; i++) { + txq = netdev_get_tx_queue(dev, i); + tx_bytes += txq->tx_bytes; + tx_packets += txq->tx_packets; + tx_dropped += txq->tx_dropped; + } + if (tx_bytes || tx_packets || tx_dropped) { + stats->tx_bytes = tx_bytes; + stats->tx_packets = tx_packets; + stats->tx_dropped = tx_dropped; + } +} +EXPORT_SYMBOL(dev_txq_stats_fold); + +/** * dev_get_stats - get network device statistics * @dev: device to get statistics from * @@ -5163,25 +5396,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev) if (ops->ndo_get_stats) return ops->ndo_get_stats(dev); - else { - unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - struct net_device_stats *stats = &dev->stats; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } - return stats; - } + + dev_txq_stats_fold(dev, &dev->stats); + return &dev->stats; } EXPORT_SYMBOL(dev_get_stats); @@ -5260,8 +5477,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); + INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); + dev->ethtool_ntuple_list.count = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); @@ -5295,6 +5515,9 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); + /* Clear ethtool n-tuple list */ + ethtool_ntuple_flush(dev); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); @@ -5329,7 +5552,7 @@ EXPORT_SYMBOL(synchronize_net); * unregister_netdevice_queue - remove device from the kernel * @dev: device * @head: list - + * * This function shuts down a device interface and removes it * from the kernel tables. * If head not NULL, device is queued to be unregistered later. @@ -5355,7 +5578,6 @@ EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdevice_many - unregister many devices * @head: list of devices - * */ void unregister_netdevice_many(struct list_head *head) { @@ -5404,8 +5626,6 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - char buf[IFNAMSIZ]; - const char *destname; int err; ASSERT_RTNL(); @@ -5438,20 +5658,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char * we can use it in the destination network namespace. */ err = -EEXIST; - destname = dev->name; - if (__dev_get_by_name(net, destname)) { + if (__dev_get_by_name(net, dev->name)) { /* We get here if we can't use the current device name */ if (!pat) goto out; - if (!dev_valid_name(pat)) - goto out; - if (strchr(pat, '%')) { - if (__dev_alloc_name(net, pat, buf) < 0) - goto out; - destname = buf; - } else - destname = pat; - if (__dev_get_by_name(net, destname)) + if (dev_get_valid_name(net, pat, dev->name, 1)) goto out; } @@ -5475,6 +5686,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char this device. They should clean all the things. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); /* * Flush the unicast and multicast chains @@ -5487,10 +5699,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Actually switch the network namespace */ dev_net_set(dev, net); - /* Assign the new device name */ - if (destname != dev->name) - strcpy(dev->name, destname); - /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) { int iflink = (dev->iflink == dev->ifindex); @@ -5509,6 +5717,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Notify protocols, that a new device appeared. */ call_netdevice_notifiers(NETDEV_REGISTER, dev); + /* + * Prevent userspace races by waiting until the network + * device is fully setup before sending notifications. + */ + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + synchronize_net(); err = 0; out: @@ -5677,14 +5891,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit(struct net *net) { - struct net_device *dev; + struct net_device *dev, *aux; /* - * Push all migratable of the network devices back to the + * Push all migratable network devices back to the * initial network namespace */ rtnl_lock(); -restart: - for_each_netdev(net, dev) { + for_each_netdev_safe(net, dev, aux) { int err; char fb_name[IFNAMSIZ]; @@ -5692,11 +5905,9 @@ restart: if (dev->features & NETIF_F_NETNS_LOCAL) continue; - /* Delete virtual devices */ - if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { - dev->rtnl_link_ops->dellink(dev, NULL); - goto restart; - } + /* Leave virtual devices for the generic cleanup */ + if (dev->rtnl_link_ops) + continue; /* Push remaing network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); @@ -5706,13 +5917,37 @@ restart: __func__, dev->name, err); BUG(); } - goto restart; } rtnl_unlock(); } +static void __net_exit default_device_exit_batch(struct list_head *net_list) +{ + /* At exit all network devices most be removed from a network + * namespace. Do this in the reverse order of registeration. + * Do this across as many network namespaces as possible to + * improve batching efficiency. + */ + struct net_device *dev; + struct net *net; + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + for_each_netdev_reverse(net, dev) { + if (dev->rtnl_link_ops) + dev->rtnl_link_ops->dellink(dev, &dev_kill_list); + else + unregister_netdevice_queue(dev, &dev_kill_list); + } + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); +} + static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, + .exit_batch = default_device_exit_batch, }; /*