#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/string.h>
#include <net/dst.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
+#include <net/xfrm.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <trace/events/napi.h>
+#include <linux/pci.h>
#include "net-sysfs.h"
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
* semaphore.
*
- * Pure readers hold dev_base_lock for reading.
+ * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
*
* Writers must hold the rtnl semaphore while they loop through the
* dev_base_head list, and hold dev_base_lock for writing when they do the
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
- return &net->dev_name_head[hash & (NETDEV_HASHENTRIES - 1)];
+ return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
}
static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
+static inline void rps_lock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+ spin_lock(&queue->input_pkt_queue.lock);
+#endif
+}
+
+static inline void rps_unlock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+ spin_unlock(&queue->input_pkt_queue.lock);
+#endif
+}
+
/* Device list insertion */
static int list_netdevice(struct net_device *dev)
{
ASSERT_RTNL();
write_lock_bh(&dev_base_lock);
- list_add_tail(&dev->dev_list, &net->dev_base_head);
- hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
- hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
+ list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
+ hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+ hlist_add_head_rcu(&dev->index_hlist,
+ dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
return 0;
}
-/* Device list removal */
+/* Device list removal
+ * caller must respect a RCU grace period before freeing/reusing dev
+ */
static void unlist_netdevice(struct net_device *dev)
{
ASSERT_RTNL();
/* Unlink dev from the device chain */
write_lock_bh(&dev_base_lock);
- list_del(&dev->dev_list);
- hlist_del(&dev->name_hlist);
- hlist_del(&dev->index_hlist);
+ list_del_rcu(&dev->dev_list);
+ hlist_del_rcu(&dev->name_hlist);
+ hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
}
struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
struct hlist_node *p;
+ struct net_device *dev;
+ struct hlist_head *head = dev_name_hash(net, name);
- hlist_for_each(p, dev_name_hash(net, name)) {
- struct net_device *dev
- = hlist_entry(p, struct net_device, name_hlist);
+ hlist_for_each_entry(dev, p, head, name_hlist)
if (!strncmp(dev->name, name, IFNAMSIZ))
return dev;
- }
+
return NULL;
}
EXPORT_SYMBOL(__dev_get_by_name);
/**
+ * dev_get_by_name_rcu - find a device by its name
+ * @net: the applicable net namespace
+ * @name: name to find
+ *
+ * Find an interface by name.
+ * If the name is found a pointer to the device is returned.
+ * If the name is not found then %NULL is returned.
+ * The reference counters are not incremented so the caller must be
+ * careful with locks. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
+{
+ struct hlist_node *p;
+ struct net_device *dev;
+ struct hlist_head *head = dev_name_hash(net, name);
+
+ hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+ if (!strncmp(dev->name, name, IFNAMSIZ))
+ return dev;
+
+ return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_name_rcu);
+
+/**
* dev_get_by_name - find a device by its name
* @net: the applicable net namespace
* @name: name to find
{
struct net_device *dev;
- read_lock(&dev_base_lock);
- dev = __dev_get_by_name(net, name);
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(net, name);
if (dev)
dev_hold(dev);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return dev;
}
EXPORT_SYMBOL(dev_get_by_name);
struct net_device *__dev_get_by_index(struct net *net, int ifindex)
{
struct hlist_node *p;
+ struct net_device *dev;
+ struct hlist_head *head = dev_index_hash(net, ifindex);
- hlist_for_each(p, dev_index_hash(net, ifindex)) {
- struct net_device *dev
- = hlist_entry(p, struct net_device, index_hlist);
+ hlist_for_each_entry(dev, p, head, index_hlist)
if (dev->ifindex == ifindex)
return dev;
- }
+
return NULL;
}
EXPORT_SYMBOL(__dev_get_by_index);
+/**
+ * dev_get_by_index_rcu - find a device by its ifindex
+ * @net: the applicable net namespace
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not
+ * had its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
+{
+ struct hlist_node *p;
+ struct net_device *dev;
+ struct hlist_head *head = dev_index_hash(net, ifindex);
+
+ hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+ if (dev->ifindex == ifindex)
+ return dev;
+
+ return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_index_rcu);
+
/**
* dev_get_by_index - find a device by its ifindex
{
struct net_device *dev;
- read_lock(&dev_base_lock);
- dev = __dev_get_by_index(net, ifindex);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifindex);
if (dev)
dev_hold(dev);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return dev;
}
EXPORT_SYMBOL(dev_get_by_index);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
- struct net_device *dev;
+ struct net_device *dev, *ret = NULL;
- rtnl_lock();
- dev = __dev_getfirstbyhwtype(net, type);
- if (dev)
- dev_hold(dev);
- rtnl_unlock();
- return dev;
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev)
+ if (dev->type == type) {
+ dev_hold(dev);
+ ret = dev;
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(dev_getfirstbyhwtype);
struct net_device *dev, *ret;
ret = NULL;
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev) {
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
dev_hold(dev);
ret = dev;
break;
}
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL(dev_get_by_flags);
free_page((unsigned long) inuse);
}
- snprintf(buf, IFNAMSIZ, name, i);
+ if (buf != name)
+ snprintf(buf, IFNAMSIZ, name, i);
if (!__dev_get_by_name(net, buf))
return i;
}
EXPORT_SYMBOL(dev_alloc_name);
+static int dev_get_valid_name(struct net *net, const char *name, char *buf,
+ bool fmt)
+{
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ if (fmt && strchr(name, '%'))
+ return __dev_alloc_name(net, name, buf);
+ else if (__dev_get_by_name(net, name))
+ return -EEXIST;
+ else if (buf != name)
+ strlcpy(buf, name, IFNAMSIZ);
+
+ return 0;
+}
/**
* dev_change_name - change name of a device
if (dev->flags & IFF_UP)
return -EBUSY;
- if (!dev_valid_name(newname))
- return -EINVAL;
-
if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
return 0;
memcpy(oldname, dev->name, IFNAMSIZ);
- if (strchr(newname, '%')) {
- err = dev_alloc_name(dev, newname);
- if (err < 0)
- return err;
- } else if (__dev_get_by_name(net, newname))
- return -EEXIST;
- else
- strlcpy(dev->name, newname, IFNAMSIZ);
+ err = dev_get_valid_name(net, newname, dev->name, 1);
+ if (err < 0)
+ return err;
rollback:
/* For now only devices in the initial network namespace
* are in sysfs.
*/
- if (net == &init_net) {
+ if (net_eq(net, &init_net)) {
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
write_lock_bh(&dev_base_lock);
hlist_del(&dev->name_hlist);
- hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+ write_unlock_bh(&dev_base_lock);
+
+ synchronize_rcu();
+
+ write_lock_bh(&dev_base_lock);
+ hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
write_unlock_bh(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
ret = notifier_to_errno(ret);
if (ret) {
- if (err) {
- printk(KERN_ERR
- "%s: name change rollback failed: %d.\n",
- dev->name, ret);
- } else {
+ /* err >= 0 after dev_alloc_name() or stores the first errno */
+ if (err >= 0) {
err = ret;
memcpy(dev->name, oldname, IFNAMSIZ);
goto rollback;
+ } else {
+ printk(KERN_ERR
+ "%s: name change rollback failed: %d.\n",
+ dev->name, ret);
}
}
}
EXPORT_SYMBOL(netdev_state_change);
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
{
- call_netdevice_notifiers(event, dev);
+ return call_netdevice_notifiers(event, dev);
}
EXPORT_SYMBOL(netdev_bonding_change);
{
struct net_device *dev;
- read_lock(&dev_base_lock);
- dev = __dev_get_by_name(net, name);
- read_unlock(&dev_base_lock);
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(net, name);
+ rcu_read_unlock();
if (!dev && capable(CAP_NET_ADMIN))
request_module("%s", name);
}
EXPORT_SYMBOL(dev_load);
-/**
- * dev_open - prepare an interface for use.
- * @dev: device to open
- *
- * Takes a device from down to up state. The device's private open
- * function is invoked and then the multicast lists are loaded. Finally
- * the device is moved into the up state and a %NETDEV_UP message is
- * sent to the netdev notifier chain.
- *
- * Calling this function on an active interface is a nop. On a failure
- * a negative errno code is returned.
- */
-int dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
int ret;
ASSERT_RTNL();
/*
- * Is it already up?
- */
-
- if (dev->flags & IFF_UP)
- return 0;
-
- /*
* Is it even present?
*/
if (!netif_device_present(dev))
* Wakeup transmit queue engine
*/
dev_activate(dev);
-
- /*
- * ... and announce new interface.
- */
- call_netdevice_notifiers(NETDEV_UP, dev);
}
return ret;
}
-EXPORT_SYMBOL(dev_open);
/**
- * dev_close - shutdown an interface.
- * @dev: device to shutdown
+ * dev_open - prepare an interface for use.
+ * @dev: device to open
*
- * This function moves an active device into down state. A
- * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
- * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
- * chain.
+ * Takes a device from down to up state. The device's private open
+ * function is invoked and then the multicast lists are loaded. Finally
+ * the device is moved into the up state and a %NETDEV_UP message is
+ * sent to the netdev notifier chain.
+ *
+ * Calling this function on an active interface is a nop. On a failure
+ * a negative errno code is returned.
*/
-int dev_close(struct net_device *dev)
+int dev_open(struct net_device *dev)
+{
+ int ret;
+
+ /*
+ * Is it already up?
+ */
+ if (dev->flags & IFF_UP)
+ return 0;
+
+ /*
+ * Open device
+ */
+ ret = __dev_open(dev);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * ... and announce new interface.
+ */
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ call_netdevice_notifiers(NETDEV_UP, dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(dev_open);
+
+static int __dev_close(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
- ASSERT_RTNL();
+ ASSERT_RTNL();
might_sleep();
- if (!(dev->flags & IFF_UP))
- return 0;
-
/*
* Tell people we are going down, so that they can
* prepare to death, when device is still operating.
dev->flags &= ~IFF_UP;
/*
- * Tell people we are down
+ * Shutdown NET_DMA
*/
- call_netdevice_notifiers(NETDEV_DOWN, dev);
+ net_dmaengine_put();
+
+ return 0;
+}
+
+/**
+ * dev_close - shutdown an interface.
+ * @dev: device to shutdown
+ *
+ * This function moves an active device into down state. A
+ * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ * chain.
+ */
+int dev_close(struct net_device *dev)
+{
+ if (!(dev->flags & IFF_UP))
+ return 0;
+
+ __dev_close(dev);
/*
- * Shutdown NET_DMA
+ * Tell people we are down
*/
- net_dmaengine_put();
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ call_netdevice_notifiers(NETDEV_DOWN, dev);
return 0;
}
nb->notifier_call(nb, NETDEV_DOWN, dev);
}
nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
}
}
skb->tstamp.tv64 = 0;
}
+/**
+ * dev_forward_skb - loopback an skb to another netif
+ *
+ * @dev: destination network device
+ * @skb: buffer to forward
+ *
+ * return values:
+ * NET_RX_SUCCESS (no congestion)
+ * NET_RX_DROP (packet was dropped)
+ *
+ * dev_forward_skb can be used for injecting an skb from the
+ * start_xmit function of one device into the receive queue
+ * of another device.
+ *
+ * The receiving device may be in another namespace, so
+ * we have to clear all information in the skb that could
+ * impact namespace isolation.
+ */
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ skb_orphan(skb);
+
+ if (!(dev->flags & IFF_UP))
+ return NET_RX_DROP;
+
+ if (skb->len > (dev->mtu + dev->hard_header_len))
+ return NET_RX_DROP;
+
+ skb_set_dev(skb, dev);
+ skb->tstamp.tv64 = 0;
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, dev);
+ return netif_rx(skb);
+}
+EXPORT_SYMBOL_GPL(dev_forward_skb);
+
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
return false;
}
+/**
+ * skb_dev_set -- assign a new device to a buffer
+ * @skb: buffer for the new device
+ * @dev: network device
+ *
+ * If an skb is owned by a device already, we have to reset
+ * all data private to the namespace a device belongs to
+ * before assigning it a new device.
+ */
+#ifdef CONFIG_NET_NS
+void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
+{
+ skb_dst_drop(skb);
+ if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
+ secpath_reset(skb);
+ nf_reset(skb);
+ skb_init_secmark(skb);
+ skb->mark = 0;
+ skb->priority = 0;
+ skb->nf_trace = 0;
+ skb->ipvs_property = 0;
+#ifdef CONFIG_NET_SCHED
+ skb->tc_index = 0;
+#endif
+ }
+ skb->dev = dev;
+}
+EXPORT_SYMBOL(skb_set_dev);
+#endif /* CONFIG_NET_NS */
+
/*
* Invalidate hardware checksum when packet is to be mangled, and
* complete checksum manually on outgoing path.
* 2. No high memory really exists on this machine.
*/
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
+ if (!(dev->features & NETIF_F_HIGHDMA)) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+ return 1;
+ }
- if (dev->features & NETIF_F_HIGHDMA)
- return 0;
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (PageHighMem(skb_shinfo(skb)->frags[i].page))
- return 1;
+ if (PCI_DMA_BUS_IS_PHYS) {
+ struct device *pdev = dev->dev.parent;
+ if (!pdev)
+ return 0;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+ if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+ return 1;
+ }
+ }
#endif
return 0;
}
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
- int rc;
+ int rc = NETDEV_TX_OK;
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
skb->next = nskb->next;
nskb->next = NULL;
+
+ /*
+ * If device doesnt need nskb->dst, release it right now while
+ * its hot in this cpu cache
+ */
+ if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+ skb_dst_drop(nskb);
+
rc = ops->ndo_start_xmit(nskb, dev);
if (unlikely(rc != NETDEV_TX_OK)) {
+ if (rc & ~NETDEV_TX_MASK)
+ goto out_kfree_gso_skb;
nskb->next = skb->next;
skb->next = nskb;
return rc;
return NETDEV_TX_BUSY;
} while (skb->next);
- skb->destructor = DEV_GSO_CB(skb)->destructor;
-
+out_kfree_gso_skb:
+ if (likely(skb->next == NULL))
+ skb->destructor = DEV_GSO_CB(skb)->destructor;
out_kfree_skb:
kfree_skb(skb);
- return NETDEV_TX_OK;
+ return rc;
}
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
{
else
hash = skb->protocol;
- hash = jhash_1word(hash, skb_tx_hashrnd);
+ hash = jhash_1word(hash, hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
}
EXPORT_SYMBOL(skb_tx_hash);
+static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
+{
+ if (unlikely(queue_index >= dev->real_num_tx_queues)) {
+ if (net_ratelimit()) {
+ pr_warning("%s selects TX queue %d, but "
+ "real number of TX queues is %d\n",
+ dev->name, queue_index, dev->real_num_tx_queues);
+ }
+ return 0;
+ }
+ return queue_index;
+}
+
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
if (ops->ndo_select_queue) {
queue_index = ops->ndo_select_queue(dev, skb);
+ queue_index = dev_cap_txqueue(dev, queue_index);
} else {
queue_index = 0;
if (dev->real_num_tx_queues > 1)
queue_index = skb_tx_hash(dev, skb);
- if (sk && sk->sk_dst_cache)
+ if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
sk_tx_queue_set(sk, queue_index);
}
}
return rc;
}
+/*
+ * Returns true if either:
+ * 1. skb has frag_list and the device doesn't support FRAGLIST, or
+ * 2. skb is fragmented and the device does not support SG, or if
+ * at least one of fragments is in highmem and device does not
+ * support DMA from it.
+ */
+static inline int skb_needs_linearize(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+ (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+ illegal_highdma(dev, skb)));
+}
+
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
if (netif_needs_gso(dev, skb))
goto gso;
- if (skb_has_frags(skb) &&
- !(dev->features & NETIF_F_FRAGLIST) &&
- __skb_linearize(skb))
- goto out_kfree_skb;
-
- /* Fragmented skb is linearized if device does not support SG,
- * or if at least one of fragments is in highmem and device
- * does not support DMA from it.
- */
- if (skb_shinfo(skb)->nr_frags &&
- (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
- __skb_linearize(skb))
+ /* Convert a paged skb to linear, if required */
+ if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
goto out_kfree_skb;
/* If packet is not checksummed and device does not support
rcu_read_lock_bh();
txq = dev_pick_tx(dev, skb);
- q = rcu_dereference(txq->qdisc);
+ q = rcu_dereference_bh(txq->qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_tx_queue_stopped(txq)) {
- rc = NET_XMIT_SUCCESS;
- if (!dev_hard_start_xmit(skb, dev, txq)) {
+ rc = dev_hard_start_xmit(skb, dev, txq);
+ if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
goto out;
}
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+#ifdef CONFIG_RPS
-/**
- * netif_rx - post buffer to the network code
- * @skb: buffer to post
- *
- * This function receives a packet from a device driver and queues it for
- * the upper (protocol) levels to process. It always succeeds. The buffer
- * may be dropped during processing for congestion control or by the
- * protocol layers.
- *
- * return values:
- * NET_RX_SUCCESS (no congestion)
- * NET_RX_DROP (packet was dropped)
- *
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
*/
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow **rflowp)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip;
+ struct netdev_rx_queue *rxqueue;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_sock_flow_table *sock_flow_table;
+ int cpu = -1;
+ u8 ip_proto;
+ u16 tcpu;
+ u32 addr1, addr2, ports, ihl;
-int netif_rx(struct sk_buff *skb)
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+ if (unlikely(index >= dev->num_rx_queues)) {
+ if (net_ratelimit()) {
+ pr_warning("%s received packet on queue "
+ "%u, but number of RX queues is %u\n",
+ dev->name, index, dev->num_rx_queues);
+ }
+ goto done;
+ }
+ rxqueue = dev->_rx + index;
+ } else
+ rxqueue = dev->_rx;
+
+ if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
+ goto done;
+
+ if (skb->rxhash)
+ goto got_hash; /* Skip hash computation on packet header */
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ if (!pskb_may_pull(skb, sizeof(*ip)))
+ goto done;
+
+ ip = (struct iphdr *) skb->data;
+ ip_proto = ip->protocol;
+ addr1 = ip->saddr;
+ addr2 = ip->daddr;
+ ihl = ip->ihl;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ if (!pskb_may_pull(skb, sizeof(*ip6)))
+ goto done;
+
+ ip6 = (struct ipv6hdr *) skb->data;
+ ip_proto = ip6->nexthdr;
+ addr1 = ip6->saddr.s6_addr32[3];
+ addr2 = ip6->daddr.s6_addr32[3];
+ ihl = (40 >> 2);
+ break;
+ default:
+ goto done;
+ }
+ ports = 0;
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_AH:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ if (pskb_may_pull(skb, (ihl * 4) + 4))
+ ports = *((u32 *) (skb->data + (ihl * 4)));
+ break;
+
+ default:
+ break;
+ }
+
+ skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+ if (!skb->rxhash)
+ skb->rxhash = 1;
+
+got_hash:
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ if (flow_table && sock_flow_table) {
+ u16 next_cpu;
+ struct rps_dev_flow *rflow;
+
+ rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+ tcpu = rflow->cpu;
+
+ next_cpu = sock_flow_table->ents[skb->rxhash &
+ sock_flow_table->mask];
+
+ /*
+ * If the desired CPU (where last recvmsg was done) is
+ * different from current CPU (one in the rx-queue flow
+ * table entry), switch if one of the following holds:
+ * - Current CPU is unset (equal to RPS_NO_CPU).
+ * - Current CPU is offline.
+ * - The current CPU's queue tail has advanced beyond the
+ * last packet that was enqueued using this table entry.
+ * This guarantees that all previous packets for the flow
+ * have been dequeued, thus preserving in order delivery.
+ */
+ if (unlikely(tcpu != next_cpu) &&
+ (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+ ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+ rflow->last_qtail)) >= 0)) {
+ tcpu = rflow->cpu = next_cpu;
+ if (tcpu != RPS_NO_CPU)
+ rflow->last_qtail = per_cpu(softnet_data,
+ tcpu).input_queue_head;
+ }
+ if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+ *rflowp = rflow;
+ cpu = tcpu;
+ goto done;
+ }
+ }
+
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map) {
+ tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+ if (cpu_online(tcpu)) {
+ cpu = tcpu;
+ goto done;
+ }
+ }
+
+done:
+ return cpu;
+}
+
+/*
+ * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * to be sent to kick remote softirq processing. There are two masks since
+ * the sending of IPIs must be done with interrupts enabled. The select field
+ * indicates the current mask that enqueue_backlog uses to schedule IPIs.
+ * select is flipped before net_rps_action is called while still under lock,
+ * net_rps_action then uses the non-selected mask to send the IPIs and clears
+ * it without conflicting with enqueue_backlog operation.
+ */
+struct rps_remote_softirq_cpus {
+ cpumask_t mask[2];
+ int select;
+};
+static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
+
+/* Called from hardirq (IPI) context */
+static void trigger_softirq(void *data)
+{
+ struct softnet_data *queue = data;
+ __napi_schedule(&queue->backlog);
+ __get_cpu_var(netdev_rx_stat).received_rps++;
+}
+#endif /* CONFIG_RPS */
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+ unsigned int *qtail)
{
struct softnet_data *queue;
unsigned long flags;
- /* if netpoll wants it, pretend we never saw it */
- if (netpoll_rx(skb))
- return NET_RX_DROP;
-
- if (!skb->tstamp.tv64)
- net_timestamp(skb);
+ queue = &per_cpu(softnet_data, cpu);
- /*
- * The code is rearranged so that the path is the most
- * short when CPU is congested, but is still operating.
- */
local_irq_save(flags);
- queue = &__get_cpu_var(softnet_data);
-
__get_cpu_var(netdev_rx_stat).total++;
+
+ rps_lock(queue);
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
enqueue:
__skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifdef CONFIG_RPS
+ *qtail = queue->input_queue_head +
+ queue->input_pkt_queue.qlen;
+#endif
+ rps_unlock(queue);
local_irq_restore(flags);
return NET_RX_SUCCESS;
}
- napi_schedule(&queue->backlog);
+ /* Schedule NAPI for backlog device */
+ if (napi_schedule_prep(&queue->backlog)) {
+#ifdef CONFIG_RPS
+ if (cpu != smp_processor_id()) {
+ struct rps_remote_softirq_cpus *rcpus =
+ &__get_cpu_var(rps_remote_softirq_cpus);
+
+ cpu_set(cpu, rcpus->mask[rcpus->select]);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ goto enqueue;
+ }
+#endif
+ __napi_schedule(&queue->backlog);
+ }
goto enqueue;
}
+ rps_unlock(queue);
+
__get_cpu_var(netdev_rx_stat).dropped++;
local_irq_restore(flags);
kfree_skb(skb);
return NET_RX_DROP;
}
+
+/**
+ * netif_rx - post buffer to the network code
+ * @skb: buffer to post
+ *
+ * This function receives a packet from a device driver and queues it for
+ * the upper (protocol) levels to process. It always succeeds. The buffer
+ * may be dropped during processing for congestion control or by the
+ * protocol layers.
+ *
+ * return values:
+ * NET_RX_SUCCESS (no congestion)
+ * NET_RX_DROP (packet was dropped)
+ *
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+ int ret;
+
+ /* if netpoll wants it, pretend we never saw it */
+ if (netpoll_rx(skb))
+ return NET_RX_DROP;
+
+ if (!skb->tstamp.tv64)
+ net_timestamp(skb);
+
+#ifdef CONFIG_RPS
+ {
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu;
+
+ rcu_read_lock();
+
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
+ if (cpu < 0)
+ cpu = smp_processor_id();
+
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
+ rcu_read_unlock();
+ }
+#else
+ {
+ unsigned int qtail;
+ ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+ put_cpu();
+ }
+#endif
+ return ret;
+}
EXPORT_SYMBOL(netif_rx);
int netif_rx_ni(struct sk_buff *skb)
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING
"Redir loop detected Dropping packet (%d->%d)\n",
- skb->iif, dev->ifindex);
+ skb->skb_iif, dev->ifindex);
return TC_ACT_SHOT;
}
rcu_read_unlock();
}
-/**
- * netif_receive_skb - process receive buffer from network
- * @skb: buffer to process
- *
- * netif_receive_skb() is the main receive data processing function.
- * It always succeeds. The buffer may be dropped during processing
- * for congestion control or by the protocol layers.
- *
- * This function may only be called from softirq context and interrupts
- * should be enabled.
- *
- * Return values (usually ignored):
- * NET_RX_SUCCESS: no congestion
- * NET_RX_DROP: packet was dropped
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
+ struct net_device *master)
+{
+ if (skb->pkt_type == PACKET_HOST) {
+ u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+ memcpy(dest, master->dev_addr, ETH_ALEN);
+ }
+}
+
+/* On bonding slaves other than the currently active slave, suppress
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
*/
-int netif_receive_skb(struct sk_buff *skb)
+int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+{
+ struct net_device *dev = skb->dev;
+
+ if (master->priv_flags & IFF_MASTER_ARPMON)
+ dev->last_rx = jiffies;
+
+ if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+ /* Do address unmangle. The local destination address
+ * will be always the one master has. Provides the right
+ * functionality in a bridge.
+ */
+ skb_bond_set_mac_by_master(skb, master);
+ }
+
+ if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+ if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+ skb->protocol == __cpu_to_be16(ETH_P_ARP))
+ return 0;
+
+ if (master->priv_flags & IFF_MASTER_ALB) {
+ if (skb->pkt_type != PACKET_BROADCAST &&
+ skb->pkt_type != PACKET_MULTICAST)
+ return 0;
+ }
+ if (master->priv_flags & IFF_MASTER_8023AD &&
+ skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+ return 0;
+
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(__skb_bond_should_drop);
+
+static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
+ struct net_device *master;
struct net_device *null_or_orig;
+ struct net_device *null_or_bond;
int ret = NET_RX_DROP;
__be16 type;
if (netpoll_receive_skb(skb))
return NET_RX_DROP;
- if (!skb->iif)
- skb->iif = skb->dev->ifindex;
+ if (!skb->skb_iif)
+ skb->skb_iif = skb->dev->ifindex;
null_or_orig = NULL;
orig_dev = skb->dev;
- if (orig_dev->master) {
- if (skb_bond_should_drop(skb))
+ master = ACCESS_ONCE(orig_dev->master);
+ if (master) {
+ if (skb_bond_should_drop(skb, master))
null_or_orig = orig_dev; /* deliver only exact match */
else
- skb->dev = orig_dev->master;
+ skb->dev = master;
}
__get_cpu_var(netdev_rx_stat).total++;
if (!skb)
goto out;
+ /*
+ * Make sure frames received on VLAN interfaces stacked on
+ * bonding interfaces still make their way to any base bonding
+ * device that may have registered for a specific ptype. The
+ * handler may have to adjust skb->dev and orig_dev.
+ */
+ null_or_bond = NULL;
+ if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
+ (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
+ null_or_bond = vlan_dev_real_dev(skb->dev);
+ }
+
type = skb->protocol;
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type &&
- (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
- ptype->dev == orig_dev)) {
+ if (ptype->type == type && (ptype->dev == null_or_orig ||
+ ptype->dev == skb->dev || ptype->dev == orig_dev ||
+ ptype->dev == null_or_bond)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
rcu_read_unlock();
return ret;
}
+
+/**
+ * netif_receive_skb - process receive buffer from network
+ * @skb: buffer to process
+ *
+ * netif_receive_skb() is the main receive data processing function.
+ * It always succeeds. The buffer may be dropped during processing
+ * for congestion control or by the protocol layers.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+#ifdef CONFIG_RPS
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu, ret;
+
+ rcu_read_lock();
+
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
+
+ if (cpu >= 0) {
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+ rcu_read_unlock();
+ } else {
+ rcu_read_unlock();
+ ret = __netif_receive_skb(skb);
+ }
+
+ return ret;
+#else
+ return __netif_receive_skb(skb);
+#endif
+}
EXPORT_SYMBOL(netif_receive_skb);
/* Network device is going away, flush any packets still pending */
struct softnet_data *queue = &__get_cpu_var(softnet_data);
struct sk_buff *skb, *tmp;
+ rps_lock(queue);
skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
if (skb->dev == dev) {
__skb_unlink(skb, &queue->input_pkt_queue);
kfree_skb(skb);
+ incr_input_queue_head(queue);
}
+ rps_unlock(queue);
}
static int napi_gro_complete(struct sk_buff *skb)
return netif_receive_skb(skb);
}
-void napi_gro_flush(struct napi_struct *napi)
+static void napi_gro_flush(struct napi_struct *napi)
{
struct sk_buff *skb, *next;
napi->gro_count = 0;
napi->gro_list = NULL;
}
-EXPORT_SYMBOL(napi_gro_flush);
-int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
struct packet_type *ptype;
struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
int same_flow;
int mac_len;
- int ret;
+ enum gro_result ret;
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
}
EXPORT_SYMBOL(dev_gro_receive);
-static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static gro_result_t
+__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff *p;
return GRO_NORMAL;
for (p = napi->gro_list; p; p = p->next) {
- NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
- && !compare_ether_header(skb_mac_header(p),
- skb_gro_mac_header(skb));
+ NAPI_GRO_CB(p)->same_flow =
+ (p->dev == skb->dev) &&
+ !compare_ether_header(skb_mac_header(p),
+ skb_gro_mac_header(skb));
NAPI_GRO_CB(p)->flush = 0;
}
return dev_gro_receive(napi, skb);
}
-int napi_skb_finish(int ret, struct sk_buff *skb)
+gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
- int err = NET_RX_SUCCESS;
-
switch (ret) {
case GRO_NORMAL:
- return netif_receive_skb(skb);
+ if (netif_receive_skb(skb))
+ ret = GRO_DROP;
+ break;
case GRO_DROP:
- err = NET_RX_DROP;
- /* fall through */
-
case GRO_MERGED_FREE:
kfree_skb(skb);
break;
+
+ case GRO_HELD:
+ case GRO_MERGED:
+ break;
}
- return err;
+ return ret;
}
EXPORT_SYMBOL(napi_skb_finish);
}
EXPORT_SYMBOL(skb_gro_reset_offset);
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
skb_gro_reset_offset(skb);
}
EXPORT_SYMBOL(napi_get_frags);
-int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
+gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+ gro_result_t ret)
{
- int err = NET_RX_SUCCESS;
-
switch (ret) {
case GRO_NORMAL:
case GRO_HELD:
- skb->protocol = eth_type_trans(skb, napi->dev);
-
- if (ret == GRO_NORMAL)
- return netif_receive_skb(skb);
+ skb->protocol = eth_type_trans(skb, skb->dev);
- skb_gro_pull(skb, -ETH_HLEN);
+ if (ret == GRO_HELD)
+ skb_gro_pull(skb, -ETH_HLEN);
+ else if (netif_receive_skb(skb))
+ ret = GRO_DROP;
break;
case GRO_DROP:
- err = NET_RX_DROP;
- /* fall through */
-
case GRO_MERGED_FREE:
napi_reuse_skb(napi, skb);
break;
+
+ case GRO_MERGED:
+ break;
}
- return err;
+ return ret;
}
EXPORT_SYMBOL(napi_frags_finish);
}
EXPORT_SYMBOL(napi_frags_skb);
-int napi_gro_frags(struct napi_struct *napi)
+gro_result_t napi_gro_frags(struct napi_struct *napi)
{
struct sk_buff *skb = napi_frags_skb(napi);
if (!skb)
- return NET_RX_DROP;
+ return GRO_DROP;
return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
}
struct sk_buff *skb;
local_irq_disable();
+ rps_lock(queue);
skb = __skb_dequeue(&queue->input_pkt_queue);
if (!skb) {
__napi_complete(napi);
+ rps_unlock(queue);
local_irq_enable();
break;
}
+ incr_input_queue_head(queue);
+ rps_unlock(queue);
local_irq_enable();
- netif_receive_skb(skb);
+ __netif_receive_skb(skb);
} while (++work < quota && jiffies == start_time);
return work;
}
EXPORT_SYMBOL(netif_napi_del);
+#ifdef CONFIG_RPS
+/*
+ * net_rps_action sends any pending IPI's for rps. This is only called from
+ * softirq and interrupts must be enabled.
+ */
+static void net_rps_action(cpumask_t *mask)
+{
+ int cpu;
+
+ /* Send pending IPI's to kick RPS processing on remote cpus. */
+ for_each_cpu_mask_nr(cpu, *mask) {
+ struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+ if (cpu_online(cpu))
+ __smp_call_function_single(cpu, &queue->csd, 0);
+ }
+ cpus_clear(*mask);
+}
+#endif
static void net_rx_action(struct softirq_action *h)
{
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
+#ifdef CONFIG_RPS
+ int select;
+ struct rps_remote_softirq_cpus *rcpus;
+#endif
local_irq_disable();
* entries to the tail of this list, and only ->poll()
* calls can remove this head entry from the list.
*/
- n = list_entry(list->next, struct napi_struct, poll_list);
+ n = list_first_entry(list, struct napi_struct, poll_list);
have = netpoll_poll_lock(n);
netpoll_poll_unlock(have);
}
out:
+#ifdef CONFIG_RPS
+ rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
+ select = rcpus->select;
+ rcpus->select ^= 1;
+
+ local_irq_enable();
+
+ net_rps_action(&rcpus->mask[select]);
+#else
local_irq_enable();
+#endif
#ifdef CONFIG_NET_DMA
/*
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
- read_lock(&dev_base_lock);
- dev = __dev_get_by_index(net, ifr.ifr_ifindex);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
if (!dev) {
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return -ENODEV;
}
strcpy(ifr.ifr_name, dev->name);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
* in detail.
*/
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(dev_base_lock)
+ __acquires(RCU)
{
struct net *net = seq_file_net(seq);
loff_t off;
struct net_device *dev;
- read_lock(&dev_base_lock);
+ rcu_read_lock();
if (!*pos)
return SEQ_START_TOKEN;
off = 1;
- for_each_netdev(net, dev)
+ for_each_netdev_rcu(net, dev)
if (off++ == *pos)
return dev;
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net *net = seq_file_net(seq);
+ struct net_device *dev = (v == SEQ_START_TOKEN) ?
+ first_net_device(seq_file_net(seq)) :
+ next_net_device((struct net_device *)v);
+
++*pos;
- return v == SEQ_START_TOKEN ?
- first_net_device(net) : next_net_device((struct net_device *)v);
+ return rcu_dereference(dev);
}
void dev_seq_stop(struct seq_file *seq, void *v)
- __releases(dev_base_lock)
+ __releases(RCU)
{
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
const struct net_device_stats *stats = dev_get_stats(dev);
- seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+ seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
{
struct netif_rx_stats *s = v;
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
s->total, s->dropped, s->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
- s->cpu_collision);
+ s->cpu_collision, s->received_rps);
return 0;
}
slave->master = master;
- synchronize_net();
-
- if (old)
+ if (old) {
+ synchronize_net();
dev_put(old);
-
+ }
if (master)
slave->flags |= IFF_SLAVE;
else
* Return 0 if successful or a negative errno code on error.
*/
int dev_set_promiscuity(struct net_device *dev, int inc)
-{
- unsigned short old_flags = dev->flags;
- int err;
-
- err = __dev_set_promiscuity(dev, inc);
- if (err < 0)
- return err;
- if (dev->flags != old_flags)
- dev_set_rx_mode(dev);
- return err;
-}
-EXPORT_SYMBOL(dev_set_promiscuity);
-
-/**
- * dev_set_allmulti - update allmulti count on a device
- * @dev: device
- * @inc: modifier
- *
- * Add or remove reception of all multicast frames to a device. While the
- * count in the device remains above zero the interface remains listening
- * to all interfaces. Once it hits zero the device reverts back to normal
- * filtering operation. A negative @inc value is used to drop the counter
- * when releasing a resource needing all multicasts.
- * Return 0 if successful or a negative errno code on error.
- */
-
-int dev_set_allmulti(struct net_device *dev, int inc)
-{
- unsigned short old_flags = dev->flags;
-
- ASSERT_RTNL();
-
- dev->flags |= IFF_ALLMULTI;
- dev->allmulti += inc;
- if (dev->allmulti == 0) {
- /*
- * Avoid overflow.
- * If inc causes overflow, untouch allmulti and return error.
- */
- if (inc < 0)
- dev->flags &= ~IFF_ALLMULTI;
- else {
- dev->allmulti -= inc;
- printk(KERN_WARNING "%s: allmulti touches roof, "
- "set allmulti failed, allmulti feature of "
- "device might be broken.\n", dev->name);
- return -EOVERFLOW;
- }
- }
- if (dev->flags ^ old_flags) {
- dev_change_rx_flags(dev, IFF_ALLMULTI);
- dev_set_rx_mode(dev);
- }
- return 0;
-}
-EXPORT_SYMBOL(dev_set_allmulti);
-
-/*
- * Upload unicast and multicast address lists to device and
- * configure RX filtering. When the device doesn't support unicast
- * filtering it is put in promiscuous mode while unicast addresses
- * are present.
- */
-void __dev_set_rx_mode(struct net_device *dev)
-{
- const struct net_device_ops *ops = dev->netdev_ops;
-
- /* dev_open will call this function so the list will stay sane. */
- if (!(dev->flags&IFF_UP))
- return;
-
- if (!netif_device_present(dev))
- return;
-
- if (ops->ndo_set_rx_mode)
- ops->ndo_set_rx_mode(dev);
- else {
- /* Unicast addresses changes may only happen under the rtnl,
- * therefore calling __dev_set_promiscuity here is safe.
- */
- if (dev->uc.count > 0 && !dev->uc_promisc) {
- __dev_set_promiscuity(dev, 1);
- dev->uc_promisc = 1;
- } else if (dev->uc.count == 0 && dev->uc_promisc) {
- __dev_set_promiscuity(dev, -1);
- dev->uc_promisc = 0;
- }
-
- if (ops->ndo_set_multicast_list)
- ops->ndo_set_multicast_list(dev);
- }
-}
-
-void dev_set_rx_mode(struct net_device *dev)
-{
- netif_addr_lock_bh(dev);
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
-}
-
-/* hw addresses list handling functions */
-
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
- int alloc_size;
-
- if (addr_len > MAX_ADDR_LEN)
- return -EINVAL;
-
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- ha->type == addr_type) {
- ha->refcount++;
- return 0;
- }
- }
-
-
- alloc_size = sizeof(*ha);
- if (alloc_size < L1_CACHE_BYTES)
- alloc_size = L1_CACHE_BYTES;
- ha = kmalloc(alloc_size, GFP_ATOMIC);
- if (!ha)
- return -ENOMEM;
- memcpy(ha->addr, addr, addr_len);
- ha->type = addr_type;
- ha->refcount = 1;
- ha->synced = false;
- list_add_tail_rcu(&ha->list, &list->list);
- list->count++;
- return 0;
-}
-
-static void ha_rcu_free(struct rcu_head *head)
-{
- struct netdev_hw_addr *ha;
-
- ha = container_of(head, struct netdev_hw_addr, rcu_head);
- kfree(ha);
-}
-
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
-
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- (ha->type == addr_type || !addr_type)) {
- if (--ha->refcount)
- return 0;
- list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
- list->count--;
- return 0;
- }
- }
- return -ENOENT;
-}
-
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len,
- unsigned char addr_type)
-{
- int err;
- struct netdev_hw_addr *ha, *ha2;
- unsigned char type;
-
- list_for_each_entry(ha, &from_list->list, list) {
- type = addr_type ? addr_type : ha->type;
- err = __hw_addr_add(to_list, ha->addr, addr_len, type);
- if (err)
- goto unroll;
- }
- return 0;
-
-unroll:
- list_for_each_entry(ha2, &from_list->list, list) {
- if (ha2 == ha)
- break;
- type = addr_type ? addr_type : ha2->type;
- __hw_addr_del(to_list, ha2->addr, addr_len, type);
- }
- return err;
-}
-
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len,
- unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
- unsigned char type;
-
- list_for_each_entry(ha, &from_list->list, list) {
- type = addr_type ? addr_type : ha->type;
- __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
- }
-}
-
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len)
-{
- int err = 0;
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (!ha->synced) {
- err = __hw_addr_add(to_list, ha->addr,
- addr_len, ha->type);
- if (err)
- break;
- ha->synced = true;
- ha->refcount++;
- } else if (ha->refcount == 1) {
- __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
- __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
- }
- }
- return err;
-}
-
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len)
-{
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (ha->synced) {
- __hw_addr_del(to_list, ha->addr,
- addr_len, ha->type);
- ha->synced = false;
- __hw_addr_del(from_list, ha->addr,
- addr_len, ha->type);
- }
- }
-}
-
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &list->list, list) {
- list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
- }
- list->count = 0;
-}
-
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
- INIT_LIST_HEAD(&list->list);
- list->count = 0;
-}
-
-/* Device addresses handling functions */
-
-static void dev_addr_flush(struct net_device *dev)
-{
- /* rtnl_mutex must be held here */
-
- __hw_addr_flush(&dev->dev_addrs);
- dev->dev_addr = NULL;
-}
-
-static int dev_addr_init(struct net_device *dev)
-{
- unsigned char addr[MAX_ADDR_LEN];
- struct netdev_hw_addr *ha;
- int err;
-
- /* rtnl_mutex must be held here */
-
- __hw_addr_init(&dev->dev_addrs);
- memset(addr, 0, sizeof(addr));
- err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
- NETDEV_HW_ADDR_T_LAN);
- if (!err) {
- /*
- * Get the first (previously created) address from the list
- * and set dev_addr pointer to this location.
- */
- ha = list_first_entry(&dev->dev_addrs.list,
- struct netdev_hw_addr, list);
- dev->dev_addr = ha->addr;
- }
- return err;
-}
-
-/**
- * dev_addr_add - Add a device address
- * @dev: device
- * @addr: address to add
- * @addr_type: address type
- *
- * Add a device address to the device or increase the reference count if
- * it already exists.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
- unsigned char addr_type)
-{
- int err;
-
- ASSERT_RTNL();
-
- err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-
-/**
- * dev_addr_del - Release a device address.
- * @dev: device
- * @addr: address to delete
- * @addr_type: address type
- *
- * Release reference to a device address and remove it from the device
- * if the reference count drops to zero.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
- unsigned char addr_type)
-{
- int err;
- struct netdev_hw_addr *ha;
-
- ASSERT_RTNL();
-
- /*
- * We can not remove the first address from the list because
- * dev->dev_addr points to that.
- */
- ha = list_first_entry(&dev->dev_addrs.list,
- struct netdev_hw_addr, list);
- if (ha->addr == dev->dev_addr && ha->refcount == 1)
- return -ENOENT;
-
- err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
- addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-
-/**
- * dev_addr_add_multiple - Add device addresses from another device
- * @to_dev: device to which addresses will be added
- * @from_dev: device from which addresses will be added
- * @addr_type: address type - 0 means type will be used from from_dev
- *
- * Add device addresses of the one device to another.
- **
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
- struct net_device *from_dev,
- unsigned char addr_type)
-{
- int err;
-
- ASSERT_RTNL();
-
- if (from_dev->addr_len != to_dev->addr_len)
- return -EINVAL;
- err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
- to_dev->addr_len, addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-
-/**
- * dev_addr_del_multiple - Delete device addresses by another device
- * @to_dev: device where the addresses will be deleted
- * @from_dev: device by which addresses the addresses will be deleted
- * @addr_type: address type - 0 means type will used from from_dev
- *
- * Deletes addresses in to device by the list of addresses in from device.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
- struct net_device *from_dev,
- unsigned char addr_type)
-{
- ASSERT_RTNL();
-
- if (from_dev->addr_len != to_dev->addr_len)
- return -EINVAL;
- __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
- to_dev->addr_len, addr_type);
- call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
- return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-
-/* multicast addresses handling functions */
-
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
- void *addr, int alen, int glbl)
-{
- struct dev_addr_list *da;
-
- for (; (da = *list) != NULL; list = &da->next) {
- if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
- alen == da->da_addrlen) {
- if (glbl) {
- int old_glbl = da->da_gusers;
- da->da_gusers = 0;
- if (old_glbl == 0)
- break;
- }
- if (--da->da_users)
- return 0;
-
- *list = da->next;
- kfree(da);
- (*count)--;
- return 0;
- }
- }
- return -ENOENT;
-}
-
-int __dev_addr_add(struct dev_addr_list **list, int *count,
- void *addr, int alen, int glbl)
-{
- struct dev_addr_list *da;
-
- for (da = *list; da != NULL; da = da->next) {
- if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
- da->da_addrlen == alen) {
- if (glbl) {
- int old_glbl = da->da_gusers;
- da->da_gusers = 1;
- if (old_glbl)
- return 0;
- }
- da->da_users++;
- return 0;
- }
- }
-
- da = kzalloc(sizeof(*da), GFP_ATOMIC);
- if (da == NULL)
- return -ENOMEM;
- memcpy(da->da_addr, addr, alen);
- da->da_addrlen = alen;
- da->da_users = 1;
- da->da_gusers = glbl ? 1 : 0;
- da->next = *list;
- *list = da;
- (*count)++;
- return 0;
-}
-
-/**
- * dev_unicast_delete - Release secondary unicast address.
- * @dev: device
- * @addr: address to delete
- *
- * Release reference to a secondary unicast address and remove it
- * from the device if the reference count drops to zero.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
- int err;
-
- ASSERT_RTNL();
-
- netif_addr_lock_bh(dev);
- err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-
-/**
- * dev_unicast_add - add a secondary unicast address
- * @dev: device
- * @addr: address to add
- *
- * Add a secondary unicast address to the device or increase
- * the reference count if it already exists.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
- int err;
-
- ASSERT_RTNL();
-
- netif_addr_lock_bh(dev);
- err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
-
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
- struct dev_addr_list **from, int *from_count)
-{
- struct dev_addr_list *da, *next;
- int err = 0;
-
- da = *from;
- while (da != NULL) {
- next = da->next;
- if (!da->da_synced) {
- err = __dev_addr_add(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- if (err < 0)
- break;
- da->da_synced = 1;
- da->da_users++;
- } else if (da->da_users == 1) {
- __dev_addr_delete(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- __dev_addr_delete(from, from_count,
- da->da_addr, da->da_addrlen, 0);
- }
- da = next;
- }
- return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
- struct dev_addr_list **from, int *from_count)
-{
- struct dev_addr_list *da, *next;
-
- da = *from;
- while (da != NULL) {
- next = da->next;
- if (da->da_synced) {
- __dev_addr_delete(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- da->da_synced = 0;
- __dev_addr_delete(from, from_count,
- da->da_addr, da->da_addrlen, 0);
- }
- da = next;
- }
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-
-/**
- * dev_unicast_sync - Synchronize device's unicast list to another device
- * @to: destination device
- * @from: source device
- *
- * Add newly added addresses to the destination device and release
- * addresses that have no users left. The source device must be
- * locked by netif_tx_lock_bh.
- *
- * This function is intended to be called from the dev->set_rx_mode
- * function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
- int err = 0;
-
- if (to->addr_len != from->addr_len)
- return -EINVAL;
+{
+ unsigned short old_flags = dev->flags;
+ int err;
- netif_addr_lock_bh(to);
- err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
- if (!err)
- __dev_set_rx_mode(to);
- netif_addr_unlock_bh(to);
+ err = __dev_set_promiscuity(dev, inc);
+ if (err < 0)
+ return err;
+ if (dev->flags != old_flags)
+ dev_set_rx_mode(dev);
return err;
}
-EXPORT_SYMBOL(dev_unicast_sync);
+EXPORT_SYMBOL(dev_set_promiscuity);
/**
- * dev_unicast_unsync - Remove synchronized addresses from the destination device
- * @to: destination device
- * @from: source device
+ * dev_set_allmulti - update allmulti count on a device
+ * @dev: device
+ * @inc: modifier
*
- * Remove all addresses that were added to the destination device by
- * dev_unicast_sync(). This function is intended to be called from the
- * dev->stop function of layered software devices.
+ * Add or remove reception of all multicast frames to a device. While the
+ * count in the device remains above zero the interface remains listening
+ * to all interfaces. Once it hits zero the device reverts back to normal
+ * filtering operation. A negative @inc value is used to drop the counter
+ * when releasing a resource needing all multicasts.
+ * Return 0 if successful or a negative errno code on error.
*/
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
+
+int dev_set_allmulti(struct net_device *dev, int inc)
{
- if (to->addr_len != from->addr_len)
- return;
+ unsigned short old_flags = dev->flags;
- netif_addr_lock_bh(from);
- netif_addr_lock(to);
- __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
- __dev_set_rx_mode(to);
- netif_addr_unlock(to);
- netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
+ ASSERT_RTNL();
-static void dev_unicast_flush(struct net_device *dev)
-{
- netif_addr_lock_bh(dev);
- __hw_addr_flush(&dev->uc);
- netif_addr_unlock_bh(dev);
+ dev->flags |= IFF_ALLMULTI;
+ dev->allmulti += inc;
+ if (dev->allmulti == 0) {
+ /*
+ * Avoid overflow.
+ * If inc causes overflow, untouch allmulti and return error.
+ */
+ if (inc < 0)
+ dev->flags &= ~IFF_ALLMULTI;
+ else {
+ dev->allmulti -= inc;
+ printk(KERN_WARNING "%s: allmulti touches roof, "
+ "set allmulti failed, allmulti feature of "
+ "device might be broken.\n", dev->name);
+ return -EOVERFLOW;
+ }
+ }
+ if (dev->flags ^ old_flags) {
+ dev_change_rx_flags(dev, IFF_ALLMULTI);
+ dev_set_rx_mode(dev);
+ }
+ return 0;
}
+EXPORT_SYMBOL(dev_set_allmulti);
-static void dev_unicast_init(struct net_device *dev)
+/*
+ * Upload unicast and multicast address lists to device and
+ * configure RX filtering. When the device doesn't support unicast
+ * filtering it is put in promiscuous mode while unicast addresses
+ * are present.
+ */
+void __dev_set_rx_mode(struct net_device *dev)
{
- __hw_addr_init(&dev->uc);
-}
+ const struct net_device_ops *ops = dev->netdev_ops;
+ /* dev_open will call this function so the list will stay sane. */
+ if (!(dev->flags&IFF_UP))
+ return;
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
- struct dev_addr_list *tmp;
+ if (!netif_device_present(dev))
+ return;
+
+ if (ops->ndo_set_rx_mode)
+ ops->ndo_set_rx_mode(dev);
+ else {
+ /* Unicast addresses changes may only happen under the rtnl,
+ * therefore calling __dev_set_promiscuity here is safe.
+ */
+ if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
+ __dev_set_promiscuity(dev, 1);
+ dev->uc_promisc = 1;
+ } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
+ __dev_set_promiscuity(dev, -1);
+ dev->uc_promisc = 0;
+ }
- while (*list != NULL) {
- tmp = *list;
- *list = tmp->next;
- if (tmp->da_users > tmp->da_gusers)
- printk("__dev_addr_discard: address leakage! "
- "da_users=%d\n", tmp->da_users);
- kfree(tmp);
+ if (ops->ndo_set_multicast_list)
+ ops->ndo_set_multicast_list(dev);
}
}
-static void dev_addr_discard(struct net_device *dev)
+void dev_set_rx_mode(struct net_device *dev)
{
netif_addr_lock_bh(dev);
-
- __dev_addr_discard(&dev->mc_list);
- dev->mc_count = 0;
-
+ __dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
}
}
EXPORT_SYMBOL(dev_get_flags);
-/**
- * dev_change_flags - change device settings
- * @dev: device
- * @flags: device state flags
- *
- * Change settings on device based state flags. The flags are
- * in the userspace exported format.
- */
-int dev_change_flags(struct net_device *dev, unsigned flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags)
{
- int ret, changes;
int old_flags = dev->flags;
+ int ret;
ASSERT_RTNL();
ret = 0;
if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
- ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+ ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
if (!ret)
dev_set_rx_mode(dev);
}
- if (dev->flags & IFF_UP &&
- ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
- IFF_VOLATILE)))
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
-
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
dev_set_allmulti(dev, inc);
}
- /* Exclude state transition flags, already notified */
- changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
+ return ret;
+}
+
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+{
+ unsigned int changes = dev->flags ^ old_flags;
+
+ if (changes & IFF_UP) {
+ if (dev->flags & IFF_UP)
+ call_netdevice_notifiers(NETDEV_UP, dev);
+ else
+ call_netdevice_notifiers(NETDEV_DOWN, dev);
+ }
+
+ if (dev->flags & IFF_UP &&
+ (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
+ call_netdevice_notifiers(NETDEV_CHANGE, dev);
+}
+
+/**
+ * dev_change_flags - change device settings
+ * @dev: device
+ * @flags: device state flags
+ *
+ * Change settings on device based state flags. The flags are
+ * in the userspace exported format.
+ */
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+ int ret, changes;
+ int old_flags = dev->flags;
+
+ ret = __dev_change_flags(dev, flags);
+ if (ret < 0)
+ return ret;
+
+ changes = old_flags ^ dev->flags;
if (changes)
rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
+ __dev_notify_flags(dev, old_flags);
return ret;
}
EXPORT_SYMBOL(dev_change_flags);
EXPORT_SYMBOL(dev_set_mac_address);
/*
- * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
+ * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
*/
static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
int err;
- struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+ struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
if (!dev)
return -ENODEV;
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
- dev->addr_len, 1);
+ return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCDELMULTI:
if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
- dev->addr_len, 1);
+ return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
dev_load(net, ifr.ifr_name);
- read_lock(&dev_base_lock);
+ rcu_read_lock();
ret = dev_ifsioc_locked(net, &ifr, cmd);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
if (!ret) {
if (colon)
*colon = ':';
list_add_tail(&dev->todo_list, &net_todo_list);
}
-static void rollback_registered(struct net_device *dev)
+static void rollback_registered_many(struct list_head *head)
{
+ struct net_device *dev, *tmp;
+
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
- /* Some devices call without registering for initialization unwind. */
- if (dev->reg_state == NETREG_UNINITIALIZED) {
- printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
- "was registered\n", dev->name, dev);
+ list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+ /* Some devices call without registering
+ * for initialization unwind. Remove those
+ * devices and proceed with the remaining.
+ */
+ if (dev->reg_state == NETREG_UNINITIALIZED) {
+ pr_debug("unregister_netdevice: device %s/%p never "
+ "was registered\n", dev->name, dev);
- WARN_ON(1);
- return;
- }
+ WARN_ON(1);
+ list_del(&dev->unreg_list);
+ continue;
+ }
- BUG_ON(dev->reg_state != NETREG_REGISTERED);
+ BUG_ON(dev->reg_state != NETREG_REGISTERED);
- /* If device is running, close it first. */
- dev_close(dev);
+ /* If device is running, close it first. */
+ dev_close(dev);
- /* And unlink it from device chain. */
- unlist_netdevice(dev);
+ /* And unlink it from device chain. */
+ unlist_netdevice(dev);
- dev->reg_state = NETREG_UNREGISTERING;
+ dev->reg_state = NETREG_UNREGISTERING;
+ }
synchronize_net();
- /* Shutdown queueing discipline. */
- dev_shutdown(dev);
+ list_for_each_entry(dev, head, unreg_list) {
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
- /* Notify protocols, that we are about to destroy
- this device. They should clean all the things.
- */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ /* Notify protocols, that we are about to destroy
+ this device. They should clean all the things.
+ */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- /*
- * Flush the unicast and multicast chains
- */
- dev_unicast_flush(dev);
- dev_addr_discard(dev);
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
- if (dev->netdev_ops->ndo_uninit)
- dev->netdev_ops->ndo_uninit(dev);
+ /*
+ * Flush the unicast and multicast chains
+ */
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
- /* Notifier chain MUST detach us from master device. */
- WARN_ON(dev->master);
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
- /* Remove entries from kobject tree */
- netdev_unregister_kobject(dev);
+ /* Notifier chain MUST detach us from master device. */
+ WARN_ON(dev->master);
+
+ /* Remove entries from kobject tree */
+ netdev_unregister_kobject(dev);
+ }
+
+ /* Process any work delayed until the end of the batch */
+ dev = list_first_entry(head, struct net_device, unreg_list);
+ call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
synchronize_net();
- dev_put(dev);
+ list_for_each_entry(dev, head, unreg_list)
+ dev_put(dev);
+}
+
+static void rollback_registered(struct net_device *dev)
+{
+ LIST_HEAD(single);
+
+ list_add(&dev->unreg_list, &single);
+ rollback_registered_many(&single);
}
static void __netdev_init_queue_locks_one(struct net_device *dev,
EXPORT_SYMBOL(netdev_fix_features);
/**
+ * netif_stacked_transfer_operstate - transfer operstate
+ * @rootdev: the root or lower level device to transfer state from
+ * @dev: the device to transfer operstate to
+ *
+ * Transfer operational state from root to device. This is normally
+ * called when a stacking relationship exists between the root
+ * device and the device(a leaf device).
+ */
+void netif_stacked_transfer_operstate(const struct net_device *rootdev,
+ struct net_device *dev)
+{
+ if (rootdev->operstate == IF_OPER_DORMANT)
+ netif_dormant_on(dev);
+ else
+ netif_dormant_off(dev);
+
+ if (netif_carrier_ok(rootdev)) {
+ if (!netif_carrier_ok(dev))
+ netif_carrier_on(dev);
+ } else {
+ if (netif_carrier_ok(dev))
+ netif_carrier_off(dev);
+ }
+}
+EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+
+/**
* register_netdevice - register a network device
* @dev: device to register
*
int register_netdevice(struct net_device *dev)
{
- struct hlist_head *head;
- struct hlist_node *p;
int ret;
struct net *net = dev_net(dev);
dev->iflink = -1;
+#ifdef CONFIG_RPS
+ if (!dev->num_rx_queues) {
+ /*
+ * Allocate a single RX queue if driver never called
+ * alloc_netdev_mq
+ */
+
+ dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!dev->_rx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ dev->_rx->first = dev->_rx;
+ atomic_set(&dev->_rx->count, 1);
+ dev->num_rx_queues = 1;
+ }
+#endif
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
}
}
- if (!dev_valid_name(dev->name)) {
- ret = -EINVAL;
+ ret = dev_get_valid_name(net, dev->name, dev->name, 0);
+ if (ret)
goto err_uninit;
- }
dev->ifindex = dev_new_index(net);
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
- /* Check for existence of name */
- head = dev_name_hash(net, dev->name);
- hlist_for_each(p, head) {
- struct net_device *d
- = hlist_entry(p, struct net_device, name_hlist);
- if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
- ret = -EEXIST;
- goto err_uninit;
- }
- }
-
/* Fix illegal checksum combinations */
if ((dev->features & NETIF_F_HW_CSUM) &&
(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
rollback_registered(dev);
dev->reg_state = NETREG_UNREGISTERED;
}
+ /*
+ * Prevent userspace races by waiting until the network
+ * device is fully setup before sending notifications.
+ */
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
out:
return ret;
{
unsigned long rebroadcast_time, warning_time;
+ linkwatch_forget_dev(dev);
+
rebroadcast_time = warning_time = jiffies;
while (atomic_read(&dev->refcnt) != 0) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
/* Rebroadcast unregister notification */
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
+ * should have already handle it the first time */
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
while (!list_empty(&list)) {
struct net_device *dev
- = list_entry(list.next, struct net_device, todo_list);
+ = list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list);
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
}
/**
+ * dev_txq_stats_fold - fold tx_queues stats
+ * @dev: device to get statistics from
+ * @stats: struct net_device_stats to hold results
+ */
+void dev_txq_stats_fold(const struct net_device *dev,
+ struct net_device_stats *stats)
+{
+ unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+ unsigned int i;
+ struct netdev_queue *txq;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ txq = netdev_get_tx_queue(dev, i);
+ tx_bytes += txq->tx_bytes;
+ tx_packets += txq->tx_packets;
+ tx_dropped += txq->tx_dropped;
+ }
+ if (tx_bytes || tx_packets || tx_dropped) {
+ stats->tx_bytes = tx_bytes;
+ stats->tx_packets = tx_packets;
+ stats->tx_dropped = tx_dropped;
+ }
+}
+EXPORT_SYMBOL(dev_txq_stats_fold);
+
+/**
* dev_get_stats - get network device statistics
* @dev: device to get statistics from
*
if (ops->ndo_get_stats)
return ops->ndo_get_stats(dev);
- else {
- unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
- struct net_device_stats *stats = &dev->stats;
- unsigned int i;
- struct netdev_queue *txq;
-
- for (i = 0; i < dev->num_tx_queues; i++) {
- txq = netdev_get_tx_queue(dev, i);
- tx_bytes += txq->tx_bytes;
- tx_packets += txq->tx_packets;
- tx_dropped += txq->tx_dropped;
- }
- if (tx_bytes || tx_packets || tx_dropped) {
- stats->tx_bytes = tx_bytes;
- stats->tx_packets = tx_packets;
- stats->tx_dropped = tx_dropped;
- }
- return stats;
- }
+
+ dev_txq_stats_fold(dev, &dev->stats);
+ return &dev->stats;
}
EXPORT_SYMBOL(dev_get_stats);
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
+#ifdef CONFIG_RPS
+ struct netdev_rx_queue *rx;
+ int i;
+#endif
BUG_ON(strlen(name) >= sizeof(dev->name));
goto free_p;
}
+#ifdef CONFIG_RPS
+ rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!rx) {
+ printk(KERN_ERR "alloc_netdev: Unable to allocate "
+ "rx queues.\n");
+ goto free_tx;
+ }
+
+ atomic_set(&rx->count, queue_count);
+
+ /*
+ * Set a pointer to first element in the array which holds the
+ * reference count.
+ */
+ for (i = 0; i < queue_count; i++)
+ rx[i].first = rx;
+#endif
+
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
if (dev_addr_init(dev))
- goto free_tx;
+ goto free_rx;
- dev_unicast_init(dev);
+ dev_mc_init(dev);
+ dev_uc_init(dev);
dev_net_set(dev, &init_net);
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
+#ifdef CONFIG_RPS
+ dev->_rx = rx;
+ dev->num_rx_queues = queue_count;
+#endif
+
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev);
+ INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
+ dev->ethtool_ntuple_list.count = 0;
INIT_LIST_HEAD(&dev->napi_list);
+ INIT_LIST_HEAD(&dev->unreg_list);
+ INIT_LIST_HEAD(&dev->link_watch_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
strcpy(dev->name, name);
return dev;
+free_rx:
+#ifdef CONFIG_RPS
+ kfree(rx);
free_tx:
+#endif
kfree(tx);
-
free_p:
kfree(p);
return NULL;
/* Flush device addresses */
dev_addr_flush(dev);
+ /* Clear ethtool n-tuple list */
+ ethtool_ntuple_flush(dev);
+
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
EXPORT_SYMBOL(synchronize_net);
/**
- * unregister_netdevice - remove device from the kernel
+ * unregister_netdevice_queue - remove device from the kernel
* @dev: device
+ * @head: list
*
* This function shuts down a device interface and removes it
* from the kernel tables.
+ * If head not NULL, device is queued to be unregistered later.
*
* Callers must hold the rtnl semaphore. You may want
* unregister_netdev() instead of this.
*/
-void unregister_netdevice(struct net_device *dev)
+void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
{
ASSERT_RTNL();
- rollback_registered(dev);
- /* Finish processing unregister after unlock */
- net_set_todo(dev);
+ if (head) {
+ list_move_tail(&dev->unreg_list, head);
+ } else {
+ rollback_registered(dev);
+ /* Finish processing unregister after unlock */
+ net_set_todo(dev);
+ }
+}
+EXPORT_SYMBOL(unregister_netdevice_queue);
+
+/**
+ * unregister_netdevice_many - unregister many devices
+ * @head: list of devices
+ */
+void unregister_netdevice_many(struct list_head *head)
+{
+ struct net_device *dev;
+
+ if (!list_empty(head)) {
+ rollback_registered_many(head);
+ list_for_each_entry(dev, head, unreg_list)
+ net_set_todo(dev);
+ }
}
-EXPORT_SYMBOL(unregister_netdevice);
+EXPORT_SYMBOL(unregister_netdevice_many);
/**
* unregister_netdev - remove device from the kernel
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- char buf[IFNAMSIZ];
- const char *destname;
int err;
ASSERT_RTNL();
* we can use it in the destination network namespace.
*/
err = -EEXIST;
- destname = dev->name;
- if (__dev_get_by_name(net, destname)) {
+ if (__dev_get_by_name(net, dev->name)) {
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- if (!dev_valid_name(pat))
- goto out;
- if (strchr(pat, '%')) {
- if (__dev_alloc_name(net, pat, buf) < 0)
- goto out;
- destname = buf;
- } else
- destname = pat;
- if (__dev_get_by_name(net, destname))
+ if (dev_get_valid_name(net, pat, dev->name, 1))
goto out;
}
this device. They should clean all the things.
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
/*
* Flush the unicast and multicast chains
*/
- dev_unicast_flush(dev);
- dev_addr_discard(dev);
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
netdev_unregister_kobject(dev);
/* Actually switch the network namespace */
dev_net_set(dev, net);
- /* Assign the new device name */
- if (destname != dev->name)
- strcpy(dev->name, destname);
-
/* If there is an ifindex conflict assign a new one */
if (__dev_get_by_index(net, dev->ifindex)) {
int iflink = (dev->iflink == dev->ifindex);
/* Notify protocols, that a new device appeared. */
call_netdevice_notifiers(NETDEV_REGISTER, dev);
+ /*
+ * Prevent userspace races by waiting until the network
+ * device is fully setup before sending notifications.
+ */
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
synchronize_net();
err = 0;
out:
local_irq_enable();
/* Process offline CPU's input_pkt_queue */
- while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
+ incr_input_queue_head(oldsd);
+ }
return NOTIFY_OK;
}
static void __net_exit default_device_exit(struct net *net)
{
- struct net_device *dev;
+ struct net_device *dev, *aux;
/*
- * Push all migratable of the network devices back to the
+ * Push all migratable network devices back to the
* initial network namespace
*/
rtnl_lock();
-restart:
- for_each_netdev(net, dev) {
+ for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
if (dev->features & NETIF_F_NETNS_LOCAL)
continue;
- /* Delete virtual devices */
- if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
- dev->rtnl_link_ops->dellink(dev);
- goto restart;
- }
+ /* Leave virtual devices for the generic cleanup */
+ if (dev->rtnl_link_ops)
+ continue;
/* Push remaing network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
__func__, dev->name, err);
BUG();
}
- goto restart;
}
rtnl_unlock();
}
+static void __net_exit default_device_exit_batch(struct list_head *net_list)
+{
+ /* At exit all network devices most be removed from a network
+ * namespace. Do this in the reverse order of registeration.
+ * Do this across as many network namespaces as possible to
+ * improve batching efficiency.
+ */
+ struct net_device *dev;
+ struct net *net;
+ LIST_HEAD(dev_kill_list);
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ for_each_netdev_reverse(net, dev) {
+ if (dev->rtnl_link_ops)
+ dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
+ else
+ unregister_netdevice_queue(dev, &dev_kill_list);
+ }
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+}
+
static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
+ .exit_batch = default_device_exit_batch,
};
/*
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
+#ifdef CONFIG_RPS
+ queue->csd.func = trigger_softirq;
+ queue->csd.info = queue;
+ queue->csd.flags = 0;
+#endif
+
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
queue->backlog.gro_list = NULL;
static int __init initialize_hashrnd(void)
{
- get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+ get_random_bytes(&hashrnd, sizeof(hashrnd));
return 0;
}