Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
[safe/jmp/linux-2.6] / net / core / dev.c
index 89912ae..4464240 100644 (file)
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/module.h>
-#include <linux/kallsyms.h>
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
 
 #include "net-sysfs.h"
 
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
 /*
  *     The list of packet types we will receive (as opposed to discard)
  *     and the routines to invoke.
@@ -281,8 +283,8 @@ static const unsigned short netdev_lock_type[] =
         ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
         ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
         ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
-        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
-        ARPHRD_NONE};
+        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
+        ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
 
 static const char *netdev_lock_name[] =
        {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -298,8 +300,8 @@ static const char *netdev_lock_name[] =
         "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
         "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
         "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
-        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
-        "_xmit_NONE"};
+        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
+        "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -924,10 +926,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
                strlcpy(dev->name, newname, IFNAMSIZ);
 
 rollback:
-       ret = device_rename(&dev->dev, dev->name);
-       if (ret) {
-               memcpy(dev->name, oldname, IFNAMSIZ);
-               return ret;
+       /* For now only devices in the initial network namespace
+        * are in sysfs.
+        */
+       if (net == &init_net) {
+               ret = device_rename(&dev->dev, dev->name);
+               if (ret) {
+                       memcpy(dev->name, oldname, IFNAMSIZ);
+                       return ret;
+               }
        }
 
        write_lock_bh(&dev_base_lock);
@@ -1055,6 +1062,7 @@ void dev_load(struct net *net, const char *name)
  */
 int dev_open(struct net_device *dev)
 {
+       const struct net_device_ops *ops = dev->netdev_ops;
        int ret = 0;
 
        ASSERT_RTNL();
@@ -1077,11 +1085,11 @@ int dev_open(struct net_device *dev)
         */
        set_bit(__LINK_STATE_START, &dev->state);
 
-       if (dev->validate_addr)
-               ret = dev->validate_addr(dev);
+       if (ops->ndo_validate_addr)
+               ret = ops->ndo_validate_addr(dev);
 
-       if (!ret && dev->open)
-               ret = dev->open(dev);
+       if (!ret && ops->ndo_open)
+               ret = ops->ndo_open(dev);
 
        /*
         *      If it went open OK then:
@@ -1125,6 +1133,7 @@ int dev_open(struct net_device *dev)
  */
 int dev_close(struct net_device *dev)
 {
+       const struct net_device_ops *ops = dev->netdev_ops;
        ASSERT_RTNL();
 
        might_sleep();
@@ -1157,8 +1166,8 @@ int dev_close(struct net_device *dev)
         *      We allow it to be called even after a DETACH hot-plug
         *      event.
         */
-       if (dev->stop)
-               dev->stop(dev);
+       if (ops->ndo_stop)
+               ops->ndo_stop(dev);
 
        /*
         *      Device is now down.
@@ -1527,8 +1536,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
        __be16 type = skb->protocol;
        int err;
 
-       BUG_ON(skb_shinfo(skb)->frag_list);
-
        skb_reset_mac_header(skb);
        skb->mac_len = skb->network_header - skb->mac_header;
        __skb_pull(skb, skb->mac_len);
@@ -1654,6 +1661,9 @@ static int dev_gso_segment(struct sk_buff *skb)
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                        struct netdev_queue *txq)
 {
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       prefetch(&dev->netdev_ops->ndo_start_xmit);
        if (likely(!skb->next)) {
                if (!list_empty(&ptype_all))
                        dev_queue_xmit_nit(skb, dev);
@@ -1665,7 +1675,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                goto gso;
                }
 
-               return dev->hard_start_xmit(skb, dev);
+               return ops->ndo_start_xmit(skb, dev);
        }
 
 gso:
@@ -1675,7 +1685,7 @@ gso:
 
                skb->next = nskb->next;
                nskb->next = NULL;
-               rc = dev->hard_start_xmit(nskb, dev);
+               rc = ops->ndo_start_xmit(nskb, dev);
                if (unlikely(rc)) {
                        nskb->next = skb->next;
                        skb->next = nskb;
@@ -1749,10 +1759,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
                                        struct sk_buff *skb)
 {
+       const struct net_device_ops *ops = dev->netdev_ops;
        u16 queue_index = 0;
 
-       if (dev->select_queue)
-               queue_index = dev->select_queue(dev, skb);
+       if (ops->ndo_select_queue)
+               queue_index = ops->ndo_select_queue(dev, skb);
        else if (dev->real_num_tx_queues > 1)
                queue_index = simple_tx_hash(dev, skb);
 
@@ -2251,8 +2262,10 @@ int netif_receive_skb(struct sk_buff *skb)
        rcu_read_lock();
 
        /* Don't receive packets in an exiting network namespace */
-       if (!net_alive(dev_net(skb->dev)))
+       if (!net_alive(dev_net(skb->dev))) {
+               kfree_skb(skb);
                goto out;
+       }
 
 #ifdef CONFIG_NET_CLS_ACT
        if (skb->tc_verd & TC_NCLS) {
@@ -2325,6 +2338,125 @@ static void flush_backlog(void *arg)
                }
 }
 
+static int napi_gro_complete(struct sk_buff *skb)
+{
+       struct packet_type *ptype;
+       __be16 type = skb->protocol;
+       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       int err = -ENOENT;
+
+       if (!skb_shinfo(skb)->frag_list)
+               goto out;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, head, list) {
+               if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+                       continue;
+
+               err = ptype->gro_complete(skb);
+               break;
+       }
+       rcu_read_unlock();
+
+       if (err) {
+               WARN_ON(&ptype->list == head);
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
+out:
+       __skb_push(skb, -skb_network_offset(skb));
+       return netif_receive_skb(skb);
+}
+
+void napi_gro_flush(struct napi_struct *napi)
+{
+       struct sk_buff *skb, *next;
+
+       for (skb = napi->gro_list; skb; skb = next) {
+               next = skb->next;
+               skb->next = NULL;
+               napi_gro_complete(skb);
+       }
+
+       napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+       struct sk_buff **pp = NULL;
+       struct packet_type *ptype;
+       __be16 type = skb->protocol;
+       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       int count = 0;
+       int same_flow;
+       int mac_len;
+
+       if (!(skb->dev->features & NETIF_F_GRO))
+               goto normal;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, head, list) {
+               struct sk_buff *p;
+
+               if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+                       continue;
+
+               skb_reset_network_header(skb);
+               mac_len = skb->network_header - skb->mac_header;
+               skb->mac_len = mac_len;
+               NAPI_GRO_CB(skb)->same_flow = 0;
+               NAPI_GRO_CB(skb)->flush = 0;
+
+               for (p = napi->gro_list; p; p = p->next) {
+                       count++;
+                       NAPI_GRO_CB(p)->same_flow =
+                               p->mac_len == mac_len &&
+                               !memcmp(skb_mac_header(p), skb_mac_header(skb),
+                                       mac_len);
+                       NAPI_GRO_CB(p)->flush = 0;
+               }
+
+               pp = ptype->gro_receive(&napi->gro_list, skb);
+               break;
+       }
+       rcu_read_unlock();
+
+       if (&ptype->list == head)
+               goto normal;
+
+       same_flow = NAPI_GRO_CB(skb)->same_flow;
+
+       if (pp) {
+               struct sk_buff *nskb = *pp;
+
+               *pp = nskb->next;
+               nskb->next = NULL;
+               napi_gro_complete(nskb);
+               count--;
+       }
+
+       if (same_flow)
+               goto ok;
+
+       if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
+               __skb_push(skb, -skb_network_offset(skb));
+               goto normal;
+       }
+
+       NAPI_GRO_CB(skb)->count = 1;
+       skb->next = napi->gro_list;
+       napi->gro_list = skb;
+
+ok:
+       return NET_RX_SUCCESS;
+
+normal:
+       return netif_receive_skb(skb);
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
        int work = 0;
@@ -2344,9 +2476,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
                }
                local_irq_enable();
 
-               netif_receive_skb(skb);
+               napi_gro_receive(napi, skb);
        } while (++work < quota && jiffies == start_time);
 
+       napi_gro_flush(napi);
+
        return work;
 }
 
@@ -2367,11 +2501,73 @@ void __napi_schedule(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule);
 
+void __napi_complete(struct napi_struct *n)
+{
+       BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+       BUG_ON(n->gro_list);
+
+       list_del(&n->poll_list);
+       smp_mb__before_clear_bit();
+       clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+       unsigned long flags;
+
+       /*
+        * don't let napi dequeue from the cpu poll list
+        * just in case its running on a different cpu
+        */
+       if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+               return;
+
+       napi_gro_flush(n);
+       local_irq_save(flags);
+       __napi_complete(n);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+                   int (*poll)(struct napi_struct *, int), int weight)
+{
+       INIT_LIST_HEAD(&napi->poll_list);
+       napi->gro_list = NULL;
+       napi->poll = poll;
+       napi->weight = weight;
+       list_add(&napi->dev_list, &dev->napi_list);
+#ifdef CONFIG_NETPOLL
+       napi->dev = dev;
+       spin_lock_init(&napi->poll_lock);
+       napi->poll_owner = -1;
+#endif
+       set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+       struct sk_buff *skb, *next;
+
+       list_del_init(&napi->dev_list);
+
+       for (skb = napi->gro_list; skb; skb = next) {
+               next = skb->next;
+               skb->next = NULL;
+               kfree_skb(skb);
+       }
+
+       napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
 
 static void net_rx_action(struct softirq_action *h)
 {
        struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
-       unsigned long start_time = jiffies;
+       unsigned long time_limit = jiffies + 2;
        int budget = netdev_budget;
        void *have;
 
@@ -2382,13 +2578,10 @@ static void net_rx_action(struct softirq_action *h)
                int work, weight;
 
                /* If softirq window is exhuasted then punt.
-                *
-                * Note that this is a slight policy change from the
-                * previous NAPI code, which would allow up to 2
-                * jiffies to pass before breaking out.  The test
-                * used to be "jiffies - start_time > 1".
+                * Allow this to run for 2 jiffies since which will allow
+                * an average latency of 1.5/HZ.
                 */
-               if (unlikely(budget <= 0 || jiffies != start_time))
+               if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
                        goto softnet_break;
 
                local_irq_enable();
@@ -2615,7 +2808,7 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-       struct net_device_stats *stats = dev->get_stats(dev);
+       const struct net_device_stats *stats = dev_get_stats(dev);
 
        seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
                   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
@@ -2797,31 +2990,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
        rcu_read_unlock();
 }
 
-static void ptype_seq_decode(struct seq_file *seq, void *sym)
-{
-#ifdef CONFIG_KALLSYMS
-       unsigned long offset = 0, symsize;
-       const char *symname;
-       char *modname;
-       char namebuf[128];
-
-       symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
-                                 &modname, namebuf);
-
-       if (symname) {
-               char *delim = ":";
-
-               if (!modname)
-                       modname = delim = "";
-               seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
-                          symname, offset);
-               return;
-       }
-#endif
-
-       seq_printf(seq, "[%p]", sym);
-}
-
 static int ptype_seq_show(struct seq_file *seq, void *v)
 {
        struct packet_type *pt = v;
@@ -2834,10 +3002,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
                else
                        seq_printf(seq, "%04x", ntohs(pt->type));
 
-               seq_printf(seq, " %-8s ",
-                          pt->dev ? pt->dev->name : "");
-               ptype_seq_decode(seq,  pt->func);
-               seq_putc(seq, '\n');
+               seq_printf(seq, " %-8s %pF\n",
+                          pt->dev ? pt->dev->name : "", pt->func);
        }
 
        return 0;
@@ -2954,8 +3120,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
-       if (dev->flags & IFF_UP && dev->change_rx_flags)
-               dev->change_rx_flags(dev, flags);
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+               ops->ndo_change_rx_flags(dev, flags);
 }
 
 static int __dev_set_promiscuity(struct net_device *dev, int inc)
@@ -3079,6 +3247,8 @@ int dev_set_allmulti(struct net_device *dev, int inc)
  */
 void __dev_set_rx_mode(struct net_device *dev)
 {
+       const struct net_device_ops *ops = dev->netdev_ops;
+
        /* dev_open will call this function so the list will stay sane. */
        if (!(dev->flags&IFF_UP))
                return;
@@ -3086,8 +3256,8 @@ void __dev_set_rx_mode(struct net_device *dev)
        if (!netif_device_present(dev))
                return;
 
-       if (dev->set_rx_mode)
-               dev->set_rx_mode(dev);
+       if (ops->ndo_set_rx_mode)
+               ops->ndo_set_rx_mode(dev);
        else {
                /* Unicast addresses changes may only happen under the rtnl,
                 * therefore calling __dev_set_promiscuity here is safe.
@@ -3100,8 +3270,8 @@ void __dev_set_rx_mode(struct net_device *dev)
                        dev->uc_promisc = 0;
                }
 
-               if (dev->set_multicast_list)
-                       dev->set_multicast_list(dev);
+               if (ops->ndo_set_multicast_list)
+                       ops->ndo_set_multicast_list(dev);
        }
 }
 
@@ -3460,6 +3630,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
  */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
+       const struct net_device_ops *ops = dev->netdev_ops;
        int err;
 
        if (new_mtu == dev->mtu)
@@ -3473,10 +3644,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
                return -ENODEV;
 
        err = 0;
-       if (dev->change_mtu)
-               err = dev->change_mtu(dev, new_mtu);
+       if (ops->ndo_change_mtu)
+               err = ops->ndo_change_mtu(dev, new_mtu);
        else
                dev->mtu = new_mtu;
+
        if (!err && dev->flags & IFF_UP)
                call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
        return err;
@@ -3491,15 +3663,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
  */
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 {
+       const struct net_device_ops *ops = dev->netdev_ops;
        int err;
 
-       if (!dev->set_mac_address)
+       if (!ops->ndo_set_mac_address)
                return -EOPNOTSUPP;
        if (sa->sa_family != dev->type)
                return -EINVAL;
        if (!netif_device_present(dev))
                return -ENODEV;
-       err = dev->set_mac_address(dev, sa);
+       err = ops->ndo_set_mac_address(dev, sa);
        if (!err)
                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        return err;
@@ -3579,10 +3752,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 {
        int err;
        struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+       const struct net_device_ops *ops;
 
        if (!dev)
                return -ENODEV;
 
+       ops = dev->netdev_ops;
+
        switch (cmd) {
                case SIOCSIFFLAGS:      /* Set interface flags */
                        return dev_change_flags(dev, ifr->ifr_flags);
@@ -3606,15 +3782,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                        return 0;
 
                case SIOCSIFMAP:
-                       if (dev->set_config) {
+                       if (ops->ndo_set_config) {
                                if (!netif_device_present(dev))
                                        return -ENODEV;
-                               return dev->set_config(dev, &ifr->ifr_map);
+                               return ops->ndo_set_config(dev, &ifr->ifr_map);
                        }
                        return -EOPNOTSUPP;
 
                case SIOCADDMULTI:
-                       if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+                       if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
                            ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                                return -EINVAL;
                        if (!netif_device_present(dev))
@@ -3623,7 +3799,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                                          dev->addr_len, 1);
 
                case SIOCDELMULTI:
-                       if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+                       if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
                            ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                                return -EINVAL;
                        if (!netif_device_present(dev))
@@ -3661,10 +3837,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                            cmd == SIOCBRDELIF ||
                            cmd == SIOCWANDEV) {
                                err = -EOPNOTSUPP;
-                               if (dev->do_ioctl) {
+                               if (ops->ndo_do_ioctl) {
                                        if (netif_device_present(dev))
-                                               err = dev->do_ioctl(dev, ifr,
-                                                                   cmd);
+                                               err = ops->ndo_do_ioctl(dev, ifr, cmd);
                                        else
                                                err = -ENODEV;
                                }
@@ -3925,8 +4100,8 @@ static void rollback_registered(struct net_device *dev)
         */
        dev_addr_discard(dev);
 
-       if (dev->uninit)
-               dev->uninit(dev);
+       if (dev->netdev_ops->ndo_uninit)
+               dev->netdev_ops->ndo_uninit(dev);
 
        /* Notifier chain MUST detach us from master device. */
        WARN_ON(dev->master);
@@ -4016,7 +4191,7 @@ int register_netdevice(struct net_device *dev)
        struct hlist_head *head;
        struct hlist_node *p;
        int ret;
-       struct net *net;
+       struct net *net = dev_net(dev);
 
        BUG_ON(dev_boot_phase);
        ASSERT_RTNL();
@@ -4025,8 +4200,7 @@ int register_netdevice(struct net_device *dev)
 
        /* When net_device's are persistent, this will be fatal. */
        BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
-       BUG_ON(!dev_net(dev));
-       net = dev_net(dev);
+       BUG_ON(!net);
 
        spin_lock_init(&dev->addr_list_lock);
        netdev_set_addr_lockdep_class(dev);
@@ -4034,9 +4208,46 @@ int register_netdevice(struct net_device *dev)
 
        dev->iflink = -1;
 
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+       /* Netdevice_ops API compatiability support.
+        * This is temporary until all network devices are converted.
+        */
+       if (dev->netdev_ops) {
+               const struct net_device_ops *ops = dev->netdev_ops;
+
+               dev->init = ops->ndo_init;
+               dev->uninit = ops->ndo_uninit;
+               dev->open = ops->ndo_open;
+               dev->change_rx_flags = ops->ndo_change_rx_flags;
+               dev->set_rx_mode = ops->ndo_set_rx_mode;
+               dev->set_multicast_list = ops->ndo_set_multicast_list;
+               dev->set_mac_address = ops->ndo_set_mac_address;
+               dev->validate_addr = ops->ndo_validate_addr;
+               dev->do_ioctl = ops->ndo_do_ioctl;
+               dev->set_config = ops->ndo_set_config;
+               dev->change_mtu = ops->ndo_change_mtu;
+               dev->tx_timeout = ops->ndo_tx_timeout;
+               dev->get_stats = ops->ndo_get_stats;
+               dev->vlan_rx_register = ops->ndo_vlan_rx_register;
+               dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
+               dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+               dev->poll_controller = ops->ndo_poll_controller;
+#endif
+       } else {
+               char drivername[64];
+               pr_info("%s (%s): not using net_device_ops yet\n",
+                       dev->name, netdev_drivername(dev, drivername, 64));
+
+               /* This works only because net_device_ops and the
+                  compatiablity structure are the same. */
+               dev->netdev_ops = (void *) &(dev->init);
+       }
+#endif
+
        /* Init, if this function is available */
-       if (dev->init) {
-               ret = dev->init(dev);
+       if (dev->netdev_ops->ndo_init) {
+               ret = dev->netdev_ops->ndo_init(dev);
                if (ret) {
                        if (ret > 0)
                                ret = -EIO;
@@ -4114,8 +4325,8 @@ out:
        return ret;
 
 err_uninit:
-       if (dev->uninit)
-               dev->uninit(dev);
+       if (dev->netdev_ops->ndo_uninit)
+               dev->netdev_ops->ndo_uninit(dev);
        goto out;
 }
 
@@ -4271,10 +4482,24 @@ void netdev_run_todo(void)
        }
 }
 
-static struct net_device_stats *internal_stats(struct net_device *dev)
-{
-       return &dev->stats;
+/**
+ *     dev_get_stats   - get network device statistics
+ *     @dev: device to get statistics from
+ *
+ *     Get network statistics from device. The device driver may provide
+ *     its own method by setting dev->netdev_ops->get_stats; otherwise
+ *     the internal statistics structure is used.
+ */
+const struct net_device_stats *dev_get_stats(struct net_device *dev)
+ {
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (ops->ndo_get_stats)
+               return ops->ndo_get_stats(dev);
+       else
+               return &dev->stats;
 }
+EXPORT_SYMBOL(dev_get_stats);
 
 static void netdev_init_one_queue(struct net_device *dev,
                                  struct netdev_queue *queue,
@@ -4343,18 +4568,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        dev->num_tx_queues = queue_count;
        dev->real_num_tx_queues = queue_count;
 
-       if (sizeof_priv) {
-               dev->priv = ((char *)dev +
-                            ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
-                             & ~NETDEV_ALIGN_CONST));
-       }
-
        dev->gso_max_size = GSO_MAX_SIZE;
 
        netdev_init_queues(dev);
 
-       dev->get_stats = internal_stats;
-       netpoll_netdev_init(dev);
+       INIT_LIST_HEAD(&dev->napi_list);
        setup(dev);
        strcpy(dev->name, name);
        return dev;
@@ -4371,10 +4589,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
  */
 void free_netdev(struct net_device *dev)
 {
+       struct napi_struct *p, *n;
+
        release_net(dev_net(dev));
 
        kfree(dev->_tx);
 
+       list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+               netif_napi_del(p);
+
        /*  Compatibility with error handling in drivers */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
                kfree((char *)dev - dev->padded);
@@ -4467,6 +4690,15 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        if (dev->features & NETIF_F_NETNS_LOCAL)
                goto out;
 
+#ifdef CONFIG_SYSFS
+       /* Don't allow real devices to be moved when sysfs
+        * is enabled.
+        */
+       err = -EINVAL;
+       if (dev->dev.parent)
+               goto out;
+#endif
+
        /* Ensure the device has been registrered */
        err = -EINVAL;
        if (dev->reg_state != NETREG_REGISTERED)
@@ -4524,6 +4756,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
         */
        dev_addr_discard(dev);
 
+       netdev_unregister_kobject(dev);
+
        /* Actually switch the network namespace */
        dev_net_set(dev, net);
 
@@ -4540,7 +4774,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        }
 
        /* Fixup kobjects */
-       netdev_unregister_kobject(dev);
        err = netdev_register_kobject(dev);
        WARN_ON(err);
 
@@ -4847,6 +5080,12 @@ static void __net_exit default_device_exit(struct net *net)
                if (dev->features & NETIF_F_NETNS_LOCAL)
                        continue;
 
+               /* Delete virtual devices */
+               if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
+                       dev->rtnl_link_ops->dellink(dev);
+                       continue;
+               }
+
                /* Push remaing network devices to init_net */
                snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
                err = dev_change_net_namespace(dev, &init_net, fb_name);
@@ -4893,9 +5132,6 @@ static int __init net_dev_init(void)
        if (register_pernet_subsys(&netdev_net_ops))
                goto out;
 
-       if (register_pernet_device(&default_device_ops))
-               goto out;
-
        /*
         *      Initialise the packet receive queues.
         */
@@ -4910,12 +5146,28 @@ static int __init net_dev_init(void)
 
                queue->backlog.poll = process_backlog;
                queue->backlog.weight = weight_p;
+               queue->backlog.gro_list = NULL;
        }
 
-       netdev_dma_register();
-
        dev_boot_phase = 0;
 
+       /* The loopback device is special if any other network devices
+        * is present in a network namespace the loopback device must
+        * be present. Since we now dynamically allocate and free the
+        * loopback device ensure this invariant is maintained by
+        * keeping the loopback device as the first device on the
+        * list of network devices.  Ensuring the loopback devices
+        * is the first device that appears and the last network device
+        * that disappears.
+        */
+       if (register_pernet_device(&loopback_net_ops))
+               goto out;
+
+       if (register_pernet_device(&default_device_ops))
+               goto out;
+
+       netdev_dma_register();
+
        open_softirq(NET_TX_SOFTIRQ, net_tx_action);
        open_softirq(NET_RX_SOFTIRQ, net_rx_action);