net: skb->dst accessors

[safe/jmp/linux-2.6] / net / core / dev.c
diff --git a/net/core/dev.c b/net/core/dev.c

index e0dc67a..34b49a6 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -108,7 +108,6 @@
  #include <linux/init.h>
  #include <linux/kmod.h>
  #include <linux/module.h>
-#include <linux/kallsyms.h>
  #include <linux/netpoll.h>
  #include <linux/rcupdate.h>
  #include <linux/delay.h>
@@ -127,9 +126,16 @@
  #include <linux/in.h>
  #include <linux/jhash.h>
  #include <linux/random.h>
+#include <trace/napi.h>
  
  #include "net-sysfs.h"
  
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
  /*
   *     The list of packet types we will receive (as opposed to discard)
   *     and the routines to invoke.
@@ -165,25 +171,6 @@ static DEFINE_SPINLOCK(ptype_lock);
  static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
  static struct list_head ptype_all __read_mostly;       /* Taps */
  
-#ifdef CONFIG_NET_DMA
-struct net_dma {
-       struct dma_client client;
-       spinlock_t lock;
-       cpumask_t channel_mask;
-       struct dma_chan **channels;
-};
-
-static enum dma_state_client
-netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
-       enum dma_state state);
-
-static struct net_dma net_dma = {
-       .client = {
-               .event_callback = netdev_dma_event,
-       },
-};
-#endif
-
  /*
   * The @dev_base_head list is protected by @dev_base_lock and the rtnl
   * semaphore.
@@ -281,8 +268,8 @@ static const unsigned short netdev_lock_type[] =
          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
-        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
-        ARPHRD_NONE};
+        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
+        ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
  
  static const char *netdev_lock_name[] =
         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -298,8 +285,8 @@ static const char *netdev_lock_name[] =
          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
-        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
-        "_xmit_NONE"};
+        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
+        "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
  
  static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
  static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -1060,6 +1047,7 @@ void dev_load(struct net *net, const char *name)
   */
  int dev_open(struct net_device *dev)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
         int ret = 0;
  
         ASSERT_RTNL();
@@ -1082,11 +1070,11 @@ int dev_open(struct net_device *dev)
          */
         set_bit(__LINK_STATE_START, &dev->state);
  
-       if (dev->validate_addr)
-               ret = dev->validate_addr(dev);
+       if (ops->ndo_validate_addr)
+               ret = ops->ndo_validate_addr(dev);
  
-       if (!ret && dev->open)
-               ret = dev->open(dev);
+       if (!ret && ops->ndo_open)
+               ret = ops->ndo_open(dev);
  
         /*
          *      If it went open OK then:
@@ -1101,6 +1089,11 @@ int dev_open(struct net_device *dev)
                 dev->flags |= IFF_UP;
  
                 /*
+                *      Enable NET_DMA
+                */
+               net_dmaengine_get();
+
+               /*
                  *      Initialize multicasting status
                  */
                 dev_set_rx_mode(dev);
@@ -1130,6 +1123,7 @@ int dev_open(struct net_device *dev)
   */
  int dev_close(struct net_device *dev)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
         ASSERT_RTNL();
  
         might_sleep();
@@ -1162,8 +1156,8 @@ int dev_close(struct net_device *dev)
          *      We allow it to be called even after a DETACH hot-plug
          *      event.
          */
-       if (dev->stop)
-               dev->stop(dev);
+       if (ops->ndo_stop)
+               ops->ndo_stop(dev);
  
         /*
          *      Device is now down.
@@ -1176,6 +1170,11 @@ int dev_close(struct net_device *dev)
          */
         call_netdevice_notifiers(NETDEV_DOWN, dev);
  
+       /*
+        *      Shutdown NET_DMA
+        */
+       net_dmaengine_put();
+
         return 0;
  }
  
@@ -1338,7 +1337,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  {
         struct packet_type *ptype;
  
+#ifdef CONFIG_NET_CLS_ACT
+       if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
+               net_timestamp(skb);
+#else
         net_timestamp(skb);
+#endif
  
         rcu_read_lock();
         list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1432,7 +1436,7 @@ void netif_device_detach(struct net_device *dev)
  {
         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
             netif_running(dev)) {
-               netif_stop_queue(dev);
+               netif_tx_stop_all_queues(dev);
         }
  }
  EXPORT_SYMBOL(netif_device_detach);
@@ -1447,7 +1451,7 @@ void netif_device_attach(struct net_device *dev)
  {
         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
             netif_running(dev)) {
-               netif_wake_queue(dev);
+               netif_tx_wake_all_queues(dev);
                 __netdev_watchdog_up(dev);
         }
  }
@@ -1459,7 +1463,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
                 ((features & NETIF_F_IP_CSUM) &&
                  protocol == htons(ETH_P_IP)) ||
                 ((features & NETIF_F_IPV6_CSUM) &&
-                protocol == htons(ETH_P_IPV6)));
+                protocol == htons(ETH_P_IPV6)) ||
+               ((features & NETIF_F_FCOE_CRC) &&
+                protocol == htons(ETH_P_FCOE)));
  }
  
  static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
@@ -1532,13 +1538,23 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
         __be16 type = skb->protocol;
         int err;
  
-       BUG_ON(skb_shinfo(skb)->frag_list);
-
         skb_reset_mac_header(skb);
         skb->mac_len = skb->network_header - skb->mac_header;
         __skb_pull(skb, skb->mac_len);
  
-       if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
+       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+               struct net_device *dev = skb->dev;
+               struct ethtool_drvinfo info = {};
+
+               if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
+                       dev->ethtool_ops->get_drvinfo(dev, &info);
+
+               WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
+                       "ip_summed=%d",
+                    info.driver, dev ? dev->features : 0L,
+                    skb->sk ? skb->sk->sk_route_caps : 0L,
+                    skb->len, skb->data_len, skb->ip_summed);
+
                 if (skb_header_cloned(skb) &&
                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
                         return ERR_PTR(err);
@@ -1659,6 +1675,9 @@ static int dev_gso_segment(struct sk_buff *skb)
  int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                         struct netdev_queue *txq)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
+       int rc;
+
         if (likely(!skb->next)) {
                 if (!list_empty(&ptype_all))
                         dev_queue_xmit_nit(skb, dev);
@@ -1670,22 +1689,46 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                 goto gso;
                 }
  
-               return dev->hard_start_xmit(skb, dev);
+               /*
+                * If device doesnt need skb->dst, release it right now while
+                * its hot in this cpu cache
+                */
+               if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+                       skb_dst_drop(skb);
+
+               rc = ops->ndo_start_xmit(skb, dev);
+               if (rc == 0)
+                       txq_trans_update(txq);
+               /*
+                * TODO: if skb_orphan() was called by
+                * dev->hard_start_xmit() (for example, the unmodified
+                * igb driver does that; bnx2 doesn't), then
+                * skb_tx_software_timestamp() will be unable to send
+                * back the time stamp.
+                *
+                * How can this be prevented? Always create another
+                * reference to the socket before calling
+                * dev->hard_start_xmit()? Prevent that skb_orphan()
+                * does anything in dev->hard_start_xmit() by clearing
+                * the skb destructor before the call and restoring it
+                * afterwards, then doing the skb_orphan() ourselves?
+                */
+               return rc;
         }
  
  gso:
         do {
                 struct sk_buff *nskb = skb->next;
-               int rc;
  
                 skb->next = nskb->next;
                 nskb->next = NULL;
-               rc = dev->hard_start_xmit(nskb, dev);
+               rc = ops->ndo_start_xmit(nskb, dev);
                 if (unlikely(rc)) {
                         nskb->next = skb->next;
                         skb->next = nskb;
                         return rc;
                 }
+               txq_trans_update(txq);
                 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
                         return NETDEV_TX_BUSY;
         } while (skb->next);
@@ -1697,69 +1740,40 @@ out_kfree_skb:
         return 0;
  }
  
-static u32 simple_tx_hashrnd;
-static int simple_tx_hashrnd_initialized = 0;
+static u32 skb_tx_hashrnd;
  
-static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
  {
-       u32 addr1, addr2, ports;
-       u32 hash, ihl;
-       u8 ip_proto = 0;
-
-       if (unlikely(!simple_tx_hashrnd_initialized)) {
-               get_random_bytes(&simple_tx_hashrnd, 4);
-               simple_tx_hashrnd_initialized = 1;
-       }
+       u32 hash;
  
-       switch (skb->protocol) {
-       case htons(ETH_P_IP):
-               if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
-                       ip_proto = ip_hdr(skb)->protocol;
-               addr1 = ip_hdr(skb)->saddr;
-               addr2 = ip_hdr(skb)->daddr;
-               ihl = ip_hdr(skb)->ihl;
-               break;
-       case htons(ETH_P_IPV6):
-               ip_proto = ipv6_hdr(skb)->nexthdr;
-               addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
-               addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
-               ihl = (40 >> 2);
-               break;
-       default:
-               return 0;
+       if (skb_rx_queue_recorded(skb)) {
+               hash = skb_get_rx_queue(skb);
+               while (unlikely (hash >= dev->real_num_tx_queues))
+                       hash -= dev->real_num_tx_queues;
+               return hash;
         }
  
+       if (skb->sk && skb->sk->sk_hash)
+               hash = skb->sk->sk_hash;
+       else
+               hash = skb->protocol;
  
-       switch (ip_proto) {
-       case IPPROTO_TCP:
-       case IPPROTO_UDP:
-       case IPPROTO_DCCP:
-       case IPPROTO_ESP:
-       case IPPROTO_AH:
-       case IPPROTO_SCTP:
-       case IPPROTO_UDPLITE:
-               ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
-               break;
-
-       default:
-               ports = 0;
-               break;
-       }
-
-       hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+       hash = jhash_1word(hash, skb_tx_hashrnd);
  
         return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
  }
+EXPORT_SYMBOL(skb_tx_hash);
  
  static struct netdev_queue *dev_pick_tx(struct net_device *dev,
                                         struct sk_buff *skb)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
         u16 queue_index = 0;
  
-       if (dev->select_queue)
-               queue_index = dev->select_queue(dev, skb);
+       if (ops->ndo_select_queue)
+               queue_index = ops->ndo_select_queue(dev, skb);
         else if (dev->real_num_tx_queues > 1)
-               queue_index = simple_tx_hash(dev, skb);
+               queue_index = skb_tx_hash(dev, skb);
  
         skb_set_queue_mapping(skb, queue_index);
         return netdev_get_tx_queue(dev, queue_index);
@@ -2255,12 +2269,6 @@ int netif_receive_skb(struct sk_buff *skb)
  
         rcu_read_lock();
  
-       /* Don't receive packets in an exiting network namespace */
-       if (!net_alive(dev_net(skb->dev))) {
-               kfree_skb(skb);
-               goto out;
-       }
-
  #ifdef CONFIG_NET_CLS_ACT
         if (skb->tc_verd & TC_NCLS) {
                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -2291,6 +2299,8 @@ ncls:
         if (!skb)
                 goto out;
  
+       skb_orphan(skb);
+
         type = skb->protocol;
         list_for_each_entry_rcu(ptype,
                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2332,6 +2342,312 @@ static void flush_backlog(void *arg)
                 }
  }
  
+static int napi_gro_complete(struct sk_buff *skb)
+{
+       struct packet_type *ptype;
+       __be16 type = skb->protocol;
+       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       int err = -ENOENT;
+
+       if (NAPI_GRO_CB(skb)->count == 1) {
+               skb_shinfo(skb)->gso_size = 0;
+               goto out;
+       }
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, head, list) {
+               if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+                       continue;
+
+               err = ptype->gro_complete(skb);
+               break;
+       }
+       rcu_read_unlock();
+
+       if (err) {
+               WARN_ON(&ptype->list == head);
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
+out:
+       return netif_receive_skb(skb);
+}
+
+void napi_gro_flush(struct napi_struct *napi)
+{
+       struct sk_buff *skb, *next;
+
+       for (skb = napi->gro_list; skb; skb = next) {
+               next = skb->next;
+               skb->next = NULL;
+               napi_gro_complete(skb);
+       }
+
+       napi->gro_count = 0;
+       napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+       struct sk_buff **pp = NULL;
+       struct packet_type *ptype;
+       __be16 type = skb->protocol;
+       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       int same_flow;
+       int mac_len;
+       int ret;
+
+       if (!(skb->dev->features & NETIF_F_GRO))
+               goto normal;
+
+       if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
+               goto normal;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, head, list) {
+               if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+                       continue;
+
+               skb_set_network_header(skb, skb_gro_offset(skb));
+               mac_len = skb->network_header - skb->mac_header;
+               skb->mac_len = mac_len;
+               NAPI_GRO_CB(skb)->same_flow = 0;
+               NAPI_GRO_CB(skb)->flush = 0;
+               NAPI_GRO_CB(skb)->free = 0;
+
+               pp = ptype->gro_receive(&napi->gro_list, skb);
+               break;
+       }
+       rcu_read_unlock();
+
+       if (&ptype->list == head)
+               goto normal;
+
+       same_flow = NAPI_GRO_CB(skb)->same_flow;
+       ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
+
+       if (pp) {
+               struct sk_buff *nskb = *pp;
+
+               *pp = nskb->next;
+               nskb->next = NULL;
+               napi_gro_complete(nskb);
+               napi->gro_count--;
+       }
+
+       if (same_flow)
+               goto ok;
+
+       if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
+               goto normal;
+
+       napi->gro_count++;
+       NAPI_GRO_CB(skb)->count = 1;
+       skb_shinfo(skb)->gso_size = skb_gro_len(skb);
+       skb->next = napi->gro_list;
+       napi->gro_list = skb;
+       ret = GRO_HELD;
+
+pull:
+       if (skb_headlen(skb) < skb_gro_offset(skb)) {
+               int grow = skb_gro_offset(skb) - skb_headlen(skb);
+
+               BUG_ON(skb->end - skb->tail < grow);
+
+               memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+               skb->tail += grow;
+               skb->data_len -= grow;
+
+               skb_shinfo(skb)->frags[0].page_offset += grow;
+               skb_shinfo(skb)->frags[0].size -= grow;
+
+               if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
+                       put_page(skb_shinfo(skb)->frags[0].page);
+                       memmove(skb_shinfo(skb)->frags,
+                               skb_shinfo(skb)->frags + 1,
+                               --skb_shinfo(skb)->nr_frags);
+               }
+       }
+
+ok:
+       return ret;
+
+normal:
+       ret = GRO_NORMAL;
+       goto pull;
+}
+EXPORT_SYMBOL(dev_gro_receive);
+
+static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+       struct sk_buff *p;
+
+       if (netpoll_rx_on(skb))
+               return GRO_NORMAL;
+
+       for (p = napi->gro_list; p; p = p->next) {
+               NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
+                       && !compare_ether_header(skb_mac_header(p),
+                                                skb_gro_mac_header(skb));
+               NAPI_GRO_CB(p)->flush = 0;
+       }
+
+       return dev_gro_receive(napi, skb);
+}
+
+int napi_skb_finish(int ret, struct sk_buff *skb)
+{
+       int err = NET_RX_SUCCESS;
+
+       switch (ret) {
+       case GRO_NORMAL:
+               return netif_receive_skb(skb);
+
+       case GRO_DROP:
+               err = NET_RX_DROP;
+               /* fall through */
+
+       case GRO_MERGED_FREE:
+               kfree_skb(skb);
+               break;
+       }
+
+       return err;
+}
+EXPORT_SYMBOL(napi_skb_finish);
+
+void skb_gro_reset_offset(struct sk_buff *skb)
+{
+       NAPI_GRO_CB(skb)->data_offset = 0;
+       NAPI_GRO_CB(skb)->frag0 = NULL;
+       NAPI_GRO_CB(skb)->frag0_len = 0;
+
+       if (skb->mac_header == skb->tail &&
+           !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
+               NAPI_GRO_CB(skb)->frag0 =
+                       page_address(skb_shinfo(skb)->frags[0].page) +
+                       skb_shinfo(skb)->frags[0].page_offset;
+               NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
+       }
+}
+EXPORT_SYMBOL(skb_gro_reset_offset);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+       skb_gro_reset_offset(skb);
+
+       return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
+void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+{
+       __skb_pull(skb, skb_headlen(skb));
+       skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+
+       napi->skb = skb;
+}
+EXPORT_SYMBOL(napi_reuse_skb);
+
+struct sk_buff *napi_get_frags(struct napi_struct *napi)
+{
+       struct net_device *dev = napi->dev;
+       struct sk_buff *skb = napi->skb;
+
+       if (!skb) {
+               skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
+               if (!skb)
+                       goto out;
+
+               skb_reserve(skb, NET_IP_ALIGN);
+
+               napi->skb = skb;
+       }
+
+out:
+       return skb;
+}
+EXPORT_SYMBOL(napi_get_frags);
+
+int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
+{
+       int err = NET_RX_SUCCESS;
+
+       switch (ret) {
+       case GRO_NORMAL:
+       case GRO_HELD:
+               skb->protocol = eth_type_trans(skb, napi->dev);
+
+               if (ret == GRO_NORMAL)
+                       return netif_receive_skb(skb);
+
+               skb_gro_pull(skb, -ETH_HLEN);
+               break;
+
+       case GRO_DROP:
+               err = NET_RX_DROP;
+               /* fall through */
+
+       case GRO_MERGED_FREE:
+               napi_reuse_skb(napi, skb);
+               break;
+       }
+
+       return err;
+}
+EXPORT_SYMBOL(napi_frags_finish);
+
+struct sk_buff *napi_frags_skb(struct napi_struct *napi)
+{
+       struct sk_buff *skb = napi->skb;
+       struct ethhdr *eth;
+       unsigned int hlen;
+       unsigned int off;
+
+       napi->skb = NULL;
+
+       skb_reset_mac_header(skb);
+       skb_gro_reset_offset(skb);
+
+       off = skb_gro_offset(skb);
+       hlen = off + sizeof(*eth);
+       eth = skb_gro_header_fast(skb, off);
+       if (skb_gro_header_hard(skb, hlen)) {
+               eth = skb_gro_header_slow(skb, hlen, off);
+               if (unlikely(!eth)) {
+                       napi_reuse_skb(napi, skb);
+                       skb = NULL;
+                       goto out;
+               }
+       }
+
+       skb_gro_pull(skb, sizeof(*eth));
+
+       /*
+        * This works because the only protocols we care about don't require
+        * special handling.  We'll fix it up properly at the end.
+        */
+       skb->protocol = eth->h_proto;
+
+out:
+       return skb;
+}
+EXPORT_SYMBOL(napi_frags_skb);
+
+int napi_gro_frags(struct napi_struct *napi)
+{
+       struct sk_buff *skb = napi_frags_skb(napi);
+
+       if (!skb)
+               return NET_RX_DROP;
+
+       return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+}
+EXPORT_SYMBOL(napi_gro_frags);
+
  static int process_backlog(struct napi_struct *napi, int quota)
  {
         int work = 0;
@@ -2374,6 +2690,72 @@ void __napi_schedule(struct napi_struct *n)
  }
  EXPORT_SYMBOL(__napi_schedule);
  
+void __napi_complete(struct napi_struct *n)
+{
+       BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+       BUG_ON(n->gro_list);
+
+       list_del(&n->poll_list);
+       smp_mb__before_clear_bit();
+       clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+       unsigned long flags;
+
+       /*
+        * don't let napi dequeue from the cpu poll list
+        * just in case its running on a different cpu
+        */
+       if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+               return;
+
+       napi_gro_flush(n);
+       local_irq_save(flags);
+       __napi_complete(n);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+                   int (*poll)(struct napi_struct *, int), int weight)
+{
+       INIT_LIST_HEAD(&napi->poll_list);
+       napi->gro_count = 0;
+       napi->gro_list = NULL;
+       napi->skb = NULL;
+       napi->poll = poll;
+       napi->weight = weight;
+       list_add(&napi->dev_list, &dev->napi_list);
+       napi->dev = dev;
+#ifdef CONFIG_NETPOLL
+       spin_lock_init(&napi->poll_lock);
+       napi->poll_owner = -1;
+#endif
+       set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+       struct sk_buff *skb, *next;
+
+       list_del_init(&napi->dev_list);
+       napi_free_frags(napi);
+
+       for (skb = napi->gro_list; skb; skb = next) {
+               next = skb->next;
+               skb->next = NULL;
+               kfree_skb(skb);
+       }
+
+       napi->gro_list = NULL;
+       napi->gro_count = 0;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
  
  static void net_rx_action(struct softirq_action *h)
  {
@@ -2415,8 +2797,10 @@ static void net_rx_action(struct softirq_action *h)
                  * accidently calling ->poll() when NAPI is not scheduled.
                  */
                 work = 0;
-               if (test_bit(NAPI_STATE_SCHED, &n->state))
+               if (test_bit(NAPI_STATE_SCHED, &n->state)) {
                         work = n->poll(n, weight);
+                       trace_napi_poll(n);
+               }
  
                 WARN_ON_ONCE(work > weight);
  
@@ -2446,14 +2830,7 @@ out:
          * There may not be any more sk_buffs coming right now, so push
          * any pending DMA copies to hardware
          */
-       if (!cpus_empty(net_dma.channel_mask)) {
-               int chan_idx;
-               for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
-                       struct dma_chan *chan = net_dma.channels[chan_idx];
-                       if (chan)
-                               dma_async_memcpy_issue_pending(chan);
-               }
-       }
+       dma_issue_pending_all();
  #endif
  
         return;
@@ -2619,7 +2996,7 @@ void dev_seq_stop(struct seq_file *seq, void *v)
  
  static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
  {
-       struct net_device_stats *stats = dev->get_stats(dev);
+       const struct net_device_stats *stats = dev_get_stats(dev);
  
         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
@@ -2801,31 +3178,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
         rcu_read_unlock();
  }
  
-static void ptype_seq_decode(struct seq_file *seq, void *sym)
-{
-#ifdef CONFIG_KALLSYMS
-       unsigned long offset = 0, symsize;
-       const char *symname;
-       char *modname;
-       char namebuf[128];
-
-       symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
-                                 &modname, namebuf);
-
-       if (symname) {
-               char *delim = ":";
-
-               if (!modname)
-                       modname = delim = "";
-               seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
-                          symname, offset);
-               return;
-       }
-#endif
-
-       seq_printf(seq, "[%p]", sym);
-}
-
  static int ptype_seq_show(struct seq_file *seq, void *v)
  {
         struct packet_type *pt = v;
@@ -2838,10 +3190,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
                 else
                         seq_printf(seq, "%04x", ntohs(pt->type));
  
-               seq_printf(seq, " %-8s ",
-                          pt->dev ? pt->dev->name : "");
-               ptype_seq_decode(seq,  pt->func);
-               seq_putc(seq, '\n');
+               seq_printf(seq, " %-8s %pF\n",
+                          pt->dev ? pt->dev->name : "", pt->func);
         }
  
         return 0;
@@ -2958,13 +3308,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
  
  static void dev_change_rx_flags(struct net_device *dev, int flags)
  {
-       if (dev->flags & IFF_UP && dev->change_rx_flags)
-               dev->change_rx_flags(dev, flags);
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+               ops->ndo_change_rx_flags(dev, flags);
  }
  
  static int __dev_set_promiscuity(struct net_device *dev, int inc)
  {
         unsigned short old_flags = dev->flags;
+       uid_t uid;
+       gid_t gid;
  
         ASSERT_RTNL();
  
@@ -2989,15 +3343,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
                 printk(KERN_INFO "device %s %s promiscuous mode\n",
                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
                                                                "left");
-               if (audit_enabled)
+               if (audit_enabled) {
+                       current_uid_gid(&uid, &gid);
                         audit_log(current->audit_context, GFP_ATOMIC,
                                 AUDIT_ANOM_PROMISCUOUS,
                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
                                 dev->name, (dev->flags & IFF_PROMISC),
                                 (old_flags & IFF_PROMISC),
                                 audit_get_loginuid(current),
-                               current->uid, current->gid,
+                               uid, gid,
                                 audit_get_sessionid(current));
+               }
  
                 dev_change_rx_flags(dev, IFF_PROMISC);
         }
@@ -3079,6 +3435,8 @@ int dev_set_allmulti(struct net_device *dev, int inc)
   */
  void __dev_set_rx_mode(struct net_device *dev)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
+
         /* dev_open will call this function so the list will stay sane. */
         if (!(dev->flags&IFF_UP))
                 return;
@@ -3086,8 +3444,8 @@ void __dev_set_rx_mode(struct net_device *dev)
         if (!netif_device_present(dev))
                 return;
  
-       if (dev->set_rx_mode)
-               dev->set_rx_mode(dev);
+       if (ops->ndo_set_rx_mode)
+               ops->ndo_set_rx_mode(dev);
         else {
                 /* Unicast addresses changes may only happen under the rtnl,
                  * therefore calling __dev_set_promiscuity here is safe.
@@ -3100,8 +3458,8 @@ void __dev_set_rx_mode(struct net_device *dev)
                         dev->uc_promisc = 0;
                 }
  
-               if (dev->set_multicast_list)
-                       dev->set_multicast_list(dev);
+               if (ops->ndo_set_multicast_list)
+                       ops->ndo_set_multicast_list(dev);
         }
  }
  
@@ -3112,39 +3470,352 @@ void dev_set_rx_mode(struct net_device *dev)
         netif_addr_unlock_bh(dev);
  }
  
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
-                     void *addr, int alen, int glbl)
+/* hw addresses list handling functions */
+
+static int __hw_addr_add(struct list_head *list, int *delta,
+                        unsigned char *addr, int addr_len,
+                        unsigned char addr_type)
  {
-       struct dev_addr_list *da;
+       struct netdev_hw_addr *ha;
+       int alloc_size;
  
-       for (; (da = *list) != NULL; list = &da->next) {
-               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-                   alen == da->da_addrlen) {
-                       if (glbl) {
-                               int old_glbl = da->da_gusers;
-                               da->da_gusers = 0;
-                               if (old_glbl == 0)
-                                       break;
-                       }
-                       if (--da->da_users)
-                               return 0;
+       if (addr_len > MAX_ADDR_LEN)
+               return -EINVAL;
  
-                       *list = da->next;
-                       kfree(da);
-                       (*count)--;
+       list_for_each_entry(ha, list, list) {
+               if (!memcmp(ha->addr, addr, addr_len) &&
+                   ha->type == addr_type) {
+                       ha->refcount++;
                         return 0;
                 }
         }
-       return -ENOENT;
+
+
+       alloc_size = sizeof(*ha);
+       if (alloc_size < L1_CACHE_BYTES)
+               alloc_size = L1_CACHE_BYTES;
+       ha = kmalloc(alloc_size, GFP_ATOMIC);
+       if (!ha)
+               return -ENOMEM;
+       memcpy(ha->addr, addr, addr_len);
+       ha->type = addr_type;
+       ha->refcount = 1;
+       ha->synced = false;
+       list_add_tail_rcu(&ha->list, list);
+       if (delta)
+               (*delta)++;
+       return 0;
  }
  
-int __dev_addr_add(struct dev_addr_list **list, int *count,
-                  void *addr, int alen, int glbl)
+static void ha_rcu_free(struct rcu_head *head)
  {
-       struct dev_addr_list *da;
+       struct netdev_hw_addr *ha;
  
-       for (da = *list; da != NULL; da = da->next) {
-               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
+       ha = container_of(head, struct netdev_hw_addr, rcu_head);
+       kfree(ha);
+}
+
+static int __hw_addr_del(struct list_head *list, int *delta,
+                        unsigned char *addr, int addr_len,
+                        unsigned char addr_type)
+{
+       struct netdev_hw_addr *ha;
+
+       list_for_each_entry(ha, list, list) {
+               if (!memcmp(ha->addr, addr, addr_len) &&
+                   (ha->type == addr_type || !addr_type)) {
+                       if (--ha->refcount)
+                               return 0;
+                       list_del_rcu(&ha->list);
+                       call_rcu(&ha->rcu_head, ha_rcu_free);
+                       if (delta)
+                               (*delta)--;
+                       return 0;
+               }
+       }
+       return -ENOENT;
+}
+
+static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta,
+                                 struct list_head *from_list, int addr_len,
+                                 unsigned char addr_type)
+{
+       int err;
+       struct netdev_hw_addr *ha, *ha2;
+       unsigned char type;
+
+       list_for_each_entry(ha, from_list, list) {
+               type = addr_type ? addr_type : ha->type;
+               err = __hw_addr_add(to_list, to_delta, ha->addr,
+                                   addr_len, type);
+               if (err)
+                       goto unroll;
+       }
+       return 0;
+
+unroll:
+       list_for_each_entry(ha2, from_list, list) {
+               if (ha2 == ha)
+                       break;
+               type = addr_type ? addr_type : ha2->type;
+               __hw_addr_del(to_list, to_delta, ha2->addr,
+                             addr_len, type);
+       }
+       return err;
+}
+
+static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta,
+                                  struct list_head *from_list, int addr_len,
+                                  unsigned char addr_type)
+{
+       struct netdev_hw_addr *ha;
+       unsigned char type;
+
+       list_for_each_entry(ha, from_list, list) {
+               type = addr_type ? addr_type : ha->type;
+               __hw_addr_del(to_list, to_delta, ha->addr,
+                             addr_len, addr_type);
+       }
+}
+
+static int __hw_addr_sync(struct list_head *to_list, int *to_delta,
+                         struct list_head *from_list, int *from_delta,
+                         int addr_len)
+{
+       int err = 0;
+       struct netdev_hw_addr *ha, *tmp;
+
+       list_for_each_entry_safe(ha, tmp, from_list, list) {
+               if (!ha->synced) {
+                       err = __hw_addr_add(to_list, to_delta, ha->addr,
+                                           addr_len, ha->type);
+                       if (err)
+                               break;
+                       ha->synced = true;
+                       ha->refcount++;
+               } else if (ha->refcount == 1) {
+                       __hw_addr_del(to_list, to_delta, ha->addr,
+                                     addr_len, ha->type);
+                       __hw_addr_del(from_list, from_delta, ha->addr,
+                                     addr_len, ha->type);
+               }
+       }
+       return err;
+}
+
+static void __hw_addr_unsync(struct list_head *to_list, int *to_delta,
+                            struct list_head *from_list, int *from_delta,
+                            int addr_len)
+{
+       struct netdev_hw_addr *ha, *tmp;
+
+       list_for_each_entry_safe(ha, tmp, from_list, list) {
+               if (ha->synced) {
+                       __hw_addr_del(to_list, to_delta, ha->addr,
+                                     addr_len, ha->type);
+                       ha->synced = false;
+                       __hw_addr_del(from_list, from_delta, ha->addr,
+                                     addr_len, ha->type);
+               }
+       }
+}
+
+
+static void __hw_addr_flush(struct list_head *list)
+{
+       struct netdev_hw_addr *ha, *tmp;
+
+       list_for_each_entry_safe(ha, tmp, list, list) {
+               list_del_rcu(&ha->list);
+               call_rcu(&ha->rcu_head, ha_rcu_free);
+       }
+}
+
+/* Device addresses handling functions */
+
+static void dev_addr_flush(struct net_device *dev)
+{
+       /* rtnl_mutex must be held here */
+
+       __hw_addr_flush(&dev->dev_addr_list);
+       dev->dev_addr = NULL;
+}
+
+static int dev_addr_init(struct net_device *dev)
+{
+       unsigned char addr[MAX_ADDR_LEN];
+       struct netdev_hw_addr *ha;
+       int err;
+
+       /* rtnl_mutex must be held here */
+
+       INIT_LIST_HEAD(&dev->dev_addr_list);
+       memset(addr, 0, sizeof(*addr));
+       err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(*addr),
+                           NETDEV_HW_ADDR_T_LAN);
+       if (!err) {
+               /*
+                * Get the first (previously created) address from the list
+                * and set dev_addr pointer to this location.
+                */
+               ha = list_first_entry(&dev->dev_addr_list,
+                                     struct netdev_hw_addr, list);
+               dev->dev_addr = ha->addr;
+       }
+       return err;
+}
+
+/**
+ *     dev_addr_add    - Add a device address
+ *     @dev: device
+ *     @addr: address to add
+ *     @addr_type: address type
+ *
+ *     Add a device address to the device or increase the reference count if
+ *     it already exists.
+ *
+ *     The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+                unsigned char addr_type)
+{
+       int err;
+
+       ASSERT_RTNL();
+
+       err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len,
+                           addr_type);
+       if (!err)
+               call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+       return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *     dev_addr_del    - Release a device address.
+ *     @dev: device
+ *     @addr: address to delete
+ *     @addr_type: address type
+ *
+ *     Release reference to a device address and remove it from the device
+ *     if the reference count drops to zero.
+ *
+ *     The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+                unsigned char addr_type)
+{
+       int err;
+       struct netdev_hw_addr *ha;
+
+       ASSERT_RTNL();
+
+       /*
+        * We can not remove the first address from the list because
+        * dev->dev_addr points to that.
+        */
+       ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list);
+       if (ha->addr == dev->dev_addr && ha->refcount == 1)
+               return -ENOENT;
+
+       err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len,
+                           addr_type);
+       if (!err)
+               call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+       return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *     dev_addr_add_multiple   - Add device addresses from another device
+ *     @to_dev: device to which addresses will be added
+ *     @from_dev: device from which addresses will be added
+ *     @addr_type: address type - 0 means type will be used from from_dev
+ *
+ *     Add device addresses of the one device to another.
+ **
+ *     The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+                         struct net_device *from_dev,
+                         unsigned char addr_type)
+{
+       int err;
+
+       ASSERT_RTNL();
+
+       if (from_dev->addr_len != to_dev->addr_len)
+               return -EINVAL;
+       err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL,
+                                    &from_dev->dev_addr_list,
+                                    to_dev->addr_len, addr_type);
+       if (!err)
+               call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+       return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *     dev_addr_del_multiple   - Delete device addresses by another device
+ *     @to_dev: device where the addresses will be deleted
+ *     @from_dev: device by which addresses the addresses will be deleted
+ *     @addr_type: address type - 0 means type will used from from_dev
+ *
+ *     Deletes addresses in to device by the list of addresses in from device.
+ *
+ *     The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+                         struct net_device *from_dev,
+                         unsigned char addr_type)
+{
+       ASSERT_RTNL();
+
+       if (from_dev->addr_len != to_dev->addr_len)
+               return -EINVAL;
+       __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL,
+                              &from_dev->dev_addr_list,
+                              to_dev->addr_len, addr_type);
+       call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+       return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/* unicast and multicast addresses handling functions */
+
+int __dev_addr_delete(struct dev_addr_list **list, int *count,
+                     void *addr, int alen, int glbl)
+{
+       struct dev_addr_list *da;
+
+       for (; (da = *list) != NULL; list = &da->next) {
+               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
+                   alen == da->da_addrlen) {
+                       if (glbl) {
+                               int old_glbl = da->da_gusers;
+                               da->da_gusers = 0;
+                               if (old_glbl == 0)
+                                       break;
+                       }
+                       if (--da->da_users)
+                               return 0;
+
+                       *list = da->next;
+                       kfree(da);
+                       (*count)--;
+                       return 0;
+               }
+       }
+       return -ENOENT;
+}
+
+int __dev_addr_add(struct dev_addr_list **list, int *count,
+                  void *addr, int alen, int glbl)
+{
+       struct dev_addr_list *da;
+
+       for (da = *list; da != NULL; da = da->next) {
+               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
                     da->da_addrlen == alen) {
                         if (glbl) {
                                 int old_glbl = da->da_gusers;
@@ -3174,24 +3845,22 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
   *     dev_unicast_delete      - Release secondary unicast address.
   *     @dev: device
   *     @addr: address to delete
- *     @alen: length of @addr
   *
   *     Release reference to a secondary unicast address and remove it
   *     from the device if the reference count drops to zero.
   *
   *     The caller must hold the rtnl_mutex.
   */
-int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
+int dev_unicast_delete(struct net_device *dev, void *addr)
  {
         int err;
  
         ASSERT_RTNL();
  
-       netif_addr_lock_bh(dev);
-       err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+       err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr,
+                           dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
         if (!err)
                 __dev_set_rx_mode(dev);
-       netif_addr_unlock_bh(dev);
         return err;
  }
  EXPORT_SYMBOL(dev_unicast_delete);
@@ -3200,24 +3869,22 @@ EXPORT_SYMBOL(dev_unicast_delete);
   *     dev_unicast_add         - add a secondary unicast address
   *     @dev: device
   *     @addr: address to add
- *     @alen: length of @addr
   *
   *     Add a secondary unicast address to the device or increase
   *     the reference count if it already exists.
   *
   *     The caller must hold the rtnl_mutex.
   */
-int dev_unicast_add(struct net_device *dev, void *addr, int alen)
+int dev_unicast_add(struct net_device *dev, void *addr)
  {
         int err;
  
         ASSERT_RTNL();
  
-       netif_addr_lock_bh(dev);
-       err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+       err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr,
+                           dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
         if (!err)
                 __dev_set_rx_mode(dev);
-       netif_addr_unlock_bh(dev);
         return err;
  }
  EXPORT_SYMBOL(dev_unicast_add);
@@ -3274,8 +3941,7 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
   *     @from: source device
   *
   *     Add newly added addresses to the destination device and release
- *     addresses that have no users left. The source device must be
- *     locked by netif_tx_lock_bh.
+ *     addresses that have no users left.
   *
   *     This function is intended to be called from the dev->set_rx_mode
   *     function of layered software devices.
@@ -3284,12 +3950,15 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
  {
         int err = 0;
  
-       netif_addr_lock_bh(to);
-       err = __dev_addr_sync(&to->uc_list, &to->uc_count,
-                             &from->uc_list, &from->uc_count);
+       ASSERT_RTNL();
+
+       if (to->addr_len != from->addr_len)
+               return -EINVAL;
+
+       err = __hw_addr_sync(&to->uc_list, &to->uc_count,
+                            &from->uc_list, &from->uc_count, to->addr_len);
         if (!err)
                 __dev_set_rx_mode(to);
-       netif_addr_unlock_bh(to);
         return err;
  }
  EXPORT_SYMBOL(dev_unicast_sync);
@@ -3305,18 +3974,33 @@ EXPORT_SYMBOL(dev_unicast_sync);
   */
  void dev_unicast_unsync(struct net_device *to, struct net_device *from)
  {
-       netif_addr_lock_bh(from);
-       netif_addr_lock(to);
+       ASSERT_RTNL();
  
-       __dev_addr_unsync(&to->uc_list, &to->uc_count,
-                         &from->uc_list, &from->uc_count);
-       __dev_set_rx_mode(to);
+       if (to->addr_len != from->addr_len)
+               return;
  
-       netif_addr_unlock(to);
-       netif_addr_unlock_bh(from);
+       __hw_addr_unsync(&to->uc_list, &to->uc_count,
+                        &from->uc_list, &from->uc_count, to->addr_len);
+       __dev_set_rx_mode(to);
  }
  EXPORT_SYMBOL(dev_unicast_unsync);
  
+static void dev_unicast_flush(struct net_device *dev)
+{
+       /* rtnl_mutex must be held here */
+
+       __hw_addr_flush(&dev->uc_list);
+       dev->uc_count = 0;
+}
+
+static void dev_unicast_init(struct net_device *dev)
+{
+       /* rtnl_mutex must be held here */
+
+       INIT_LIST_HEAD(&dev->uc_list);
+}
+
+
  static void __dev_addr_discard(struct dev_addr_list **list)
  {
         struct dev_addr_list *tmp;
@@ -3335,9 +4019,6 @@ static void dev_addr_discard(struct net_device *dev)
  {
         netif_addr_lock_bh(dev);
  
-       __dev_addr_discard(&dev->uc_list);
-       dev->uc_count = 0;
-
         __dev_addr_discard(&dev->mc_list);
         dev->mc_count = 0;
  
@@ -3460,6 +4141,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
   */
  int dev_set_mtu(struct net_device *dev, int new_mtu)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
         int err;
  
         if (new_mtu == dev->mtu)
@@ -3473,10 +4155,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
                 return -ENODEV;
  
         err = 0;
-       if (dev->change_mtu)
-               err = dev->change_mtu(dev, new_mtu);
+       if (ops->ndo_change_mtu)
+               err = ops->ndo_change_mtu(dev, new_mtu);
         else
                 dev->mtu = new_mtu;
+
         if (!err && dev->flags & IFF_UP)
                 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
         return err;
@@ -3491,15 +4174,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
   */
  int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
         int err;
  
-       if (!dev->set_mac_address)
+       if (!ops->ndo_set_mac_address)
                 return -EOPNOTSUPP;
         if (sa->sa_family != dev->type)
                 return -EINVAL;
         if (!netif_device_present(dev))
                 return -ENODEV;
-       err = dev->set_mac_address(dev, sa);
+       err = ops->ndo_set_mac_address(dev, sa);
         if (!err)
                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
         return err;
@@ -3579,10 +4263,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
  {
         int err;
         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+       const struct net_device_ops *ops;
  
         if (!dev)
                 return -ENODEV;
  
+       ops = dev->netdev_ops;
+
         switch (cmd) {
                 case SIOCSIFFLAGS:      /* Set interface flags */
                         return dev_change_flags(dev, ifr->ifr_flags);
@@ -3606,15 +4293,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                         return 0;
  
                 case SIOCSIFMAP:
-                       if (dev->set_config) {
+                       if (ops->ndo_set_config) {
                                 if (!netif_device_present(dev))
                                         return -ENODEV;
-                               return dev->set_config(dev, &ifr->ifr_map);
+                               return ops->ndo_set_config(dev, &ifr->ifr_map);
                         }
                         return -EOPNOTSUPP;
  
                 case SIOCADDMULTI:
-                       if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+                       if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                                 return -EINVAL;
                         if (!netif_device_present(dev))
@@ -3623,7 +4310,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                                           dev->addr_len, 1);
  
                 case SIOCDELMULTI:
-                       if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+                       if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                                 return -EINVAL;
                         if (!netif_device_present(dev))
@@ -3659,12 +4346,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                             cmd == SIOCSMIIREG ||
                             cmd == SIOCBRADDIF ||
                             cmd == SIOCBRDELIF ||
+                           cmd == SIOCSHWTSTAMP ||
                             cmd == SIOCWANDEV) {
                                 err = -EOPNOTSUPP;
-                               if (dev->do_ioctl) {
+                               if (ops->ndo_do_ioctl) {
                                         if (netif_device_present(dev))
-                                               err = dev->do_ioctl(dev, ifr,
-                                                                   cmd);
+                                               err = ops->ndo_do_ioctl(dev, ifr, cmd);
                                         else
                                                 err = -ENODEV;
                                 }
@@ -3814,6 +4501,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
                 case SIOCBONDCHANGEACTIVE:
                 case SIOCBRADDIF:
                 case SIOCBRDELIF:
+               case SIOCSHWTSTAMP:
                         if (!capable(CAP_NET_ADMIN))
                                 return -EPERM;
                         /* fall through */
@@ -3923,10 +4611,11 @@ static void rollback_registered(struct net_device *dev)
         /*
          *      Flush the unicast and multicast chains
          */
+       dev_unicast_flush(dev);
         dev_addr_discard(dev);
  
-       if (dev->uninit)
-               dev->uninit(dev);
+       if (dev->netdev_ops->ndo_uninit)
+               dev->netdev_ops->ndo_uninit(dev);
  
         /* Notifier chain MUST detach us from master device. */
         WARN_ON(dev->master);
@@ -4016,7 +4705,7 @@ int register_netdevice(struct net_device *dev)
         struct hlist_head *head;
         struct hlist_node *p;
         int ret;
-       struct net *net;
+       struct net *net = dev_net(dev);
  
         BUG_ON(dev_boot_phase);
         ASSERT_RTNL();
@@ -4025,8 +4714,7 @@ int register_netdevice(struct net_device *dev)
  
         /* When net_device's are persistent, this will be fatal. */
         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
-       BUG_ON(!dev_net(dev));
-       net = dev_net(dev);
+       BUG_ON(!net);
  
         spin_lock_init(&dev->addr_list_lock);
         netdev_set_addr_lockdep_class(dev);
@@ -4035,8 +4723,8 @@ int register_netdevice(struct net_device *dev)
         dev->iflink = -1;
  
         /* Init, if this function is available */
-       if (dev->init) {
-               ret = dev->init(dev);
+       if (dev->netdev_ops->ndo_init) {
+               ret = dev->netdev_ops->ndo_init(dev);
                 if (ret) {
                         if (ret > 0)
                                 ret = -EIO;
@@ -4114,12 +4802,51 @@ out:
         return ret;
  
  err_uninit:
-       if (dev->uninit)
-               dev->uninit(dev);
+       if (dev->netdev_ops->ndo_uninit)
+               dev->netdev_ops->ndo_uninit(dev);
         goto out;
  }
  
  /**
+ *     init_dummy_netdev       - init a dummy network device for NAPI
+ *     @dev: device to init
+ *
+ *     This takes a network device structure and initialize the minimum
+ *     amount of fields so it can be used to schedule NAPI polls without
+ *     registering a full blown interface. This is to be used by drivers
+ *     that need to tie several hardware interfaces to a single NAPI
+ *     poll scheduler due to HW limitations.
+ */
+int init_dummy_netdev(struct net_device *dev)
+{
+       /* Clear everything. Note we don't initialize spinlocks
+        * are they aren't supposed to be taken by any of the
+        * NAPI code and this dummy netdev is supposed to be
+        * only ever used for NAPI polls
+        */
+       memset(dev, 0, sizeof(struct net_device));
+
+       /* make sure we BUG if trying to hit standard
+        * register/unregister code path
+        */
+       dev->reg_state = NETREG_DUMMY;
+
+       /* initialize the ref count */
+       atomic_set(&dev->refcnt, 1);
+
+       /* NAPI wants this */
+       INIT_LIST_HEAD(&dev->napi_list);
+
+       /* a dummy interface is started by default */
+       set_bit(__LINK_STATE_PRESENT, &dev->state);
+       set_bit(__LINK_STATE_START, &dev->state);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(init_dummy_netdev);
+
+
+/**
   *     register_netdev - register a network device
   *     @dev: device to register
   *
@@ -4271,10 +4998,41 @@ void netdev_run_todo(void)
         }
  }
  
-static struct net_device_stats *internal_stats(struct net_device *dev)
+/**
+ *     dev_get_stats   - get network device statistics
+ *     @dev: device to get statistics from
+ *
+ *     Get network statistics from device. The device driver may provide
+ *     its own method by setting dev->netdev_ops->get_stats; otherwise
+ *     the internal statistics structure is used.
+ */
+const struct net_device_stats *dev_get_stats(struct net_device *dev)
  {
-       return &dev->stats;
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (ops->ndo_get_stats)
+               return ops->ndo_get_stats(dev);
+       else {
+               unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+               struct net_device_stats *stats = &dev->stats;
+               unsigned int i;
+               struct netdev_queue *txq;
+
+               for (i = 0; i < dev->num_tx_queues; i++) {
+                       txq = netdev_get_tx_queue(dev, i);
+                       tx_bytes   += txq->tx_bytes;
+                       tx_packets += txq->tx_packets;
+                       tx_dropped += txq->tx_dropped;
+               }
+               if (tx_bytes || tx_packets || tx_dropped) {
+                       stats->tx_bytes   = tx_bytes;
+                       stats->tx_packets = tx_packets;
+                       stats->tx_dropped = tx_dropped;
+               }
+               return stats;
+       }
  }
+EXPORT_SYMBOL(dev_get_stats);
  
  static void netdev_init_one_queue(struct net_device *dev,
                                   struct netdev_queue *queue,
@@ -4307,18 +5065,18 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
         struct netdev_queue *tx;
         struct net_device *dev;
         size_t alloc_size;
-       void *p;
+       struct net_device *p;
  
         BUG_ON(strlen(name) >= sizeof(dev->name));
  
         alloc_size = sizeof(struct net_device);
         if (sizeof_priv) {
                 /* ensure 32-byte alignment of private area */
-               alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+               alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
                 alloc_size += sizeof_priv;
         }
         /* ensure 32-byte alignment of whole construct */
-       alloc_size += NETDEV_ALIGN_CONST;
+       alloc_size += NETDEV_ALIGN - 1;
  
         p = kzalloc(alloc_size, GFP_KERNEL);
         if (!p) {
@@ -4330,34 +5088,39 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
         if (!tx) {
                 printk(KERN_ERR "alloc_netdev: Unable to allocate "
                        "tx qdiscs.\n");
-               kfree(p);
-               return NULL;
+               goto free_p;
         }
  
-       dev = (struct net_device *)
-               (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+       dev = PTR_ALIGN(p, NETDEV_ALIGN);
         dev->padded = (char *)dev - (char *)p;
+
+       if (dev_addr_init(dev))
+               goto free_tx;
+
+       dev_unicast_init(dev);
+
         dev_net_set(dev, &init_net);
  
         dev->_tx = tx;
         dev->num_tx_queues = queue_count;
         dev->real_num_tx_queues = queue_count;
  
-       if (sizeof_priv) {
-               dev->priv = ((char *)dev +
-                            ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
-                             & ~NETDEV_ALIGN_CONST));
-       }
-
         dev->gso_max_size = GSO_MAX_SIZE;
  
         netdev_init_queues(dev);
  
-       dev->get_stats = internal_stats;
-       netpoll_netdev_init(dev);
+       INIT_LIST_HEAD(&dev->napi_list);
+       dev->priv_flags = IFF_XMIT_DST_RELEASE;
         setup(dev);
         strcpy(dev->name, name);
         return dev;
+
+free_tx:
+       kfree(tx);
+
+free_p:
+       kfree(p);
+       return NULL;
  }
  EXPORT_SYMBOL(alloc_netdev_mq);
  
@@ -4371,10 +5134,18 @@ EXPORT_SYMBOL(alloc_netdev_mq);
   */
  void free_netdev(struct net_device *dev)
  {
+       struct napi_struct *p, *n;
+
         release_net(dev_net(dev));
  
         kfree(dev->_tx);
  
+       /* Flush device addresses */
+       dev_addr_flush(dev);
+
+       list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+               netif_napi_del(p);
+
         /*  Compatibility with error handling in drivers */
         if (dev->reg_state == NETREG_UNINITIALIZED) {
                 kfree((char *)dev - dev->padded);
@@ -4531,6 +5302,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
         /*
          *      Flush the unicast and multicast chains
          */
+       dev_unicast_flush(dev);
         dev_addr_discard(dev);
  
         netdev_unregister_kobject(dev);
@@ -4610,122 +5382,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
         return NOTIFY_OK;
  }
  
-#ifdef CONFIG_NET_DMA
-/**
- * net_dma_rebalance - try to maintain one DMA channel per CPU
- * @net_dma: DMA client and associated data (lock, channels, channel_mask)
- *
- * This is called when the number of channels allocated to the net_dma client
- * changes.  The net_dma client tries to have one DMA channel per CPU.
- */
-
-static void net_dma_rebalance(struct net_dma *net_dma)
-{
-       unsigned int cpu, i, n, chan_idx;
-       struct dma_chan *chan;
-
-       if (cpus_empty(net_dma->channel_mask)) {
-               for_each_online_cpu(cpu)
-                       rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
-               return;
-       }
-
-       i = 0;
-       cpu = first_cpu(cpu_online_map);
-
-       for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
-               chan = net_dma->channels[chan_idx];
-
-               n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
-                  + (i < (num_online_cpus() %
-                       cpus_weight(net_dma->channel_mask)) ? 1 : 0));
-
-               while(n) {
-                       per_cpu(softnet_data, cpu).net_dma = chan;
-                       cpu = next_cpu(cpu, cpu_online_map);
-                       n--;
-               }
-               i++;
-       }
-}
-
-/**
- * netdev_dma_event - event callback for the net_dma_client
- * @client: should always be net_dma_client
- * @chan: DMA channel for the event
- * @state: DMA state to be handled
- */
-static enum dma_state_client
-netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
-       enum dma_state state)
-{
-       int i, found = 0, pos = -1;
-       struct net_dma *net_dma =
-               container_of(client, struct net_dma, client);
-       enum dma_state_client ack = DMA_DUP; /* default: take no action */
-
-       spin_lock(&net_dma->lock);
-       switch (state) {
-       case DMA_RESOURCE_AVAILABLE:
-               for (i = 0; i < nr_cpu_ids; i++)
-                       if (net_dma->channels[i] == chan) {
-                               found = 1;
-                               break;
-                       } else if (net_dma->channels[i] == NULL && pos < 0)
-                               pos = i;
-
-               if (!found && pos >= 0) {
-                       ack = DMA_ACK;
-                       net_dma->channels[pos] = chan;
-                       cpu_set(pos, net_dma->channel_mask);
-                       net_dma_rebalance(net_dma);
-               }
-               break;
-       case DMA_RESOURCE_REMOVED:
-               for (i = 0; i < nr_cpu_ids; i++)
-                       if (net_dma->channels[i] == chan) {
-                               found = 1;
-                               pos = i;
-                               break;
-                       }
-
-               if (found) {
-                       ack = DMA_ACK;
-                       cpu_clear(pos, net_dma->channel_mask);
-                       net_dma->channels[i] = NULL;
-                       net_dma_rebalance(net_dma);
-               }
-               break;
-       default:
-               break;
-       }
-       spin_unlock(&net_dma->lock);
-
-       return ack;
-}
-
-/**
- * netdev_dma_register - register the networking subsystem as a DMA client
- */
-static int __init netdev_dma_register(void)
-{
-       net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
-                                                               GFP_KERNEL);
-       if (unlikely(!net_dma.channels)) {
-               printk(KERN_NOTICE
-                               "netdev_dma: no memory for net_dma.channels\n");
-               return -ENOMEM;
-       }
-       spin_lock_init(&net_dma.lock);
-       dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
-       dma_async_client_register(&net_dma.client);
-       dma_async_client_chan_request(&net_dma.client);
-       return 0;
-}
-
-#else
-static int __init netdev_dma_register(void) { return -ENODEV; }
-#endif /* CONFIG_NET_DMA */
  
  /**
   *     netdev_increment_features - increment feature set by one
@@ -4843,13 +5499,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
  
  static void __net_exit default_device_exit(struct net *net)
  {
-       struct net_device *dev, *next;
+       struct net_device *dev;
         /*
          * Push all migratable of the network devices back to the
          * initial network namespace
          */
         rtnl_lock();
-       for_each_netdev_safe(net, dev, next) {
+restart:
+       for_each_netdev(net, dev) {
                 int err;
                 char fb_name[IFNAMSIZ];
  
@@ -4860,7 +5517,7 @@ static void __net_exit default_device_exit(struct net *net)
                 /* Delete virtual devices */
                 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
                         dev->rtnl_link_ops->dellink(dev);
-                       continue;
+                       goto restart;
                 }
  
                 /* Push remaing network devices to init_net */
@@ -4871,6 +5528,7 @@ static void __net_exit default_device_exit(struct net *net)
                                 __func__, dev->name, err);
                         BUG();
                 }
+               goto restart;
         }
         rtnl_unlock();
  }
@@ -4909,21 +5567,6 @@ static int __init net_dev_init(void)
         if (register_pernet_subsys(&netdev_net_ops))
                 goto out;
  
-       /* The loopback device is special if any other network devices
-        * is present in a network namespace the loopback device must
-        * be present. Since we now dynamically allocate and free the
-        * loopback device ensure this invariant is maintained by
-        * keeping the loopback device as the first device on the
-        * list of network devices.  Ensuring the loopback devices
-        * is the first device that appears and the last network device
-        * that disappears.
-        */
-       if (register_pernet_device(&loopback_net_ops))
-               goto out;
-
-       if (register_pernet_device(&default_device_ops))
-               goto out;
-
         /*
          *      Initialise the packet receive queues.
          */
@@ -4938,12 +5581,27 @@ static int __init net_dev_init(void)
  
                 queue->backlog.poll = process_backlog;
                 queue->backlog.weight = weight_p;
+               queue->backlog.gro_list = NULL;
+               queue->backlog.gro_count = 0;
         }
  
-       netdev_dma_register();
-
         dev_boot_phase = 0;
  
+       /* The loopback device is special if any other network devices
+        * is present in a network namespace the loopback device must
+        * be present. Since we now dynamically allocate and free the
+        * loopback device ensure this invariant is maintained by
+        * keeping the loopback device as the first device on the
+        * list of network devices.  Ensuring the loopback devices
+        * is the first device that appears and the last network device
+        * that disappears.
+        */
+       if (register_pernet_device(&loopback_net_ops))
+               goto out;
+
+       if (register_pernet_device(&default_device_ops))
+               goto out;
+
         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
  
@@ -4957,6 +5615,14 @@ out:
  
  subsys_initcall(net_dev_init);
  
+static int __init initialize_hashrnd(void)
+{
+       get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+       return 0;
+}
+
+late_initcall_sync(initialize_hashrnd);
+
  EXPORT_SYMBOL(__dev_get_by_index);
  EXPORT_SYMBOL(__dev_get_by_name);
  EXPORT_SYMBOL(__dev_remove_pack);