netlink: fix for too early rmmod
[safe/jmp/linux-2.6] / net / core / dev.c
index 9977288..be9924f 100644 (file)
 #include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
@@ -985,7 +986,7 @@ rollback:
        /* For now only devices in the initial network namespace
         * are in sysfs.
         */
-       if (net == &init_net) {
+       if (net_eq(net, &init_net)) {
                ret = device_rename(&dev->dev, dev->name);
                if (ret) {
                        memcpy(dev->name, oldname, IFNAMSIZ);
@@ -1352,7 +1353,7 @@ rollback:
                                nb->notifier_call(nb, NETDEV_DOWN, dev);
                        }
                        nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
-                       nb->notifier_call(nb, NETDEV_UNREGISTER_PERNET, dev);
+                       nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
                }
        }
 
@@ -1419,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb)
                skb->tstamp.tv64 = 0;
 }
 
+/**
+ * dev_forward_skb - loopback an skb to another netif
+ *
+ * @dev: destination network device
+ * @skb: buffer to forward
+ *
+ * return values:
+ *     NET_RX_SUCCESS  (no congestion)
+ *     NET_RX_DROP     (packet was dropped)
+ *
+ * dev_forward_skb can be used for injecting an skb from the
+ * start_xmit function of one device into the receive queue
+ * of another device.
+ *
+ * The receiving device may be in another namespace, so
+ * we have to clear all information in the skb that could
+ * impact namespace isolation.
+ */
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+       skb_orphan(skb);
+
+       if (!(dev->flags & IFF_UP))
+               return NET_RX_DROP;
+
+       if (skb->len > (dev->mtu + dev->hard_header_len))
+               return NET_RX_DROP;
+
+       skb_dst_drop(skb);
+       skb->tstamp.tv64 = 0;
+       skb->pkt_type = PACKET_HOST;
+       skb->protocol = eth_type_trans(skb, dev);
+       skb->mark = 0;
+       secpath_reset(skb);
+       nf_reset(skb);
+       return netif_rx(skb);
+}
+EXPORT_SYMBOL_GPL(dev_forward_skb);
+
 /*
  *     Support routine. Sends outgoing frames to any network
  *     taps currently in use.
@@ -2287,7 +2327,7 @@ static int ing_filter(struct sk_buff *skb)
        if (MAX_RED_LOOP < ttl++) {
                printk(KERN_WARNING
                       "Redir loop detected Dropping packet (%d->%d)\n",
-                      skb->iif, dev->ifindex);
+                      skb->skb_iif, dev->ifindex);
                return TC_ACT_SHOT;
        }
 
@@ -2395,8 +2435,8 @@ int netif_receive_skb(struct sk_buff *skb)
        if (netpoll_receive_skb(skb))
                return NET_RX_DROP;
 
-       if (!skb->iif)
-               skb->iif = skb->dev->ifindex;
+       if (!skb->skb_iif)
+               skb->skb_iif = skb->dev->ifindex;
 
        null_or_orig = NULL;
        orig_dev = skb->dev;
@@ -2637,9 +2677,10 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
                return GRO_NORMAL;
 
        for (p = napi->gro_list; p; p = p->next) {
-               NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
-                       && !compare_ether_header(skb_mac_header(p),
-                                                skb_gro_mac_header(skb));
+               NAPI_GRO_CB(p)->same_flow =
+                       (p->dev == skb->dev) &&
+                       !compare_ether_header(skb_mac_header(p),
+                                             skb_gro_mac_header(skb));
                NAPI_GRO_CB(p)->flush = 0;
        }
 
@@ -4730,22 +4771,23 @@ static void net_set_todo(struct net_device *dev)
 
 static void rollback_registered_many(struct list_head *head)
 {
-       struct net_device *dev, *aux, *fdev;
-       LIST_HEAD(pernet_list);
+       struct net_device *dev, *tmp;
 
        BUG_ON(dev_boot_phase);
        ASSERT_RTNL();
 
-       list_for_each_entry(dev, head, unreg_list) {
+       list_for_each_entry_safe(dev, tmp, head, unreg_list) {
                /* Some devices call without registering
-                * for initialization unwind.
+                * for initialization unwind. Remove those
+                * devices and proceed with the remaining.
                 */
                if (dev->reg_state == NETREG_UNINITIALIZED) {
                        pr_debug("unregister_netdevice: device %s/%p never "
                                 "was registered\n", dev->name, dev);
 
                        WARN_ON(1);
-                       return;
+                       list_del(&dev->unreg_list);
+                       continue;
                }
 
                BUG_ON(dev->reg_state != NETREG_REGISTERED);
@@ -4787,26 +4829,14 @@ static void rollback_registered_many(struct list_head *head)
                netdev_unregister_kobject(dev);
        }
 
-       synchronize_net();
+       /* Process any work delayed until the end of the batch */
+       dev = list_entry(head->next, struct net_device, unreg_list);
+       call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
-       list_for_each_entry_safe(dev, aux, head, unreg_list) {
-               int new_net = 1;
-               list_for_each_entry(fdev, &pernet_list, unreg_list) {
-                       if (dev_net(dev) == dev_net(fdev)) {
-                               new_net = 0;
-                               dev_put(dev);
-                               break;
-                       }
-               }
-               if (new_net)
-                       list_move(&dev->unreg_list, &pernet_list);
-       }
+       synchronize_net();
 
-       list_for_each_entry_safe(dev, aux, &pernet_list, unreg_list) {
-               call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev);
-               list_move(&dev->unreg_list, head);
+       list_for_each_entry(dev, head, unreg_list)
                dev_put(dev);
-       }
 }
 
 static void rollback_registered(struct net_device *dev)
@@ -4873,6 +4903,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 EXPORT_SYMBOL(netdev_fix_features);
 
 /**
+ *     netif_stacked_transfer_operstate -      transfer operstate
+ *     @rootdev: the root or lower level device to transfer state from
+ *     @dev: the device to transfer operstate to
+ *
+ *     Transfer operational state from root to device. This is normally
+ *     called when a stacking relationship exists between the root
+ *     device and the device(a leaf device).
+ */
+void netif_stacked_transfer_operstate(const struct net_device *rootdev,
+                                       struct net_device *dev)
+{
+       if (rootdev->operstate == IF_OPER_DORMANT)
+               netif_dormant_on(dev);
+       else
+               netif_dormant_off(dev);
+
+       if (netif_carrier_ok(rootdev)) {
+               if (!netif_carrier_ok(dev))
+                       netif_carrier_on(dev);
+       } else {
+               if (netif_carrier_ok(dev))
+                       netif_carrier_off(dev);
+       }
+}
+EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+
+/**
  *     register_netdevice      - register a network device
  *     @dev: device to register
  *
@@ -4978,6 +5035,11 @@ int register_netdevice(struct net_device *dev)
                rollback_registered(dev);
                dev->reg_state = NETREG_UNREGISTERED;
        }
+       /*
+        *      Prevent userspace races by waiting until the network
+        *      device is fully setup before sending notifications.
+        */
+       rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
 
 out:
        return ret;
@@ -5088,7 +5150,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
                        /* Rebroadcast unregister notification */
                        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-                       /* don't resend NETDEV_UNREGISTER_PERNET, _PERNET users
+                       /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
                         * should have already handle it the first time */
 
                        if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
@@ -5375,7 +5437,7 @@ EXPORT_SYMBOL(synchronize_net);
  *     unregister_netdevice_queue - remove device from the kernel
  *     @dev: device
  *     @head: list
-
+ *
  *     This function shuts down a device interface and removes it
  *     from the kernel tables.
  *     If head not NULL, device is queued to be unregistered later.
@@ -5401,11 +5463,6 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
 /**
  *     unregister_netdevice_many - unregister many devices
  *     @head: list of devices
- *
- *     WARNING: Calling this modifies the given list
- *     (in rollback_registered_many). It may change the order of the elements
- *     in the list. However, you can assume it does not add or delete elements
- *     to/from the list.
  */
 void unregister_netdevice_many(struct list_head *head)
 {
@@ -5514,7 +5571,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
           this device. They should clean all the things.
        */
        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-       call_netdevice_notifiers(NETDEV_UNREGISTER_PERNET, dev);
+       call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
        /*
         *      Flush the unicast and multicast chains
@@ -5545,6 +5602,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        /* Notify protocols, that a new device appeared. */
        call_netdevice_notifiers(NETDEV_REGISTER, dev);
 
+       /*
+        *      Prevent userspace races by waiting until the network
+        *      device is fully setup before sending notifications.
+        */
+       rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
        synchronize_net();
        err = 0;
 out:
@@ -5713,14 +5776,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
 
 static void __net_exit default_device_exit(struct net *net)
 {
-       struct net_device *dev;
+       struct net_device *dev, *aux;
        /*
-        * Push all migratable of the network devices back to the
+        * Push all migratable network devices back to the
         * initial network namespace
         */
        rtnl_lock();
-restart:
-       for_each_netdev(net, dev) {
+       for_each_netdev_safe(net, dev, aux) {
                int err;
                char fb_name[IFNAMSIZ];
 
@@ -5728,11 +5790,9 @@ restart:
                if (dev->features & NETIF_F_NETNS_LOCAL)
                        continue;
 
-               /* Delete virtual devices */
-               if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
-                       dev->rtnl_link_ops->dellink(dev, NULL);
-                       goto restart;
-               }
+               /* Leave virtual devices for the generic cleanup */
+               if (dev->rtnl_link_ops)
+                       continue;
 
                /* Push remaing network devices to init_net */
                snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
@@ -5742,13 +5802,37 @@ restart:
                                __func__, dev->name, err);
                        BUG();
                }
-               goto restart;
        }
        rtnl_unlock();
 }
 
+static void __net_exit default_device_exit_batch(struct list_head *net_list)
+{
+       /* At exit all network devices most be removed from a network
+        * namespace.  Do this in the reverse order of registeration.
+        * Do this across as many network namespaces as possible to
+        * improve batching efficiency.
+        */
+       struct net_device *dev;
+       struct net *net;
+       LIST_HEAD(dev_kill_list);
+
+       rtnl_lock();
+       list_for_each_entry(net, net_list, exit_list) {
+               for_each_netdev_reverse(net, dev) {
+                       if (dev->rtnl_link_ops)
+                               dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
+                       else
+                               unregister_netdevice_queue(dev, &dev_kill_list);
+               }
+       }
+       unregister_netdevice_many(&dev_kill_list);
+       rtnl_unlock();
+}
+
 static struct pernet_operations __net_initdata default_device_ops = {
        .exit = default_device_exit,
+       .exit_batch = default_device_exit_batch,
 };
 
 /*