SAFE public projects git trees. - safe/jmp/linux-2.6/blob - net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/ethtool.h>
  94 #include <linux/notifier.h>
  95 #include <linux/skbuff.h>
  96 #include <net/net_namespace.h>
  97 #include <net/sock.h>
  98 #include <linux/rtnetlink.h>
  99 #include <linux/proc_fs.h>
 100 #include <linux/seq_file.h>
 101 #include <linux/stat.h>
 102 #include <linux/if_bridge.h>
 103 #include <linux/if_macvlan.h>
 104 #include <net/dst.h>
 105 #include <net/pkt_sched.h>
 106 #include <net/checksum.h>
 107 #include <linux/highmem.h>
 108 #include <linux/init.h>
 109 #include <linux/kmod.h>
 110 #include <linux/module.h>
 111 #include <linux/netpoll.h>
 112 #include <linux/rcupdate.h>
 113 #include <linux/delay.h>
 114 #include <net/wext.h>
 115 #include <net/iw_handler.h>
 116 #include <asm/current.h>
 117 #include <linux/audit.h>
 118 #include <linux/dmaengine.h>
 119 #include <linux/err.h>
 120 #include <linux/ctype.h>
 121 #include <linux/if_arp.h>
 122 #include <linux/if_vlan.h>
 123 #include <linux/ip.h>
 124 #include <net/ip.h>
 125 #include <linux/ipv6.h>
 126 #include <linux/in.h>
 127 #include <linux/jhash.h>
 128 #include <linux/random.h>
 129 #include <trace/napi.h>
 130
 131 #include "net-sysfs.h"
 132
 133 /* Instead of increasing this, you should create a hash table. */
 134 #define MAX_GRO_SKBS 8
 135
 136 /* This should be increased if a protocol with a bigger head is added. */
 137 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 138
 139 /*
 140  *      The list of packet types we will receive (as opposed to discard)
 141  *      and the routines to invoke.
 142  *
 143  *      Why 16. Because with 16 the only overlap we get on a hash of the
 144  *      low nibble of the protocol value is RARP/SNAP/X.25.
 145  *
 146  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 147  *             sure which should go first, but I bet it won't make much
 148  *             difference if we are running VLANs.  The good news is that
 149  *             this protocol won't be in the list unless compiled in, so
 150  *             the average user (w/out VLANs) will not be adversely affected.
 151  *             --BLG
 152  *
 153  *              0800    IP
 154  *              8100    802.1Q VLAN
 155  *              0001    802.3
 156  *              0002    AX.25
 157  *              0004    802.2
 158  *              8035    RARP
 159  *              0005    SNAP
 160  *              0805    X.25
 161  *              0806    ARP
 162  *              8137    IPX
 163  *              0009    Localtalk
 164  *              86DD    IPv6
 165  */
 166
 167 #define PTYPE_HASH_SIZE (16)
 168 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
 169
 170 static DEFINE_SPINLOCK(ptype_lock);
 171 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 172 static struct list_head ptype_all __read_mostly;        /* Taps */
 173
 174 /*
 175  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 176  * semaphore.
 177  *
 178  * Pure readers hold dev_base_lock for reading.
 179  *
 180  * Writers must hold the rtnl semaphore while they loop through the
 181  * dev_base_head list, and hold dev_base_lock for writing when they do the
 182  * actual updates.  This allows pure readers to access the list even
 183  * while a writer is preparing to update it.
 184  *
 185  * To put it another way, dev_base_lock is held for writing only to
 186  * protect against pure readers; the rtnl semaphore provides the
 187  * protection against other writers.
 188  *
 189  * See, for example usages, register_netdevice() and
 190  * unregister_netdevice(), which must be called with the rtnl
 191  * semaphore held.
 192  */
 193 DEFINE_RWLOCK(dev_base_lock);
 194
 195 EXPORT_SYMBOL(dev_base_lock);
 196
 197 #define NETDEV_HASHBITS 8
 198 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 199
 200 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 201 {
 202         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 203         return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 204 }
 205
 206 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 207 {
 208         return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 209 }
 210
 211 /* Device list insertion */
 212 static int list_netdevice(struct net_device *dev)
 213 {
 214         struct net *net = dev_net(dev);
 215
 216         ASSERT_RTNL();
 217
 218         write_lock_bh(&dev_base_lock);
 219         list_add_tail(&dev->dev_list, &net->dev_base_head);
 220         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 221         hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 222         write_unlock_bh(&dev_base_lock);
 223         return 0;
 224 }
 225
 226 /* Device list removal */
 227 static void unlist_netdevice(struct net_device *dev)
 228 {
 229         ASSERT_RTNL();
 230
 231         /* Unlink dev from the device chain */
 232         write_lock_bh(&dev_base_lock);
 233         list_del(&dev->dev_list);
 234         hlist_del(&dev->name_hlist);
 235         hlist_del(&dev->index_hlist);
 236         write_unlock_bh(&dev_base_lock);
 237 }
 238
 239 /*
 240  *      Our notifier list
 241  */
 242
 243 static RAW_NOTIFIER_HEAD(netdev_chain);
 244
 245 /*
 246  *      Device drivers call our routines to queue packets here. We empty the
 247  *      queue in the local softnet handler.
 248  */
 249
 250 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 251
 252 #ifdef CONFIG_LOCKDEP
 253 /*
 254  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 255  * according to dev->type
 256  */
 257 static const unsigned short netdev_lock_type[] =
 258         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 259          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 260          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 261          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 262          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 263          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 264          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 265          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 266          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 267          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 268          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 269          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 270          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 271          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
 272          ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
 273
 274 static const char *netdev_lock_name[] =
 275         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 276          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 277          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 278          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 279          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 280          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 281          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 282          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 283          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 284          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 285          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 286          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 287          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 288          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
 289          "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
 290
 291 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 292 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 293
 294 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 295 {
 296         int i;
 297
 298         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 299                 if (netdev_lock_type[i] == dev_type)
 300                         return i;
 301         /* the last key is used by default */
 302         return ARRAY_SIZE(netdev_lock_type) - 1;
 303 }
 304
 305 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 306                                                  unsigned short dev_type)
 307 {
 308         int i;
 309
 310         i = netdev_lock_pos(dev_type);
 311         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 312                                    netdev_lock_name[i]);
 313 }
 314
 315 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 316 {
 317         int i;
 318
 319         i = netdev_lock_pos(dev->type);
 320         lockdep_set_class_and_name(&dev->addr_list_lock,
 321                                    &netdev_addr_lock_key[i],
 322                                    netdev_lock_name[i]);
 323 }
 324 #else
 325 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 326                                                  unsigned short dev_type)
 327 {
 328 }
 329 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 330 {
 331 }
 332 #endif
 333
 334 /*******************************************************************************
 335
 336                 Protocol management and registration routines
 337
 338 *******************************************************************************/
 339
 340 /*
 341  *      Add a protocol ID to the list. Now that the input handler is
 342  *      smarter we can dispense with all the messy stuff that used to be
 343  *      here.
 344  *
 345  *      BEWARE!!! Protocol handlers, mangling input packets,
 346  *      MUST BE last in hash buckets and checking protocol handlers
 347  *      MUST start from promiscuous ptype_all chain in net_bh.
 348  *      It is true now, do not change it.
 349  *      Explanation follows: if protocol handler, mangling packet, will
 350  *      be the first on list, it is not able to sense, that packet
 351  *      is cloned and should be copied-on-write, so that it will
 352  *      change it and subsequent readers will get broken packet.
 353  *                                                      --ANK (980803)
 354  */
 355
 356 /**
 357  *      dev_add_pack - add packet handler
 358  *      @pt: packet type declaration
 359  *
 360  *      Add a protocol handler to the networking stack. The passed &packet_type
 361  *      is linked into kernel lists and may not be freed until it has been
 362  *      removed from the kernel lists.
 363  *
 364  *      This call does not sleep therefore it can not
 365  *      guarantee all CPU's that are in middle of receiving packets
 366  *      will see the new packet type (until the next received packet).
 367  */
 368
 369 void dev_add_pack(struct packet_type *pt)
 370 {
 371         int hash;
 372
 373         spin_lock_bh(&ptype_lock);
 374         if (pt->type == htons(ETH_P_ALL))
 375                 list_add_rcu(&pt->list, &ptype_all);
 376         else {
 377                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
 378                 list_add_rcu(&pt->list, &ptype_base[hash]);
 379         }
 380         spin_unlock_bh(&ptype_lock);
 381 }
 382
 383 /**
 384  *      __dev_remove_pack        - remove packet handler
 385  *      @pt: packet type declaration
 386  *
 387  *      Remove a protocol handler that was previously added to the kernel
 388  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 389  *      from the kernel lists and can be freed or reused once this function
 390  *      returns.
 391  *
 392  *      The packet type might still be in use by receivers
 393  *      and must not be freed until after all the CPU's have gone
 394  *      through a quiescent state.
 395  */
 396 void __dev_remove_pack(struct packet_type *pt)
 397 {
 398         struct list_head *head;
 399         struct packet_type *pt1;
 400
 401         spin_lock_bh(&ptype_lock);
 402
 403         if (pt->type == htons(ETH_P_ALL))
 404                 head = &ptype_all;
 405         else
 406                 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 407
 408         list_for_each_entry(pt1, head, list) {
 409                 if (pt == pt1) {
 410                         list_del_rcu(&pt->list);
 411                         goto out;
 412                 }
 413         }
 414
 415         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 416 out:
 417         spin_unlock_bh(&ptype_lock);
 418 }
 419 /**
 420  *      dev_remove_pack  - remove packet handler
 421  *      @pt: packet type declaration
 422  *
 423  *      Remove a protocol handler that was previously added to the kernel
 424  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 425  *      from the kernel lists and can be freed or reused once this function
 426  *      returns.
 427  *
 428  *      This call sleeps to guarantee that no CPU is looking at the packet
 429  *      type after return.
 430  */
 431 void dev_remove_pack(struct packet_type *pt)
 432 {
 433         __dev_remove_pack(pt);
 434
 435         synchronize_net();
 436 }
 437
 438 /******************************************************************************
 439
 440                       Device Boot-time Settings Routines
 441
 442 *******************************************************************************/
 443
 444 /* Boot time configuration table */
 445 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 446
 447 /**
 448  *      netdev_boot_setup_add   - add new setup entry
 449  *      @name: name of the device
 450  *      @map: configured settings for the device
 451  *
 452  *      Adds new setup entry to the dev_boot_setup list.  The function
 453  *      returns 0 on error and 1 on success.  This is a generic routine to
 454  *      all netdevices.
 455  */
 456 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 457 {
 458         struct netdev_boot_setup *s;
 459         int i;
 460
 461         s = dev_boot_setup;
 462         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 463                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 464                         memset(s[i].name, 0, sizeof(s[i].name));
 465                         strlcpy(s[i].name, name, IFNAMSIZ);
 466                         memcpy(&s[i].map, map, sizeof(s[i].map));
 467                         break;
 468                 }
 469         }
 470
 471         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 472 }
 473
 474 /**
 475  *      netdev_boot_setup_check - check boot time settings
 476  *      @dev: the netdevice
 477  *
 478  *      Check boot time settings for the device.
 479  *      The found settings are set for the device to be used
 480  *      later in the device probing.
 481  *      Returns 0 if no settings found, 1 if they are.
 482  */
 483 int netdev_boot_setup_check(struct net_device *dev)
 484 {
 485         struct netdev_boot_setup *s = dev_boot_setup;
 486         int i;
 487
 488         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 489                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 490                     !strcmp(dev->name, s[i].name)) {
 491                         dev->irq        = s[i].map.irq;
 492                         dev->base_addr  = s[i].map.base_addr;
 493                         dev->mem_start  = s[i].map.mem_start;
 494                         dev->mem_end    = s[i].map.mem_end;
 495                         return 1;
 496                 }
 497         }
 498         return 0;
 499 }
 500
 501
 502 /**
 503  *      netdev_boot_base        - get address from boot time settings
 504  *      @prefix: prefix for network device
 505  *      @unit: id for network device
 506  *
 507  *      Check boot time settings for the base address of device.
 508  *      The found settings are set for the device to be used
 509  *      later in the device probing.
 510  *      Returns 0 if no settings found.
 511  */
 512 unsigned long netdev_boot_base(const char *prefix, int unit)
 513 {
 514         const struct netdev_boot_setup *s = dev_boot_setup;
 515         char name[IFNAMSIZ];
 516         int i;
 517
 518         sprintf(name, "%s%d", prefix, unit);
 519
 520         /*
 521          * If device already registered then return base of 1
 522          * to indicate not to probe for this interface
 523          */
 524         if (__dev_get_by_name(&init_net, name))
 525                 return 1;
 526
 527         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 528                 if (!strcmp(name, s[i].name))
 529                         return s[i].map.base_addr;
 530         return 0;
 531 }
 532
 533 /*
 534  * Saves at boot time configured settings for any netdevice.
 535  */
 536 int __init netdev_boot_setup(char *str)
 537 {
 538         int ints[5];
 539         struct ifmap map;
 540
 541         str = get_options(str, ARRAY_SIZE(ints), ints);
 542         if (!str || !*str)
 543                 return 0;
 544
 545         /* Save settings */
 546         memset(&map, 0, sizeof(map));
 547         if (ints[0] > 0)
 548                 map.irq = ints[1];
 549         if (ints[0] > 1)
 550                 map.base_addr = ints[2];
 551         if (ints[0] > 2)
 552                 map.mem_start = ints[3];
 553         if (ints[0] > 3)
 554                 map.mem_end = ints[4];
 555
 556         /* Add new entry to the list */
 557         return netdev_boot_setup_add(str, &map);
 558 }
 559
 560 __setup("netdev=", netdev_boot_setup);
 561
 562 /*******************************************************************************
 563
 564                             Device Interface Subroutines
 565
 566 *******************************************************************************/
 567
 568 /**
 569  *      __dev_get_by_name       - find a device by its name
 570  *      @net: the applicable net namespace
 571  *      @name: name to find
 572  *
 573  *      Find an interface by name. Must be called under RTNL semaphore
 574  *      or @dev_base_lock. If the name is found a pointer to the device
 575  *      is returned. If the name is not found then %NULL is returned. The
 576  *      reference counters are not incremented so the caller must be
 577  *      careful with locks.
 578  */
 579
 580 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 581 {
 582         struct hlist_node *p;
 583
 584         hlist_for_each(p, dev_name_hash(net, name)) {
 585                 struct net_device *dev
 586                         = hlist_entry(p, struct net_device, name_hlist);
 587                 if (!strncmp(dev->name, name, IFNAMSIZ))
 588                         return dev;
 589         }
 590         return NULL;
 591 }
 592
 593 /**
 594  *      dev_get_by_name         - find a device by its name
 595  *      @net: the applicable net namespace
 596  *      @name: name to find
 597  *
 598  *      Find an interface by name. This can be called from any
 599  *      context and does its own locking. The returned handle has
 600  *      the usage count incremented and the caller must use dev_put() to
 601  *      release it when it is no longer needed. %NULL is returned if no
 602  *      matching device is found.
 603  */
 604
 605 struct net_device *dev_get_by_name(struct net *net, const char *name)
 606 {
 607         struct net_device *dev;
 608
 609         read_lock(&dev_base_lock);
 610         dev = __dev_get_by_name(net, name);
 611         if (dev)
 612                 dev_hold(dev);
 613         read_unlock(&dev_base_lock);
 614         return dev;
 615 }
 616
 617 /**
 618  *      __dev_get_by_index - find a device by its ifindex
 619  *      @net: the applicable net namespace
 620  *      @ifindex: index of device
 621  *
 622  *      Search for an interface by index. Returns %NULL if the device
 623  *      is not found or a pointer to the device. The device has not
 624  *      had its reference counter increased so the caller must be careful
 625  *      about locking. The caller must hold either the RTNL semaphore
 626  *      or @dev_base_lock.
 627  */
 628
 629 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 630 {
 631         struct hlist_node *p;
 632
 633         hlist_for_each(p, dev_index_hash(net, ifindex)) {
 634                 struct net_device *dev
 635                         = hlist_entry(p, struct net_device, index_hlist);
 636                 if (dev->ifindex == ifindex)
 637                         return dev;
 638         }
 639         return NULL;
 640 }
 641
 642
 643 /**
 644  *      dev_get_by_index - find a device by its ifindex
 645  *      @net: the applicable net namespace
 646  *      @ifindex: index of device
 647  *
 648  *      Search for an interface by index. Returns NULL if the device
 649  *      is not found or a pointer to the device. The device returned has
 650  *      had a reference added and the pointer is safe until the user calls
 651  *      dev_put to indicate they have finished with it.
 652  */
 653
 654 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 655 {
 656         struct net_device *dev;
 657
 658         read_lock(&dev_base_lock);
 659         dev = __dev_get_by_index(net, ifindex);
 660         if (dev)
 661                 dev_hold(dev);
 662         read_unlock(&dev_base_lock);
 663         return dev;
 664 }
 665
 666 /**
 667  *      dev_getbyhwaddr - find a device by its hardware address
 668  *      @net: the applicable net namespace
 669  *      @type: media type of device
 670  *      @ha: hardware address
 671  *
 672  *      Search for an interface by MAC address. Returns NULL if the device
 673  *      is not found or a pointer to the device. The caller must hold the
 674  *      rtnl semaphore. The returned device has not had its ref count increased
 675  *      and the caller must therefore be careful about locking
 676  *
 677  *      BUGS:
 678  *      If the API was consistent this would be __dev_get_by_hwaddr
 679  */
 680
 681 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 682 {
 683         struct net_device *dev;
 684
 685         ASSERT_RTNL();
 686
 687         for_each_netdev(net, dev)
 688                 if (dev->type == type &&
 689                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 690                         return dev;
 691
 692         return NULL;
 693 }
 694
 695 EXPORT_SYMBOL(dev_getbyhwaddr);
 696
 697 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 698 {
 699         struct net_device *dev;
 700
 701         ASSERT_RTNL();
 702         for_each_netdev(net, dev)
 703                 if (dev->type == type)
 704                         return dev;
 705
 706         return NULL;
 707 }
 708
 709 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 710
 711 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 712 {
 713         struct net_device *dev;
 714
 715         rtnl_lock();
 716         dev = __dev_getfirstbyhwtype(net, type);
 717         if (dev)
 718                 dev_hold(dev);
 719         rtnl_unlock();
 720         return dev;
 721 }
 722
 723 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 724
 725 /**
 726  *      dev_get_by_flags - find any device with given flags
 727  *      @net: the applicable net namespace
 728  *      @if_flags: IFF_* values
 729  *      @mask: bitmask of bits in if_flags to check
 730  *
 731  *      Search for any interface with the given flags. Returns NULL if a device
 732  *      is not found or a pointer to the device. The device returned has
 733  *      had a reference added and the pointer is safe until the user calls
 734  *      dev_put to indicate they have finished with it.
 735  */
 736
 737 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 738 {
 739         struct net_device *dev, *ret;
 740
 741         ret = NULL;
 742         read_lock(&dev_base_lock);
 743         for_each_netdev(net, dev) {
 744                 if (((dev->flags ^ if_flags) & mask) == 0) {
 745                         dev_hold(dev);
 746                         ret = dev;
 747                         break;
 748                 }
 749         }
 750         read_unlock(&dev_base_lock);
 751         return ret;
 752 }
 753
 754 /**
 755  *      dev_valid_name - check if name is okay for network device
 756  *      @name: name string
 757  *
 758  *      Network device names need to be valid file names to
 759  *      to allow sysfs to work.  We also disallow any kind of
 760  *      whitespace.
 761  */
 762 int dev_valid_name(const char *name)
 763 {
 764         if (*name == '\0')
 765                 return 0;
 766         if (strlen(name) >= IFNAMSIZ)
 767                 return 0;
 768         if (!strcmp(name, ".") || !strcmp(name, ".."))
 769                 return 0;
 770
 771         while (*name) {
 772                 if (*name == '/' || isspace(*name))
 773                         return 0;
 774                 name++;
 775         }
 776         return 1;
 777 }
 778
 779 /**
 780  *      __dev_alloc_name - allocate a name for a device
 781  *      @net: network namespace to allocate the device name in
 782  *      @name: name format string
 783  *      @buf:  scratch buffer and result name string
 784  *
 785  *      Passed a format string - eg "lt%d" it will try and find a suitable
 786  *      id. It scans list of devices to build up a free map, then chooses
 787  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 788  *      while allocating the name and adding the device in order to avoid
 789  *      duplicates.
 790  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 791  *      Returns the number of the unit assigned or a negative errno code.
 792  */
 793
 794 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 795 {
 796         int i = 0;
 797         const char *p;
 798         const int max_netdevices = 8*PAGE_SIZE;
 799         unsigned long *inuse;
 800         struct net_device *d;
 801
 802         p = strnchr(name, IFNAMSIZ-1, '%');
 803         if (p) {
 804                 /*
 805                  * Verify the string as this thing may have come from
 806                  * the user.  There must be either one "%d" and no other "%"
 807                  * characters.
 808                  */
 809                 if (p[1] != 'd' || strchr(p + 2, '%'))
 810                         return -EINVAL;
 811
 812                 /* Use one page as a bit array of possible slots */
 813                 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 814                 if (!inuse)
 815                         return -ENOMEM;
 816
 817                 for_each_netdev(net, d) {
 818                         if (!sscanf(d->name, name, &i))
 819                                 continue;
 820                         if (i < 0 || i >= max_netdevices)
 821                                 continue;
 822
 823                         /*  avoid cases where sscanf is not exact inverse of printf */
 824                         snprintf(buf, IFNAMSIZ, name, i);
 825                         if (!strncmp(buf, d->name, IFNAMSIZ))
 826                                 set_bit(i, inuse);
 827                 }
 828
 829                 i = find_first_zero_bit(inuse, max_netdevices);
 830                 free_page((unsigned long) inuse);
 831         }
 832
 833         snprintf(buf, IFNAMSIZ, name, i);
 834         if (!__dev_get_by_name(net, buf))
 835                 return i;
 836
 837         /* It is possible to run out of possible slots
 838          * when the name is long and there isn't enough space left
 839          * for the digits, or if all bits are used.
 840          */
 841         return -ENFILE;
 842 }
 843
 844 /**
 845  *      dev_alloc_name - allocate a name for a device
 846  *      @dev: device
 847  *      @name: name format string
 848  *
 849  *      Passed a format string - eg "lt%d" it will try and find a suitable
 850  *      id. It scans list of devices to build up a free map, then chooses
 851  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 852  *      while allocating the name and adding the device in order to avoid
 853  *      duplicates.
 854  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 855  *      Returns the number of the unit assigned or a negative errno code.
 856  */
 857
 858 int dev_alloc_name(struct net_device *dev, const char *name)
 859 {
 860         char buf[IFNAMSIZ];
 861         struct net *net;
 862         int ret;
 863
 864         BUG_ON(!dev_net(dev));
 865         net = dev_net(dev);
 866         ret = __dev_alloc_name(net, name, buf);
 867         if (ret >= 0)
 868                 strlcpy(dev->name, buf, IFNAMSIZ);
 869         return ret;
 870 }
 871
 872
 873 /**
 874  *      dev_change_name - change name of a device
 875  *      @dev: device
 876  *      @newname: name (or format string) must be at least IFNAMSIZ
 877  *
 878  *      Change name of a device, can pass format strings "eth%d".
 879  *      for wildcarding.
 880  */
 881 int dev_change_name(struct net_device *dev, const char *newname)
 882 {
 883         char oldname[IFNAMSIZ];
 884         int err = 0;
 885         int ret;
 886         struct net *net;
 887
 888         ASSERT_RTNL();
 889         BUG_ON(!dev_net(dev));
 890
 891         net = dev_net(dev);
 892         if (dev->flags & IFF_UP)
 893                 return -EBUSY;
 894
 895         if (!dev_valid_name(newname))
 896                 return -EINVAL;
 897
 898         if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 899                 return 0;
 900
 901         memcpy(oldname, dev->name, IFNAMSIZ);
 902
 903         if (strchr(newname, '%')) {
 904                 err = dev_alloc_name(dev, newname);
 905                 if (err < 0)
 906                         return err;
 907         }
 908         else if (__dev_get_by_name(net, newname))
 909                 return -EEXIST;
 910         else
 911                 strlcpy(dev->name, newname, IFNAMSIZ);
 912
 913 rollback:
 914         /* For now only devices in the initial network namespace
 915          * are in sysfs.
 916          */
 917         if (net == &init_net) {
 918                 ret = device_rename(&dev->dev, dev->name);
 919                 if (ret) {
 920                         memcpy(dev->name, oldname, IFNAMSIZ);
 921                         return ret;
 922                 }
 923         }
 924
 925         write_lock_bh(&dev_base_lock);
 926         hlist_del(&dev->name_hlist);
 927         hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 928         write_unlock_bh(&dev_base_lock);
 929
 930         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 931         ret = notifier_to_errno(ret);
 932
 933         if (ret) {
 934                 if (err) {
 935                         printk(KERN_ERR
 936                                "%s: name change rollback failed: %d.\n",
 937                                dev->name, ret);
 938                 } else {
 939                         err = ret;
 940                         memcpy(dev->name, oldname, IFNAMSIZ);
 941                         goto rollback;
 942                 }
 943         }
 944
 945         return err;
 946 }
 947
 948 /**
 949  *      dev_set_alias - change ifalias of a device
 950  *      @dev: device
 951  *      @alias: name up to IFALIASZ
 952  *      @len: limit of bytes to copy from info
 953  *
 954  *      Set ifalias for a device,
 955  */
 956 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 957 {
 958         ASSERT_RTNL();
 959
 960         if (len >= IFALIASZ)
 961                 return -EINVAL;
 962
 963         if (!len) {
 964                 if (dev->ifalias) {
 965                         kfree(dev->ifalias);
 966                         dev->ifalias = NULL;
 967                 }
 968                 return 0;
 969         }
 970
 971         dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
 972         if (!dev->ifalias)
 973                 return -ENOMEM;
 974
 975         strlcpy(dev->ifalias, alias, len+1);
 976         return len;
 977 }
 978
 979
 980 /**
 981  *      netdev_features_change - device changes features
 982  *      @dev: device to cause notification
 983  *
 984  *      Called to indicate a device has changed features.
 985  */
 986 void netdev_features_change(struct net_device *dev)
 987 {
 988         call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 989 }
 990 EXPORT_SYMBOL(netdev_features_change);
 991
 992 /**
 993  *      netdev_state_change - device changes state
 994  *      @dev: device to cause notification
 995  *
 996  *      Called to indicate a device has changed state. This function calls
 997  *      the notifier chains for netdev_chain and sends a NEWLINK message
 998  *      to the routing socket.
 999  */
1000 void netdev_state_change(struct net_device *dev)
1001 {
1002         if (dev->flags & IFF_UP) {
1003                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1004                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1005         }
1006 }
1007
1008 void netdev_bonding_change(struct net_device *dev)
1009 {
1010         call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1011 }
1012 EXPORT_SYMBOL(netdev_bonding_change);
1013
1014 /**
1015  *      dev_load        - load a network module
1016  *      @net: the applicable net namespace
1017  *      @name: name of interface
1018  *
1019  *      If a network interface is not present and the process has suitable
1020  *      privileges this function loads the module. If module loading is not
1021  *      available in this kernel then it becomes a nop.
1022  */
1023
1024 void dev_load(struct net *net, const char *name)
1025 {
1026         struct net_device *dev;
1027
1028         read_lock(&dev_base_lock);
1029         dev = __dev_get_by_name(net, name);
1030         read_unlock(&dev_base_lock);
1031
1032         if (!dev && capable(CAP_SYS_MODULE))
1033                 request_module("%s", name);
1034 }
1035
1036 /**
1037  *      dev_open        - prepare an interface for use.
1038  *      @dev:   device to open
1039  *
1040  *      Takes a device from down to up state. The device's private open
1041  *      function is invoked and then the multicast lists are loaded. Finally
1042  *      the device is moved into the up state and a %NETDEV_UP message is
1043  *      sent to the netdev notifier chain.
1044  *
1045  *      Calling this function on an active interface is a nop. On a failure
1046  *      a negative errno code is returned.
1047  */
1048 int dev_open(struct net_device *dev)
1049 {
1050         const struct net_device_ops *ops = dev->netdev_ops;
1051         int ret = 0;
1052
1053         ASSERT_RTNL();
1054
1055         /*
1056          *      Is it already up?
1057          */
1058
1059         if (dev->flags & IFF_UP)
1060                 return 0;
1061
1062         /*
1063          *      Is it even present?
1064          */
1065         if (!netif_device_present(dev))
1066                 return -ENODEV;
1067
1068         /*
1069          *      Call device private open method
1070          */
1071         set_bit(__LINK_STATE_START, &dev->state);
1072
1073         if (ops->ndo_validate_addr)
1074                 ret = ops->ndo_validate_addr(dev);
1075
1076         if (!ret && ops->ndo_open)
1077                 ret = ops->ndo_open(dev);
1078
1079         /*
1080          *      If it went open OK then:
1081          */
1082
1083         if (ret)
1084                 clear_bit(__LINK_STATE_START, &dev->state);
1085         else {
1086                 /*
1087                  *      Set the flags.
1088                  */
1089                 dev->flags |= IFF_UP;
1090
1091                 /*
1092                  *      Enable NET_DMA
1093                  */
1094                 net_dmaengine_get();
1095
1096                 /*
1097                  *      Initialize multicasting status
1098                  */
1099                 dev_set_rx_mode(dev);
1100
1101                 /*
1102                  *      Wakeup transmit queue engine
1103                  */
1104                 dev_activate(dev);
1105
1106                 /*
1107                  *      ... and announce new interface.
1108                  */
1109                 call_netdevice_notifiers(NETDEV_UP, dev);
1110         }
1111
1112         return ret;
1113 }
1114
1115 /**
1116  *      dev_close - shutdown an interface.
1117  *      @dev: device to shutdown
1118  *
1119  *      This function moves an active device into down state. A
1120  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1121  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1122  *      chain.
1123  */
1124 int dev_close(struct net_device *dev)
1125 {
1126         const struct net_device_ops *ops = dev->netdev_ops;
1127         ASSERT_RTNL();
1128
1129         might_sleep();
1130
1131         if (!(dev->flags & IFF_UP))
1132                 return 0;
1133
1134         /*
1135          *      Tell people we are going down, so that they can
1136          *      prepare to death, when device is still operating.
1137          */
1138         call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1139
1140         clear_bit(__LINK_STATE_START, &dev->state);
1141
1142         /* Synchronize to scheduled poll. We cannot touch poll list,
1143          * it can be even on different cpu. So just clear netif_running().
1144          *
1145          * dev->stop() will invoke napi_disable() on all of it's
1146          * napi_struct instances on this device.
1147          */
1148         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1149
1150         dev_deactivate(dev);
1151
1152         /*
1153          *      Call the device specific close. This cannot fail.
1154          *      Only if device is UP
1155          *
1156          *      We allow it to be called even after a DETACH hot-plug
1157          *      event.
1158          */
1159         if (ops->ndo_stop)
1160                 ops->ndo_stop(dev);
1161
1162         /*
1163          *      Device is now down.
1164          */
1165
1166         dev->flags &= ~IFF_UP;
1167
1168         /*
1169          * Tell people we are down
1170          */
1171         call_netdevice_notifiers(NETDEV_DOWN, dev);
1172
1173         /*
1174          *      Shutdown NET_DMA
1175          */
1176         net_dmaengine_put();
1177
1178         return 0;
1179 }
1180
1181
1182 /**
1183  *      dev_disable_lro - disable Large Receive Offload on a device
1184  *      @dev: device
1185  *
1186  *      Disable Large Receive Offload (LRO) on a net device.  Must be
1187  *      called under RTNL.  This is needed if received packets may be
1188  *      forwarded to another interface.
1189  */
1190 void dev_disable_lro(struct net_device *dev)
1191 {
1192         if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1193             dev->ethtool_ops->set_flags) {
1194                 u32 flags = dev->ethtool_ops->get_flags(dev);
1195                 if (flags & ETH_FLAG_LRO) {
1196                         flags &= ~ETH_FLAG_LRO;
1197                         dev->ethtool_ops->set_flags(dev, flags);
1198                 }
1199         }
1200         WARN_ON(dev->features & NETIF_F_LRO);
1201 }
1202 EXPORT_SYMBOL(dev_disable_lro);
1203
1204
1205 static int dev_boot_phase = 1;
1206
1207 /*
1208  *      Device change register/unregister. These are not inline or static
1209  *      as we export them to the world.
1210  */
1211
1212 /**
1213  *      register_netdevice_notifier - register a network notifier block
1214  *      @nb: notifier
1215  *
1216  *      Register a notifier to be called when network device events occur.
1217  *      The notifier passed is linked into the kernel structures and must
1218  *      not be reused until it has been unregistered. A negative errno code
1219  *      is returned on a failure.
1220  *
1221  *      When registered all registration and up events are replayed
1222  *      to the new notifier to allow device to have a race free
1223  *      view of the network device list.
1224  */
1225
1226 int register_netdevice_notifier(struct notifier_block *nb)
1227 {
1228         struct net_device *dev;
1229         struct net_device *last;
1230         struct net *net;
1231         int err;
1232
1233         rtnl_lock();
1234         err = raw_notifier_chain_register(&netdev_chain, nb);
1235         if (err)
1236                 goto unlock;
1237         if (dev_boot_phase)
1238                 goto unlock;
1239         for_each_net(net) {
1240                 for_each_netdev(net, dev) {
1241                         err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1242                         err = notifier_to_errno(err);
1243                         if (err)
1244                                 goto rollback;
1245
1246                         if (!(dev->flags & IFF_UP))
1247                                 continue;
1248
1249                         nb->notifier_call(nb, NETDEV_UP, dev);
1250                 }
1251         }
1252
1253 unlock:
1254         rtnl_unlock();
1255         return err;
1256
1257 rollback:
1258         last = dev;
1259         for_each_net(net) {
1260                 for_each_netdev(net, dev) {
1261                         if (dev == last)
1262                                 break;
1263
1264                         if (dev->flags & IFF_UP) {
1265                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1266                                 nb->notifier_call(nb, NETDEV_DOWN, dev);
1267                         }
1268                         nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1269                 }
1270         }
1271
1272         raw_notifier_chain_unregister(&netdev_chain, nb);
1273         goto unlock;
1274 }
1275
1276 /**
1277  *      unregister_netdevice_notifier - unregister a network notifier block
1278  *      @nb: notifier
1279  *
1280  *      Unregister a notifier previously registered by
1281  *      register_netdevice_notifier(). The notifier is unlinked into the
1282  *      kernel structures and may then be reused. A negative errno code
1283  *      is returned on a failure.
1284  */
1285
1286 int unregister_netdevice_notifier(struct notifier_block *nb)
1287 {
1288         int err;
1289
1290         rtnl_lock();
1291         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1292         rtnl_unlock();
1293         return err;
1294 }
1295
1296 /**
1297  *      call_netdevice_notifiers - call all network notifier blocks
1298  *      @val: value passed unmodified to notifier function
1299  *      @dev: net_device pointer passed unmodified to notifier function
1300  *
1301  *      Call all network notifier blocks.  Parameters and return value
1302  *      are as for raw_notifier_call_chain().
1303  */
1304
1305 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1306 {
1307         return raw_notifier_call_chain(&netdev_chain, val, dev);
1308 }
1309
1310 /* When > 0 there are consumers of rx skb time stamps */
1311 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1312
1313 void net_enable_timestamp(void)
1314 {
1315         atomic_inc(&netstamp_needed);
1316 }
1317
1318 void net_disable_timestamp(void)
1319 {
1320         atomic_dec(&netstamp_needed);
1321 }
1322
1323 static inline void net_timestamp(struct sk_buff *skb)
1324 {
1325         if (atomic_read(&netstamp_needed))
1326                 __net_timestamp(skb);
1327         else
1328                 skb->tstamp.tv64 = 0;
1329 }
1330
1331 /*
1332  *      Support routine. Sends outgoing frames to any network
1333  *      taps currently in use.
1334  */
1335
1336 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1337 {
1338         struct packet_type *ptype;
1339
1340 #ifdef CONFIG_NET_CLS_ACT
1341         if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1342                 net_timestamp(skb);
1343 #else
1344         net_timestamp(skb);
1345 #endif
1346
1347         rcu_read_lock();
1348         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1349                 /* Never send packets back to the socket
1350                  * they originated from - MvS (miquels@drinkel.ow.org)
1351                  */
1352                 if ((ptype->dev == dev || !ptype->dev) &&
1353                     (ptype->af_packet_priv == NULL ||
1354                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1355                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1356                         if (!skb2)
1357                                 break;
1358
1359                         /* skb->nh should be correctly
1360                            set by sender, so that the second statement is
1361                            just protection against buggy protocols.
1362                          */
1363                         skb_reset_mac_header(skb2);
1364
1365                         if (skb_network_header(skb2) < skb2->data ||
1366                             skb2->network_header > skb2->tail) {
1367                                 if (net_ratelimit())
1368                                         printk(KERN_CRIT "protocol %04x is "
1369                                                "buggy, dev %s\n",
1370                                                skb2->protocol, dev->name);
1371                                 skb_reset_network_header(skb2);
1372                         }
1373
1374                         skb2->transport_header = skb2->network_header;
1375                         skb2->pkt_type = PACKET_OUTGOING;
1376                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1377                 }
1378         }
1379         rcu_read_unlock();
1380 }
1381
1382
1383 static inline void __netif_reschedule(struct Qdisc *q)
1384 {
1385         struct softnet_data *sd;
1386         unsigned long flags;
1387
1388         local_irq_save(flags);
1389         sd = &__get_cpu_var(softnet_data);
1390         q->next_sched = sd->output_queue;
1391         sd->output_queue = q;
1392         raise_softirq_irqoff(NET_TX_SOFTIRQ);
1393         local_irq_restore(flags);
1394 }
1395
1396 void __netif_schedule(struct Qdisc *q)
1397 {
1398         if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1399                 __netif_reschedule(q);
1400 }
1401 EXPORT_SYMBOL(__netif_schedule);
1402
1403 void dev_kfree_skb_irq(struct sk_buff *skb)
1404 {
1405         if (atomic_dec_and_test(&skb->users)) {
1406                 struct softnet_data *sd;
1407                 unsigned long flags;
1408
1409                 local_irq_save(flags);
1410                 sd = &__get_cpu_var(softnet_data);
1411                 skb->next = sd->completion_queue;
1412                 sd->completion_queue = skb;
1413                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1414                 local_irq_restore(flags);
1415         }
1416 }
1417 EXPORT_SYMBOL(dev_kfree_skb_irq);
1418
1419 void dev_kfree_skb_any(struct sk_buff *skb)
1420 {
1421         if (in_irq() || irqs_disabled())
1422                 dev_kfree_skb_irq(skb);
1423         else
1424                 dev_kfree_skb(skb);
1425 }
1426 EXPORT_SYMBOL(dev_kfree_skb_any);
1427
1428
1429 /**
1430  * netif_device_detach - mark device as removed
1431  * @dev: network device
1432  *
1433  * Mark device as removed from system and therefore no longer available.
1434  */
1435 void netif_device_detach(struct net_device *dev)
1436 {
1437         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1438             netif_running(dev)) {
1439                 netif_tx_stop_all_queues(dev);
1440         }
1441 }
1442 EXPORT_SYMBOL(netif_device_detach);
1443
1444 /**
1445  * netif_device_attach - mark device as attached
1446  * @dev: network device
1447  *
1448  * Mark device as attached from system and restart if needed.
1449  */
1450 void netif_device_attach(struct net_device *dev)
1451 {
1452         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1453             netif_running(dev)) {
1454                 netif_tx_wake_all_queues(dev);
1455                 __netdev_watchdog_up(dev);
1456         }
1457 }
1458 EXPORT_SYMBOL(netif_device_attach);
1459
1460 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1461 {
1462         return ((features & NETIF_F_GEN_CSUM) ||
1463                 ((features & NETIF_F_IP_CSUM) &&
1464                  protocol == htons(ETH_P_IP)) ||
1465                 ((features & NETIF_F_IPV6_CSUM) &&
1466                  protocol == htons(ETH_P_IPV6)) ||
1467                 ((features & NETIF_F_FCOE_CRC) &&
1468                  protocol == htons(ETH_P_FCOE)));
1469 }
1470
1471 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1472 {
1473         if (can_checksum_protocol(dev->features, skb->protocol))
1474                 return true;
1475
1476         if (skb->protocol == htons(ETH_P_8021Q)) {
1477                 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1478                 if (can_checksum_protocol(dev->features & dev->vlan_features,
1479                                           veh->h_vlan_encapsulated_proto))
1480                         return true;
1481         }
1482
1483         return false;
1484 }
1485
1486 /*
1487  * Invalidate hardware checksum when packet is to be mangled, and
1488  * complete checksum manually on outgoing path.
1489  */
1490 int skb_checksum_help(struct sk_buff *skb)
1491 {
1492         __wsum csum;
1493         int ret = 0, offset;
1494
1495         if (skb->ip_summed == CHECKSUM_COMPLETE)
1496                 goto out_set_summed;
1497
1498         if (unlikely(skb_shinfo(skb)->gso_size)) {
1499                 /* Let GSO fix up the checksum. */
1500                 goto out_set_summed;
1501         }
1502
1503         offset = skb->csum_start - skb_headroom(skb);
1504         BUG_ON(offset >= skb_headlen(skb));
1505         csum = skb_checksum(skb, offset, skb->len - offset, 0);
1506
1507         offset += skb->csum_offset;
1508         BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1509
1510         if (skb_cloned(skb) &&
1511             !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1512                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1513                 if (ret)
1514                         goto out;
1515         }
1516
1517         *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1518 out_set_summed:
1519         skb->ip_summed = CHECKSUM_NONE;
1520 out:
1521         return ret;
1522 }
1523
1524 /**
1525  *      skb_gso_segment - Perform segmentation on skb.
1526  *      @skb: buffer to segment
1527  *      @features: features for the output path (see dev->features)
1528  *
1529  *      This function segments the given skb and returns a list of segments.
1530  *
1531  *      It may return NULL if the skb requires no segmentation.  This is
1532  *      only possible when GSO is used for verifying header integrity.
1533  */
1534 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1535 {
1536         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1537         struct packet_type *ptype;
1538         __be16 type = skb->protocol;
1539         int err;
1540
1541         skb_reset_mac_header(skb);
1542         skb->mac_len = skb->network_header - skb->mac_header;
1543         __skb_pull(skb, skb->mac_len);
1544
1545         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1546                 struct net_device *dev = skb->dev;
1547                 struct ethtool_drvinfo info = {};
1548
1549                 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1550                         dev->ethtool_ops->get_drvinfo(dev, &info);
1551
1552                 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1553                         "ip_summed=%d",
1554                      info.driver, dev ? dev->features : 0L,
1555                      skb->sk ? skb->sk->sk_route_caps : 0L,
1556                      skb->len, skb->data_len, skb->ip_summed);
1557
1558                 if (skb_header_cloned(skb) &&
1559                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1560                         return ERR_PTR(err);
1561         }
1562
1563         rcu_read_lock();
1564         list_for_each_entry_rcu(ptype,
1565                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1566                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1567                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1568                                 err = ptype->gso_send_check(skb);
1569                                 segs = ERR_PTR(err);
1570                                 if (err || skb_gso_ok(skb, features))
1571                                         break;
1572                                 __skb_push(skb, (skb->data -
1573                                                  skb_network_header(skb)));
1574                         }
1575                         segs = ptype->gso_segment(skb, features);
1576                         break;
1577                 }
1578         }
1579         rcu_read_unlock();
1580
1581         __skb_push(skb, skb->data - skb_mac_header(skb));
1582
1583         return segs;
1584 }
1585
1586 EXPORT_SYMBOL(skb_gso_segment);
1587
1588 /* Take action when hardware reception checksum errors are detected. */
1589 #ifdef CONFIG_BUG
1590 void netdev_rx_csum_fault(struct net_device *dev)
1591 {
1592         if (net_ratelimit()) {
1593                 printk(KERN_ERR "%s: hw csum failure.\n",
1594                         dev ? dev->name : "<unknown>");
1595                 dump_stack();
1596         }
1597 }
1598 EXPORT_SYMBOL(netdev_rx_csum_fault);
1599 #endif
1600
1601 /* Actually, we should eliminate this check as soon as we know, that:
1602  * 1. IOMMU is present and allows to map all the memory.
1603  * 2. No high memory really exists on this machine.
1604  */
1605
1606 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1607 {
1608 #ifdef CONFIG_HIGHMEM
1609         int i;
1610
1611         if (dev->features & NETIF_F_HIGHDMA)
1612                 return 0;
1613
1614         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1615                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1616                         return 1;
1617
1618 #endif
1619         return 0;
1620 }
1621
1622 struct dev_gso_cb {
1623         void (*destructor)(struct sk_buff *skb);
1624 };
1625
1626 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1627
1628 static void dev_gso_skb_destructor(struct sk_buff *skb)
1629 {
1630         struct dev_gso_cb *cb;
1631
1632         do {
1633                 struct sk_buff *nskb = skb->next;
1634
1635                 skb->next = nskb->next;
1636                 nskb->next = NULL;
1637                 kfree_skb(nskb);
1638         } while (skb->next);
1639
1640         cb = DEV_GSO_CB(skb);
1641         if (cb->destructor)
1642                 cb->destructor(skb);
1643 }
1644
1645 /**
1646  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1647  *      @skb: buffer to segment
1648  *
1649  *      This function segments the given skb and stores the list of segments
1650  *      in skb->next.
1651  */
1652 static int dev_gso_segment(struct sk_buff *skb)
1653 {
1654         struct net_device *dev = skb->dev;
1655         struct sk_buff *segs;
1656         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1657                                          NETIF_F_SG : 0);
1658
1659         segs = skb_gso_segment(skb, features);
1660
1661         /* Verifying header integrity only. */
1662         if (!segs)
1663                 return 0;
1664
1665         if (IS_ERR(segs))
1666                 return PTR_ERR(segs);
1667
1668         skb->next = segs;
1669         DEV_GSO_CB(skb)->destructor = skb->destructor;
1670         skb->destructor = dev_gso_skb_destructor;
1671
1672         return 0;
1673 }
1674
1675 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1676                         struct netdev_queue *txq)
1677 {
1678         const struct net_device_ops *ops = dev->netdev_ops;
1679         int rc;
1680
1681         if (likely(!skb->next)) {
1682                 if (!list_empty(&ptype_all))
1683                         dev_queue_xmit_nit(skb, dev);
1684
1685                 if (netif_needs_gso(dev, skb)) {
1686                         if (unlikely(dev_gso_segment(skb)))
1687                                 goto out_kfree_skb;
1688                         if (skb->next)
1689                                 goto gso;
1690                 }
1691
1692                 /*
1693                  * If device doesnt need skb->dst, release it right now while
1694                  * its hot in this cpu cache
1695                  */
1696                 if ((dev->priv_flags & IFF_XMIT_DST_RELEASE) && skb->dst) {
1697                         dst_release(skb->dst);
1698                         skb->dst = NULL;
1699                 }
1700                 rc = ops->ndo_start_xmit(skb, dev);
1701                 if (rc == 0)
1702                         txq_trans_update(txq);
1703                 /*
1704                  * TODO: if skb_orphan() was called by
1705                  * dev->hard_start_xmit() (for example, the unmodified
1706                  * igb driver does that; bnx2 doesn't), then
1707                  * skb_tx_software_timestamp() will be unable to send
1708                  * back the time stamp.
1709                  *
1710                  * How can this be prevented? Always create another
1711                  * reference to the socket before calling
1712                  * dev->hard_start_xmit()? Prevent that skb_orphan()
1713                  * does anything in dev->hard_start_xmit() by clearing
1714                  * the skb destructor before the call and restoring it
1715                  * afterwards, then doing the skb_orphan() ourselves?
1716                  */
1717                 return rc;
1718         }
1719
1720 gso:
1721         do {
1722                 struct sk_buff *nskb = skb->next;
1723
1724                 skb->next = nskb->next;
1725                 nskb->next = NULL;
1726                 rc = ops->ndo_start_xmit(nskb, dev);
1727                 if (unlikely(rc)) {
1728                         nskb->next = skb->next;
1729                         skb->next = nskb;
1730                         return rc;
1731                 }
1732                 txq_trans_update(txq);
1733                 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1734                         return NETDEV_TX_BUSY;
1735         } while (skb->next);
1736
1737         skb->destructor = DEV_GSO_CB(skb)->destructor;
1738
1739 out_kfree_skb:
1740         kfree_skb(skb);
1741         return 0;
1742 }
1743
1744 static u32 skb_tx_hashrnd;
1745
1746 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1747 {
1748         u32 hash;
1749
1750         if (skb_rx_queue_recorded(skb)) {
1751                 hash = skb_get_rx_queue(skb);
1752                 while (unlikely (hash >= dev->real_num_tx_queues))
1753                         hash -= dev->real_num_tx_queues;
1754                 return hash;
1755         }
1756
1757         if (skb->sk && skb->sk->sk_hash)
1758                 hash = skb->sk->sk_hash;
1759         else
1760                 hash = skb->protocol;
1761
1762         hash = jhash_1word(hash, skb_tx_hashrnd);
1763
1764         return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1765 }
1766 EXPORT_SYMBOL(skb_tx_hash);
1767
1768 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1769                                         struct sk_buff *skb)
1770 {
1771         const struct net_device_ops *ops = dev->netdev_ops;
1772         u16 queue_index = 0;
1773
1774         if (ops->ndo_select_queue)
1775                 queue_index = ops->ndo_select_queue(dev, skb);
1776         else if (dev->real_num_tx_queues > 1)
1777                 queue_index = skb_tx_hash(dev, skb);
1778
1779         skb_set_queue_mapping(skb, queue_index);
1780         return netdev_get_tx_queue(dev, queue_index);
1781 }
1782
1783 /**
1784  *      dev_queue_xmit - transmit a buffer
1785  *      @skb: buffer to transmit
1786  *
1787  *      Queue a buffer for transmission to a network device. The caller must
1788  *      have set the device and priority and built the buffer before calling
1789  *      this function. The function can be called from an interrupt.
1790  *
1791  *      A negative errno code is returned on a failure. A success does not
1792  *      guarantee the frame will be transmitted as it may be dropped due
1793  *      to congestion or traffic shaping.
1794  *
1795  * -----------------------------------------------------------------------------------
1796  *      I notice this method can also return errors from the queue disciplines,
1797  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1798  *      be positive.
1799  *
1800  *      Regardless of the return value, the skb is consumed, so it is currently
1801  *      difficult to retry a send to this method.  (You can bump the ref count
1802  *      before sending to hold a reference for retry if you are careful.)
1803  *
1804  *      When calling this method, interrupts MUST be enabled.  This is because
1805  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1806  *          --BLG
1807  */
1808 int dev_queue_xmit(struct sk_buff *skb)
1809 {
1810         struct net_device *dev = skb->dev;
1811         struct netdev_queue *txq;
1812         struct Qdisc *q;
1813         int rc = -ENOMEM;
1814
1815         /* GSO will handle the following emulations directly. */
1816         if (netif_needs_gso(dev, skb))
1817                 goto gso;
1818
1819         if (skb_shinfo(skb)->frag_list &&
1820             !(dev->features & NETIF_F_FRAGLIST) &&
1821             __skb_linearize(skb))
1822                 goto out_kfree_skb;
1823
1824         /* Fragmented skb is linearized if device does not support SG,
1825          * or if at least one of fragments is in highmem and device
1826          * does not support DMA from it.
1827          */
1828         if (skb_shinfo(skb)->nr_frags &&
1829             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1830             __skb_linearize(skb))
1831                 goto out_kfree_skb;
1832
1833         /* If packet is not checksummed and device does not support
1834          * checksumming for this protocol, complete checksumming here.
1835          */
1836         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1837                 skb_set_transport_header(skb, skb->csum_start -
1838                                               skb_headroom(skb));
1839                 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1840                         goto out_kfree_skb;
1841         }
1842
1843 gso:
1844         /* Disable soft irqs for various locks below. Also
1845          * stops preemption for RCU.
1846          */
1847         rcu_read_lock_bh();
1848
1849         txq = dev_pick_tx(dev, skb);
1850         q = rcu_dereference(txq->qdisc);
1851
1852 #ifdef CONFIG_NET_CLS_ACT
1853         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1854 #endif
1855         if (q->enqueue) {
1856                 spinlock_t *root_lock = qdisc_lock(q);
1857
1858                 spin_lock(root_lock);
1859
1860                 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1861                         kfree_skb(skb);
1862                         rc = NET_XMIT_DROP;
1863                 } else {
1864                         rc = qdisc_enqueue_root(skb, q);
1865                         qdisc_run(q);
1866                 }
1867                 spin_unlock(root_lock);
1868
1869                 goto out;
1870         }
1871
1872         /* The device has no queue. Common case for software devices:
1873            loopback, all the sorts of tunnels...
1874
1875            Really, it is unlikely that netif_tx_lock protection is necessary
1876            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1877            counters.)
1878            However, it is possible, that they rely on protection
1879            made by us here.
1880
1881            Check this and shot the lock. It is not prone from deadlocks.
1882            Either shot noqueue qdisc, it is even simpler 8)
1883          */
1884         if (dev->flags & IFF_UP) {
1885                 int cpu = smp_processor_id(); /* ok because BHs are off */
1886
1887                 if (txq->xmit_lock_owner != cpu) {
1888
1889                         HARD_TX_LOCK(dev, txq, cpu);
1890
1891                         if (!netif_tx_queue_stopped(txq)) {
1892                                 rc = 0;
1893                                 if (!dev_hard_start_xmit(skb, dev, txq)) {
1894                                         HARD_TX_UNLOCK(dev, txq);
1895                                         goto out;
1896                                 }
1897                         }
1898                         HARD_TX_UNLOCK(dev, txq);
1899                         if (net_ratelimit())
1900                                 printk(KERN_CRIT "Virtual device %s asks to "
1901                                        "queue packet!\n", dev->name);
1902                 } else {
1903                         /* Recursion is detected! It is possible,
1904                          * unfortunately */
1905                         if (net_ratelimit())
1906                                 printk(KERN_CRIT "Dead loop on virtual device "
1907                                        "%s, fix it urgently!\n", dev->name);
1908                 }
1909         }
1910
1911         rc = -ENETDOWN;
1912         rcu_read_unlock_bh();
1913
1914 out_kfree_skb:
1915         kfree_skb(skb);
1916         return rc;
1917 out:
1918         rcu_read_unlock_bh();
1919         return rc;
1920 }
1921
1922
1923 /*=======================================================================
1924                         Receiver routines
1925   =======================================================================*/
1926
1927 int netdev_max_backlog __read_mostly = 1000;
1928 int netdev_budget __read_mostly = 300;
1929 int weight_p __read_mostly = 64;            /* old backlog weight */
1930
1931 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1932
1933
1934 /**
1935  *      netif_rx        -       post buffer to the network code
1936  *      @skb: buffer to post
1937  *
1938  *      This function receives a packet from a device driver and queues it for
1939  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1940  *      may be dropped during processing for congestion control or by the
1941  *      protocol layers.
1942  *
1943  *      return values:
1944  *      NET_RX_SUCCESS  (no congestion)
1945  *      NET_RX_DROP     (packet was dropped)
1946  *
1947  */
1948
1949 int netif_rx(struct sk_buff *skb)
1950 {
1951         struct softnet_data *queue;
1952         unsigned long flags;
1953
1954         /* if netpoll wants it, pretend we never saw it */
1955         if (netpoll_rx(skb))
1956                 return NET_RX_DROP;
1957
1958         if (!skb->tstamp.tv64)
1959                 net_timestamp(skb);
1960
1961         /*
1962          * The code is rearranged so that the path is the most
1963          * short when CPU is congested, but is still operating.
1964          */
1965         local_irq_save(flags);
1966         queue = &__get_cpu_var(softnet_data);
1967
1968         __get_cpu_var(netdev_rx_stat).total++;
1969         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1970                 if (queue->input_pkt_queue.qlen) {
1971 enqueue:
1972                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1973                         local_irq_restore(flags);
1974                         return NET_RX_SUCCESS;
1975                 }
1976
1977                 napi_schedule(&queue->backlog);
1978                 goto enqueue;
1979         }
1980
1981         __get_cpu_var(netdev_rx_stat).dropped++;
1982         local_irq_restore(flags);
1983
1984         kfree_skb(skb);
1985         return NET_RX_DROP;
1986 }
1987
1988 int netif_rx_ni(struct sk_buff *skb)
1989 {
1990         int err;
1991
1992         preempt_disable();
1993         err = netif_rx(skb);
1994         if (local_softirq_pending())
1995                 do_softirq();
1996         preempt_enable();
1997
1998         return err;
1999 }
2000
2001 EXPORT_SYMBOL(netif_rx_ni);
2002
2003 static void net_tx_action(struct softirq_action *h)
2004 {
2005         struct softnet_data *sd = &__get_cpu_var(softnet_data);
2006
2007         if (sd->completion_queue) {
2008                 struct sk_buff *clist;
2009
2010                 local_irq_disable();
2011                 clist = sd->completion_queue;
2012                 sd->completion_queue = NULL;
2013                 local_irq_enable();
2014
2015                 while (clist) {
2016                         struct sk_buff *skb = clist;
2017                         clist = clist->next;
2018
2019                         WARN_ON(atomic_read(&skb->users));
2020                         __kfree_skb(skb);
2021                 }
2022         }
2023
2024         if (sd->output_queue) {
2025                 struct Qdisc *head;
2026
2027                 local_irq_disable();
2028                 head = sd->output_queue;
2029                 sd->output_queue = NULL;
2030                 local_irq_enable();
2031
2032                 while (head) {
2033                         struct Qdisc *q = head;
2034                         spinlock_t *root_lock;
2035
2036                         head = head->next_sched;
2037
2038                         root_lock = qdisc_lock(q);
2039                         if (spin_trylock(root_lock)) {
2040                                 smp_mb__before_clear_bit();
2041                                 clear_bit(__QDISC_STATE_SCHED,
2042                                           &q->state);
2043                                 qdisc_run(q);
2044                                 spin_unlock(root_lock);
2045                         } else {
2046                                 if (!test_bit(__QDISC_STATE_DEACTIVATED,
2047                                               &q->state)) {
2048                                         __netif_reschedule(q);
2049                                 } else {
2050                                         smp_mb__before_clear_bit();
2051                                         clear_bit(__QDISC_STATE_SCHED,
2052                                                   &q->state);
2053                                 }
2054                         }
2055                 }
2056         }
2057 }
2058
2059 static inline int deliver_skb(struct sk_buff *skb,
2060                               struct packet_type *pt_prev,
2061                               struct net_device *orig_dev)
2062 {
2063         atomic_inc(&skb->users);
2064         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2065 }
2066
2067 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2068 /* These hooks defined here for ATM */
2069 struct net_bridge;
2070 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2071                                                 unsigned char *addr);
2072 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2073
2074 /*
2075  * If bridge module is loaded call bridging hook.
2076  *  returns NULL if packet was consumed.
2077  */
2078 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2079                                         struct sk_buff *skb) __read_mostly;
2080 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2081                                             struct packet_type **pt_prev, int *ret,
2082                                             struct net_device *orig_dev)
2083 {
2084         struct net_bridge_port *port;
2085
2086         if (skb->pkt_type == PACKET_LOOPBACK ||
2087             (port = rcu_dereference(skb->dev->br_port)) == NULL)
2088                 return skb;
2089
2090         if (*pt_prev) {
2091                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2092                 *pt_prev = NULL;
2093         }
2094
2095         return br_handle_frame_hook(port, skb);
2096 }
2097 #else
2098 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
2099 #endif
2100
2101 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2102 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2103 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2104
2105 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2106                                              struct packet_type **pt_prev,
2107                                              int *ret,
2108                                              struct net_device *orig_dev)
2109 {
2110         if (skb->dev->macvlan_port == NULL)
2111                 return skb;
2112
2113         if (*pt_prev) {
2114                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2115                 *pt_prev = NULL;
2116         }
2117         return macvlan_handle_frame_hook(skb);
2118 }
2119 #else
2120 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
2121 #endif
2122
2123 #ifdef CONFIG_NET_CLS_ACT
2124 /* TODO: Maybe we should just force sch_ingress to be compiled in
2125  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2126  * a compare and 2 stores extra right now if we dont have it on
2127  * but have CONFIG_NET_CLS_ACT
2128  * NOTE: This doesnt stop any functionality; if you dont have
2129  * the ingress scheduler, you just cant add policies on ingress.
2130  *
2131  */
2132 static int ing_filter(struct sk_buff *skb)
2133 {
2134         struct net_device *dev = skb->dev;
2135         u32 ttl = G_TC_RTTL(skb->tc_verd);
2136         struct netdev_queue *rxq;
2137         int result = TC_ACT_OK;
2138         struct Qdisc *q;
2139
2140         if (MAX_RED_LOOP < ttl++) {
2141                 printk(KERN_WARNING
2142                        "Redir loop detected Dropping packet (%d->%d)\n",
2143                        skb->iif, dev->ifindex);
2144                 return TC_ACT_SHOT;
2145         }
2146
2147         skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2148         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2149
2150         rxq = &dev->rx_queue;
2151
2152         q = rxq->qdisc;
2153         if (q != &noop_qdisc) {
2154                 spin_lock(qdisc_lock(q));
2155                 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2156                         result = qdisc_enqueue_root(skb, q);
2157                 spin_unlock(qdisc_lock(q));
2158         }
2159
2160         return result;
2161 }
2162
2163 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2164                                          struct packet_type **pt_prev,
2165                                          int *ret, struct net_device *orig_dev)
2166 {
2167         if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2168                 goto out;
2169
2170         if (*pt_prev) {
2171                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2172                 *pt_prev = NULL;
2173         } else {
2174                 /* Huh? Why does turning on AF_PACKET affect this? */
2175                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2176         }
2177
2178         switch (ing_filter(skb)) {
2179         case TC_ACT_SHOT:
2180         case TC_ACT_STOLEN:
2181                 kfree_skb(skb);
2182                 return NULL;
2183         }
2184
2185 out:
2186         skb->tc_verd = 0;
2187         return skb;
2188 }
2189 #endif
2190
2191 /*
2192  *      netif_nit_deliver - deliver received packets to network taps
2193  *      @skb: buffer
2194  *
2195  *      This function is used to deliver incoming packets to network
2196  *      taps. It should be used when the normal netif_receive_skb path
2197  *      is bypassed, for example because of VLAN acceleration.
2198  */
2199 void netif_nit_deliver(struct sk_buff *skb)
2200 {
2201         struct packet_type *ptype;
2202
2203         if (list_empty(&ptype_all))
2204                 return;
2205
2206         skb_reset_network_header(skb);
2207         skb_reset_transport_header(skb);
2208         skb->mac_len = skb->network_header - skb->mac_header;
2209
2210         rcu_read_lock();
2211         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2212                 if (!ptype->dev || ptype->dev == skb->dev)
2213                         deliver_skb(skb, ptype, skb->dev);
2214         }
2215         rcu_read_unlock();
2216 }
2217
2218 /**
2219  *      netif_receive_skb - process receive buffer from network
2220  *      @skb: buffer to process
2221  *
2222  *      netif_receive_skb() is the main receive data processing function.
2223  *      It always succeeds. The buffer may be dropped during processing
2224  *      for congestion control or by the protocol layers.
2225  *
2226  *      This function may only be called from softirq context and interrupts
2227  *      should be enabled.
2228  *
2229  *      Return values (usually ignored):
2230  *      NET_RX_SUCCESS: no congestion
2231  *      NET_RX_DROP: packet was dropped
2232  */
2233 int netif_receive_skb(struct sk_buff *skb)
2234 {
2235         struct packet_type *ptype, *pt_prev;
2236         struct net_device *orig_dev;
2237         struct net_device *null_or_orig;
2238         int ret = NET_RX_DROP;
2239         __be16 type;
2240
2241         if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2242                 return NET_RX_SUCCESS;
2243
2244         /* if we've gotten here through NAPI, check netpoll */
2245         if (netpoll_receive_skb(skb))
2246                 return NET_RX_DROP;
2247
2248         if (!skb->tstamp.tv64)
2249                 net_timestamp(skb);
2250
2251         if (!skb->iif)
2252                 skb->iif = skb->dev->ifindex;
2253
2254         null_or_orig = NULL;
2255         orig_dev = skb->dev;
2256         if (orig_dev->master) {
2257                 if (skb_bond_should_drop(skb))
2258                         null_or_orig = orig_dev; /* deliver only exact match */
2259                 else
2260                         skb->dev = orig_dev->master;
2261         }
2262
2263         __get_cpu_var(netdev_rx_stat).total++;
2264
2265         skb_reset_network_header(skb);
2266         skb_reset_transport_header(skb);
2267         skb->mac_len = skb->network_header - skb->mac_header;
2268
2269         pt_prev = NULL;
2270
2271         rcu_read_lock();
2272
2273 #ifdef CONFIG_NET_CLS_ACT
2274         if (skb->tc_verd & TC_NCLS) {
2275                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2276                 goto ncls;
2277         }
2278 #endif
2279
2280         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2281                 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2282                     ptype->dev == orig_dev) {
2283                         if (pt_prev)
2284                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2285                         pt_prev = ptype;
2286                 }
2287         }
2288
2289 #ifdef CONFIG_NET_CLS_ACT
2290         skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2291         if (!skb)
2292                 goto out;
2293 ncls:
2294 #endif
2295
2296         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2297         if (!skb)
2298                 goto out;
2299         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2300         if (!skb)
2301                 goto out;
2302
2303         skb_orphan(skb);
2304
2305         type = skb->protocol;
2306         list_for_each_entry_rcu(ptype,
2307                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2308                 if (ptype->type == type &&
2309                     (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2310                      ptype->dev == orig_dev)) {
2311                         if (pt_prev)
2312                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2313                         pt_prev = ptype;
2314                 }
2315         }
2316
2317         if (pt_prev) {
2318                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2319         } else {
2320                 kfree_skb(skb);
2321                 /* Jamal, now you will not able to escape explaining
2322                  * me how you were going to use this. :-)
2323                  */
2324                 ret = NET_RX_DROP;
2325         }
2326
2327 out:
2328         rcu_read_unlock();
2329         return ret;
2330 }
2331
2332 /* Network device is going away, flush any packets still pending  */
2333 static void flush_backlog(void *arg)
2334 {
2335         struct net_device *dev = arg;
2336         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2337         struct sk_buff *skb, *tmp;
2338
2339         skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2340                 if (skb->dev == dev) {
2341                         __skb_unlink(skb, &queue->input_pkt_queue);
2342                         kfree_skb(skb);
2343                 }
2344 }
2345
2346 static int napi_gro_complete(struct sk_buff *skb)
2347 {
2348         struct packet_type *ptype;
2349         __be16 type = skb->protocol;
2350         struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2351         int err = -ENOENT;
2352
2353         if (NAPI_GRO_CB(skb)->count == 1) {
2354                 skb_shinfo(skb)->gso_size = 0;
2355                 goto out;
2356         }
2357
2358         rcu_read_lock();
2359         list_for_each_entry_rcu(ptype, head, list) {
2360                 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2361                         continue;
2362
2363                 err = ptype->gro_complete(skb);
2364                 break;
2365         }
2366         rcu_read_unlock();
2367
2368         if (err) {
2369                 WARN_ON(&ptype->list == head);
2370                 kfree_skb(skb);
2371                 return NET_RX_SUCCESS;
2372         }
2373
2374 out:
2375         return netif_receive_skb(skb);
2376 }
2377
2378 void napi_gro_flush(struct napi_struct *napi)
2379 {
2380         struct sk_buff *skb, *next;
2381
2382         for (skb = napi->gro_list; skb; skb = next) {
2383                 next = skb->next;
2384                 skb->next = NULL;
2385                 napi_gro_complete(skb);
2386         }
2387
2388         napi->gro_count = 0;
2389         napi->gro_list = NULL;
2390 }
2391 EXPORT_SYMBOL(napi_gro_flush);
2392
2393 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2394 {
2395         struct sk_buff **pp = NULL;
2396         struct packet_type *ptype;
2397         __be16 type = skb->protocol;
2398         struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2399         int same_flow;
2400         int mac_len;
2401         int ret;
2402
2403         if (!(skb->dev->features & NETIF_F_GRO))
2404                 goto normal;
2405
2406         if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
2407                 goto normal;
2408
2409         rcu_read_lock();
2410         list_for_each_entry_rcu(ptype, head, list) {
2411                 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2412                         continue;
2413
2414                 skb_set_network_header(skb, skb_gro_offset(skb));
2415                 mac_len = skb->network_header - skb->mac_header;
2416                 skb->mac_len = mac_len;
2417                 NAPI_GRO_CB(skb)->same_flow = 0;
2418                 NAPI_GRO_CB(skb)->flush = 0;
2419                 NAPI_GRO_CB(skb)->free = 0;
2420
2421                 pp = ptype->gro_receive(&napi->gro_list, skb);
2422                 break;
2423         }
2424         rcu_read_unlock();
2425
2426         if (&ptype->list == head)
2427                 goto normal;
2428
2429         same_flow = NAPI_GRO_CB(skb)->same_flow;
2430         ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2431
2432         if (pp) {
2433                 struct sk_buff *nskb = *pp;
2434
2435                 *pp = nskb->next;
2436                 nskb->next = NULL;
2437                 napi_gro_complete(nskb);
2438                 napi->gro_count--;
2439         }
2440
2441         if (same_flow)
2442                 goto ok;
2443
2444         if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2445                 goto normal;
2446
2447         napi->gro_count++;
2448         NAPI_GRO_CB(skb)->count = 1;
2449         skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2450         skb->next = napi->gro_list;
2451         napi->gro_list = skb;
2452         ret = GRO_HELD;
2453
2454 pull:
2455         if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
2456                 if (napi->gro_list == skb)
2457                         napi->gro_list = skb->next;
2458                 ret = GRO_DROP;
2459         }
2460
2461 ok:
2462         return ret;
2463
2464 normal:
2465         ret = GRO_NORMAL;
2466         goto pull;
2467 }
2468 EXPORT_SYMBOL(dev_gro_receive);
2469
2470 static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2471 {
2472         struct sk_buff *p;
2473
2474         if (netpoll_rx_on(skb))
2475                 return GRO_NORMAL;
2476
2477         for (p = napi->gro_list; p; p = p->next) {
2478                 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2479                         && !compare_ether_header(skb_mac_header(p),
2480                                                  skb_gro_mac_header(skb));
2481                 NAPI_GRO_CB(p)->flush = 0;
2482         }
2483
2484         return dev_gro_receive(napi, skb);
2485 }
2486
2487 int napi_skb_finish(int ret, struct sk_buff *skb)
2488 {
2489         int err = NET_RX_SUCCESS;
2490
2491         switch (ret) {
2492         case GRO_NORMAL:
2493                 return netif_receive_skb(skb);
2494
2495         case GRO_DROP:
2496                 err = NET_RX_DROP;
2497                 /* fall through */
2498
2499         case GRO_MERGED_FREE:
2500                 kfree_skb(skb);
2501                 break;
2502         }
2503
2504         return err;
2505 }
2506 EXPORT_SYMBOL(napi_skb_finish);
2507
2508 void skb_gro_reset_offset(struct sk_buff *skb)
2509 {
2510         NAPI_GRO_CB(skb)->data_offset = 0;
2511         NAPI_GRO_CB(skb)->frag0 = NULL;
2512         NAPI_GRO_CB(skb)->frag0_len = 0;
2513
2514         if (skb->mac_header == skb->tail &&
2515             !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2516                 NAPI_GRO_CB(skb)->frag0 =
2517                         page_address(skb_shinfo(skb)->frags[0].page) +
2518                         skb_shinfo(skb)->frags[0].page_offset;
2519                 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2520         }
2521 }
2522 EXPORT_SYMBOL(skb_gro_reset_offset);
2523
2524 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2525 {
2526         skb_gro_reset_offset(skb);
2527
2528         return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2529 }
2530 EXPORT_SYMBOL(napi_gro_receive);
2531
2532 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2533 {
2534         __skb_pull(skb, skb_headlen(skb));
2535         skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2536
2537         napi->skb = skb;
2538 }
2539 EXPORT_SYMBOL(napi_reuse_skb);
2540
2541 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2542 {
2543         struct net_device *dev = napi->dev;
2544         struct sk_buff *skb = napi->skb;
2545
2546         if (!skb) {
2547                 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2548                 if (!skb)
2549                         goto out;
2550
2551                 skb_reserve(skb, NET_IP_ALIGN);
2552
2553                 napi->skb = skb;
2554         }
2555
2556 out:
2557         return skb;
2558 }
2559 EXPORT_SYMBOL(napi_get_frags);
2560
2561 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2562 {
2563         int err = NET_RX_SUCCESS;
2564
2565         switch (ret) {
2566         case GRO_NORMAL:
2567         case GRO_HELD:
2568                 skb->protocol = eth_type_trans(skb, napi->dev);
2569
2570                 if (ret == GRO_NORMAL)
2571                         return netif_receive_skb(skb);
2572
2573                 skb_gro_pull(skb, -ETH_HLEN);
2574                 break;
2575
2576         case GRO_DROP:
2577                 err = NET_RX_DROP;
2578                 /* fall through */
2579
2580         case GRO_MERGED_FREE:
2581                 napi_reuse_skb(napi, skb);
2582                 break;
2583         }
2584
2585         return err;
2586 }
2587 EXPORT_SYMBOL(napi_frags_finish);
2588
2589 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2590 {
2591         struct sk_buff *skb = napi->skb;
2592         struct ethhdr *eth;
2593
2594         napi->skb = NULL;
2595
2596         skb_reset_mac_header(skb);
2597         skb_gro_reset_offset(skb);
2598
2599         eth = skb_gro_header(skb, sizeof(*eth));
2600         if (!eth) {
2601                 napi_reuse_skb(napi, skb);
2602                 skb = NULL;
2603                 goto out;
2604         }
2605
2606         skb_gro_pull(skb, sizeof(*eth));
2607
2608         /*
2609          * This works because the only protocols we care about don't require
2610          * special handling.  We'll fix it up properly at the end.
2611          */
2612         skb->protocol = eth->h_proto;
2613
2614 out:
2615         return skb;
2616 }
2617 EXPORT_SYMBOL(napi_frags_skb);
2618
2619 int napi_gro_frags(struct napi_struct *napi)
2620 {
2621         struct sk_buff *skb = napi_frags_skb(napi);
2622
2623         if (!skb)
2624                 return NET_RX_DROP;
2625
2626         return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2627 }
2628 EXPORT_SYMBOL(napi_gro_frags);
2629
2630 static int process_backlog(struct napi_struct *napi, int quota)
2631 {
2632         int work = 0;
2633         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2634         unsigned long start_time = jiffies;
2635
2636         napi->weight = weight_p;
2637         do {
2638                 struct sk_buff *skb;
2639
2640                 local_irq_disable();
2641                 skb = __skb_dequeue(&queue->input_pkt_queue);
2642                 if (!skb) {
2643                         __napi_complete(napi);
2644                         local_irq_enable();
2645                         break;
2646                 }
2647                 local_irq_enable();
2648
2649                 netif_receive_skb(skb);
2650         } while (++work < quota && jiffies == start_time);
2651
2652         return work;
2653 }
2654
2655 /**
2656  * __napi_schedule - schedule for receive
2657  * @n: entry to schedule
2658  *
2659  * The entry's receive function will be scheduled to run
2660  */
2661 void __napi_schedule(struct napi_struct *n)
2662 {
2663         unsigned long flags;
2664
2665         local_irq_save(flags);
2666         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2667         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2668         local_irq_restore(flags);
2669 }
2670 EXPORT_SYMBOL(__napi_schedule);
2671
2672 void __napi_complete(struct napi_struct *n)
2673 {
2674         BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2675         BUG_ON(n->gro_list);
2676
2677         list_del(&n->poll_list);
2678         smp_mb__before_clear_bit();
2679         clear_bit(NAPI_STATE_SCHED, &n->state);
2680 }
2681 EXPORT_SYMBOL(__napi_complete);
2682
2683 void napi_complete(struct napi_struct *n)
2684 {
2685         unsigned long flags;
2686
2687         /*
2688          * don't let napi dequeue from the cpu poll list
2689          * just in case its running on a different cpu
2690          */
2691         if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2692                 return;
2693
2694         napi_gro_flush(n);
2695         local_irq_save(flags);
2696         __napi_complete(n);
2697         local_irq_restore(flags);
2698 }
2699 EXPORT_SYMBOL(napi_complete);
2700
2701 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2702                     int (*poll)(struct napi_struct *, int), int weight)
2703 {
2704         INIT_LIST_HEAD(&napi->poll_list);
2705         napi->gro_count = 0;
2706         napi->gro_list = NULL;
2707         napi->skb = NULL;
2708         napi->poll = poll;
2709         napi->weight = weight;
2710         list_add(&napi->dev_list, &dev->napi_list);
2711         napi->dev = dev;
2712 #ifdef CONFIG_NETPOLL
2713         spin_lock_init(&napi->poll_lock);
2714         napi->poll_owner = -1;
2715 #endif
2716         set_bit(NAPI_STATE_SCHED, &napi->state);
2717 }
2718 EXPORT_SYMBOL(netif_napi_add);
2719
2720 void netif_napi_del(struct napi_struct *napi)
2721 {
2722         struct sk_buff *skb, *next;
2723
2724         list_del_init(&napi->dev_list);
2725         napi_free_frags(napi);
2726
2727         for (skb = napi->gro_list; skb; skb = next) {
2728                 next = skb->next;
2729                 skb->next = NULL;
2730                 kfree_skb(skb);
2731         }
2732
2733         napi->gro_list = NULL;
2734         napi->gro_count = 0;
2735 }
2736 EXPORT_SYMBOL(netif_napi_del);
2737
2738
2739 static void net_rx_action(struct softirq_action *h)
2740 {
2741         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2742         unsigned long time_limit = jiffies + 2;
2743         int budget = netdev_budget;
2744         void *have;
2745
2746         local_irq_disable();
2747
2748         while (!list_empty(list)) {
2749                 struct napi_struct *n;
2750                 int work, weight;
2751
2752                 /* If softirq window is exhuasted then punt.
2753                  * Allow this to run for 2 jiffies since which will allow
2754                  * an average latency of 1.5/HZ.
2755                  */
2756                 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2757                         goto softnet_break;
2758
2759                 local_irq_enable();
2760
2761                 /* Even though interrupts have been re-enabled, this
2762                  * access is safe because interrupts can only add new
2763                  * entries to the tail of this list, and only ->poll()
2764                  * calls can remove this head entry from the list.
2765                  */
2766                 n = list_entry(list->next, struct napi_struct, poll_list);
2767
2768                 have = netpoll_poll_lock(n);
2769
2770                 weight = n->weight;
2771
2772                 /* This NAPI_STATE_SCHED test is for avoiding a race
2773                  * with netpoll's poll_napi().  Only the entity which
2774                  * obtains the lock and sees NAPI_STATE_SCHED set will
2775                  * actually make the ->poll() call.  Therefore we avoid
2776                  * accidently calling ->poll() when NAPI is not scheduled.
2777                  */
2778                 work = 0;
2779                 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
2780                         work = n->poll(n, weight);
2781                         trace_napi_poll(n);
2782                 }
2783
2784                 WARN_ON_ONCE(work > weight);
2785
2786                 budget -= work;
2787
2788                 local_irq_disable();
2789
2790                 /* Drivers must not modify the NAPI state if they
2791                  * consume the entire weight.  In such cases this code
2792                  * still "owns" the NAPI instance and therefore can
2793                  * move the instance around on the list at-will.
2794                  */
2795                 if (unlikely(work == weight)) {
2796                         if (unlikely(napi_disable_pending(n)))
2797                                 __napi_complete(n);
2798                         else
2799                                 list_move_tail(&n->poll_list, list);
2800                 }
2801
2802                 netpoll_poll_unlock(have);
2803         }
2804 out:
2805         local_irq_enable();
2806
2807 #ifdef CONFIG_NET_DMA
2808         /*
2809          * There may not be any more sk_buffs coming right now, so push
2810          * any pending DMA copies to hardware
2811          */
2812         dma_issue_pending_all();
2813 #endif
2814
2815         return;
2816
2817 softnet_break:
2818         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2819         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2820         goto out;
2821 }
2822
2823 static gifconf_func_t * gifconf_list [NPROTO];
2824
2825 /**
2826  *      register_gifconf        -       register a SIOCGIF handler
2827  *      @family: Address family
2828  *      @gifconf: Function handler
2829  *
2830  *      Register protocol dependent address dumping routines. The handler
2831  *      that is passed must not be freed or reused until it has been replaced
2832  *      by another handler.
2833  */
2834 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2835 {
2836         if (family >= NPROTO)
2837                 return -EINVAL;
2838         gifconf_list[family] = gifconf;
2839         return 0;
2840 }
2841
2842
2843 /*
2844  *      Map an interface index to its name (SIOCGIFNAME)
2845  */
2846
2847 /*
2848  *      We need this ioctl for efficient implementation of the
2849  *      if_indextoname() function required by the IPv6 API.  Without
2850  *      it, we would have to search all the interfaces to find a
2851  *      match.  --pb
2852  */
2853
2854 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2855 {
2856         struct net_device *dev;
2857         struct ifreq ifr;
2858
2859         /*
2860          *      Fetch the caller's info block.
2861          */
2862
2863         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2864                 return -EFAULT;
2865
2866         read_lock(&dev_base_lock);
2867         dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2868         if (!dev) {
2869                 read_unlock(&dev_base_lock);
2870                 return -ENODEV;
2871         }
2872
2873         strcpy(ifr.ifr_name, dev->name);
2874         read_unlock(&dev_base_lock);
2875
2876         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2877                 return -EFAULT;
2878         return 0;
2879 }
2880
2881 /*
2882  *      Perform a SIOCGIFCONF call. This structure will change
2883  *      size eventually, and there is nothing I can do about it.
2884  *      Thus we will need a 'compatibility mode'.
2885  */
2886
2887 static int dev_ifconf(struct net *net, char __user *arg)
2888 {
2889         struct ifconf ifc;
2890         struct net_device *dev;
2891         char __user *pos;
2892         int len;
2893         int total;
2894         int i;
2895
2896         /*
2897          *      Fetch the caller's info block.
2898          */
2899
2900         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2901                 return -EFAULT;
2902
2903         pos = ifc.ifc_buf;
2904         len = ifc.ifc_len;
2905
2906         /*
2907          *      Loop over the interfaces, and write an info block for each.
2908          */
2909
2910         total = 0;
2911         for_each_netdev(net, dev) {
2912                 for (i = 0; i < NPROTO; i++) {
2913                         if (gifconf_list[i]) {
2914                                 int done;
2915                                 if (!pos)
2916                                         done = gifconf_list[i](dev, NULL, 0);
2917                                 else
2918                                         done = gifconf_list[i](dev, pos + total,
2919                                                                len - total);
2920                                 if (done < 0)
2921                                         return -EFAULT;
2922                                 total += done;
2923                         }
2924                 }
2925         }
2926
2927         /*
2928          *      All done.  Write the updated control block back to the caller.
2929          */
2930         ifc.ifc_len = total;
2931
2932         /*
2933          *      Both BSD and Solaris return 0 here, so we do too.
2934          */
2935         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2936 }
2937
2938 #ifdef CONFIG_PROC_FS
2939 /*
2940  *      This is invoked by the /proc filesystem handler to display a device
2941  *      in detail.
2942  */
2943 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2944         __acquires(dev_base_lock)
2945 {
2946         struct net *net = seq_file_net(seq);
2947         loff_t off;
2948         struct net_device *dev;
2949
2950         read_lock(&dev_base_lock);
2951         if (!*pos)
2952                 return SEQ_START_TOKEN;
2953
2954         off = 1;
2955         for_each_netdev(net, dev)
2956                 if (off++ == *pos)
2957                         return dev;
2958
2959         return NULL;
2960 }
2961
2962 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2963 {
2964         struct net *net = seq_file_net(seq);
2965         ++*pos;
2966         return v == SEQ_START_TOKEN ?
2967                 first_net_device(net) : next_net_device((struct net_device *)v);
2968 }
2969
2970 void dev_seq_stop(struct seq_file *seq, void *v)
2971         __releases(dev_base_lock)
2972 {
2973         read_unlock(&dev_base_lock);
2974 }
2975
2976 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2977 {
2978         const struct net_device_stats *stats = dev_get_stats(dev);
2979
2980         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2981                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2982                    dev->name, stats->rx_bytes, stats->rx_packets,
2983                    stats->rx_errors,
2984                    stats->rx_dropped + stats->rx_missed_errors,
2985                    stats->rx_fifo_errors,
2986                    stats->rx_length_errors + stats->rx_over_errors +
2987                     stats->rx_crc_errors + stats->rx_frame_errors,
2988                    stats->rx_compressed, stats->multicast,
2989                    stats->tx_bytes, stats->tx_packets,
2990                    stats->tx_errors, stats->tx_dropped,
2991                    stats->tx_fifo_errors, stats->collisions,
2992                    stats->tx_carrier_errors +
2993                     stats->tx_aborted_errors +
2994                     stats->tx_window_errors +
2995                     stats->tx_heartbeat_errors,
2996                    stats->tx_compressed);
2997 }
2998
2999 /*
3000  *      Called from the PROCfs module. This now uses the new arbitrary sized
3001  *      /proc/net interface to create /proc/net/dev
3002  */
3003 static int dev_seq_show(struct seq_file *seq, void *v)
3004 {
3005         if (v == SEQ_START_TOKEN)
3006                 seq_puts(seq, "Inter-|   Receive                            "
3007                               "                    |  Transmit\n"
3008                               " face |bytes    packets errs drop fifo frame "
3009                               "compressed multicast|bytes    packets errs "
3010                               "drop fifo colls carrier compressed\n");
3011         else
3012                 dev_seq_printf_stats(seq, v);
3013         return 0;
3014 }
3015
3016 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3017 {
3018         struct netif_rx_stats *rc = NULL;
3019
3020         while (*pos < nr_cpu_ids)
3021                 if (cpu_online(*pos)) {
3022                         rc = &per_cpu(netdev_rx_stat, *pos);
3023                         break;
3024                 } else
3025                         ++*pos;
3026         return rc;
3027 }
3028
3029 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3030 {
3031         return softnet_get_online(pos);
3032 }
3033
3034 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3035 {
3036         ++*pos;
3037         return softnet_get_online(pos);
3038 }
3039
3040 static void softnet_seq_stop(struct seq_file *seq, void *v)
3041 {
3042 }
3043
3044 static int softnet_seq_show(struct seq_file *seq, void *v)
3045 {
3046         struct netif_rx_stats *s = v;
3047
3048         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3049                    s->total, s->dropped, s->time_squeeze, 0,
3050                    0, 0, 0, 0, /* was fastroute */
3051                    s->cpu_collision );
3052         return 0;
3053 }
3054
3055 static const struct seq_operations dev_seq_ops = {
3056         .start = dev_seq_start,
3057         .next  = dev_seq_next,
3058         .stop  = dev_seq_stop,
3059         .show  = dev_seq_show,
3060 };
3061
3062 static int dev_seq_open(struct inode *inode, struct file *file)
3063 {
3064         return seq_open_net(inode, file, &dev_seq_ops,
3065                             sizeof(struct seq_net_private));
3066 }
3067
3068 static const struct file_operations dev_seq_fops = {
3069         .owner   = THIS_MODULE,
3070         .open    = dev_seq_open,
3071         .read    = seq_read,
3072         .llseek  = seq_lseek,
3073         .release = seq_release_net,
3074 };
3075
3076 static const struct seq_operations softnet_seq_ops = {
3077         .start = softnet_seq_start,
3078         .next  = softnet_seq_next,
3079         .stop  = softnet_seq_stop,
3080         .show  = softnet_seq_show,
3081 };
3082
3083 static int softnet_seq_open(struct inode *inode, struct file *file)
3084 {
3085         return seq_open(file, &softnet_seq_ops);
3086 }
3087
3088 static const struct file_operations softnet_seq_fops = {
3089         .owner   = THIS_MODULE,
3090         .open    = softnet_seq_open,
3091         .read    = seq_read,
3092         .llseek  = seq_lseek,
3093         .release = seq_release,
3094 };
3095
3096 static void *ptype_get_idx(loff_t pos)
3097 {
3098         struct packet_type *pt = NULL;
3099         loff_t i = 0;
3100         int t;
3101
3102         list_for_each_entry_rcu(pt, &ptype_all, list) {
3103                 if (i == pos)
3104                         return pt;
3105                 ++i;
3106         }
3107
3108         for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3109                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3110                         if (i == pos)
3111                                 return pt;
3112                         ++i;
3113                 }
3114         }
3115         return NULL;
3116 }
3117
3118 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3119         __acquires(RCU)
3120 {
3121         rcu_read_lock();
3122         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3123 }
3124
3125 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3126 {
3127         struct packet_type *pt;
3128         struct list_head *nxt;
3129         int hash;
3130
3131         ++*pos;
3132         if (v == SEQ_START_TOKEN)
3133                 return ptype_get_idx(0);
3134
3135         pt = v;
3136         nxt = pt->list.next;
3137         if (pt->type == htons(ETH_P_ALL)) {
3138                 if (nxt != &ptype_all)
3139                         goto found;
3140                 hash = 0;
3141                 nxt = ptype_base[0].next;
3142         } else
3143                 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3144
3145         while (nxt == &ptype_base[hash]) {
3146                 if (++hash >= PTYPE_HASH_SIZE)
3147                         return NULL;
3148                 nxt = ptype_base[hash].next;
3149         }
3150 found:
3151         return list_entry(nxt, struct packet_type, list);
3152 }
3153
3154 static void ptype_seq_stop(struct seq_file *seq, void *v)
3155         __releases(RCU)
3156 {
3157         rcu_read_unlock();
3158 }
3159
3160 static int ptype_seq_show(struct seq_file *seq, void *v)
3161 {
3162         struct packet_type *pt = v;
3163
3164         if (v == SEQ_START_TOKEN)
3165                 seq_puts(seq, "Type Device      Function\n");
3166         else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3167                 if (pt->type == htons(ETH_P_ALL))
3168                         seq_puts(seq, "ALL ");
3169                 else
3170                         seq_printf(seq, "%04x", ntohs(pt->type));
3171
3172                 seq_printf(seq, " %-8s %pF\n",
3173                            pt->dev ? pt->dev->name : "", pt->func);
3174         }
3175
3176         return 0;
3177 }
3178
3179 static const struct seq_operations ptype_seq_ops = {
3180         .start = ptype_seq_start,
3181         .next  = ptype_seq_next,
3182         .stop  = ptype_seq_stop,
3183         .show  = ptype_seq_show,
3184 };
3185
3186 static int ptype_seq_open(struct inode *inode, struct file *file)
3187 {
3188         return seq_open_net(inode, file, &ptype_seq_ops,
3189                         sizeof(struct seq_net_private));
3190 }
3191
3192 static const struct file_operations ptype_seq_fops = {
3193         .owner   = THIS_MODULE,
3194         .open    = ptype_seq_open,
3195         .read    = seq_read,
3196         .llseek  = seq_lseek,
3197         .release = seq_release_net,
3198 };
3199
3200
3201 static int __net_init dev_proc_net_init(struct net *net)
3202 {
3203         int rc = -ENOMEM;
3204
3205         if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3206                 goto out;
3207         if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3208                 goto out_dev;
3209         if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3210                 goto out_softnet;
3211
3212         if (wext_proc_init(net))
3213                 goto out_ptype;
3214         rc = 0;
3215 out:
3216         return rc;
3217 out_ptype:
3218         proc_net_remove(net, "ptype");
3219 out_softnet:
3220         proc_net_remove(net, "softnet_stat");
3221 out_dev:
3222         proc_net_remove(net, "dev");
3223         goto out;
3224 }
3225
3226 static void __net_exit dev_proc_net_exit(struct net *net)
3227 {
3228         wext_proc_exit(net);
3229
3230         proc_net_remove(net, "ptype");
3231         proc_net_remove(net, "softnet_stat");
3232         proc_net_remove(net, "dev");
3233 }
3234
3235 static struct pernet_operations __net_initdata dev_proc_ops = {
3236         .init = dev_proc_net_init,
3237         .exit = dev_proc_net_exit,
3238 };
3239
3240 static int __init dev_proc_init(void)
3241 {
3242         return register_pernet_subsys(&dev_proc_ops);
3243 }
3244 #else
3245 #define dev_proc_init() 0
3246 #endif  /* CONFIG_PROC_FS */
3247
3248
3249 /**
3250  *      netdev_set_master       -       set up master/slave pair
3251  *      @slave: slave device
3252  *      @master: new master device
3253  *
3254  *      Changes the master device of the slave. Pass %NULL to break the
3255  *      bonding. The caller must hold the RTNL semaphore. On a failure
3256  *      a negative errno code is returned. On success the reference counts
3257  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3258  *      function returns zero.
3259  */
3260 int netdev_set_master(struct net_device *slave, struct net_device *master)
3261 {
3262         struct net_device *old = slave->master;
3263
3264         ASSERT_RTNL();
3265
3266         if (master) {
3267                 if (old)
3268                         return -EBUSY;
3269                 dev_hold(master);
3270         }
3271
3272         slave->master = master;
3273
3274         synchronize_net();
3275
3276         if (old)
3277                 dev_put(old);
3278
3279         if (master)
3280                 slave->flags |= IFF_SLAVE;
3281         else
3282                 slave->flags &= ~IFF_SLAVE;
3283
3284         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3285         return 0;
3286 }
3287
3288 static void dev_change_rx_flags(struct net_device *dev, int flags)
3289 {
3290         const struct net_device_ops *ops = dev->netdev_ops;
3291
3292         if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3293                 ops->ndo_change_rx_flags(dev, flags);
3294 }
3295
3296 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3297 {
3298         unsigned short old_flags = dev->flags;
3299         uid_t uid;
3300         gid_t gid;
3301
3302         ASSERT_RTNL();
3303
3304         dev->flags |= IFF_PROMISC;
3305         dev->promiscuity += inc;
3306         if (dev->promiscuity == 0) {
3307                 /*
3308                  * Avoid overflow.
3309                  * If inc causes overflow, untouch promisc and return error.
3310                  */
3311                 if (inc < 0)
3312                         dev->flags &= ~IFF_PROMISC;
3313                 else {
3314                         dev->promiscuity -= inc;
3315                         printk(KERN_WARNING "%s: promiscuity touches roof, "
3316                                 "set promiscuity failed, promiscuity feature "
3317                                 "of device might be broken.\n", dev->name);
3318                         return -EOVERFLOW;
3319                 }
3320         }
3321         if (dev->flags != old_flags) {
3322                 printk(KERN_INFO "device %s %s promiscuous mode\n",
3323                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3324                                                                "left");
3325                 if (audit_enabled) {
3326                         current_uid_gid(&uid, &gid);
3327                         audit_log(current->audit_context, GFP_ATOMIC,
3328                                 AUDIT_ANOM_PROMISCUOUS,
3329                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3330                                 dev->name, (dev->flags & IFF_PROMISC),
3331                                 (old_flags & IFF_PROMISC),
3332                                 audit_get_loginuid(current),
3333                                 uid, gid,
3334                                 audit_get_sessionid(current));
3335                 }
3336
3337                 dev_change_rx_flags(dev, IFF_PROMISC);
3338         }
3339         return 0;
3340 }
3341
3342 /**
3343  *      dev_set_promiscuity     - update promiscuity count on a device
3344  *      @dev: device
3345  *      @inc: modifier
3346  *
3347  *      Add or remove promiscuity from a device. While the count in the device
3348  *      remains above zero the interface remains promiscuous. Once it hits zero
3349  *      the device reverts back to normal filtering operation. A negative inc
3350  *      value is used to drop promiscuity on the device.
3351  *      Return 0 if successful or a negative errno code on error.
3352  */
3353 int dev_set_promiscuity(struct net_device *dev, int inc)
3354 {
3355         unsigned short old_flags = dev->flags;
3356         int err;
3357
3358         err = __dev_set_promiscuity(dev, inc);
3359         if (err < 0)
3360                 return err;
3361         if (dev->flags != old_flags)
3362                 dev_set_rx_mode(dev);
3363         return err;
3364 }
3365
3366 /**
3367  *      dev_set_allmulti        - update allmulti count on a device
3368  *      @dev: device
3369  *      @inc: modifier
3370  *
3371  *      Add or remove reception of all multicast frames to a device. While the
3372  *      count in the device remains above zero the interface remains listening
3373  *      to all interfaces. Once it hits zero the device reverts back to normal
3374  *      filtering operation. A negative @inc value is used to drop the counter
3375  *      when releasing a resource needing all multicasts.
3376  *      Return 0 if successful or a negative errno code on error.
3377  */
3378
3379 int dev_set_allmulti(struct net_device *dev, int inc)
3380 {
3381         unsigned short old_flags = dev->flags;
3382
3383         ASSERT_RTNL();
3384
3385         dev->flags |= IFF_ALLMULTI;
3386         dev->allmulti += inc;
3387         if (dev->allmulti == 0) {
3388                 /*
3389                  * Avoid overflow.
3390                  * If inc causes overflow, untouch allmulti and return error.
3391                  */
3392                 if (inc < 0)
3393                         dev->flags &= ~IFF_ALLMULTI;
3394                 else {
3395                         dev->allmulti -= inc;
3396                         printk(KERN_WARNING "%s: allmulti touches roof, "
3397                                 "set allmulti failed, allmulti feature of "
3398                                 "device might be broken.\n", dev->name);
3399                         return -EOVERFLOW;
3400                 }
3401         }
3402         if (dev->flags ^ old_flags) {
3403                 dev_change_rx_flags(dev, IFF_ALLMULTI);
3404                 dev_set_rx_mode(dev);
3405         }
3406         return 0;
3407 }
3408
3409 /*
3410  *      Upload unicast and multicast address lists to device and
3411  *      configure RX filtering. When the device doesn't support unicast
3412  *      filtering it is put in promiscuous mode while unicast addresses
3413  *      are present.
3414  */
3415 void __dev_set_rx_mode(struct net_device *dev)
3416 {
3417         const struct net_device_ops *ops = dev->netdev_ops;
3418
3419         /* dev_open will call this function so the list will stay sane. */
3420         if (!(dev->flags&IFF_UP))
3421                 return;
3422
3423         if (!netif_device_present(dev))
3424                 return;
3425
3426         if (ops->ndo_set_rx_mode)
3427                 ops->ndo_set_rx_mode(dev);
3428         else {
3429                 /* Unicast addresses changes may only happen under the rtnl,
3430                  * therefore calling __dev_set_promiscuity here is safe.
3431                  */
3432                 if (dev->uc_count > 0 && !dev->uc_promisc) {
3433                         __dev_set_promiscuity(dev, 1);
3434                         dev->uc_promisc = 1;
3435                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3436                         __dev_set_promiscuity(dev, -1);
3437                         dev->uc_promisc = 0;
3438                 }
3439
3440                 if (ops->ndo_set_multicast_list)
3441                         ops->ndo_set_multicast_list(dev);
3442         }
3443 }
3444
3445 void dev_set_rx_mode(struct net_device *dev)
3446 {
3447         netif_addr_lock_bh(dev);
3448         __dev_set_rx_mode(dev);
3449         netif_addr_unlock_bh(dev);
3450 }
3451
3452 /* hw addresses list handling functions */
3453
3454 static int __hw_addr_add(struct list_head *list, unsigned char *addr,
3455                          int addr_len, unsigned char addr_type)
3456 {
3457         struct netdev_hw_addr *ha;
3458         int alloc_size;
3459
3460         if (addr_len > MAX_ADDR_LEN)
3461                 return -EINVAL;
3462
3463         alloc_size = sizeof(*ha);
3464         if (alloc_size < L1_CACHE_BYTES)
3465                 alloc_size = L1_CACHE_BYTES;
3466         ha = kmalloc(alloc_size, GFP_ATOMIC);
3467         if (!ha)
3468                 return -ENOMEM;
3469         memcpy(ha->addr, addr, addr_len);
3470         ha->type = addr_type;
3471         list_add_tail_rcu(&ha->list, list);
3472         return 0;
3473 }
3474
3475 static void ha_rcu_free(struct rcu_head *head)
3476 {
3477         struct netdev_hw_addr *ha;
3478
3479         ha = container_of(head, struct netdev_hw_addr, rcu_head);
3480         kfree(ha);
3481 }
3482
3483 static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr,
3484                             int addr_len, unsigned char addr_type,
3485                             int ignore_index)
3486 {
3487         struct netdev_hw_addr *ha;
3488         int i = 0;
3489
3490         list_for_each_entry(ha, list, list) {
3491                 if (i++ != ignore_index &&
3492                     !memcmp(ha->addr, addr, addr_len) &&
3493                     (ha->type == addr_type || !addr_type)) {
3494                         list_del_rcu(&ha->list);
3495                         call_rcu(&ha->rcu_head, ha_rcu_free);
3496                         return 0;
3497                 }
3498         }
3499         return -ENOENT;
3500 }
3501
3502 static int __hw_addr_add_multiple_ii(struct list_head *to_list,
3503                                      struct list_head *from_list,
3504                                      int addr_len, unsigned char addr_type,
3505                                      int ignore_index)
3506 {
3507         int err;
3508         struct netdev_hw_addr *ha, *ha2;
3509         unsigned char type;
3510
3511         list_for_each_entry(ha, from_list, list) {
3512                 type = addr_type ? addr_type : ha->type;
3513                 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3514                 if (err)
3515                         goto unroll;
3516         }
3517         return 0;
3518
3519 unroll:
3520         list_for_each_entry(ha2, from_list, list) {
3521                 if (ha2 == ha)
3522                         break;
3523                 type = addr_type ? addr_type : ha2->type;
3524                 __hw_addr_del_ii(to_list, ha2->addr, addr_len, type,
3525                                  ignore_index);
3526         }
3527         return err;
3528 }
3529
3530 static void __hw_addr_del_multiple_ii(struct list_head *to_list,
3531                                       struct list_head *from_list,
3532                                       int addr_len, unsigned char addr_type,
3533                                       int ignore_index)
3534 {
3535         struct netdev_hw_addr *ha;
3536         unsigned char type;
3537
3538         list_for_each_entry(ha, from_list, list) {
3539                 type = addr_type ? addr_type : ha->type;
3540                 __hw_addr_del_ii(to_list, ha->addr, addr_len, addr_type,
3541                                  ignore_index);
3542         }
3543 }
3544
3545 static void __hw_addr_flush(struct list_head *list)
3546 {
3547         struct netdev_hw_addr *ha, *tmp;
3548
3549         list_for_each_entry_safe(ha, tmp, list, list) {
3550                 list_del_rcu(&ha->list);
3551                 call_rcu(&ha->rcu_head, ha_rcu_free);
3552         }
3553 }
3554
3555 /* Device addresses handling functions */
3556
3557 static void dev_addr_flush(struct net_device *dev)
3558 {
3559         /* rtnl_mutex must be held here */
3560
3561         __hw_addr_flush(&dev->dev_addr_list);
3562         dev->dev_addr = NULL;
3563 }
3564
3565 static int dev_addr_init(struct net_device *dev)
3566 {
3567         unsigned char addr[MAX_ADDR_LEN];
3568         struct netdev_hw_addr *ha;
3569         int err;
3570
3571         /* rtnl_mutex must be held here */
3572
3573         INIT_LIST_HEAD(&dev->dev_addr_list);
3574         memset(addr, 0, sizeof(*addr));
3575         err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr),
3576                             NETDEV_HW_ADDR_T_LAN);
3577         if (!err) {
3578                 /*
3579                  * Get the first (previously created) address from the list
3580                  * and set dev_addr pointer to this location.
3581                  */
3582                 ha = list_first_entry(&dev->dev_addr_list,
3583                                       struct netdev_hw_addr, list);
3584                 dev->dev_addr = ha->addr;
3585         }
3586         return err;
3587 }
3588
3589 /**
3590  *      dev_addr_add    - Add a device address
3591  *      @dev: device
3592  *      @addr: address to add
3593  *      @addr_type: address type
3594  *
3595  *      Add a device address to the device or increase the reference count if
3596  *      it already exists.
3597  *
3598  *      The caller must hold the rtnl_mutex.
3599  */
3600 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3601                  unsigned char addr_type)
3602 {
3603         int err;
3604
3605         ASSERT_RTNL();
3606
3607         err = __hw_addr_add(&dev->dev_addr_list, addr, dev->addr_len,
3608                             addr_type);
3609         if (!err)
3610                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3611         return err;
3612 }
3613 EXPORT_SYMBOL(dev_addr_add);
3614
3615 /**
3616  *      dev_addr_del    - Release a device address.
3617  *      @dev: device
3618  *      @addr: address to delete
3619  *      @addr_type: address type
3620  *
3621  *      Release reference to a device address and remove it from the device
3622  *      if the reference count drops to zero.
3623  *
3624  *      The caller must hold the rtnl_mutex.
3625  */
3626 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3627                  unsigned char addr_type)
3628 {
3629         int err;
3630
3631         ASSERT_RTNL();
3632
3633         err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len,
3634                                addr_type, 0);
3635         if (!err)
3636                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3637         return err;
3638 }
3639 EXPORT_SYMBOL(dev_addr_del);
3640
3641 /**
3642  *      dev_addr_add_multiple   - Add device addresses from another device
3643  *      @to_dev: device to which addresses will be added
3644  *      @from_dev: device from which addresses will be added
3645  *      @addr_type: address type - 0 means type will be used from from_dev
3646  *
3647  *      Add device addresses of the one device to another.
3648  **
3649  *      The caller must hold the rtnl_mutex.
3650  */
3651 int dev_addr_add_multiple(struct net_device *to_dev,
3652                           struct net_device *from_dev,
3653                           unsigned char addr_type)
3654 {
3655         int err;
3656
3657         ASSERT_RTNL();
3658
3659         if (from_dev->addr_len != to_dev->addr_len)
3660                 return -EINVAL;
3661         err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
3662                                         &from_dev->dev_addr_list,
3663                                         to_dev->addr_len, addr_type, 0);
3664         if (!err)
3665                 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3666         return err;
3667 }
3668 EXPORT_SYMBOL(dev_addr_add_multiple);
3669
3670 /**
3671  *      dev_addr_del_multiple   - Delete device addresses by another device
3672  *      @to_dev: device where the addresses will be deleted
3673  *      @from_dev: device by which addresses the addresses will be deleted
3674  *      @addr_type: address type - 0 means type will used from from_dev
3675  *
3676  *      Deletes addresses in to device by the list of addresses in from device.
3677  *
3678  *      The caller must hold the rtnl_mutex.
3679  */
3680 int dev_addr_del_multiple(struct net_device *to_dev,
3681                           struct net_device *from_dev,
3682                           unsigned char addr_type)
3683 {
3684         ASSERT_RTNL();
3685
3686         if (from_dev->addr_len != to_dev->addr_len)
3687                 return -EINVAL;
3688         __hw_addr_del_multiple_ii(&to_dev->dev_addr_list,
3689                                   &from_dev->dev_addr_list,
3690                                   to_dev->addr_len, addr_type, 0);
3691         call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3692         return 0;
3693 }
3694 EXPORT_SYMBOL(dev_addr_del_multiple);
3695
3696 /* unicast and multicast addresses handling functions */
3697
3698 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3699                       void *addr, int alen, int glbl)
3700 {
3701         struct dev_addr_list *da;
3702
3703         for (; (da = *list) != NULL; list = &da->next) {
3704                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3705                     alen == da->da_addrlen) {
3706                         if (glbl) {
3707                                 int old_glbl = da->da_gusers;
3708                                 da->da_gusers = 0;
3709                                 if (old_glbl == 0)
3710                                         break;
3711                         }
3712                         if (--da->da_users)
3713                                 return 0;
3714
3715                         *list = da->next;
3716                         kfree(da);
3717                         (*count)--;
3718                         return 0;
3719                 }
3720         }
3721         return -ENOENT;
3722 }
3723
3724 int __dev_addr_add(struct dev_addr_list **list, int *count,
3725                    void *addr, int alen, int glbl)
3726 {
3727         struct dev_addr_list *da;
3728
3729         for (da = *list; da != NULL; da = da->next) {
3730                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3731                     da->da_addrlen == alen) {
3732                         if (glbl) {
3733                                 int old_glbl = da->da_gusers;
3734                                 da->da_gusers = 1;
3735                                 if (old_glbl)
3736                                         return 0;
3737                         }
3738                         da->da_users++;
3739                         return 0;
3740                 }
3741         }
3742
3743         da = kzalloc(sizeof(*da), GFP_ATOMIC);
3744         if (da == NULL)
3745                 return -ENOMEM;
3746         memcpy(da->da_addr, addr, alen);
3747         da->da_addrlen = alen;
3748         da->da_users = 1;
3749         da->da_gusers = glbl ? 1 : 0;
3750         da->next = *list;
3751         *list = da;
3752         (*count)++;
3753         return 0;
3754 }
3755
3756 /**
3757  *      dev_unicast_delete      - Release secondary unicast address.
3758  *      @dev: device
3759  *      @addr: address to delete
3760  *      @alen: length of @addr
3761  *
3762  *      Release reference to a secondary unicast address and remove it
3763  *      from the device if the reference count drops to zero.
3764  *
3765  *      The caller must hold the rtnl_mutex.
3766  */
3767 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3768 {
3769         int err;
3770
3771         ASSERT_RTNL();
3772
3773         netif_addr_lock_bh(dev);
3774         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3775         if (!err)
3776                 __dev_set_rx_mode(dev);
3777         netif_addr_unlock_bh(dev);
3778         return err;
3779 }
3780 EXPORT_SYMBOL(dev_unicast_delete);
3781
3782 /**
3783  *      dev_unicast_add         - add a secondary unicast address
3784  *      @dev: device
3785  *      @addr: address to add
3786  *      @alen: length of @addr
3787  *
3788  *      Add a secondary unicast address to the device or increase
3789  *      the reference count if it already exists.
3790  *
3791  *      The caller must hold the rtnl_mutex.
3792  */
3793 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3794 {
3795         int err;
3796
3797         ASSERT_RTNL();
3798
3799         netif_addr_lock_bh(dev);
3800         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3801         if (!err)
3802                 __dev_set_rx_mode(dev);
3803         netif_addr_unlock_bh(dev);
3804         return err;
3805 }
3806 EXPORT_SYMBOL(dev_unicast_add);
3807
3808 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3809                     struct dev_addr_list **from, int *from_count)
3810 {
3811         struct dev_addr_list *da, *next;
3812         int err = 0;
3813
3814         da = *from;
3815         while (da != NULL) {
3816                 next = da->next;
3817                 if (!da->da_synced) {
3818                         err = __dev_addr_add(to, to_count,
3819                                              da->da_addr, da->da_addrlen, 0);
3820                         if (err < 0)
3821                                 break;
3822                         da->da_synced = 1;
3823                         da->da_users++;
3824                 } else if (da->da_users == 1) {
3825                         __dev_addr_delete(to, to_count,
3826                                           da->da_addr, da->da_addrlen, 0);
3827                         __dev_addr_delete(from, from_count,
3828                                           da->da_addr, da->da_addrlen, 0);
3829                 }
3830                 da = next;
3831         }
3832         return err;
3833 }
3834
3835 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3836                        struct dev_addr_list **from, int *from_count)
3837 {
3838         struct dev_addr_list *da, *next;
3839
3840         da = *from;
3841         while (da != NULL) {
3842                 next = da->next;
3843                 if (da->da_synced) {
3844                         __dev_addr_delete(to, to_count,
3845                                           da->da_addr, da->da_addrlen, 0);
3846                         da->da_synced = 0;
3847                         __dev_addr_delete(from, from_count,
3848                                           da->da_addr, da->da_addrlen, 0);
3849                 }
3850                 da = next;
3851         }
3852 }
3853
3854 /**
3855  *      dev_unicast_sync - Synchronize device's unicast list to another device
3856  *      @to: destination device
3857  *      @from: source device
3858  *
3859  *      Add newly added addresses to the destination device and release
3860  *      addresses that have no users left. The source device must be
3861  *      locked by netif_tx_lock_bh.
3862  *
3863  *      This function is intended to be called from the dev->set_rx_mode
3864  *      function of layered software devices.
3865  */
3866 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3867 {
3868         int err = 0;
3869
3870         netif_addr_lock_bh(to);
3871         err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3872                               &from->uc_list, &from->uc_count);
3873         if (!err)
3874                 __dev_set_rx_mode(to);
3875         netif_addr_unlock_bh(to);
3876         return err;
3877 }
3878 EXPORT_SYMBOL(dev_unicast_sync);
3879
3880 /**
3881  *      dev_unicast_unsync - Remove synchronized addresses from the destination device
3882  *      @to: destination device
3883  *      @from: source device
3884  *
3885  *      Remove all addresses that were added to the destination device by
3886  *      dev_unicast_sync(). This function is intended to be called from the
3887  *      dev->stop function of layered software devices.
3888  */
3889 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3890 {
3891         netif_addr_lock_bh(from);
3892         netif_addr_lock(to);
3893
3894         __dev_addr_unsync(&to->uc_list, &to->uc_count,
3895                           &from->uc_list, &from->uc_count);
3896         __dev_set_rx_mode(to);
3897
3898         netif_addr_unlock(to);
3899         netif_addr_unlock_bh(from);
3900 }
3901 EXPORT_SYMBOL(dev_unicast_unsync);
3902
3903 static void __dev_addr_discard(struct dev_addr_list **list)
3904 {
3905         struct dev_addr_list *tmp;
3906
3907         while (*list != NULL) {
3908                 tmp = *list;
3909                 *list = tmp->next;
3910                 if (tmp->da_users > tmp->da_gusers)
3911                         printk("__dev_addr_discard: address leakage! "
3912                                "da_users=%d\n", tmp->da_users);
3913                 kfree(tmp);
3914         }
3915 }
3916
3917 static void dev_addr_discard(struct net_device *dev)
3918 {
3919         netif_addr_lock_bh(dev);
3920
3921         __dev_addr_discard(&dev->uc_list);
3922         dev->uc_count = 0;
3923
3924         __dev_addr_discard(&dev->mc_list);
3925         dev->mc_count = 0;
3926
3927         netif_addr_unlock_bh(dev);
3928 }
3929
3930 /**
3931  *      dev_get_flags - get flags reported to userspace
3932  *      @dev: device
3933  *
3934  *      Get the combination of flag bits exported through APIs to userspace.
3935  */
3936 unsigned dev_get_flags(const struct net_device *dev)
3937 {
3938         unsigned flags;
3939
3940         flags = (dev->flags & ~(IFF_PROMISC |
3941                                 IFF_ALLMULTI |
3942                                 IFF_RUNNING |
3943                                 IFF_LOWER_UP |
3944                                 IFF_DORMANT)) |
3945                 (dev->gflags & (IFF_PROMISC |
3946                                 IFF_ALLMULTI));
3947
3948         if (netif_running(dev)) {
3949                 if (netif_oper_up(dev))
3950                         flags |= IFF_RUNNING;
3951                 if (netif_carrier_ok(dev))
3952                         flags |= IFF_LOWER_UP;
3953                 if (netif_dormant(dev))
3954                         flags |= IFF_DORMANT;
3955         }
3956
3957         return flags;
3958 }
3959
3960 /**
3961  *      dev_change_flags - change device settings
3962  *      @dev: device
3963  *      @flags: device state flags
3964  *
3965  *      Change settings on device based state flags. The flags are
3966  *      in the userspace exported format.
3967  */
3968 int dev_change_flags(struct net_device *dev, unsigned flags)
3969 {
3970         int ret, changes;
3971         int old_flags = dev->flags;
3972
3973         ASSERT_RTNL();
3974
3975         /*
3976          *      Set the flags on our device.
3977          */
3978
3979         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3980                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3981                                IFF_AUTOMEDIA)) |
3982                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3983                                     IFF_ALLMULTI));
3984
3985         /*
3986          *      Load in the correct multicast list now the flags have changed.
3987          */
3988
3989         if ((old_flags ^ flags) & IFF_MULTICAST)
3990                 dev_change_rx_flags(dev, IFF_MULTICAST);
3991
3992         dev_set_rx_mode(dev);
3993
3994         /*
3995          *      Have we downed the interface. We handle IFF_UP ourselves
3996          *      according to user attempts to set it, rather than blindly
3997          *      setting it.
3998          */
3999
4000         ret = 0;
4001         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
4002                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4003
4004                 if (!ret)
4005                         dev_set_rx_mode(dev);
4006         }
4007
4008         if (dev->flags & IFF_UP &&
4009             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4010                                           IFF_VOLATILE)))
4011                 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4012
4013         if ((flags ^ dev->gflags) & IFF_PROMISC) {
4014                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
4015                 dev->gflags ^= IFF_PROMISC;
4016                 dev_set_promiscuity(dev, inc);
4017         }
4018
4019         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4020            is important. Some (broken) drivers set IFF_PROMISC, when
4021            IFF_ALLMULTI is requested not asking us and not reporting.
4022          */
4023         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4024                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
4025                 dev->gflags ^= IFF_ALLMULTI;
4026                 dev_set_allmulti(dev, inc);
4027         }
4028
4029         /* Exclude state transition flags, already notified */
4030         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4031         if (changes)
4032                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4033
4034         return ret;
4035 }
4036
4037 /**
4038  *      dev_set_mtu - Change maximum transfer unit
4039  *      @dev: device
4040  *      @new_mtu: new transfer unit
4041  *
4042  *      Change the maximum transfer size of the network device.
4043  */
4044 int dev_set_mtu(struct net_device *dev, int new_mtu)
4045 {
4046         const struct net_device_ops *ops = dev->netdev_ops;
4047         int err;
4048
4049         if (new_mtu == dev->mtu)
4050                 return 0;
4051
4052         /*      MTU must be positive.    */
4053         if (new_mtu < 0)
4054                 return -EINVAL;
4055
4056         if (!netif_device_present(dev))
4057                 return -ENODEV;
4058
4059         err = 0;
4060         if (ops->ndo_change_mtu)
4061                 err = ops->ndo_change_mtu(dev, new_mtu);
4062         else
4063                 dev->mtu = new_mtu;
4064
4065         if (!err && dev->flags & IFF_UP)
4066                 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4067         return err;
4068 }
4069
4070 /**
4071  *      dev_set_mac_address - Change Media Access Control Address
4072  *      @dev: device
4073  *      @sa: new address
4074  *
4075  *      Change the hardware (MAC) address of the device
4076  */
4077 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4078 {
4079         const struct net_device_ops *ops = dev->netdev_ops;
4080         int err;
4081
4082         if (!ops->ndo_set_mac_address)
4083                 return -EOPNOTSUPP;
4084         if (sa->sa_family != dev->type)
4085                 return -EINVAL;
4086         if (!netif_device_present(dev))
4087                 return -ENODEV;
4088         err = ops->ndo_set_mac_address(dev, sa);
4089         if (!err)
4090                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4091         return err;
4092 }
4093
4094 /*
4095  *      Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4096  */
4097 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4098 {
4099         int err;
4100         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4101
4102         if (!dev)
4103                 return -ENODEV;
4104
4105         switch (cmd) {
4106                 case SIOCGIFFLAGS:      /* Get interface flags */
4107                         ifr->ifr_flags = dev_get_flags(dev);
4108                         return 0;
4109
4110                 case SIOCGIFMETRIC:     /* Get the metric on the interface
4111                                            (currently unused) */
4112                         ifr->ifr_metric = 0;
4113                         return 0;
4114
4115                 case SIOCGIFMTU:        /* Get the MTU of a device */
4116                         ifr->ifr_mtu = dev->mtu;
4117                         return 0;
4118
4119                 case SIOCGIFHWADDR:
4120                         if (!dev->addr_len)
4121                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4122                         else
4123                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4124                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4125                         ifr->ifr_hwaddr.sa_family = dev->type;
4126                         return 0;
4127
4128                 case SIOCGIFSLAVE:
4129                         err = -EINVAL;
4130                         break;
4131
4132                 case SIOCGIFMAP:
4133                         ifr->ifr_map.mem_start = dev->mem_start;
4134                         ifr->ifr_map.mem_end   = dev->mem_end;
4135                         ifr->ifr_map.base_addr = dev->base_addr;
4136                         ifr->ifr_map.irq       = dev->irq;
4137                         ifr->ifr_map.dma       = dev->dma;
4138                         ifr->ifr_map.port      = dev->if_port;
4139                         return 0;
4140
4141                 case SIOCGIFINDEX:
4142                         ifr->ifr_ifindex = dev->ifindex;
4143                         return 0;
4144
4145                 case SIOCGIFTXQLEN:
4146                         ifr->ifr_qlen = dev->tx_queue_len;
4147                         return 0;
4148
4149                 default:
4150                         /* dev_ioctl() should ensure this case
4151                          * is never reached
4152                          */
4153                         WARN_ON(1);
4154                         err = -EINVAL;
4155                         break;
4156
4157         }
4158         return err;
4159 }
4160
4161 /*
4162  *      Perform the SIOCxIFxxx calls, inside rtnl_lock()
4163  */
4164 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4165 {
4166         int err;
4167         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4168         const struct net_device_ops *ops;
4169
4170         if (!dev)
4171                 return -ENODEV;
4172
4173         ops = dev->netdev_ops;
4174
4175         switch (cmd) {
4176                 case SIOCSIFFLAGS:      /* Set interface flags */
4177                         return dev_change_flags(dev, ifr->ifr_flags);
4178
4179                 case SIOCSIFMETRIC:     /* Set the metric on the interface
4180                                            (currently unused) */
4181                         return -EOPNOTSUPP;
4182
4183                 case SIOCSIFMTU:        /* Set the MTU of a device */
4184                         return dev_set_mtu(dev, ifr->ifr_mtu);
4185
4186                 case SIOCSIFHWADDR:
4187                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4188
4189                 case SIOCSIFHWBROADCAST:
4190                         if (ifr->ifr_hwaddr.sa_family != dev->type)
4191                                 return -EINVAL;
4192                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4193                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4194                         call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4195                         return 0;
4196
4197                 case SIOCSIFMAP:
4198                         if (ops->ndo_set_config) {
4199                                 if (!netif_device_present(dev))
4200                                         return -ENODEV;
4201                                 return ops->ndo_set_config(dev, &ifr->ifr_map);
4202                         }
4203                         return -EOPNOTSUPP;
4204
4205                 case SIOCADDMULTI:
4206                         if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4207                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4208                                 return -EINVAL;
4209                         if (!netif_device_present(dev))
4210                                 return -ENODEV;
4211                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4212                                           dev->addr_len, 1);
4213
4214                 case SIOCDELMULTI:
4215                         if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4216                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4217                                 return -EINVAL;
4218                         if (!netif_device_present(dev))
4219                                 return -ENODEV;
4220                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4221                                              dev->addr_len, 1);
4222
4223                 case SIOCSIFTXQLEN:
4224                         if (ifr->ifr_qlen < 0)
4225                                 return -EINVAL;
4226                         dev->tx_queue_len = ifr->ifr_qlen;
4227                         return 0;
4228
4229                 case SIOCSIFNAME:
4230                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4231                         return dev_change_name(dev, ifr->ifr_newname);
4232
4233                 /*
4234                  *      Unknown or private ioctl
4235                  */
4236
4237                 default:
4238                         if ((cmd >= SIOCDEVPRIVATE &&
4239                             cmd <= SIOCDEVPRIVATE + 15) ||
4240                             cmd == SIOCBONDENSLAVE ||
4241                             cmd == SIOCBONDRELEASE ||
4242                             cmd == SIOCBONDSETHWADDR ||
4243                             cmd == SIOCBONDSLAVEINFOQUERY ||
4244                             cmd == SIOCBONDINFOQUERY ||
4245                             cmd == SIOCBONDCHANGEACTIVE ||
4246                             cmd == SIOCGMIIPHY ||
4247                             cmd == SIOCGMIIREG ||
4248                             cmd == SIOCSMIIREG ||
4249                             cmd == SIOCBRADDIF ||
4250                             cmd == SIOCBRDELIF ||
4251                             cmd == SIOCSHWTSTAMP ||
4252                             cmd == SIOCWANDEV) {
4253                                 err = -EOPNOTSUPP;
4254                                 if (ops->ndo_do_ioctl) {
4255                                         if (netif_device_present(dev))
4256                                                 err = ops->ndo_do_ioctl(dev, ifr, cmd);
4257                                         else
4258                                                 err = -ENODEV;
4259                                 }
4260                         } else
4261                                 err = -EINVAL;
4262
4263         }
4264         return err;
4265 }
4266
4267 /*
4268  *      This function handles all "interface"-type I/O control requests. The actual
4269  *      'doing' part of this is dev_ifsioc above.
4270  */
4271
4272 /**
4273  *      dev_ioctl       -       network device ioctl
4274  *      @net: the applicable net namespace
4275  *      @cmd: command to issue
4276  *      @arg: pointer to a struct ifreq in user space
4277  *
4278  *      Issue ioctl functions to devices. This is normally called by the
4279  *      user space syscall interfaces but can sometimes be useful for
4280  *      other purposes. The return value is the return from the syscall if
4281  *      positive or a negative errno code on error.
4282  */
4283
4284 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4285 {
4286         struct ifreq ifr;
4287         int ret;
4288         char *colon;
4289
4290         /* One special case: SIOCGIFCONF takes ifconf argument
4291            and requires shared lock, because it sleeps writing
4292            to user space.
4293          */
4294
4295         if (cmd == SIOCGIFCONF) {
4296                 rtnl_lock();
4297                 ret = dev_ifconf(net, (char __user *) arg);
4298                 rtnl_unlock();
4299                 return ret;
4300         }
4301         if (cmd == SIOCGIFNAME)
4302                 return dev_ifname(net, (struct ifreq __user *)arg);
4303
4304         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4305                 return -EFAULT;
4306
4307         ifr.ifr_name[IFNAMSIZ-1] = 0;
4308
4309         colon = strchr(ifr.ifr_name, ':');
4310         if (colon)
4311                 *colon = 0;
4312
4313         /*
4314          *      See which interface the caller is talking about.
4315          */
4316
4317         switch (cmd) {
4318                 /*
4319                  *      These ioctl calls:
4320                  *      - can be done by all.
4321                  *      - atomic and do not require locking.
4322                  *      - return a value
4323                  */
4324                 case SIOCGIFFLAGS:
4325                 case SIOCGIFMETRIC:
4326                 case SIOCGIFMTU:
4327                 case SIOCGIFHWADDR:
4328                 case SIOCGIFSLAVE:
4329                 case SIOCGIFMAP:
4330                 case SIOCGIFINDEX:
4331                 case SIOCGIFTXQLEN:
4332                         dev_load(net, ifr.ifr_name);
4333                         read_lock(&dev_base_lock);
4334                         ret = dev_ifsioc_locked(net, &ifr, cmd);
4335                         read_unlock(&dev_base_lock);
4336                         if (!ret) {
4337                                 if (colon)
4338                                         *colon = ':';
4339                                 if (copy_to_user(arg, &ifr,
4340                                                  sizeof(struct ifreq)))
4341                                         ret = -EFAULT;
4342                         }
4343                         return ret;
4344
4345                 case SIOCETHTOOL:
4346                         dev_load(net, ifr.ifr_name);
4347                         rtnl_lock();
4348                         ret = dev_ethtool(net, &ifr);
4349                         rtnl_unlock();
4350                         if (!ret) {
4351                                 if (colon)
4352                                         *colon = ':';
4353                                 if (copy_to_user(arg, &ifr,
4354                                                  sizeof(struct ifreq)))
4355                                         ret = -EFAULT;
4356                         }
4357                         return ret;
4358
4359                 /*
4360                  *      These ioctl calls:
4361                  *      - require superuser power.
4362                  *      - require strict serialization.
4363                  *      - return a value
4364                  */
4365                 case SIOCGMIIPHY:
4366                 case SIOCGMIIREG:
4367                 case SIOCSIFNAME:
4368                         if (!capable(CAP_NET_ADMIN))
4369                                 return -EPERM;
4370                         dev_load(net, ifr.ifr_name);
4371                         rtnl_lock();
4372                         ret = dev_ifsioc(net, &ifr, cmd);
4373                         rtnl_unlock();
4374                         if (!ret) {
4375                                 if (colon)
4376                                         *colon = ':';
4377                                 if (copy_to_user(arg, &ifr,
4378                                                  sizeof(struct ifreq)))
4379                                         ret = -EFAULT;
4380                         }
4381                         return ret;
4382
4383                 /*
4384                  *      These ioctl calls:
4385                  *      - require superuser power.
4386                  *      - require strict serialization.
4387                  *      - do not return a value
4388                  */
4389                 case SIOCSIFFLAGS:
4390                 case SIOCSIFMETRIC:
4391                 case SIOCSIFMTU:
4392                 case SIOCSIFMAP:
4393                 case SIOCSIFHWADDR:
4394                 case SIOCSIFSLAVE:
4395                 case SIOCADDMULTI:
4396                 case SIOCDELMULTI:
4397                 case SIOCSIFHWBROADCAST:
4398                 case SIOCSIFTXQLEN:
4399                 case SIOCSMIIREG:
4400                 case SIOCBONDENSLAVE:
4401                 case SIOCBONDRELEASE:
4402                 case SIOCBONDSETHWADDR:
4403                 case SIOCBONDCHANGEACTIVE:
4404                 case SIOCBRADDIF:
4405                 case SIOCBRDELIF:
4406                 case SIOCSHWTSTAMP:
4407                         if (!capable(CAP_NET_ADMIN))
4408                                 return -EPERM;
4409                         /* fall through */
4410                 case SIOCBONDSLAVEINFOQUERY:
4411                 case SIOCBONDINFOQUERY:
4412                         dev_load(net, ifr.ifr_name);
4413                         rtnl_lock();
4414                         ret = dev_ifsioc(net, &ifr, cmd);
4415                         rtnl_unlock();
4416                         return ret;
4417
4418                 case SIOCGIFMEM:
4419                         /* Get the per device memory space. We can add this but
4420                          * currently do not support it */
4421                 case SIOCSIFMEM:
4422                         /* Set the per device memory buffer space.
4423                          * Not applicable in our case */
4424                 case SIOCSIFLINK:
4425                         return -EINVAL;
4426
4427                 /*
4428                  *      Unknown or private ioctl.
4429                  */
4430                 default:
4431                         if (cmd == SIOCWANDEV ||
4432                             (cmd >= SIOCDEVPRIVATE &&
4433                              cmd <= SIOCDEVPRIVATE + 15)) {
4434                                 dev_load(net, ifr.ifr_name);
4435                                 rtnl_lock();
4436                                 ret = dev_ifsioc(net, &ifr, cmd);
4437                                 rtnl_unlock();
4438                                 if (!ret && copy_to_user(arg, &ifr,
4439                                                          sizeof(struct ifreq)))
4440                                         ret = -EFAULT;
4441                                 return ret;
4442                         }
4443                         /* Take care of Wireless Extensions */
4444                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4445                                 return wext_handle_ioctl(net, &ifr, cmd, arg);
4446                         return -EINVAL;
4447         }
4448 }
4449
4450
4451 /**
4452  *      dev_new_index   -       allocate an ifindex
4453  *      @net: the applicable net namespace
4454  *
4455  *      Returns a suitable unique value for a new device interface
4456  *      number.  The caller must hold the rtnl semaphore or the
4457  *      dev_base_lock to be sure it remains unique.
4458  */
4459 static int dev_new_index(struct net *net)
4460 {
4461         static int ifindex;
4462         for (;;) {
4463                 if (++ifindex <= 0)
4464                         ifindex = 1;
4465                 if (!__dev_get_by_index(net, ifindex))
4466                         return ifindex;
4467         }
4468 }
4469
4470 /* Delayed registration/unregisteration */
4471 static LIST_HEAD(net_todo_list);
4472
4473 static void net_set_todo(struct net_device *dev)
4474 {
4475         list_add_tail(&dev->todo_list, &net_todo_list);
4476 }
4477
4478 static void rollback_registered(struct net_device *dev)
4479 {
4480         BUG_ON(dev_boot_phase);
4481         ASSERT_RTNL();
4482
4483         /* Some devices call without registering for initialization unwind. */
4484         if (dev->reg_state == NETREG_UNINITIALIZED) {
4485                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4486                                   "was registered\n", dev->name, dev);
4487
4488                 WARN_ON(1);
4489                 return;
4490         }
4491
4492         BUG_ON(dev->reg_state != NETREG_REGISTERED);
4493
4494         /* If device is running, close it first. */
4495         dev_close(dev);
4496
4497         /* And unlink it from device chain. */
4498         unlist_netdevice(dev);
4499
4500         dev->reg_state = NETREG_UNREGISTERING;
4501
4502         synchronize_net();
4503
4504         /* Shutdown queueing discipline. */
4505         dev_shutdown(dev);
4506
4507
4508         /* Notify protocols, that we are about to destroy
4509            this device. They should clean all the things.
4510         */
4511         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4512
4513         /*
4514          *      Flush the unicast and multicast chains
4515          */
4516         dev_addr_discard(dev);
4517
4518         if (dev->netdev_ops->ndo_uninit)
4519                 dev->netdev_ops->ndo_uninit(dev);
4520
4521         /* Notifier chain MUST detach us from master device. */
4522         WARN_ON(dev->master);
4523
4524         /* Remove entries from kobject tree */
4525         netdev_unregister_kobject(dev);
4526
4527         synchronize_net();
4528
4529         dev_put(dev);
4530 }
4531
4532 static void __netdev_init_queue_locks_one(struct net_device *dev,
4533                                           struct netdev_queue *dev_queue,
4534                                           void *_unused)
4535 {
4536         spin_lock_init(&dev_queue->_xmit_lock);
4537         netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4538         dev_queue->xmit_lock_owner = -1;
4539 }
4540
4541 static void netdev_init_queue_locks(struct net_device *dev)
4542 {
4543         netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4544         __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4545 }
4546
4547 unsigned long netdev_fix_features(unsigned long features, const char *name)
4548 {
4549         /* Fix illegal SG+CSUM combinations. */
4550         if ((features & NETIF_F_SG) &&
4551             !(features & NETIF_F_ALL_CSUM)) {
4552                 if (name)
4553                         printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4554                                "checksum feature.\n", name);
4555                 features &= ~NETIF_F_SG;
4556         }
4557
4558         /* TSO requires that SG is present as well. */
4559         if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4560                 if (name)
4561                         printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4562                                "SG feature.\n", name);
4563                 features &= ~NETIF_F_TSO;
4564         }
4565
4566         if (features & NETIF_F_UFO) {
4567                 if (!(features & NETIF_F_GEN_CSUM)) {
4568                         if (name)
4569                                 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4570                                        "since no NETIF_F_HW_CSUM feature.\n",
4571                                        name);
4572                         features &= ~NETIF_F_UFO;
4573                 }
4574
4575                 if (!(features & NETIF_F_SG)) {
4576                         if (name)
4577                                 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4578                                        "since no NETIF_F_SG feature.\n", name);
4579                         features &= ~NETIF_F_UFO;
4580                 }
4581         }
4582
4583         return features;
4584 }
4585 EXPORT_SYMBOL(netdev_fix_features);
4586
4587 /**
4588  *      register_netdevice      - register a network device
4589  *      @dev: device to register
4590  *
4591  *      Take a completed network device structure and add it to the kernel
4592  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4593  *      chain. 0 is returned on success. A negative errno code is returned
4594  *      on a failure to set up the device, or if the name is a duplicate.
4595  *
4596  *      Callers must hold the rtnl semaphore. You may want
4597  *      register_netdev() instead of this.
4598  *
4599  *      BUGS:
4600  *      The locking appears insufficient to guarantee two parallel registers
4601  *      will not get the same name.
4602  */
4603
4604 int register_netdevice(struct net_device *dev)
4605 {
4606         struct hlist_head *head;
4607         struct hlist_node *p;
4608         int ret;
4609         struct net *net = dev_net(dev);
4610
4611         BUG_ON(dev_boot_phase);
4612         ASSERT_RTNL();
4613
4614         might_sleep();
4615
4616         /* When net_device's are persistent, this will be fatal. */
4617         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4618         BUG_ON(!net);
4619
4620         spin_lock_init(&dev->addr_list_lock);
4621         netdev_set_addr_lockdep_class(dev);
4622         netdev_init_queue_locks(dev);
4623
4624         dev->iflink = -1;
4625
4626         /* Init, if this function is available */
4627         if (dev->netdev_ops->ndo_init) {
4628                 ret = dev->netdev_ops->ndo_init(dev);
4629                 if (ret) {
4630                         if (ret > 0)
4631                                 ret = -EIO;
4632                         goto out;
4633                 }
4634         }
4635
4636         if (!dev_valid_name(dev->name)) {
4637                 ret = -EINVAL;
4638                 goto err_uninit;
4639         }
4640
4641         dev->ifindex = dev_new_index(net);
4642         if (dev->iflink == -1)
4643                 dev->iflink = dev->ifindex;
4644
4645         /* Check for existence of name */
4646         head = dev_name_hash(net, dev->name);
4647         hlist_for_each(p, head) {
4648                 struct net_device *d
4649                         = hlist_entry(p, struct net_device, name_hlist);
4650                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4651                         ret = -EEXIST;
4652                         goto err_uninit;
4653                 }
4654         }
4655
4656         /* Fix illegal checksum combinations */
4657         if ((dev->features & NETIF_F_HW_CSUM) &&
4658             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4659                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4660                        dev->name);
4661                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4662         }
4663
4664         if ((dev->features & NETIF_F_NO_CSUM) &&
4665             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4666                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4667                        dev->name);
4668                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4669         }
4670
4671         dev->features = netdev_fix_features(dev->features, dev->name);
4672
4673         /* Enable software GSO if SG is supported. */
4674         if (dev->features & NETIF_F_SG)
4675                 dev->features |= NETIF_F_GSO;
4676
4677         netdev_initialize_kobject(dev);
4678         ret = netdev_register_kobject(dev);
4679         if (ret)
4680                 goto err_uninit;
4681         dev->reg_state = NETREG_REGISTERED;
4682
4683         /*
4684          *      Default initial state at registry is that the
4685          *      device is present.
4686          */
4687
4688         set_bit(__LINK_STATE_PRESENT, &dev->state);
4689
4690         dev_init_scheduler(dev);
4691         dev_hold(dev);
4692         list_netdevice(dev);
4693
4694         /* Notify protocols, that a new device appeared. */
4695         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4696         ret = notifier_to_errno(ret);
4697         if (ret) {
4698                 rollback_registered(dev);
4699                 dev->reg_state = NETREG_UNREGISTERED;
4700         }
4701
4702 out:
4703         return ret;
4704
4705 err_uninit:
4706         if (dev->netdev_ops->ndo_uninit)
4707                 dev->netdev_ops->ndo_uninit(dev);
4708         goto out;
4709 }
4710
4711 /**
4712  *      init_dummy_netdev       - init a dummy network device for NAPI
4713  *      @dev: device to init
4714  *
4715  *      This takes a network device structure and initialize the minimum
4716  *      amount of fields so it can be used to schedule NAPI polls without
4717  *      registering a full blown interface. This is to be used by drivers
4718  *      that need to tie several hardware interfaces to a single NAPI
4719  *      poll scheduler due to HW limitations.
4720  */
4721 int init_dummy_netdev(struct net_device *dev)
4722 {
4723         /* Clear everything. Note we don't initialize spinlocks
4724          * are they aren't supposed to be taken by any of the
4725          * NAPI code and this dummy netdev is supposed to be
4726          * only ever used for NAPI polls
4727          */
4728         memset(dev, 0, sizeof(struct net_device));
4729
4730         /* make sure we BUG if trying to hit standard
4731          * register/unregister code path
4732          */
4733         dev->reg_state = NETREG_DUMMY;
4734
4735         /* initialize the ref count */
4736         atomic_set(&dev->refcnt, 1);
4737
4738         /* NAPI wants this */
4739         INIT_LIST_HEAD(&dev->napi_list);
4740
4741         /* a dummy interface is started by default */
4742         set_bit(__LINK_STATE_PRESENT, &dev->state);
4743         set_bit(__LINK_STATE_START, &dev->state);
4744
4745         return 0;
4746 }
4747 EXPORT_SYMBOL_GPL(init_dummy_netdev);
4748
4749
4750 /**
4751  *      register_netdev - register a network device
4752  *      @dev: device to register
4753  *
4754  *      Take a completed network device structure and add it to the kernel
4755  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4756  *      chain. 0 is returned on success. A negative errno code is returned
4757  *      on a failure to set up the device, or if the name is a duplicate.
4758  *
4759  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
4760  *      and expands the device name if you passed a format string to
4761  *      alloc_netdev.
4762  */
4763 int register_netdev(struct net_device *dev)
4764 {
4765         int err;
4766
4767         rtnl_lock();
4768
4769         /*
4770          * If the name is a format string the caller wants us to do a
4771          * name allocation.
4772          */
4773         if (strchr(dev->name, '%')) {
4774                 err = dev_alloc_name(dev, dev->name);
4775                 if (err < 0)
4776                         goto out;
4777         }
4778
4779         err = register_netdevice(dev);
4780 out:
4781         rtnl_unlock();
4782         return err;
4783 }
4784 EXPORT_SYMBOL(register_netdev);
4785
4786 /*
4787  * netdev_wait_allrefs - wait until all references are gone.
4788  *
4789  * This is called when unregistering network devices.
4790  *
4791  * Any protocol or device that holds a reference should register
4792  * for netdevice notification, and cleanup and put back the
4793  * reference if they receive an UNREGISTER event.
4794  * We can get stuck here if buggy protocols don't correctly
4795  * call dev_put.
4796  */
4797 static void netdev_wait_allrefs(struct net_device *dev)
4798 {
4799         unsigned long rebroadcast_time, warning_time;
4800
4801         rebroadcast_time = warning_time = jiffies;
4802         while (atomic_read(&dev->refcnt) != 0) {
4803                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4804                         rtnl_lock();
4805
4806                         /* Rebroadcast unregister notification */
4807                         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4808
4809                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4810                                      &dev->state)) {
4811                                 /* We must not have linkwatch events
4812                                  * pending on unregister. If this
4813                                  * happens, we simply run the queue
4814                                  * unscheduled, resulting in a noop
4815                                  * for this device.
4816                                  */
4817                                 linkwatch_run_queue();
4818                         }
4819
4820                         __rtnl_unlock();
4821
4822                         rebroadcast_time = jiffies;
4823                 }
4824
4825                 msleep(250);
4826
4827                 if (time_after(jiffies, warning_time + 10 * HZ)) {
4828                         printk(KERN_EMERG "unregister_netdevice: "
4829                                "waiting for %s to become free. Usage "
4830                                "count = %d\n",
4831                                dev->name, atomic_read(&dev->refcnt));
4832                         warning_time = jiffies;
4833                 }
4834         }
4835 }
4836
4837 /* The sequence is:
4838  *
4839  *      rtnl_lock();
4840  *      ...
4841  *      register_netdevice(x1);
4842  *      register_netdevice(x2);
4843  *      ...
4844  *      unregister_netdevice(y1);
4845  *      unregister_netdevice(y2);
4846  *      ...
4847  *      rtnl_unlock();
4848  *      free_netdev(y1);
4849  *      free_netdev(y2);
4850  *
4851  * We are invoked by rtnl_unlock().
4852  * This allows us to deal with problems:
4853  * 1) We can delete sysfs objects which invoke hotplug
4854  *    without deadlocking with linkwatch via keventd.
4855  * 2) Since we run with the RTNL semaphore not held, we can sleep
4856  *    safely in order to wait for the netdev refcnt to drop to zero.
4857  *
4858  * We must not return until all unregister events added during
4859  * the interval the lock was held have been completed.
4860  */
4861 void netdev_run_todo(void)
4862 {
4863         struct list_head list;
4864
4865         /* Snapshot list, allow later requests */
4866         list_replace_init(&net_todo_list, &list);
4867
4868         __rtnl_unlock();
4869
4870         while (!list_empty(&list)) {
4871                 struct net_device *dev
4872                         = list_entry(list.next, struct net_device, todo_list);
4873                 list_del(&dev->todo_list);
4874
4875                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4876                         printk(KERN_ERR "network todo '%s' but state %d\n",
4877                                dev->name, dev->reg_state);
4878                         dump_stack();
4879                         continue;
4880                 }
4881
4882                 dev->reg_state = NETREG_UNREGISTERED;
4883
4884                 on_each_cpu(flush_backlog, dev, 1);
4885
4886                 netdev_wait_allrefs(dev);
4887
4888                 /* paranoia */
4889                 BUG_ON(atomic_read(&dev->refcnt));
4890                 WARN_ON(dev->ip_ptr);
4891                 WARN_ON(dev->ip6_ptr);
4892                 WARN_ON(dev->dn_ptr);
4893
4894                 if (dev->destructor)
4895                         dev->destructor(dev);
4896
4897                 /* Free network device */
4898                 kobject_put(&dev->dev.kobj);
4899         }
4900 }
4901
4902 /**
4903  *      dev_get_stats   - get network device statistics
4904  *      @dev: device to get statistics from
4905  *
4906  *      Get network statistics from device. The device driver may provide
4907  *      its own method by setting dev->netdev_ops->get_stats; otherwise
4908  *      the internal statistics structure is used.
4909  */
4910 const struct net_device_stats *dev_get_stats(struct net_device *dev)
4911 {
4912         const struct net_device_ops *ops = dev->netdev_ops;
4913
4914         if (ops->ndo_get_stats)
4915                 return ops->ndo_get_stats(dev);
4916         else {
4917                 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
4918                 struct net_device_stats *stats = &dev->stats;
4919                 unsigned int i;
4920                 struct netdev_queue *txq;
4921
4922                 for (i = 0; i < dev->num_tx_queues; i++) {
4923                         txq = netdev_get_tx_queue(dev, i);
4924                         tx_bytes   += txq->tx_bytes;
4925                         tx_packets += txq->tx_packets;
4926                         tx_dropped += txq->tx_dropped;
4927                 }
4928                 if (tx_bytes || tx_packets || tx_dropped) {
4929                         stats->tx_bytes   = tx_bytes;
4930                         stats->tx_packets = tx_packets;
4931                         stats->tx_dropped = tx_dropped;
4932                 }
4933                 return stats;
4934         }
4935 }
4936 EXPORT_SYMBOL(dev_get_stats);
4937
4938 static void netdev_init_one_queue(struct net_device *dev,
4939                                   struct netdev_queue *queue,
4940                                   void *_unused)
4941 {
4942         queue->dev = dev;
4943 }
4944
4945 static void netdev_init_queues(struct net_device *dev)
4946 {
4947         netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4948         netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4949         spin_lock_init(&dev->tx_global_lock);
4950 }
4951
4952 /**
4953  *      alloc_netdev_mq - allocate network device
4954  *      @sizeof_priv:   size of private data to allocate space for
4955  *      @name:          device name format string
4956  *      @setup:         callback to initialize device
4957  *      @queue_count:   the number of subqueues to allocate
4958  *
4959  *      Allocates a struct net_device with private data area for driver use
4960  *      and performs basic initialization.  Also allocates subquue structs
4961  *      for each queue on the device at the end of the netdevice.
4962  */
4963 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4964                 void (*setup)(struct net_device *), unsigned int queue_count)
4965 {
4966         struct netdev_queue *tx;
4967         struct net_device *dev;
4968         size_t alloc_size;
4969         void *p;
4970
4971         BUG_ON(strlen(name) >= sizeof(dev->name));
4972
4973         alloc_size = sizeof(struct net_device);
4974         if (sizeof_priv) {
4975                 /* ensure 32-byte alignment of private area */
4976                 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4977                 alloc_size += sizeof_priv;
4978         }
4979         /* ensure 32-byte alignment of whole construct */
4980         alloc_size += NETDEV_ALIGN_CONST;
4981
4982         p = kzalloc(alloc_size, GFP_KERNEL);
4983         if (!p) {
4984                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
4985                 return NULL;
4986         }
4987
4988         tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
4989         if (!tx) {
4990                 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4991                        "tx qdiscs.\n");
4992                 goto free_p;
4993         }
4994
4995         dev = (struct net_device *)
4996                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4997         dev->padded = (char *)dev - (char *)p;
4998
4999         if (dev_addr_init(dev))
5000                 goto free_tx;
5001
5002         dev_net_set(dev, &init_net);
5003
5004         dev->_tx = tx;
5005         dev->num_tx_queues = queue_count;
5006         dev->real_num_tx_queues = queue_count;
5007
5008         dev->gso_max_size = GSO_MAX_SIZE;
5009
5010         netdev_init_queues(dev);
5011
5012         INIT_LIST_HEAD(&dev->napi_list);
5013         dev->priv_flags = IFF_XMIT_DST_RELEASE;
5014         setup(dev);
5015         strcpy(dev->name, name);
5016         return dev;
5017
5018 free_tx:
5019         kfree(tx);
5020
5021 free_p:
5022         kfree(p);
5023         return NULL;
5024 }
5025 EXPORT_SYMBOL(alloc_netdev_mq);
5026
5027 /**
5028  *      free_netdev - free network device
5029  *      @dev: device
5030  *
5031  *      This function does the last stage of destroying an allocated device
5032  *      interface. The reference to the device object is released.
5033  *      If this is the last reference then it will be freed.
5034  */
5035 void free_netdev(struct net_device *dev)
5036 {
5037         struct napi_struct *p, *n;
5038
5039         release_net(dev_net(dev));
5040
5041         kfree(dev->_tx);
5042
5043         /* Flush device addresses */
5044         dev_addr_flush(dev);
5045
5046         list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5047                 netif_napi_del(p);
5048
5049         /*  Compatibility with error handling in drivers */
5050         if (dev->reg_state == NETREG_UNINITIALIZED) {
5051                 kfree((char *)dev - dev->padded);
5052                 return;
5053         }
5054
5055         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5056         dev->reg_state = NETREG_RELEASED;
5057
5058         /* will free via device release */
5059         put_device(&dev->dev);
5060 }
5061
5062 /**
5063  *      synchronize_net -  Synchronize with packet receive processing
5064  *
5065  *      Wait for packets currently being received to be done.
5066  *      Does not block later packets from starting.
5067  */
5068 void synchronize_net(void)
5069 {
5070         might_sleep();
5071         synchronize_rcu();
5072 }
5073
5074 /**
5075  *      unregister_netdevice - remove device from the kernel
5076  *      @dev: device
5077  *
5078  *      This function shuts down a device interface and removes it
5079  *      from the kernel tables.
5080  *
5081  *      Callers must hold the rtnl semaphore.  You may want
5082  *      unregister_netdev() instead of this.
5083  */
5084
5085 void unregister_netdevice(struct net_device *dev)
5086 {
5087         ASSERT_RTNL();
5088
5089         rollback_registered(dev);
5090         /* Finish processing unregister after unlock */
5091         net_set_todo(dev);
5092 }
5093
5094 /**
5095  *      unregister_netdev - remove device from the kernel
5096  *      @dev: device
5097  *
5098  *      This function shuts down a device interface and removes it
5099  *      from the kernel tables.
5100  *
5101  *      This is just a wrapper for unregister_netdevice that takes
5102  *      the rtnl semaphore.  In general you want to use this and not
5103  *      unregister_netdevice.
5104  */
5105 void unregister_netdev(struct net_device *dev)
5106 {
5107         rtnl_lock();
5108         unregister_netdevice(dev);
5109         rtnl_unlock();
5110 }
5111
5112 EXPORT_SYMBOL(unregister_netdev);
5113
5114 /**
5115  *      dev_change_net_namespace - move device to different nethost namespace
5116  *      @dev: device
5117  *      @net: network namespace
5118  *      @pat: If not NULL name pattern to try if the current device name
5119  *            is already taken in the destination network namespace.
5120  *
5121  *      This function shuts down a device interface and moves it
5122  *      to a new network namespace. On success 0 is returned, on
5123  *      a failure a netagive errno code is returned.
5124  *
5125  *      Callers must hold the rtnl semaphore.
5126  */
5127
5128 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5129 {
5130         char buf[IFNAMSIZ];
5131         const char *destname;
5132         int err;
5133
5134         ASSERT_RTNL();
5135
5136         /* Don't allow namespace local devices to be moved. */
5137         err = -EINVAL;
5138         if (dev->features & NETIF_F_NETNS_LOCAL)
5139                 goto out;
5140
5141 #ifdef CONFIG_SYSFS
5142         /* Don't allow real devices to be moved when sysfs
5143          * is enabled.
5144          */
5145         err = -EINVAL;
5146         if (dev->dev.parent)
5147                 goto out;
5148 #endif
5149
5150         /* Ensure the device has been registrered */
5151         err = -EINVAL;
5152         if (dev->reg_state != NETREG_REGISTERED)
5153                 goto out;
5154
5155         /* Get out if there is nothing todo */
5156         err = 0;
5157         if (net_eq(dev_net(dev), net))
5158                 goto out;
5159
5160         /* Pick the destination device name, and ensure
5161          * we can use it in the destination network namespace.
5162          */
5163         err = -EEXIST;
5164         destname = dev->name;
5165         if (__dev_get_by_name(net, destname)) {
5166                 /* We get here if we can't use the current device name */
5167                 if (!pat)
5168                         goto out;
5169                 if (!dev_valid_name(pat))
5170                         goto out;
5171                 if (strchr(pat, '%')) {
5172                         if (__dev_alloc_name(net, pat, buf) < 0)
5173                                 goto out;
5174                         destname = buf;
5175                 } else
5176                         destname = pat;
5177                 if (__dev_get_by_name(net, destname))
5178                         goto out;
5179         }
5180
5181         /*
5182          * And now a mini version of register_netdevice unregister_netdevice.
5183          */
5184
5185         /* If device is running close it first. */
5186         dev_close(dev);
5187
5188         /* And unlink it from device chain */
5189         err = -ENODEV;
5190         unlist_netdevice(dev);
5191
5192         synchronize_net();
5193
5194         /* Shutdown queueing discipline. */
5195         dev_shutdown(dev);
5196
5197         /* Notify protocols, that we are about to destroy
5198            this device. They should clean all the things.
5199         */
5200         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5201
5202         /*
5203          *      Flush the unicast and multicast chains
5204          */
5205         dev_addr_discard(dev);
5206
5207         netdev_unregister_kobject(dev);
5208
5209         /* Actually switch the network namespace */
5210         dev_net_set(dev, net);
5211
5212         /* Assign the new device name */
5213         if (destname != dev->name)
5214                 strcpy(dev->name, destname);
5215
5216         /* If there is an ifindex conflict assign a new one */
5217         if (__dev_get_by_index(net, dev->ifindex)) {
5218                 int iflink = (dev->iflink == dev->ifindex);
5219                 dev->ifindex = dev_new_index(net);
5220                 if (iflink)
5221                         dev->iflink = dev->ifindex;
5222         }
5223
5224         /* Fixup kobjects */
5225         err = netdev_register_kobject(dev);
5226         WARN_ON(err);
5227
5228         /* Add the device back in the hashes */
5229         list_netdevice(dev);
5230
5231         /* Notify protocols, that a new device appeared. */
5232         call_netdevice_notifiers(NETDEV_REGISTER, dev);
5233
5234         synchronize_net();
5235         err = 0;
5236 out:
5237         return err;
5238 }
5239
5240 static int dev_cpu_callback(struct notifier_block *nfb,
5241                             unsigned long action,
5242                             void *ocpu)
5243 {
5244         struct sk_buff **list_skb;
5245         struct Qdisc **list_net;
5246         struct sk_buff *skb;
5247         unsigned int cpu, oldcpu = (unsigned long)ocpu;
5248         struct softnet_data *sd, *oldsd;
5249
5250         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5251                 return NOTIFY_OK;
5252
5253         local_irq_disable();
5254         cpu = smp_processor_id();
5255         sd = &per_cpu(softnet_data, cpu);
5256         oldsd = &per_cpu(softnet_data, oldcpu);
5257
5258         /* Find end of our completion_queue. */
5259         list_skb = &sd->completion_queue;
5260         while (*list_skb)
5261                 list_skb = &(*list_skb)->next;
5262         /* Append completion queue from offline CPU. */
5263         *list_skb = oldsd->completion_queue;
5264         oldsd->completion_queue = NULL;
5265
5266         /* Find end of our output_queue. */
5267         list_net = &sd->output_queue;
5268         while (*list_net)
5269                 list_net = &(*list_net)->next_sched;
5270         /* Append output queue from offline CPU. */
5271         *list_net = oldsd->output_queue;
5272         oldsd->output_queue = NULL;
5273
5274         raise_softirq_irqoff(NET_TX_SOFTIRQ);
5275         local_irq_enable();
5276
5277         /* Process offline CPU's input_pkt_queue */
5278         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5279                 netif_rx(skb);
5280
5281         return NOTIFY_OK;
5282 }
5283
5284
5285 /**
5286  *      netdev_increment_features - increment feature set by one
5287  *      @all: current feature set
5288  *      @one: new feature set
5289  *      @mask: mask feature set
5290  *
5291  *      Computes a new feature set after adding a device with feature set
5292  *      @one to the master device with current feature set @all.  Will not
5293  *      enable anything that is off in @mask. Returns the new feature set.
5294  */
5295 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5296                                         unsigned long mask)
5297 {
5298         /* If device needs checksumming, downgrade to it. */
5299         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5300                 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5301         else if (mask & NETIF_F_ALL_CSUM) {
5302                 /* If one device supports v4/v6 checksumming, set for all. */
5303                 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5304                     !(all & NETIF_F_GEN_CSUM)) {
5305                         all &= ~NETIF_F_ALL_CSUM;
5306                         all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5307                 }
5308
5309                 /* If one device supports hw checksumming, set for all. */
5310                 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5311                         all &= ~NETIF_F_ALL_CSUM;
5312                         all |= NETIF_F_HW_CSUM;
5313                 }
5314         }
5315
5316         one |= NETIF_F_ALL_CSUM;
5317
5318         one |= all & NETIF_F_ONE_FOR_ALL;
5319         all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5320         all |= one & mask & NETIF_F_ONE_FOR_ALL;
5321
5322         return all;
5323 }
5324 EXPORT_SYMBOL(netdev_increment_features);
5325
5326 static struct hlist_head *netdev_create_hash(void)
5327 {
5328         int i;
5329         struct hlist_head *hash;
5330
5331         hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5332         if (hash != NULL)
5333                 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5334                         INIT_HLIST_HEAD(&hash[i]);
5335
5336         return hash;
5337 }
5338
5339 /* Initialize per network namespace state */
5340 static int __net_init netdev_init(struct net *net)
5341 {
5342         INIT_LIST_HEAD(&net->dev_base_head);
5343
5344         net->dev_name_head = netdev_create_hash();
5345         if (net->dev_name_head == NULL)
5346                 goto err_name;
5347
5348         net->dev_index_head = netdev_create_hash();
5349         if (net->dev_index_head == NULL)
5350                 goto err_idx;
5351
5352         return 0;
5353
5354 err_idx:
5355         kfree(net->dev_name_head);
5356 err_name:
5357         return -ENOMEM;
5358 }
5359
5360 /**
5361  *      netdev_drivername - network driver for the device
5362  *      @dev: network device
5363  *      @buffer: buffer for resulting name
5364  *      @len: size of buffer
5365  *
5366  *      Determine network driver for device.
5367  */
5368 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5369 {
5370         const struct device_driver *driver;
5371         const struct device *parent;
5372
5373         if (len <= 0 || !buffer)
5374                 return buffer;
5375         buffer[0] = 0;
5376
5377         parent = dev->dev.parent;
5378
5379         if (!parent)
5380                 return buffer;
5381
5382         driver = parent->driver;
5383         if (driver && driver->name)
5384                 strlcpy(buffer, driver->name, len);
5385         return buffer;
5386 }
5387
5388 static void __net_exit netdev_exit(struct net *net)
5389 {
5390         kfree(net->dev_name_head);
5391         kfree(net->dev_index_head);
5392 }
5393
5394 static struct pernet_operations __net_initdata netdev_net_ops = {
5395         .init = netdev_init,
5396         .exit = netdev_exit,
5397 };
5398
5399 static void __net_exit default_device_exit(struct net *net)
5400 {
5401         struct net_device *dev;
5402         /*
5403          * Push all migratable of the network devices back to the
5404          * initial network namespace
5405          */
5406         rtnl_lock();
5407 restart:
5408         for_each_netdev(net, dev) {
5409                 int err;
5410                 char fb_name[IFNAMSIZ];
5411
5412                 /* Ignore unmoveable devices (i.e. loopback) */
5413                 if (dev->features & NETIF_F_NETNS_LOCAL)
5414                         continue;
5415
5416                 /* Delete virtual devices */
5417                 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5418                         dev->rtnl_link_ops->dellink(dev);
5419                         goto restart;
5420                 }
5421
5422                 /* Push remaing network devices to init_net */
5423                 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5424                 err = dev_change_net_namespace(dev, &init_net, fb_name);
5425                 if (err) {
5426                         printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5427                                 __func__, dev->name, err);
5428                         BUG();
5429                 }
5430                 goto restart;
5431         }
5432         rtnl_unlock();
5433 }
5434
5435 static struct pernet_operations __net_initdata default_device_ops = {
5436         .exit = default_device_exit,
5437 };
5438
5439 /*
5440  *      Initialize the DEV module. At boot time this walks the device list and
5441  *      unhooks any devices that fail to initialise (normally hardware not
5442  *      present) and leaves us with a valid list of present and active devices.
5443  *
5444  */
5445
5446 /*
5447  *       This is called single threaded during boot, so no need
5448  *       to take the rtnl semaphore.
5449  */
5450 static int __init net_dev_init(void)
5451 {
5452         int i, rc = -ENOMEM;
5453
5454         BUG_ON(!dev_boot_phase);
5455
5456         if (dev_proc_init())
5457                 goto out;
5458
5459         if (netdev_kobject_init())
5460                 goto out;
5461
5462         INIT_LIST_HEAD(&ptype_all);
5463         for (i = 0; i < PTYPE_HASH_SIZE; i++)
5464                 INIT_LIST_HEAD(&ptype_base[i]);
5465
5466         if (register_pernet_subsys(&netdev_net_ops))
5467                 goto out;
5468
5469         /*
5470          *      Initialise the packet receive queues.
5471          */
5472
5473         for_each_possible_cpu(i) {
5474                 struct softnet_data *queue;
5475
5476                 queue = &per_cpu(softnet_data, i);
5477                 skb_queue_head_init(&queue->input_pkt_queue);
5478                 queue->completion_queue = NULL;
5479                 INIT_LIST_HEAD(&queue->poll_list);
5480
5481                 queue->backlog.poll = process_backlog;
5482                 queue->backlog.weight = weight_p;
5483                 queue->backlog.gro_list = NULL;
5484                 queue->backlog.gro_count = 0;
5485         }
5486
5487         dev_boot_phase = 0;
5488
5489         /* The loopback device is special if any other network devices
5490          * is present in a network namespace the loopback device must
5491          * be present. Since we now dynamically allocate and free the
5492          * loopback device ensure this invariant is maintained by
5493          * keeping the loopback device as the first device on the
5494          * list of network devices.  Ensuring the loopback devices
5495          * is the first device that appears and the last network device
5496          * that disappears.
5497          */
5498         if (register_pernet_device(&loopback_net_ops))
5499                 goto out;
5500
5501         if (register_pernet_device(&default_device_ops))
5502                 goto out;
5503
5504         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5505         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5506
5507         hotcpu_notifier(dev_cpu_callback, 0);
5508         dst_init();
5509         dev_mcast_init();
5510         rc = 0;
5511 out:
5512         return rc;
5513 }
5514
5515 subsys_initcall(net_dev_init);
5516
5517 static int __init initialize_hashrnd(void)
5518 {
5519         get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5520         return 0;
5521 }
5522
5523 late_initcall_sync(initialize_hashrnd);
5524
5525 EXPORT_SYMBOL(__dev_get_by_index);
5526 EXPORT_SYMBOL(__dev_get_by_name);
5527 EXPORT_SYMBOL(__dev_remove_pack);
5528 EXPORT_SYMBOL(dev_valid_name);
5529 EXPORT_SYMBOL(dev_add_pack);
5530 EXPORT_SYMBOL(dev_alloc_name);
5531 EXPORT_SYMBOL(dev_close);
5532 EXPORT_SYMBOL(dev_get_by_flags);
5533 EXPORT_SYMBOL(dev_get_by_index);
5534 EXPORT_SYMBOL(dev_get_by_name);
5535 EXPORT_SYMBOL(dev_open);
5536 EXPORT_SYMBOL(dev_queue_xmit);
5537 EXPORT_SYMBOL(dev_remove_pack);
5538 EXPORT_SYMBOL(dev_set_allmulti);
5539 EXPORT_SYMBOL(dev_set_promiscuity);
5540 EXPORT_SYMBOL(dev_change_flags);
5541 EXPORT_SYMBOL(dev_set_mtu);
5542 EXPORT_SYMBOL(dev_set_mac_address);
5543 EXPORT_SYMBOL(free_netdev);
5544 EXPORT_SYMBOL(netdev_boot_setup_check);
5545 EXPORT_SYMBOL(netdev_set_master);
5546 EXPORT_SYMBOL(netdev_state_change);
5547 EXPORT_SYMBOL(netif_receive_skb);
5548 EXPORT_SYMBOL(netif_rx);
5549 EXPORT_SYMBOL(register_gifconf);
5550 EXPORT_SYMBOL(register_netdevice);
5551 EXPORT_SYMBOL(register_netdevice_notifier);
5552 EXPORT_SYMBOL(skb_checksum_help);
5553 EXPORT_SYMBOL(synchronize_net);
5554 EXPORT_SYMBOL(unregister_netdevice);
5555 EXPORT_SYMBOL(unregister_netdevice_notifier);
5556 EXPORT_SYMBOL(net_enable_timestamp);
5557 EXPORT_SYMBOL(net_disable_timestamp);
5558 EXPORT_SYMBOL(dev_get_flags);
5559
5560 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5561 EXPORT_SYMBOL(br_handle_frame_hook);
5562 EXPORT_SYMBOL(br_fdb_get_hook);
5563 EXPORT_SYMBOL(br_fdb_put_hook);
5564 #endif
5565
5566 EXPORT_SYMBOL(dev_load);
5567
5568 EXPORT_PER_CPU_SYMBOL(softnet_data);