bonding: refactor ARP active-backup monitor
[safe/jmp/linux-2.6] / drivers / net / bonding / bond_main.c
1 /*
2  * originally based on the dummy device.
3  *
4  * Copyright 1999, Thomas Davis, tadavis@lbl.gov.
5  * Licensed under the GPL. Based on dummy.c, and eql.c devices.
6  *
7  * bonding.c: an Ethernet Bonding driver
8  *
9  * This is useful to talk to a Cisco EtherChannel compatible equipment:
10  *      Cisco 5500
11  *      Sun Trunking (Solaris)
12  *      Alteon AceDirector Trunks
13  *      Linux Bonding
14  *      and probably many L2 switches ...
15  *
16  * How it works:
17  *    ifconfig bond0 ipaddress netmask up
18  *      will setup a network device, with an ip address.  No mac address
19  *      will be assigned at this time.  The hw mac address will come from
20  *      the first slave bonded to the channel.  All slaves will then use
21  *      this hw mac address.
22  *
23  *    ifconfig bond0 down
24  *         will release all slaves, marking them as down.
25  *
26  *    ifenslave bond0 eth0
27  *      will attach eth0 to bond0 as a slave.  eth0 hw mac address will either
28  *      a: be used as initial mac address
29  *      b: if a hw mac address already is there, eth0's hw mac address
30  *         will then be set from bond0.
31  *
32  */
33
34 //#define BONDING_DEBUG 1
35
36 #include <linux/kernel.h>
37 #include <linux/module.h>
38 #include <linux/types.h>
39 #include <linux/fcntl.h>
40 #include <linux/interrupt.h>
41 #include <linux/ptrace.h>
42 #include <linux/ioport.h>
43 #include <linux/in.h>
44 #include <net/ip.h>
45 #include <linux/ip.h>
46 #include <linux/tcp.h>
47 #include <linux/udp.h>
48 #include <linux/slab.h>
49 #include <linux/string.h>
50 #include <linux/init.h>
51 #include <linux/timer.h>
52 #include <linux/socket.h>
53 #include <linux/ctype.h>
54 #include <linux/inet.h>
55 #include <linux/bitops.h>
56 #include <asm/system.h>
57 #include <asm/io.h>
58 #include <asm/dma.h>
59 #include <asm/uaccess.h>
60 #include <linux/errno.h>
61 #include <linux/netdevice.h>
62 #include <linux/inetdevice.h>
63 #include <linux/igmp.h>
64 #include <linux/etherdevice.h>
65 #include <linux/skbuff.h>
66 #include <net/sock.h>
67 #include <linux/rtnetlink.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
70 #include <linux/smp.h>
71 #include <linux/if_ether.h>
72 #include <net/arp.h>
73 #include <linux/mii.h>
74 #include <linux/ethtool.h>
75 #include <linux/if_vlan.h>
76 #include <linux/if_bonding.h>
77 #include <linux/jiffies.h>
78 #include <net/route.h>
79 #include <net/net_namespace.h>
80 #include "bonding.h"
81 #include "bond_3ad.h"
82 #include "bond_alb.h"
83
84 /*---------------------------- Module parameters ----------------------------*/
85
86 /* monitor all links that often (in milliseconds). <=0 disables monitoring */
87 #define BOND_LINK_MON_INTERV    0
88 #define BOND_LINK_ARP_INTERV    0
89
90 static int max_bonds    = BOND_DEFAULT_MAX_BONDS;
91 static int num_grat_arp = 1;
92 static int miimon       = BOND_LINK_MON_INTERV;
93 static int updelay      = 0;
94 static int downdelay    = 0;
95 static int use_carrier  = 1;
96 static char *mode       = NULL;
97 static char *primary    = NULL;
98 static char *lacp_rate  = NULL;
99 static char *xmit_hash_policy = NULL;
100 static int arp_interval = BOND_LINK_ARP_INTERV;
101 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
102 static char *arp_validate = NULL;
103 static int fail_over_mac = 0;
104 struct bond_params bonding_defaults;
105
106 module_param(max_bonds, int, 0);
107 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
108 module_param(num_grat_arp, int, 0644);
109 MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
110 module_param(miimon, int, 0);
111 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
112 module_param(updelay, int, 0);
113 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
114 module_param(downdelay, int, 0);
115 MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
116                             "in milliseconds");
117 module_param(use_carrier, int, 0);
118 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
119                               "0 for off, 1 for on (default)");
120 module_param(mode, charp, 0);
121 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
122                        "1 for active-backup, 2 for balance-xor, "
123                        "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
124                        "6 for balance-alb");
125 module_param(primary, charp, 0);
126 MODULE_PARM_DESC(primary, "Primary network device to use");
127 module_param(lacp_rate, charp, 0);
128 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
129                             "(slow/fast)");
130 module_param(xmit_hash_policy, charp, 0);
131 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)"
132                                    ", 1 for layer 3+4");
133 module_param(arp_interval, int, 0);
134 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
135 module_param_array(arp_ip_target, charp, NULL, 0);
136 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
137 module_param(arp_validate, charp, 0);
138 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
139 module_param(fail_over_mac, int, 0);
140 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  0 of off (default), 1 for on.");
141
142 /*----------------------------- Global variables ----------------------------*/
143
144 static const char * const version =
145         DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
146
147 LIST_HEAD(bond_dev_list);
148
149 #ifdef CONFIG_PROC_FS
150 static struct proc_dir_entry *bond_proc_dir = NULL;
151 #endif
152
153 extern struct rw_semaphore bonding_rwsem;
154 static __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
155 static int arp_ip_count = 0;
156 static int bond_mode    = BOND_MODE_ROUNDROBIN;
157 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2;
158 static int lacp_fast    = 0;
159
160
161 struct bond_parm_tbl bond_lacp_tbl[] = {
162 {       "slow",         AD_LACP_SLOW},
163 {       "fast",         AD_LACP_FAST},
164 {       NULL,           -1},
165 };
166
167 struct bond_parm_tbl bond_mode_tbl[] = {
168 {       "balance-rr",           BOND_MODE_ROUNDROBIN},
169 {       "active-backup",        BOND_MODE_ACTIVEBACKUP},
170 {       "balance-xor",          BOND_MODE_XOR},
171 {       "broadcast",            BOND_MODE_BROADCAST},
172 {       "802.3ad",              BOND_MODE_8023AD},
173 {       "balance-tlb",          BOND_MODE_TLB},
174 {       "balance-alb",          BOND_MODE_ALB},
175 {       NULL,                   -1},
176 };
177
178 struct bond_parm_tbl xmit_hashtype_tbl[] = {
179 {       "layer2",               BOND_XMIT_POLICY_LAYER2},
180 {       "layer3+4",             BOND_XMIT_POLICY_LAYER34},
181 {       "layer2+3",             BOND_XMIT_POLICY_LAYER23},
182 {       NULL,                   -1},
183 };
184
185 struct bond_parm_tbl arp_validate_tbl[] = {
186 {       "none",                 BOND_ARP_VALIDATE_NONE},
187 {       "active",               BOND_ARP_VALIDATE_ACTIVE},
188 {       "backup",               BOND_ARP_VALIDATE_BACKUP},
189 {       "all",                  BOND_ARP_VALIDATE_ALL},
190 {       NULL,                   -1},
191 };
192
193 /*-------------------------- Forward declarations ---------------------------*/
194
195 static void bond_send_gratuitous_arp(struct bonding *bond);
196 static void bond_deinit(struct net_device *bond_dev);
197
198 /*---------------------------- General routines -----------------------------*/
199
200 static const char *bond_mode_name(int mode)
201 {
202         switch (mode) {
203         case BOND_MODE_ROUNDROBIN :
204                 return "load balancing (round-robin)";
205         case BOND_MODE_ACTIVEBACKUP :
206                 return "fault-tolerance (active-backup)";
207         case BOND_MODE_XOR :
208                 return "load balancing (xor)";
209         case BOND_MODE_BROADCAST :
210                 return "fault-tolerance (broadcast)";
211         case BOND_MODE_8023AD:
212                 return "IEEE 802.3ad Dynamic link aggregation";
213         case BOND_MODE_TLB:
214                 return "transmit load balancing";
215         case BOND_MODE_ALB:
216                 return "adaptive load balancing";
217         default:
218                 return "unknown";
219         }
220 }
221
222 /*---------------------------------- VLAN -----------------------------------*/
223
224 /**
225  * bond_add_vlan - add a new vlan id on bond
226  * @bond: bond that got the notification
227  * @vlan_id: the vlan id to add
228  *
229  * Returns -ENOMEM if allocation failed.
230  */
231 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
232 {
233         struct vlan_entry *vlan;
234
235         dprintk("bond: %s, vlan id %d\n",
236                 (bond ? bond->dev->name: "None"), vlan_id);
237
238         vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL);
239         if (!vlan) {
240                 return -ENOMEM;
241         }
242
243         INIT_LIST_HEAD(&vlan->vlan_list);
244         vlan->vlan_id = vlan_id;
245         vlan->vlan_ip = 0;
246
247         write_lock_bh(&bond->lock);
248
249         list_add_tail(&vlan->vlan_list, &bond->vlan_list);
250
251         write_unlock_bh(&bond->lock);
252
253         dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
254
255         return 0;
256 }
257
258 /**
259  * bond_del_vlan - delete a vlan id from bond
260  * @bond: bond that got the notification
261  * @vlan_id: the vlan id to delete
262  *
263  * returns -ENODEV if @vlan_id was not found in @bond.
264  */
265 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
266 {
267         struct vlan_entry *vlan;
268         int res = -ENODEV;
269
270         dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
271
272         write_lock_bh(&bond->lock);
273
274         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
275                 if (vlan->vlan_id == vlan_id) {
276                         list_del(&vlan->vlan_list);
277
278                         if ((bond->params.mode == BOND_MODE_TLB) ||
279                             (bond->params.mode == BOND_MODE_ALB)) {
280                                 bond_alb_clear_vlan(bond, vlan_id);
281                         }
282
283                         dprintk("removed VLAN ID %d from bond %s\n", vlan_id,
284                                 bond->dev->name);
285
286                         kfree(vlan);
287
288                         if (list_empty(&bond->vlan_list) &&
289                             (bond->slave_cnt == 0)) {
290                                 /* Last VLAN removed and no slaves, so
291                                  * restore block on adding VLANs. This will
292                                  * be removed once new slaves that are not
293                                  * VLAN challenged will be added.
294                                  */
295                                 bond->dev->features |= NETIF_F_VLAN_CHALLENGED;
296                         }
297
298                         res = 0;
299                         goto out;
300                 }
301         }
302
303         dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id,
304                 bond->dev->name);
305
306 out:
307         write_unlock_bh(&bond->lock);
308         return res;
309 }
310
311 /**
312  * bond_has_challenged_slaves
313  * @bond: the bond we're working on
314  *
315  * Searches the slave list. Returns 1 if a vlan challenged slave
316  * was found, 0 otherwise.
317  *
318  * Assumes bond->lock is held.
319  */
320 static int bond_has_challenged_slaves(struct bonding *bond)
321 {
322         struct slave *slave;
323         int i;
324
325         bond_for_each_slave(bond, slave, i) {
326                 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) {
327                         dprintk("found VLAN challenged slave - %s\n",
328                                 slave->dev->name);
329                         return 1;
330                 }
331         }
332
333         dprintk("no VLAN challenged slaves found\n");
334         return 0;
335 }
336
337 /**
338  * bond_next_vlan - safely skip to the next item in the vlans list.
339  * @bond: the bond we're working on
340  * @curr: item we're advancing from
341  *
342  * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
343  * or @curr->next otherwise (even if it is @curr itself again).
344  * 
345  * Caller must hold bond->lock
346  */
347 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
348 {
349         struct vlan_entry *next, *last;
350
351         if (list_empty(&bond->vlan_list)) {
352                 return NULL;
353         }
354
355         if (!curr) {
356                 next = list_entry(bond->vlan_list.next,
357                                   struct vlan_entry, vlan_list);
358         } else {
359                 last = list_entry(bond->vlan_list.prev,
360                                   struct vlan_entry, vlan_list);
361                 if (last == curr) {
362                         next = list_entry(bond->vlan_list.next,
363                                           struct vlan_entry, vlan_list);
364                 } else {
365                         next = list_entry(curr->vlan_list.next,
366                                           struct vlan_entry, vlan_list);
367                 }
368         }
369
370         return next;
371 }
372
373 /**
374  * bond_dev_queue_xmit - Prepare skb for xmit.
375  * 
376  * @bond: bond device that got this skb for tx.
377  * @skb: hw accel VLAN tagged skb to transmit
378  * @slave_dev: slave that is supposed to xmit this skbuff
379  * 
380  * When the bond gets an skb to transmit that is
381  * already hardware accelerated VLAN tagged, and it
382  * needs to relay this skb to a slave that is not
383  * hw accel capable, the skb needs to be "unaccelerated",
384  * i.e. strip the hwaccel tag and re-insert it as part
385  * of the payload.
386  */
387 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev)
388 {
389         unsigned short uninitialized_var(vlan_id);
390
391         if (!list_empty(&bond->vlan_list) &&
392             !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
393             vlan_get_tag(skb, &vlan_id) == 0) {
394                 skb->dev = slave_dev;
395                 skb = vlan_put_tag(skb, vlan_id);
396                 if (!skb) {
397                         /* vlan_put_tag() frees the skb in case of error,
398                          * so return success here so the calling functions
399                          * won't attempt to free is again.
400                          */
401                         return 0;
402                 }
403         } else {
404                 skb->dev = slave_dev;
405         }
406
407         skb->priority = 1;
408         dev_queue_xmit(skb);
409
410         return 0;
411 }
412
413 /*
414  * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid
415  * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
416  * lock because:
417  * a. This operation is performed in IOCTL context,
418  * b. The operation is protected by the RTNL semaphore in the 8021q code,
419  * c. Holding a lock with BH disabled while directly calling a base driver
420  *    entry point is generally a BAD idea.
421  * 
422  * The design of synchronization/protection for this operation in the 8021q
423  * module is good for one or more VLAN devices over a single physical device
424  * and cannot be extended for a teaming solution like bonding, so there is a
425  * potential race condition here where a net device from the vlan group might
426  * be referenced (either by a base driver or the 8021q code) while it is being
427  * removed from the system. However, it turns out we're not making matters
428  * worse, and if it works for regular VLAN usage it will work here too.
429 */
430
431 /**
432  * bond_vlan_rx_register - Propagates registration to slaves
433  * @bond_dev: bonding net device that got called
434  * @grp: vlan group being registered
435  */
436 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp)
437 {
438         struct bonding *bond = bond_dev->priv;
439         struct slave *slave;
440         int i;
441
442         bond->vlgrp = grp;
443
444         bond_for_each_slave(bond, slave, i) {
445                 struct net_device *slave_dev = slave->dev;
446
447                 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
448                     slave_dev->vlan_rx_register) {
449                         slave_dev->vlan_rx_register(slave_dev, grp);
450                 }
451         }
452 }
453
454 /**
455  * bond_vlan_rx_add_vid - Propagates adding an id to slaves
456  * @bond_dev: bonding net device that got called
457  * @vid: vlan id being added
458  */
459 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
460 {
461         struct bonding *bond = bond_dev->priv;
462         struct slave *slave;
463         int i, res;
464
465         bond_for_each_slave(bond, slave, i) {
466                 struct net_device *slave_dev = slave->dev;
467
468                 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
469                     slave_dev->vlan_rx_add_vid) {
470                         slave_dev->vlan_rx_add_vid(slave_dev, vid);
471                 }
472         }
473
474         res = bond_add_vlan(bond, vid);
475         if (res) {
476                 printk(KERN_ERR DRV_NAME
477                        ": %s: Error: Failed to add vlan id %d\n",
478                        bond_dev->name, vid);
479         }
480 }
481
482 /**
483  * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
484  * @bond_dev: bonding net device that got called
485  * @vid: vlan id being removed
486  */
487 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
488 {
489         struct bonding *bond = bond_dev->priv;
490         struct slave *slave;
491         struct net_device *vlan_dev;
492         int i, res;
493
494         bond_for_each_slave(bond, slave, i) {
495                 struct net_device *slave_dev = slave->dev;
496
497                 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
498                     slave_dev->vlan_rx_kill_vid) {
499                         /* Save and then restore vlan_dev in the grp array,
500                          * since the slave's driver might clear it.
501                          */
502                         vlan_dev = vlan_group_get_device(bond->vlgrp, vid);
503                         slave_dev->vlan_rx_kill_vid(slave_dev, vid);
504                         vlan_group_set_device(bond->vlgrp, vid, vlan_dev);
505                 }
506         }
507
508         res = bond_del_vlan(bond, vid);
509         if (res) {
510                 printk(KERN_ERR DRV_NAME
511                        ": %s: Error: Failed to remove vlan id %d\n",
512                        bond_dev->name, vid);
513         }
514 }
515
516 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
517 {
518         struct vlan_entry *vlan;
519
520         write_lock_bh(&bond->lock);
521
522         if (list_empty(&bond->vlan_list)) {
523                 goto out;
524         }
525
526         if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
527             slave_dev->vlan_rx_register) {
528                 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp);
529         }
530
531         if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
532             !(slave_dev->vlan_rx_add_vid)) {
533                 goto out;
534         }
535
536         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
537                 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id);
538         }
539
540 out:
541         write_unlock_bh(&bond->lock);
542 }
543
544 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev)
545 {
546         struct vlan_entry *vlan;
547         struct net_device *vlan_dev;
548
549         write_lock_bh(&bond->lock);
550
551         if (list_empty(&bond->vlan_list)) {
552                 goto out;
553         }
554
555         if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
556             !(slave_dev->vlan_rx_kill_vid)) {
557                 goto unreg;
558         }
559
560         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
561                 /* Save and then restore vlan_dev in the grp array,
562                  * since the slave's driver might clear it.
563                  */
564                 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
565                 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
566                 vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev);
567         }
568
569 unreg:
570         if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
571             slave_dev->vlan_rx_register) {
572                 slave_dev->vlan_rx_register(slave_dev, NULL);
573         }
574
575 out:
576         write_unlock_bh(&bond->lock);
577 }
578
579 /*------------------------------- Link status -------------------------------*/
580
581 /*
582  * Set the carrier state for the master according to the state of its
583  * slaves.  If any slaves are up, the master is up.  In 802.3ad mode,
584  * do special 802.3ad magic.
585  *
586  * Returns zero if carrier state does not change, nonzero if it does.
587  */
588 static int bond_set_carrier(struct bonding *bond)
589 {
590         struct slave *slave;
591         int i;
592
593         if (bond->slave_cnt == 0)
594                 goto down;
595
596         if (bond->params.mode == BOND_MODE_8023AD)
597                 return bond_3ad_set_carrier(bond);
598
599         bond_for_each_slave(bond, slave, i) {
600                 if (slave->link == BOND_LINK_UP) {
601                         if (!netif_carrier_ok(bond->dev)) {
602                                 netif_carrier_on(bond->dev);
603                                 return 1;
604                         }
605                         return 0;
606                 }
607         }
608
609 down:
610         if (netif_carrier_ok(bond->dev)) {
611                 netif_carrier_off(bond->dev);
612                 return 1;
613         }
614         return 0;
615 }
616
617 /*
618  * Get link speed and duplex from the slave's base driver
619  * using ethtool. If for some reason the call fails or the
620  * values are invalid, fake speed and duplex to 100/Full
621  * and return error.
622  */
623 static int bond_update_speed_duplex(struct slave *slave)
624 {
625         struct net_device *slave_dev = slave->dev;
626         struct ethtool_cmd etool;
627         int res;
628
629         /* Fake speed and duplex */
630         slave->speed = SPEED_100;
631         slave->duplex = DUPLEX_FULL;
632
633         if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings)
634                 return -1;
635
636         res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
637         if (res < 0)
638                 return -1;
639
640         switch (etool.speed) {
641         case SPEED_10:
642         case SPEED_100:
643         case SPEED_1000:
644         case SPEED_10000:
645                 break;
646         default:
647                 return -1;
648         }
649
650         switch (etool.duplex) {
651         case DUPLEX_FULL:
652         case DUPLEX_HALF:
653                 break;
654         default:
655                 return -1;
656         }
657
658         slave->speed = etool.speed;
659         slave->duplex = etool.duplex;
660
661         return 0;
662 }
663
664 /*
665  * if <dev> supports MII link status reporting, check its link status.
666  *
667  * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
668  * depening upon the setting of the use_carrier parameter.
669  *
670  * Return either BMSR_LSTATUS, meaning that the link is up (or we
671  * can't tell and just pretend it is), or 0, meaning that the link is
672  * down.
673  *
674  * If reporting is non-zero, instead of faking link up, return -1 if
675  * both ETHTOOL and MII ioctls fail (meaning the device does not
676  * support them).  If use_carrier is set, return whatever it says.
677  * It'd be nice if there was a good way to tell if a driver supports
678  * netif_carrier, but there really isn't.
679  */
680 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting)
681 {
682         static int (* ioctl)(struct net_device *, struct ifreq *, int);
683         struct ifreq ifr;
684         struct mii_ioctl_data *mii;
685
686         if (bond->params.use_carrier) {
687                 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
688         }
689
690         ioctl = slave_dev->do_ioctl;
691         if (ioctl) {
692                 /* TODO: set pointer to correct ioctl on a per team member */
693                 /*       bases to make this more efficient. that is, once  */
694                 /*       we determine the correct ioctl, we will always    */
695                 /*       call it and not the others for that team          */
696                 /*       member.                                           */
697
698                 /*
699                  * We cannot assume that SIOCGMIIPHY will also read a
700                  * register; not all network drivers (e.g., e100)
701                  * support that.
702                  */
703
704                 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */
705                 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
706                 mii = if_mii(&ifr);
707                 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
708                         mii->reg_num = MII_BMSR;
709                         if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) {
710                                 return (mii->val_out & BMSR_LSTATUS);
711                         }
712                 }
713         }
714
715         /*
716          * Some drivers cache ETHTOOL_GLINK for a period of time so we only
717          * attempt to get link status from it if the above MII ioctls fail.
718          */
719         if (slave_dev->ethtool_ops) {
720                 if (slave_dev->ethtool_ops->get_link) {
721                         u32 link;
722
723                         link = slave_dev->ethtool_ops->get_link(slave_dev);
724
725                         return link ? BMSR_LSTATUS : 0;
726                 }
727         }
728
729         /*
730          * If reporting, report that either there's no dev->do_ioctl,
731          * or both SIOCGMIIREG and get_link failed (meaning that we
732          * cannot report link status).  If not reporting, pretend
733          * we're ok.
734          */
735         return (reporting ? -1 : BMSR_LSTATUS);
736 }
737
738 /*----------------------------- Multicast list ------------------------------*/
739
740 /*
741  * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
742  */
743 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2)
744 {
745         return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
746                         dmi1->dmi_addrlen == dmi2->dmi_addrlen;
747 }
748
749 /*
750  * returns dmi entry if found, NULL otherwise
751  */
752 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list)
753 {
754         struct dev_mc_list *idmi;
755
756         for (idmi = mc_list; idmi; idmi = idmi->next) {
757                 if (bond_is_dmi_same(dmi, idmi)) {
758                         return idmi;
759                 }
760         }
761
762         return NULL;
763 }
764
765 /*
766  * Push the promiscuity flag down to appropriate slaves
767  */
768 static void bond_set_promiscuity(struct bonding *bond, int inc)
769 {
770         if (USES_PRIMARY(bond->params.mode)) {
771                 /* write lock already acquired */
772                 if (bond->curr_active_slave) {
773                         dev_set_promiscuity(bond->curr_active_slave->dev, inc);
774                 }
775         } else {
776                 struct slave *slave;
777                 int i;
778                 bond_for_each_slave(bond, slave, i) {
779                         dev_set_promiscuity(slave->dev, inc);
780                 }
781         }
782 }
783
784 /*
785  * Push the allmulti flag down to all slaves
786  */
787 static void bond_set_allmulti(struct bonding *bond, int inc)
788 {
789         if (USES_PRIMARY(bond->params.mode)) {
790                 /* write lock already acquired */
791                 if (bond->curr_active_slave) {
792                         dev_set_allmulti(bond->curr_active_slave->dev, inc);
793                 }
794         } else {
795                 struct slave *slave;
796                 int i;
797                 bond_for_each_slave(bond, slave, i) {
798                         dev_set_allmulti(slave->dev, inc);
799                 }
800         }
801 }
802
803 /*
804  * Add a Multicast address to slaves
805  * according to mode
806  */
807 static void bond_mc_add(struct bonding *bond, void *addr, int alen)
808 {
809         if (USES_PRIMARY(bond->params.mode)) {
810                 /* write lock already acquired */
811                 if (bond->curr_active_slave) {
812                         dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
813                 }
814         } else {
815                 struct slave *slave;
816                 int i;
817                 bond_for_each_slave(bond, slave, i) {
818                         dev_mc_add(slave->dev, addr, alen, 0);
819                 }
820         }
821 }
822
823 /*
824  * Remove a multicast address from slave
825  * according to mode
826  */
827 static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
828 {
829         if (USES_PRIMARY(bond->params.mode)) {
830                 /* write lock already acquired */
831                 if (bond->curr_active_slave) {
832                         dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0);
833                 }
834         } else {
835                 struct slave *slave;
836                 int i;
837                 bond_for_each_slave(bond, slave, i) {
838                         dev_mc_delete(slave->dev, addr, alen, 0);
839                 }
840         }
841 }
842
843
844 /*
845  * Retrieve the list of registered multicast addresses for the bonding
846  * device and retransmit an IGMP JOIN request to the current active
847  * slave.
848  */
849 static void bond_resend_igmp_join_requests(struct bonding *bond)
850 {
851         struct in_device *in_dev;
852         struct ip_mc_list *im;
853
854         rcu_read_lock();
855         in_dev = __in_dev_get_rcu(bond->dev);
856         if (in_dev) {
857                 for (im = in_dev->mc_list; im; im = im->next) {
858                         ip_mc_rejoin_group(im);
859                 }
860         }
861
862         rcu_read_unlock();
863 }
864
865 /*
866  * Totally destroys the mc_list in bond
867  */
868 static void bond_mc_list_destroy(struct bonding *bond)
869 {
870         struct dev_mc_list *dmi;
871
872         dmi = bond->mc_list;
873         while (dmi) {
874                 bond->mc_list = dmi->next;
875                 kfree(dmi);
876                 dmi = bond->mc_list;
877         }
878         bond->mc_list = NULL;
879 }
880
881 /*
882  * Copy all the Multicast addresses from src to the bonding device dst
883  */
884 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
885                              gfp_t gfp_flag)
886 {
887         struct dev_mc_list *dmi, *new_dmi;
888
889         for (dmi = mc_list; dmi; dmi = dmi->next) {
890                 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag);
891
892                 if (!new_dmi) {
893                         /* FIXME: Potential memory leak !!! */
894                         return -ENOMEM;
895                 }
896
897                 new_dmi->next = bond->mc_list;
898                 bond->mc_list = new_dmi;
899                 new_dmi->dmi_addrlen = dmi->dmi_addrlen;
900                 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen);
901                 new_dmi->dmi_users = dmi->dmi_users;
902                 new_dmi->dmi_gusers = dmi->dmi_gusers;
903         }
904
905         return 0;
906 }
907
908 /*
909  * flush all members of flush->mc_list from device dev->mc_list
910  */
911 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev)
912 {
913         struct bonding *bond = bond_dev->priv;
914         struct dev_mc_list *dmi;
915
916         for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
917                 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
918         }
919
920         if (bond->params.mode == BOND_MODE_8023AD) {
921                 /* del lacpdu mc addr from mc list */
922                 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
923
924                 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
925         }
926 }
927
928 /*--------------------------- Active slave change ---------------------------*/
929
930 /*
931  * Update the mc list and multicast-related flags for the new and
932  * old active slaves (if any) according to the multicast mode, and
933  * promiscuous flags unconditionally.
934  */
935 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active)
936 {
937         struct dev_mc_list *dmi;
938
939         if (!USES_PRIMARY(bond->params.mode)) {
940                 /* nothing to do -  mc list is already up-to-date on
941                  * all slaves
942                  */
943                 return;
944         }
945
946         if (old_active) {
947                 if (bond->dev->flags & IFF_PROMISC) {
948                         dev_set_promiscuity(old_active->dev, -1);
949                 }
950
951                 if (bond->dev->flags & IFF_ALLMULTI) {
952                         dev_set_allmulti(old_active->dev, -1);
953                 }
954
955                 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
956                         dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
957                 }
958         }
959
960         if (new_active) {
961                 if (bond->dev->flags & IFF_PROMISC) {
962                         dev_set_promiscuity(new_active->dev, 1);
963                 }
964
965                 if (bond->dev->flags & IFF_ALLMULTI) {
966                         dev_set_allmulti(new_active->dev, 1);
967                 }
968
969                 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
970                         dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
971                 }
972                 bond_resend_igmp_join_requests(bond);
973         }
974 }
975
976 /**
977  * find_best_interface - select the best available slave to be the active one
978  * @bond: our bonding struct
979  *
980  * Warning: Caller must hold curr_slave_lock for writing.
981  */
982 static struct slave *bond_find_best_slave(struct bonding *bond)
983 {
984         struct slave *new_active, *old_active;
985         struct slave *bestslave = NULL;
986         int mintime = bond->params.updelay;
987         int i;
988
989         new_active = old_active = bond->curr_active_slave;
990
991         if (!new_active) { /* there were no active slaves left */
992                 if (bond->slave_cnt > 0) {  /* found one slave */
993                         new_active = bond->first_slave;
994                 } else {
995                         return NULL; /* still no slave, return NULL */
996                 }
997         }
998
999         /* first try the primary link; if arping, a link must tx/rx traffic
1000          * before it can be considered the curr_active_slave - also, we would skip
1001          * slaves between the curr_active_slave and primary_slave that may be up
1002          * and able to arp
1003          */
1004         if ((bond->primary_slave) &&
1005             (!bond->params.arp_interval) &&
1006             (IS_UP(bond->primary_slave->dev))) {
1007                 new_active = bond->primary_slave;
1008         }
1009
1010         /* remember where to stop iterating over the slaves */
1011         old_active = new_active;
1012
1013         bond_for_each_slave_from(bond, new_active, i, old_active) {
1014                 if (IS_UP(new_active->dev)) {
1015                         if (new_active->link == BOND_LINK_UP) {
1016                                 return new_active;
1017                         } else if (new_active->link == BOND_LINK_BACK) {
1018                                 /* link up, but waiting for stabilization */
1019                                 if (new_active->delay < mintime) {
1020                                         mintime = new_active->delay;
1021                                         bestslave = new_active;
1022                                 }
1023                         }
1024                 }
1025         }
1026
1027         return bestslave;
1028 }
1029
1030 /**
1031  * change_active_interface - change the active slave into the specified one
1032  * @bond: our bonding struct
1033  * @new: the new slave to make the active one
1034  *
1035  * Set the new slave to the bond's settings and unset them on the old
1036  * curr_active_slave.
1037  * Setting include flags, mc-list, promiscuity, allmulti, etc.
1038  *
1039  * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP,
1040  * because it is apparently the best available slave we have, even though its
1041  * updelay hasn't timed out yet.
1042  *
1043  * Warning: Caller must hold curr_slave_lock for writing.
1044  */
1045 void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1046 {
1047         struct slave *old_active = bond->curr_active_slave;
1048
1049         if (old_active == new_active) {
1050                 return;
1051         }
1052
1053         if (new_active) {
1054                 new_active->jiffies = jiffies;
1055
1056                 if (new_active->link == BOND_LINK_BACK) {
1057                         if (USES_PRIMARY(bond->params.mode)) {
1058                                 printk(KERN_INFO DRV_NAME
1059                                        ": %s: making interface %s the new "
1060                                        "active one %d ms earlier.\n",
1061                                        bond->dev->name, new_active->dev->name,
1062                                        (bond->params.updelay - new_active->delay) * bond->params.miimon);
1063                         }
1064
1065                         new_active->delay = 0;
1066                         new_active->link = BOND_LINK_UP;
1067
1068                         if (bond->params.mode == BOND_MODE_8023AD) {
1069                                 bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
1070                         }
1071
1072                         if ((bond->params.mode == BOND_MODE_TLB) ||
1073                             (bond->params.mode == BOND_MODE_ALB)) {
1074                                 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
1075                         }
1076                 } else {
1077                         if (USES_PRIMARY(bond->params.mode)) {
1078                                 printk(KERN_INFO DRV_NAME
1079                                        ": %s: making interface %s the new "
1080                                        "active one.\n",
1081                                        bond->dev->name, new_active->dev->name);
1082                         }
1083                 }
1084         }
1085
1086         if (USES_PRIMARY(bond->params.mode)) {
1087                 bond_mc_swap(bond, new_active, old_active);
1088         }
1089
1090         if ((bond->params.mode == BOND_MODE_TLB) ||
1091             (bond->params.mode == BOND_MODE_ALB)) {
1092                 bond_alb_handle_active_change(bond, new_active);
1093                 if (old_active)
1094                         bond_set_slave_inactive_flags(old_active);
1095                 if (new_active)
1096                         bond_set_slave_active_flags(new_active);
1097         } else {
1098                 bond->curr_active_slave = new_active;
1099         }
1100
1101         if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
1102                 if (old_active) {
1103                         bond_set_slave_inactive_flags(old_active);
1104                 }
1105
1106                 if (new_active) {
1107                         bond_set_slave_active_flags(new_active);
1108                 }
1109
1110                 /* when bonding does not set the slave MAC address, the bond MAC
1111                  * address is the one of the active slave.
1112                  */
1113                 if (new_active && bond->params.fail_over_mac)
1114                         memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr,
1115                                 new_active->dev->addr_len);
1116                 bond->send_grat_arp = bond->params.num_grat_arp;
1117                 if (bond->curr_active_slave &&
1118                         test_bit(__LINK_STATE_LINKWATCH_PENDING,
1119                                         &bond->curr_active_slave->dev->state)) {
1120                         dprintk("delaying gratuitous arp on %s\n",
1121                                 bond->curr_active_slave->dev->name);
1122                 } else {
1123                         if (bond->send_grat_arp > 0) {
1124                                 bond_send_gratuitous_arp(bond);
1125                                 bond->send_grat_arp--;
1126                         }
1127                 }
1128         }
1129 }
1130
1131 /**
1132  * bond_select_active_slave - select a new active slave, if needed
1133  * @bond: our bonding struct
1134  *
1135  * This functions shoud be called when one of the following occurs:
1136  * - The old curr_active_slave has been released or lost its link.
1137  * - The primary_slave has got its link back.
1138  * - A slave has got its link back and there's no old curr_active_slave.
1139  *
1140  * Warning: Caller must hold curr_slave_lock for writing.
1141  */
1142 void bond_select_active_slave(struct bonding *bond)
1143 {
1144         struct slave *best_slave;
1145         int rv;
1146
1147         best_slave = bond_find_best_slave(bond);
1148         if (best_slave != bond->curr_active_slave) {
1149                 bond_change_active_slave(bond, best_slave);
1150                 rv = bond_set_carrier(bond);
1151                 if (!rv)
1152                         return;
1153
1154                 if (netif_carrier_ok(bond->dev)) {
1155                         printk(KERN_INFO DRV_NAME
1156                                ": %s: first active interface up!\n",
1157                                bond->dev->name);
1158                 } else {
1159                         printk(KERN_INFO DRV_NAME ": %s: "
1160                                "now running without any active interface !\n",
1161                                bond->dev->name);
1162                 }
1163         }
1164 }
1165
1166 /*--------------------------- slave list handling ---------------------------*/
1167
1168 /*
1169  * This function attaches the slave to the end of list.
1170  *
1171  * bond->lock held for writing by caller.
1172  */
1173 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
1174 {
1175         if (bond->first_slave == NULL) { /* attaching the first slave */
1176                 new_slave->next = new_slave;
1177                 new_slave->prev = new_slave;
1178                 bond->first_slave = new_slave;
1179         } else {
1180                 new_slave->next = bond->first_slave;
1181                 new_slave->prev = bond->first_slave->prev;
1182                 new_slave->next->prev = new_slave;
1183                 new_slave->prev->next = new_slave;
1184         }
1185
1186         bond->slave_cnt++;
1187 }
1188
1189 /*
1190  * This function detaches the slave from the list.
1191  * WARNING: no check is made to verify if the slave effectively
1192  * belongs to <bond>.
1193  * Nothing is freed on return, structures are just unchained.
1194  * If any slave pointer in bond was pointing to <slave>,
1195  * it should be changed by the calling function.
1196  *
1197  * bond->lock held for writing by caller.
1198  */
1199 static void bond_detach_slave(struct bonding *bond, struct slave *slave)
1200 {
1201         if (slave->next) {
1202                 slave->next->prev = slave->prev;
1203         }
1204
1205         if (slave->prev) {
1206                 slave->prev->next = slave->next;
1207         }
1208
1209         if (bond->first_slave == slave) { /* slave is the first slave */
1210                 if (bond->slave_cnt > 1) { /* there are more slave */
1211                         bond->first_slave = slave->next;
1212                 } else {
1213                         bond->first_slave = NULL; /* slave was the last one */
1214                 }
1215         }
1216
1217         slave->next = NULL;
1218         slave->prev = NULL;
1219         bond->slave_cnt--;
1220 }
1221
1222 /*---------------------------------- IOCTL ----------------------------------*/
1223
1224 static int bond_sethwaddr(struct net_device *bond_dev,
1225                           struct net_device *slave_dev)
1226 {
1227         dprintk("bond_dev=%p\n", bond_dev);
1228         dprintk("slave_dev=%p\n", slave_dev);
1229         dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len);
1230         memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
1231         return 0;
1232 }
1233
1234 #define BOND_VLAN_FEATURES \
1235         (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \
1236          NETIF_F_HW_VLAN_FILTER)
1237
1238 /* 
1239  * Compute the common dev->feature set available to all slaves.  Some
1240  * feature bits are managed elsewhere, so preserve those feature bits
1241  * on the master device.
1242  */
1243 static int bond_compute_features(struct bonding *bond)
1244 {
1245         struct slave *slave;
1246         struct net_device *bond_dev = bond->dev;
1247         unsigned long features = bond_dev->features;
1248         unsigned short max_hard_header_len = max((u16)ETH_HLEN,
1249                                                 bond_dev->hard_header_len);
1250         int i;
1251
1252         features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
1253         features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
1254                     NETIF_F_GSO_MASK | NETIF_F_NO_CSUM;
1255
1256         bond_for_each_slave(bond, slave, i) {
1257                 features = netdev_compute_features(features,
1258                                                    slave->dev->features);
1259                 if (slave->dev->hard_header_len > max_hard_header_len)
1260                         max_hard_header_len = slave->dev->hard_header_len;
1261         }
1262
1263         features |= (bond_dev->features & BOND_VLAN_FEATURES);
1264         bond_dev->features = features;
1265         bond_dev->hard_header_len = max_hard_header_len;
1266
1267         return 0;
1268 }
1269
1270
1271 static void bond_setup_by_slave(struct net_device *bond_dev,
1272                                 struct net_device *slave_dev)
1273 {
1274         struct bonding *bond = bond_dev->priv;
1275
1276         bond_dev->neigh_setup           = slave_dev->neigh_setup;
1277         bond_dev->header_ops            = slave_dev->header_ops;
1278
1279         bond_dev->type              = slave_dev->type;
1280         bond_dev->hard_header_len   = slave_dev->hard_header_len;
1281         bond_dev->addr_len          = slave_dev->addr_len;
1282
1283         memcpy(bond_dev->broadcast, slave_dev->broadcast,
1284                 slave_dev->addr_len);
1285         bond->setup_by_slave = 1;
1286 }
1287
1288 /* enslave device <slave> to bond device <master> */
1289 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1290 {
1291         struct bonding *bond = bond_dev->priv;
1292         struct slave *new_slave = NULL;
1293         struct dev_mc_list *dmi;
1294         struct sockaddr addr;
1295         int link_reporting;
1296         int old_features = bond_dev->features;
1297         int res = 0;
1298
1299         if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL &&
1300                 slave_dev->do_ioctl == NULL) {
1301                 printk(KERN_WARNING DRV_NAME
1302                        ": %s: Warning: no link monitoring support for %s\n",
1303                        bond_dev->name, slave_dev->name);
1304         }
1305
1306         /* bond must be initialized by bond_open() before enslaving */
1307         if (!(bond_dev->flags & IFF_UP)) {
1308                 printk(KERN_WARNING DRV_NAME
1309                         " %s: master_dev is not up in bond_enslave\n",
1310                         bond_dev->name);
1311         }
1312
1313         /* already enslaved */
1314         if (slave_dev->flags & IFF_SLAVE) {
1315                 dprintk("Error, Device was already enslaved\n");
1316                 return -EBUSY;
1317         }
1318
1319         /* vlan challenged mutual exclusion */
1320         /* no need to lock since we're protected by rtnl_lock */
1321         if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
1322                 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1323                 if (!list_empty(&bond->vlan_list)) {
1324                         printk(KERN_ERR DRV_NAME
1325                                ": %s: Error: cannot enslave VLAN "
1326                                "challenged slave %s on VLAN enabled "
1327                                "bond %s\n", bond_dev->name, slave_dev->name,
1328                                bond_dev->name);
1329                         return -EPERM;
1330                 } else {
1331                         printk(KERN_WARNING DRV_NAME
1332                                ": %s: Warning: enslaved VLAN challenged "
1333                                "slave %s. Adding VLANs will be blocked as "
1334                                "long as %s is part of bond %s\n",
1335                                bond_dev->name, slave_dev->name, slave_dev->name,
1336                                bond_dev->name);
1337                         bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1338                 }
1339         } else {
1340                 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1341                 if (bond->slave_cnt == 0) {
1342                         /* First slave, and it is not VLAN challenged,
1343                          * so remove the block of adding VLANs over the bond.
1344                          */
1345                         bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1346                 }
1347         }
1348
1349         /*
1350          * Old ifenslave binaries are no longer supported.  These can
1351          * be identified with moderate accurary by the state of the slave:
1352          * the current ifenslave will set the interface down prior to
1353          * enslaving it; the old ifenslave will not.
1354          */
1355         if ((slave_dev->flags & IFF_UP)) {
1356                 printk(KERN_ERR DRV_NAME ": %s is up. "
1357                        "This may be due to an out of date ifenslave.\n",
1358                        slave_dev->name);
1359                 res = -EPERM;
1360                 goto err_undo_flags;
1361         }
1362
1363         /* set bonding device ether type by slave - bonding netdevices are
1364          * created with ether_setup, so when the slave type is not ARPHRD_ETHER
1365          * there is a need to override some of the type dependent attribs/funcs.
1366          *
1367          * bond ether type mutual exclusion - don't allow slaves of dissimilar
1368          * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
1369          */
1370         if (bond->slave_cnt == 0) {
1371                 if (slave_dev->type != ARPHRD_ETHER)
1372                         bond_setup_by_slave(bond_dev, slave_dev);
1373         } else if (bond_dev->type != slave_dev->type) {
1374                 printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different "
1375                         "from other slaves (%d), can not enslave it.\n",
1376                         slave_dev->name,
1377                         slave_dev->type, bond_dev->type);
1378                         res = -EINVAL;
1379                         goto err_undo_flags;
1380         }
1381
1382         if (slave_dev->set_mac_address == NULL) {
1383                 if (bond->slave_cnt == 0) {
1384                         printk(KERN_WARNING DRV_NAME
1385                                ": %s: Warning: The first slave device "
1386                                "specified does not support setting the MAC "
1387                                "address. Enabling the fail_over_mac option.",
1388                                bond_dev->name);
1389                         bond->params.fail_over_mac = 1;
1390                 } else if (!bond->params.fail_over_mac) {
1391                         printk(KERN_ERR DRV_NAME
1392                                 ": %s: Error: The slave device specified "
1393                                 "does not support setting the MAC address, "
1394                                 "but fail_over_mac is not enabled.\n"
1395                                 , bond_dev->name);
1396                         res = -EOPNOTSUPP;
1397                         goto err_undo_flags;
1398                 }
1399         }
1400
1401         new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
1402         if (!new_slave) {
1403                 res = -ENOMEM;
1404                 goto err_undo_flags;
1405         }
1406
1407         /* save slave's original flags before calling
1408          * netdev_set_master and dev_open
1409          */
1410         new_slave->original_flags = slave_dev->flags;
1411
1412         /*
1413          * Save slave's original ("permanent") mac address for modes
1414          * that need it, and for restoring it upon release, and then
1415          * set it to the master's address
1416          */
1417         memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
1418
1419         if (!bond->params.fail_over_mac) {
1420                 /*
1421                  * Set slave to master's mac address.  The application already
1422                  * set the master's mac address to that of the first slave
1423                  */
1424                 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
1425                 addr.sa_family = slave_dev->type;
1426                 res = dev_set_mac_address(slave_dev, &addr);
1427                 if (res) {
1428                         dprintk("Error %d calling set_mac_address\n", res);
1429                         goto err_free;
1430                 }
1431         }
1432
1433         res = netdev_set_master(slave_dev, bond_dev);
1434         if (res) {
1435                 dprintk("Error %d calling netdev_set_master\n", res);
1436                 goto err_restore_mac;
1437         }
1438         /* open the slave since the application closed it */
1439         res = dev_open(slave_dev);
1440         if (res) {
1441                 dprintk("Openning slave %s failed\n", slave_dev->name);
1442                 goto err_unset_master;
1443         }
1444
1445         new_slave->dev = slave_dev;
1446         slave_dev->priv_flags |= IFF_BONDING;
1447
1448         if ((bond->params.mode == BOND_MODE_TLB) ||
1449             (bond->params.mode == BOND_MODE_ALB)) {
1450                 /* bond_alb_init_slave() must be called before all other stages since
1451                  * it might fail and we do not want to have to undo everything
1452                  */
1453                 res = bond_alb_init_slave(bond, new_slave);
1454                 if (res) {
1455                         goto err_close;
1456                 }
1457         }
1458
1459         /* If the mode USES_PRIMARY, then the new slave gets the
1460          * master's promisc (and mc) settings only if it becomes the
1461          * curr_active_slave, and that is taken care of later when calling
1462          * bond_change_active()
1463          */
1464         if (!USES_PRIMARY(bond->params.mode)) {
1465                 /* set promiscuity level to new slave */
1466                 if (bond_dev->flags & IFF_PROMISC) {
1467                         dev_set_promiscuity(slave_dev, 1);
1468                 }
1469
1470                 /* set allmulti level to new slave */
1471                 if (bond_dev->flags & IFF_ALLMULTI) {
1472                         dev_set_allmulti(slave_dev, 1);
1473                 }
1474
1475                 netif_tx_lock_bh(bond_dev);
1476                 /* upload master's mc_list to new slave */
1477                 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
1478                         dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
1479                 }
1480                 netif_tx_unlock_bh(bond_dev);
1481         }
1482
1483         if (bond->params.mode == BOND_MODE_8023AD) {
1484                 /* add lacpdu mc addr to mc list */
1485                 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
1486
1487                 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
1488         }
1489
1490         bond_add_vlans_on_slave(bond, slave_dev);
1491
1492         write_lock_bh(&bond->lock);
1493
1494         bond_attach_slave(bond, new_slave);
1495
1496         new_slave->delay = 0;
1497         new_slave->link_failure_count = 0;
1498
1499         bond_compute_features(bond);
1500
1501         new_slave->last_arp_rx = jiffies;
1502
1503         if (bond->params.miimon && !bond->params.use_carrier) {
1504                 link_reporting = bond_check_dev_link(bond, slave_dev, 1);
1505
1506                 if ((link_reporting == -1) && !bond->params.arp_interval) {
1507                         /*
1508                          * miimon is set but a bonded network driver
1509                          * does not support ETHTOOL/MII and
1510                          * arp_interval is not set.  Note: if
1511                          * use_carrier is enabled, we will never go
1512                          * here (because netif_carrier is always
1513                          * supported); thus, we don't need to change
1514                          * the messages for netif_carrier.
1515                          */
1516                         printk(KERN_WARNING DRV_NAME
1517                                ": %s: Warning: MII and ETHTOOL support not "
1518                                "available for interface %s, and "
1519                                "arp_interval/arp_ip_target module parameters "
1520                                "not specified, thus bonding will not detect "
1521                                "link failures! see bonding.txt for details.\n",
1522                                bond_dev->name, slave_dev->name);
1523                 } else if (link_reporting == -1) {
1524                         /* unable get link status using mii/ethtool */
1525                         printk(KERN_WARNING DRV_NAME
1526                                ": %s: Warning: can't get link status from "
1527                                "interface %s; the network driver associated "
1528                                "with this interface does not support MII or "
1529                                "ETHTOOL link status reporting, thus miimon "
1530                                "has no effect on this interface.\n",
1531                                bond_dev->name, slave_dev->name);
1532                 }
1533         }
1534
1535         /* check for initial state */
1536         if (!bond->params.miimon ||
1537             (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) {
1538                 if (bond->params.updelay) {
1539                         dprintk("Initial state of slave_dev is "
1540                                 "BOND_LINK_BACK\n");
1541                         new_slave->link  = BOND_LINK_BACK;
1542                         new_slave->delay = bond->params.updelay;
1543                 } else {
1544                         dprintk("Initial state of slave_dev is "
1545                                 "BOND_LINK_UP\n");
1546                         new_slave->link  = BOND_LINK_UP;
1547                 }
1548                 new_slave->jiffies = jiffies;
1549         } else {
1550                 dprintk("Initial state of slave_dev is "
1551                         "BOND_LINK_DOWN\n");
1552                 new_slave->link  = BOND_LINK_DOWN;
1553         }
1554
1555         if (bond_update_speed_duplex(new_slave) &&
1556             (new_slave->link != BOND_LINK_DOWN)) {
1557                 printk(KERN_WARNING DRV_NAME
1558                        ": %s: Warning: failed to get speed and duplex from %s, "
1559                        "assumed to be 100Mb/sec and Full.\n",
1560                        bond_dev->name, new_slave->dev->name);
1561
1562                 if (bond->params.mode == BOND_MODE_8023AD) {
1563                         printk(KERN_WARNING DRV_NAME
1564                                ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL "
1565                                "support in base driver for proper aggregator "
1566                                "selection.\n", bond_dev->name);
1567                 }
1568         }
1569
1570         if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
1571                 /* if there is a primary slave, remember it */
1572                 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
1573                         bond->primary_slave = new_slave;
1574                 }
1575         }
1576
1577         switch (bond->params.mode) {
1578         case BOND_MODE_ACTIVEBACKUP:
1579                 bond_set_slave_inactive_flags(new_slave);
1580                 bond_select_active_slave(bond);
1581                 break;
1582         case BOND_MODE_8023AD:
1583                 /* in 802.3ad mode, the internal mechanism
1584                  * will activate the slaves in the selected
1585                  * aggregator
1586                  */
1587                 bond_set_slave_inactive_flags(new_slave);
1588                 /* if this is the first slave */
1589                 if (bond->slave_cnt == 1) {
1590                         SLAVE_AD_INFO(new_slave).id = 1;
1591                         /* Initialize AD with the number of times that the AD timer is called in 1 second
1592                          * can be called only after the mac address of the bond is set
1593                          */
1594                         bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL,
1595                                             bond->params.lacp_fast);
1596                 } else {
1597                         SLAVE_AD_INFO(new_slave).id =
1598                                 SLAVE_AD_INFO(new_slave->prev).id + 1;
1599                 }
1600
1601                 bond_3ad_bind_slave(new_slave);
1602                 break;
1603         case BOND_MODE_TLB:
1604         case BOND_MODE_ALB:
1605                 new_slave->state = BOND_STATE_ACTIVE;
1606                 bond_set_slave_inactive_flags(new_slave);
1607                 break;
1608         default:
1609                 dprintk("This slave is always active in trunk mode\n");
1610
1611                 /* always active in trunk mode */
1612                 new_slave->state = BOND_STATE_ACTIVE;
1613
1614                 /* In trunking mode there is little meaning to curr_active_slave
1615                  * anyway (it holds no special properties of the bond device),
1616                  * so we can change it without calling change_active_interface()
1617                  */
1618                 if (!bond->curr_active_slave) {
1619                         bond->curr_active_slave = new_slave;
1620                 }
1621                 break;
1622         } /* switch(bond_mode) */
1623
1624         bond_set_carrier(bond);
1625
1626         write_unlock_bh(&bond->lock);
1627
1628         res = bond_create_slave_symlinks(bond_dev, slave_dev);
1629         if (res)
1630                 goto err_close;
1631
1632         printk(KERN_INFO DRV_NAME
1633                ": %s: enslaving %s as a%s interface with a%s link.\n",
1634                bond_dev->name, slave_dev->name,
1635                new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
1636                new_slave->link != BOND_LINK_DOWN ? "n up" : " down");
1637
1638         /* enslave is successful */
1639         return 0;
1640
1641 /* Undo stages on error */
1642 err_close:
1643         dev_close(slave_dev);
1644
1645 err_unset_master:
1646         netdev_set_master(slave_dev, NULL);
1647
1648 err_restore_mac:
1649         if (!bond->params.fail_over_mac) {
1650                 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
1651                 addr.sa_family = slave_dev->type;
1652                 dev_set_mac_address(slave_dev, &addr);
1653         }
1654
1655 err_free:
1656         kfree(new_slave);
1657
1658 err_undo_flags:
1659         bond_dev->features = old_features;
1660  
1661         return res;
1662 }
1663
1664 /*
1665  * Try to release the slave device <slave> from the bond device <master>
1666  * It is legal to access curr_active_slave without a lock because all the function
1667  * is write-locked.
1668  *
1669  * The rules for slave state should be:
1670  *   for Active/Backup:
1671  *     Active stays on all backups go down
1672  *   for Bonded connections:
1673  *     The first up interface should be left on and all others downed.
1674  */
1675 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1676 {
1677         struct bonding *bond = bond_dev->priv;
1678         struct slave *slave, *oldcurrent;
1679         struct sockaddr addr;
1680         int mac_addr_differ;
1681         DECLARE_MAC_BUF(mac);
1682
1683         /* slave is not a slave or master is not master of this slave */
1684         if (!(slave_dev->flags & IFF_SLAVE) ||
1685             (slave_dev->master != bond_dev)) {
1686                 printk(KERN_ERR DRV_NAME
1687                        ": %s: Error: cannot release %s.\n",
1688                        bond_dev->name, slave_dev->name);
1689                 return -EINVAL;
1690         }
1691
1692         write_lock_bh(&bond->lock);
1693
1694         slave = bond_get_slave_by_dev(bond, slave_dev);
1695         if (!slave) {
1696                 /* not a slave of this bond */
1697                 printk(KERN_INFO DRV_NAME
1698                        ": %s: %s not enslaved\n",
1699                        bond_dev->name, slave_dev->name);
1700                 write_unlock_bh(&bond->lock);
1701                 return -EINVAL;
1702         }
1703
1704         mac_addr_differ = memcmp(bond_dev->dev_addr,
1705                                  slave->perm_hwaddr,
1706                                  ETH_ALEN);
1707         if (!mac_addr_differ && (bond->slave_cnt > 1)) {
1708                 printk(KERN_WARNING DRV_NAME
1709                        ": %s: Warning: the permanent HWaddr of %s - "
1710                        "%s - is still in use by %s. "
1711                        "Set the HWaddr of %s to a different address "
1712                        "to avoid conflicts.\n",
1713                        bond_dev->name,
1714                        slave_dev->name,
1715                        print_mac(mac, slave->perm_hwaddr),
1716                        bond_dev->name,
1717                        slave_dev->name);
1718         }
1719
1720         /* Inform AD package of unbinding of slave. */
1721         if (bond->params.mode == BOND_MODE_8023AD) {
1722                 /* must be called before the slave is
1723                  * detached from the list
1724                  */
1725                 bond_3ad_unbind_slave(slave);
1726         }
1727
1728         printk(KERN_INFO DRV_NAME
1729                ": %s: releasing %s interface %s\n",
1730                bond_dev->name,
1731                (slave->state == BOND_STATE_ACTIVE)
1732                ? "active" : "backup",
1733                slave_dev->name);
1734
1735         oldcurrent = bond->curr_active_slave;
1736
1737         bond->current_arp_slave = NULL;
1738
1739         /* release the slave from its bond */
1740         bond_detach_slave(bond, slave);
1741
1742         bond_compute_features(bond);
1743
1744         if (bond->primary_slave == slave) {
1745                 bond->primary_slave = NULL;
1746         }
1747
1748         if (oldcurrent == slave) {
1749                 bond_change_active_slave(bond, NULL);
1750         }
1751
1752         if ((bond->params.mode == BOND_MODE_TLB) ||
1753             (bond->params.mode == BOND_MODE_ALB)) {
1754                 /* Must be called only after the slave has been
1755                  * detached from the list and the curr_active_slave
1756                  * has been cleared (if our_slave == old_current),
1757                  * but before a new active slave is selected.
1758                  */
1759                 write_unlock_bh(&bond->lock);
1760                 bond_alb_deinit_slave(bond, slave);
1761                 write_lock_bh(&bond->lock);
1762         }
1763
1764         if (oldcurrent == slave) {
1765                 /*
1766                  * Note that we hold RTNL over this sequence, so there
1767                  * is no concern that another slave add/remove event
1768                  * will interfere.
1769                  */
1770                 write_unlock_bh(&bond->lock);
1771                 read_lock(&bond->lock);
1772                 write_lock_bh(&bond->curr_slave_lock);
1773
1774                 bond_select_active_slave(bond);
1775
1776                 write_unlock_bh(&bond->curr_slave_lock);
1777                 read_unlock(&bond->lock);
1778                 write_lock_bh(&bond->lock);
1779         }
1780
1781         if (bond->slave_cnt == 0) {
1782                 bond_set_carrier(bond);
1783
1784                 /* if the last slave was removed, zero the mac address
1785                  * of the master so it will be set by the application
1786                  * to the mac address of the first slave
1787                  */
1788                 memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
1789
1790                 if (list_empty(&bond->vlan_list)) {
1791                         bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1792                 } else {
1793                         printk(KERN_WARNING DRV_NAME
1794                                ": %s: Warning: clearing HW address of %s while it "
1795                                "still has VLANs.\n",
1796                                bond_dev->name, bond_dev->name);
1797                         printk(KERN_WARNING DRV_NAME
1798                                ": %s: When re-adding slaves, make sure the bond's "
1799                                "HW address matches its VLANs'.\n",
1800                                bond_dev->name);
1801                 }
1802         } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
1803                    !bond_has_challenged_slaves(bond)) {
1804                 printk(KERN_INFO DRV_NAME
1805                        ": %s: last VLAN challenged slave %s "
1806                        "left bond %s. VLAN blocking is removed\n",
1807                        bond_dev->name, slave_dev->name, bond_dev->name);
1808                 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1809         }
1810
1811         write_unlock_bh(&bond->lock);
1812
1813         /* must do this from outside any spinlocks */
1814         bond_destroy_slave_symlinks(bond_dev, slave_dev);
1815
1816         bond_del_vlans_from_slave(bond, slave_dev);
1817
1818         /* If the mode USES_PRIMARY, then we should only remove its
1819          * promisc and mc settings if it was the curr_active_slave, but that was
1820          * already taken care of above when we detached the slave
1821          */
1822         if (!USES_PRIMARY(bond->params.mode)) {
1823                 /* unset promiscuity level from slave */
1824                 if (bond_dev->flags & IFF_PROMISC) {
1825                         dev_set_promiscuity(slave_dev, -1);
1826                 }
1827
1828                 /* unset allmulti level from slave */
1829                 if (bond_dev->flags & IFF_ALLMULTI) {
1830                         dev_set_allmulti(slave_dev, -1);
1831                 }
1832
1833                 /* flush master's mc_list from slave */
1834                 netif_tx_lock_bh(bond_dev);
1835                 bond_mc_list_flush(bond_dev, slave_dev);
1836                 netif_tx_unlock_bh(bond_dev);
1837         }
1838
1839         netdev_set_master(slave_dev, NULL);
1840
1841         /* close slave before restoring its mac address */
1842         dev_close(slave_dev);
1843
1844         if (!bond->params.fail_over_mac) {
1845                 /* restore original ("permanent") mac address */
1846                 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
1847                 addr.sa_family = slave_dev->type;
1848                 dev_set_mac_address(slave_dev, &addr);
1849         }
1850
1851         slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
1852                                    IFF_SLAVE_INACTIVE | IFF_BONDING |
1853                                    IFF_SLAVE_NEEDARP);
1854
1855         kfree(slave);
1856
1857         return 0;  /* deletion OK */
1858 }
1859
1860 /*
1861 * Destroy a bonding device.
1862 * Must be under rtnl_lock when this function is called.
1863 */
1864 void bond_destroy(struct bonding *bond)
1865 {
1866         bond_deinit(bond->dev);
1867         bond_destroy_sysfs_entry(bond);
1868         unregister_netdevice(bond->dev);
1869 }
1870
1871 /*
1872 * First release a slave and than destroy the bond if no more slaves iare left.
1873 * Must be under rtnl_lock when this function is called.
1874 */
1875 int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev)
1876 {
1877         struct bonding *bond = bond_dev->priv;
1878         int ret;
1879
1880         ret = bond_release(bond_dev, slave_dev);
1881         if ((ret == 0) && (bond->slave_cnt == 0)) {
1882                 printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n",
1883                        bond_dev->name, bond_dev->name);
1884                 bond_destroy(bond);
1885         }
1886         return ret;
1887 }
1888
1889 /*
1890  * This function releases all slaves.
1891  */
1892 static int bond_release_all(struct net_device *bond_dev)
1893 {
1894         struct bonding *bond = bond_dev->priv;
1895         struct slave *slave;
1896         struct net_device *slave_dev;
1897         struct sockaddr addr;
1898
1899         write_lock_bh(&bond->lock);
1900
1901         netif_carrier_off(bond_dev);
1902
1903         if (bond->slave_cnt == 0) {
1904                 goto out;
1905         }
1906
1907         bond->current_arp_slave = NULL;
1908         bond->primary_slave = NULL;
1909         bond_change_active_slave(bond, NULL);
1910
1911         while ((slave = bond->first_slave) != NULL) {
1912                 /* Inform AD package of unbinding of slave
1913                  * before slave is detached from the list.
1914                  */
1915                 if (bond->params.mode == BOND_MODE_8023AD) {
1916                         bond_3ad_unbind_slave(slave);
1917                 }
1918
1919                 slave_dev = slave->dev;
1920                 bond_detach_slave(bond, slave);
1921
1922                 /* now that the slave is detached, unlock and perform
1923                  * all the undo steps that should not be called from
1924                  * within a lock.
1925                  */
1926                 write_unlock_bh(&bond->lock);
1927
1928                 if ((bond->params.mode == BOND_MODE_TLB) ||
1929                     (bond->params.mode == BOND_MODE_ALB)) {
1930                         /* must be called only after the slave
1931                          * has been detached from the list
1932                          */
1933                         bond_alb_deinit_slave(bond, slave);
1934                 }
1935
1936                 bond_compute_features(bond);
1937
1938                 bond_destroy_slave_symlinks(bond_dev, slave_dev);
1939                 bond_del_vlans_from_slave(bond, slave_dev);
1940
1941                 /* If the mode USES_PRIMARY, then we should only remove its
1942                  * promisc and mc settings if it was the curr_active_slave, but that was
1943                  * already taken care of above when we detached the slave
1944                  */
1945                 if (!USES_PRIMARY(bond->params.mode)) {
1946                         /* unset promiscuity level from slave */
1947                         if (bond_dev->flags & IFF_PROMISC) {
1948                                 dev_set_promiscuity(slave_dev, -1);
1949                         }
1950
1951                         /* unset allmulti level from slave */
1952                         if (bond_dev->flags & IFF_ALLMULTI) {
1953                                 dev_set_allmulti(slave_dev, -1);
1954                         }
1955
1956                         /* flush master's mc_list from slave */
1957                         netif_tx_lock_bh(bond_dev);
1958                         bond_mc_list_flush(bond_dev, slave_dev);
1959                         netif_tx_unlock_bh(bond_dev);
1960                 }
1961
1962                 netdev_set_master(slave_dev, NULL);
1963
1964                 /* close slave before restoring its mac address */
1965                 dev_close(slave_dev);
1966
1967                 if (!bond->params.fail_over_mac) {
1968                         /* restore original ("permanent") mac address*/
1969                         memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
1970                         addr.sa_family = slave_dev->type;
1971                         dev_set_mac_address(slave_dev, &addr);
1972                 }
1973
1974                 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
1975                                            IFF_SLAVE_INACTIVE);
1976
1977                 kfree(slave);
1978
1979                 /* re-acquire the lock before getting the next slave */
1980                 write_lock_bh(&bond->lock);
1981         }
1982
1983         /* zero the mac address of the master so it will be
1984          * set by the application to the mac address of the
1985          * first slave
1986          */
1987         memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
1988
1989         if (list_empty(&bond->vlan_list)) {
1990                 bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1991         } else {
1992                 printk(KERN_WARNING DRV_NAME
1993                        ": %s: Warning: clearing HW address of %s while it "
1994                        "still has VLANs.\n",
1995                        bond_dev->name, bond_dev->name);
1996                 printk(KERN_WARNING DRV_NAME
1997                        ": %s: When re-adding slaves, make sure the bond's "
1998                        "HW address matches its VLANs'.\n",
1999                        bond_dev->name);
2000         }
2001
2002         printk(KERN_INFO DRV_NAME
2003                ": %s: released all slaves\n",
2004                bond_dev->name);
2005
2006 out:
2007         write_unlock_bh(&bond->lock);
2008
2009         return 0;
2010 }
2011
2012 /*
2013  * This function changes the active slave to slave <slave_dev>.
2014  * It returns -EINVAL in the following cases.
2015  *  - <slave_dev> is not found in the list.
2016  *  - There is not active slave now.
2017  *  - <slave_dev> is already active.
2018  *  - The link state of <slave_dev> is not BOND_LINK_UP.
2019  *  - <slave_dev> is not running.
2020  * In these cases, this fuction does nothing.
2021  * In the other cases, currnt_slave pointer is changed and 0 is returned.
2022  */
2023 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev)
2024 {
2025         struct bonding *bond = bond_dev->priv;
2026         struct slave *old_active = NULL;
2027         struct slave *new_active = NULL;
2028         int res = 0;
2029
2030         if (!USES_PRIMARY(bond->params.mode)) {
2031                 return -EINVAL;
2032         }
2033
2034         /* Verify that master_dev is indeed the master of slave_dev */
2035         if (!(slave_dev->flags & IFF_SLAVE) ||
2036             (slave_dev->master != bond_dev)) {
2037                 return -EINVAL;
2038         }
2039
2040         read_lock(&bond->lock);
2041
2042         read_lock(&bond->curr_slave_lock);
2043         old_active = bond->curr_active_slave;
2044         read_unlock(&bond->curr_slave_lock);
2045
2046         new_active = bond_get_slave_by_dev(bond, slave_dev);
2047
2048         /*
2049          * Changing to the current active: do nothing; return success.
2050          */
2051         if (new_active && (new_active == old_active)) {
2052                 read_unlock(&bond->lock);
2053                 return 0;
2054         }
2055
2056         if ((new_active) &&
2057             (old_active) &&
2058             (new_active->link == BOND_LINK_UP) &&
2059             IS_UP(new_active->dev)) {
2060                 write_lock_bh(&bond->curr_slave_lock);
2061                 bond_change_active_slave(bond, new_active);
2062                 write_unlock_bh(&bond->curr_slave_lock);
2063         } else {
2064                 res = -EINVAL;
2065         }
2066
2067         read_unlock(&bond->lock);
2068
2069         return res;
2070 }
2071
2072 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
2073 {
2074         struct bonding *bond = bond_dev->priv;
2075
2076         info->bond_mode = bond->params.mode;
2077         info->miimon = bond->params.miimon;
2078
2079         read_lock(&bond->lock);
2080         info->num_slaves = bond->slave_cnt;
2081         read_unlock(&bond->lock);
2082
2083         return 0;
2084 }
2085
2086 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)
2087 {
2088         struct bonding *bond = bond_dev->priv;
2089         struct slave *slave;
2090         int i, found = 0;
2091
2092         if (info->slave_id < 0) {
2093                 return -ENODEV;
2094         }
2095
2096         read_lock(&bond->lock);
2097
2098         bond_for_each_slave(bond, slave, i) {
2099                 if (i == (int)info->slave_id) {
2100                         found = 1;
2101                         break;
2102                 }
2103         }
2104
2105         read_unlock(&bond->lock);
2106
2107         if (found) {
2108                 strcpy(info->slave_name, slave->dev->name);
2109                 info->link = slave->link;
2110                 info->state = slave->state;
2111                 info->link_failure_count = slave->link_failure_count;
2112         } else {
2113                 return -ENODEV;
2114         }
2115
2116         return 0;
2117 }
2118
2119 /*-------------------------------- Monitoring -------------------------------*/
2120
2121 /*
2122  * if !have_locks, return nonzero if a failover is necessary.  if
2123  * have_locks, do whatever failover activities are needed.
2124  *
2125  * This is to separate the inspection and failover steps for locking
2126  * purposes; failover requires rtnl, but acquiring it for every
2127  * inspection is undesirable, so a wrapper first does inspection, and
2128  * the acquires the necessary locks and calls again to perform
2129  * failover if needed.  Since all locks are dropped, a complete
2130  * restart is needed between calls.
2131  */
2132 static int __bond_mii_monitor(struct bonding *bond, int have_locks)
2133 {
2134         struct slave *slave, *oldcurrent;
2135         int do_failover = 0;
2136         int i;
2137
2138         if (bond->slave_cnt == 0)
2139                 goto out;
2140
2141         /* we will try to read the link status of each of our slaves, and
2142          * set their IFF_RUNNING flag appropriately. For each slave not
2143          * supporting MII status, we won't do anything so that a user-space
2144          * program could monitor the link itself if needed.
2145          */
2146
2147         if (bond->send_grat_arp) {
2148                 if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING,
2149                                 &bond->curr_active_slave->dev->state))
2150                         dprintk("Needs to send gratuitous arp but not yet\n");
2151                 else {
2152                         dprintk("sending delayed gratuitous arp on on %s\n",
2153                                 bond->curr_active_slave->dev->name);
2154                         bond_send_gratuitous_arp(bond);
2155                         bond->send_grat_arp--;
2156                 }
2157         }
2158         read_lock(&bond->curr_slave_lock);
2159         oldcurrent = bond->curr_active_slave;
2160         read_unlock(&bond->curr_slave_lock);
2161
2162         bond_for_each_slave(bond, slave, i) {
2163                 struct net_device *slave_dev = slave->dev;
2164                 int link_state;
2165                 u16 old_speed = slave->speed;
2166                 u8 old_duplex = slave->duplex;
2167
2168                 link_state = bond_check_dev_link(bond, slave_dev, 0);
2169
2170                 switch (slave->link) {
2171                 case BOND_LINK_UP:      /* the link was up */
2172                         if (link_state == BMSR_LSTATUS) {
2173                                 if (!oldcurrent) {
2174                                         if (!have_locks)
2175                                                 return 1;
2176                                         do_failover = 1;
2177                                 }
2178                                 break;
2179                         } else { /* link going down */
2180                                 slave->link  = BOND_LINK_FAIL;
2181                                 slave->delay = bond->params.downdelay;
2182
2183                                 if (slave->link_failure_count < UINT_MAX) {
2184                                         slave->link_failure_count++;
2185                                 }
2186
2187                                 if (bond->params.downdelay) {
2188                                         printk(KERN_INFO DRV_NAME
2189                                                ": %s: link status down for %s "
2190                                                "interface %s, disabling it in "
2191                                                "%d ms.\n",
2192                                                bond->dev->name,
2193                                                IS_UP(slave_dev)
2194                                                ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
2195                                                   ? ((slave == oldcurrent)
2196                                                      ? "active " : "backup ")
2197                                                   : "")
2198                                                : "idle ",
2199                                                slave_dev->name,
2200                                                bond->params.downdelay * bond->params.miimon);
2201                                 }
2202                         }
2203                         /* no break ! fall through the BOND_LINK_FAIL test to
2204                            ensure proper action to be taken
2205                         */
2206                 case BOND_LINK_FAIL:    /* the link has just gone down */
2207                         if (link_state != BMSR_LSTATUS) {
2208                                 /* link stays down */
2209                                 if (slave->delay <= 0) {
2210                                         if (!have_locks)
2211                                                 return 1;
2212
2213                                         /* link down for too long time */
2214                                         slave->link = BOND_LINK_DOWN;
2215
2216                                         /* in active/backup mode, we must
2217                                          * completely disable this interface
2218                                          */
2219                                         if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) ||
2220                                             (bond->params.mode == BOND_MODE_8023AD)) {
2221                                                 bond_set_slave_inactive_flags(slave);
2222                                         }
2223
2224                                         printk(KERN_INFO DRV_NAME
2225                                                ": %s: link status definitely "
2226                                                "down for interface %s, "
2227                                                "disabling it\n",
2228                                                bond->dev->name,
2229                                                slave_dev->name);
2230
2231                                         /* notify ad that the link status has changed */
2232                                         if (bond->params.mode == BOND_MODE_8023AD) {
2233                                                 bond_3ad_handle_link_change(slave, BOND_LINK_DOWN);
2234                                         }
2235
2236                                         if ((bond->params.mode == BOND_MODE_TLB) ||
2237                                             (bond->params.mode == BOND_MODE_ALB)) {
2238                                                 bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN);
2239                                         }
2240
2241                                         if (slave == oldcurrent) {
2242                                                 do_failover = 1;
2243                                         }
2244                                 } else {
2245                                         slave->delay--;
2246                                 }
2247                         } else {
2248                                 /* link up again */
2249                                 slave->link  = BOND_LINK_UP;
2250                                 slave->jiffies = jiffies;
2251                                 printk(KERN_INFO DRV_NAME
2252                                        ": %s: link status up again after %d "
2253                                        "ms for interface %s.\n",
2254                                        bond->dev->name,
2255                                        (bond->params.downdelay - slave->delay) * bond->params.miimon,
2256                                        slave_dev->name);
2257                         }
2258                         break;
2259                 case BOND_LINK_DOWN:    /* the link was down */
2260                         if (link_state != BMSR_LSTATUS) {
2261                                 /* the link stays down, nothing more to do */
2262                                 break;
2263                         } else {        /* link going up */
2264                                 slave->link  = BOND_LINK_BACK;
2265                                 slave->delay = bond->params.updelay;
2266
2267                                 if (bond->params.updelay) {
2268                                         /* if updelay == 0, no need to
2269                                            advertise about a 0 ms delay */
2270                                         printk(KERN_INFO DRV_NAME
2271                                                ": %s: link status up for "
2272                                                "interface %s, enabling it "
2273                                                "in %d ms.\n",
2274                                                bond->dev->name,
2275                                                slave_dev->name,
2276                                                bond->params.updelay * bond->params.miimon);
2277                                 }
2278                         }
2279                         /* no break ! fall through the BOND_LINK_BACK state in
2280                            case there's something to do.
2281                         */
2282                 case BOND_LINK_BACK:    /* the link has just come back */
2283                         if (link_state != BMSR_LSTATUS) {
2284                                 /* link down again */
2285                                 slave->link  = BOND_LINK_DOWN;
2286
2287                                 printk(KERN_INFO DRV_NAME
2288                                        ": %s: link status down again after %d "
2289                                        "ms for interface %s.\n",
2290                                        bond->dev->name,
2291                                        (bond->params.updelay - slave->delay) * bond->params.miimon,
2292                                        slave_dev->name);
2293                         } else {
2294                                 /* link stays up */
2295                                 if (slave->delay == 0) {
2296                                         if (!have_locks)
2297                                                 return 1;
2298
2299                                         /* now the link has been up for long time enough */
2300                                         slave->link = BOND_LINK_UP;
2301                                         slave->jiffies = jiffies;
2302
2303                                         if (bond->params.mode == BOND_MODE_8023AD) {
2304                                                 /* prevent it from being the active one */
2305                                                 slave->state = BOND_STATE_BACKUP;
2306                                         } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
2307                                                 /* make it immediately active */
2308                                                 slave->state = BOND_STATE_ACTIVE;
2309                                         } else if (slave != bond->primary_slave) {
2310                                                 /* prevent it from being the active one */
2311                                                 slave->state = BOND_STATE_BACKUP;
2312                                         }
2313
2314                                         printk(KERN_INFO DRV_NAME
2315                                                ": %s: link status definitely "
2316                                                "up for interface %s.\n",
2317                                                bond->dev->name,
2318                                                slave_dev->name);
2319
2320                                         /* notify ad that the link status has changed */
2321                                         if (bond->params.mode == BOND_MODE_8023AD) {
2322                                                 bond_3ad_handle_link_change(slave, BOND_LINK_UP);
2323                                         }
2324
2325                                         if ((bond->params.mode == BOND_MODE_TLB) ||
2326                                             (bond->params.mode == BOND_MODE_ALB)) {
2327                                                 bond_alb_handle_link_change(bond, slave, BOND_LINK_UP);
2328                                         }
2329
2330                                         if ((!oldcurrent) ||
2331                                             (slave == bond->primary_slave)) {
2332                                                 do_failover = 1;
2333                                         }
2334                                 } else {
2335                                         slave->delay--;
2336                                 }
2337                         }
2338                         break;
2339                 default:
2340                         /* Should not happen */
2341                         printk(KERN_ERR DRV_NAME
2342                                ": %s: Error: %s Illegal value (link=%d)\n",
2343                                bond->dev->name,
2344                                slave->dev->name,
2345                                slave->link);
2346                         goto out;
2347                 } /* end of switch (slave->link) */
2348
2349                 bond_update_speed_duplex(slave);
2350
2351                 if (bond->params.mode == BOND_MODE_8023AD) {
2352                         if (old_speed != slave->speed) {
2353                                 bond_3ad_adapter_speed_changed(slave);
2354                         }
2355
2356                         if (old_duplex != slave->duplex) {
2357                                 bond_3ad_adapter_duplex_changed(slave);
2358                         }
2359                 }
2360
2361         } /* end of for */
2362
2363         if (do_failover) {
2364                 ASSERT_RTNL();
2365
2366                 write_lock_bh(&bond->curr_slave_lock);
2367
2368                 bond_select_active_slave(bond);
2369
2370                 write_unlock_bh(&bond->curr_slave_lock);
2371
2372         } else
2373                 bond_set_carrier(bond);
2374
2375 out:
2376         return 0;
2377 }
2378
2379 /*
2380  * bond_mii_monitor
2381  *
2382  * Really a wrapper that splits the mii monitor into two phases: an
2383  * inspection, then (if inspection indicates something needs to be
2384  * done) an acquisition of appropriate locks followed by another pass
2385  * to implement whatever link state changes are indicated.
2386  */
2387 void bond_mii_monitor(struct work_struct *work)
2388 {
2389         struct bonding *bond = container_of(work, struct bonding,
2390                                             mii_work.work);
2391         unsigned long delay;
2392
2393         read_lock(&bond->lock);
2394         if (bond->kill_timers) {
2395                 read_unlock(&bond->lock);
2396                 return;
2397         }
2398         if (__bond_mii_monitor(bond, 0)) {
2399                 read_unlock(&bond->lock);
2400                 rtnl_lock();
2401                 read_lock(&bond->lock);
2402                 __bond_mii_monitor(bond, 1);
2403                 read_unlock(&bond->lock);
2404                 rtnl_unlock();  /* might sleep, hold no other locks */
2405                 read_lock(&bond->lock);
2406         }
2407
2408         delay = msecs_to_jiffies(bond->params.miimon);
2409         read_unlock(&bond->lock);
2410         queue_delayed_work(bond->wq, &bond->mii_work, delay);
2411 }
2412
2413 static __be32 bond_glean_dev_ip(struct net_device *dev)
2414 {
2415         struct in_device *idev;
2416         struct in_ifaddr *ifa;
2417         __be32 addr = 0;
2418
2419         if (!dev)
2420                 return 0;
2421
2422         rcu_read_lock();
2423         idev = __in_dev_get_rcu(dev);
2424         if (!idev)
2425                 goto out;
2426
2427         ifa = idev->ifa_list;
2428         if (!ifa)
2429                 goto out;
2430
2431         addr = ifa->ifa_local;
2432 out:
2433         rcu_read_unlock();
2434         return addr;
2435 }
2436
2437 static int bond_has_this_ip(struct bonding *bond, __be32 ip)
2438 {
2439         struct vlan_entry *vlan;
2440
2441         if (ip == bond->master_ip)
2442                 return 1;
2443
2444         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2445                 if (ip == vlan->vlan_ip)
2446                         return 1;
2447         }
2448
2449         return 0;
2450 }
2451
2452 /*
2453  * We go to the (large) trouble of VLAN tagging ARP frames because
2454  * switches in VLAN mode (especially if ports are configured as
2455  * "native" to a VLAN) might not pass non-tagged frames.
2456  */
2457 static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id)
2458 {
2459         struct sk_buff *skb;
2460
2461         dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op,
2462                slave_dev->name, dest_ip, src_ip, vlan_id);
2463                
2464         skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,
2465                          NULL, slave_dev->dev_addr, NULL);
2466
2467         if (!skb) {
2468                 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n");
2469                 return;
2470         }
2471         if (vlan_id) {
2472                 skb = vlan_put_tag(skb, vlan_id);
2473                 if (!skb) {
2474                         printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n");
2475                         return;
2476                 }
2477         }
2478         arp_xmit(skb);
2479 }
2480
2481
2482 static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
2483 {
2484         int i, vlan_id, rv;
2485         __be32 *targets = bond->params.arp_targets;
2486         struct vlan_entry *vlan;
2487         struct net_device *vlan_dev;
2488         struct flowi fl;
2489         struct rtable *rt;
2490
2491         for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
2492                 if (!targets[i])
2493                         continue;
2494                 dprintk("basa: target %x\n", targets[i]);
2495                 if (list_empty(&bond->vlan_list)) {
2496                         dprintk("basa: empty vlan: arp_send\n");
2497                         bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2498                                       bond->master_ip, 0);
2499                         continue;
2500                 }
2501
2502                 /*
2503                  * If VLANs are configured, we do a route lookup to
2504                  * determine which VLAN interface would be used, so we
2505                  * can tag the ARP with the proper VLAN tag.
2506                  */
2507                 memset(&fl, 0, sizeof(fl));
2508                 fl.fl4_dst = targets[i];
2509                 fl.fl4_tos = RTO_ONLINK;
2510
2511                 rv = ip_route_output_key(&init_net, &rt, &fl);
2512                 if (rv) {
2513                         if (net_ratelimit()) {
2514                                 printk(KERN_WARNING DRV_NAME
2515                              ": %s: no route to arp_ip_target %u.%u.%u.%u\n",
2516                                        bond->dev->name, NIPQUAD(fl.fl4_dst));
2517                         }
2518                         continue;
2519                 }
2520
2521                 /*
2522                  * This target is not on a VLAN
2523                  */
2524                 if (rt->u.dst.dev == bond->dev) {
2525                         ip_rt_put(rt);
2526                         dprintk("basa: rtdev == bond->dev: arp_send\n");
2527                         bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2528                                       bond->master_ip, 0);
2529                         continue;
2530                 }
2531
2532                 vlan_id = 0;
2533                 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2534                         vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
2535                         if (vlan_dev == rt->u.dst.dev) {
2536                                 vlan_id = vlan->vlan_id;
2537                                 dprintk("basa: vlan match on %s %d\n",
2538                                        vlan_dev->name, vlan_id);
2539                                 break;
2540                         }
2541                 }
2542
2543                 if (vlan_id) {
2544                         ip_rt_put(rt);
2545                         bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2546                                       vlan->vlan_ip, vlan_id);
2547                         continue;
2548                 }
2549
2550                 if (net_ratelimit()) {
2551                         printk(KERN_WARNING DRV_NAME
2552                ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n",
2553                                bond->dev->name, NIPQUAD(fl.fl4_dst),
2554                                rt->u.dst.dev ? rt->u.dst.dev->name : "NULL");
2555                 }
2556                 ip_rt_put(rt);
2557         }
2558 }
2559
2560 /*
2561  * Kick out a gratuitous ARP for an IP on the bonding master plus one
2562  * for each VLAN above us.
2563  */
2564 static void bond_send_gratuitous_arp(struct bonding *bond)
2565 {
2566         struct slave *slave = bond->curr_active_slave;
2567         struct vlan_entry *vlan;
2568         struct net_device *vlan_dev;
2569
2570         dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name,
2571                                 slave ? slave->dev->name : "NULL");
2572         if (!slave)
2573                 return;
2574
2575         if (bond->master_ip) {
2576                 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
2577                                 bond->master_ip, 0);
2578         }
2579
2580         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2581                 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
2582                 if (vlan->vlan_ip) {
2583                         bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
2584                                       vlan->vlan_ip, vlan->vlan_id);
2585                 }
2586         }
2587 }
2588
2589 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
2590 {
2591         int i;
2592         __be32 *targets = bond->params.arp_targets;
2593
2594         targets = bond->params.arp_targets;
2595         for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
2596                 dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] "
2597                         "%u.%u.%u.%u bhti(tip) %d\n",
2598                        NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]),
2599                        bond_has_this_ip(bond, tip));
2600                 if (sip == targets[i]) {
2601                         if (bond_has_this_ip(bond, tip))
2602                                 slave->last_arp_rx = jiffies;
2603                         return;
2604                 }
2605         }
2606 }
2607
2608 static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
2609 {
2610         struct arphdr *arp;
2611         struct slave *slave;
2612         struct bonding *bond;
2613         unsigned char *arp_ptr;
2614         __be32 sip, tip;
2615
2616         if (dev_net(dev) != &init_net)
2617                 goto out;
2618
2619         if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
2620                 goto out;
2621
2622         bond = dev->priv;
2623         read_lock(&bond->lock);
2624
2625         dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
2626                 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
2627                 orig_dev ? orig_dev->name : "NULL");
2628
2629         slave = bond_get_slave_by_dev(bond, orig_dev);
2630         if (!slave || !slave_do_arp_validate(bond, slave))
2631                 goto out_unlock;
2632
2633         if (!pskb_may_pull(skb, arp_hdr_len(dev)))
2634                 goto out_unlock;
2635
2636         arp = arp_hdr(skb);
2637         if (arp->ar_hln != dev->addr_len ||
2638             skb->pkt_type == PACKET_OTHERHOST ||
2639             skb->pkt_type == PACKET_LOOPBACK ||
2640             arp->ar_hrd != htons(ARPHRD_ETHER) ||
2641             arp->ar_pro != htons(ETH_P_IP) ||
2642             arp->ar_pln != 4)
2643                 goto out_unlock;
2644
2645         arp_ptr = (unsigned char *)(arp + 1);
2646         arp_ptr += dev->addr_len;
2647         memcpy(&sip, arp_ptr, 4);
2648         arp_ptr += 4 + dev->addr_len;
2649         memcpy(&tip, arp_ptr, 4);
2650
2651         dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u"
2652                 " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name,
2653                 slave->state, bond->params.arp_validate,
2654                 slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip));
2655
2656         /*
2657          * Backup slaves won't see the ARP reply, but do come through
2658          * here for each ARP probe (so we swap the sip/tip to validate
2659          * the probe).  In a "redundant switch, common router" type of
2660          * configuration, the ARP probe will (hopefully) travel from
2661          * the active, through one switch, the router, then the other
2662          * switch before reaching the backup.
2663          */
2664         if (slave->state == BOND_STATE_ACTIVE)
2665                 bond_validate_arp(bond, slave, sip, tip);
2666         else
2667                 bond_validate_arp(bond, slave, tip, sip);
2668
2669 out_unlock:
2670         read_unlock(&bond->lock);
2671 out:
2672         dev_kfree_skb(skb);
2673         return NET_RX_SUCCESS;
2674 }
2675
2676 /*
2677  * this function is called regularly to monitor each slave's link
2678  * ensuring that traffic is being sent and received when arp monitoring
2679  * is used in load-balancing mode. if the adapter has been dormant, then an
2680  * arp is transmitted to generate traffic. see activebackup_arp_monitor for
2681  * arp monitoring in active backup mode.
2682  */
2683 void bond_loadbalance_arp_mon(struct work_struct *work)
2684 {
2685         struct bonding *bond = container_of(work, struct bonding,
2686                                             arp_work.work);
2687         struct slave *slave, *oldcurrent;
2688         int do_failover = 0;
2689         int delta_in_ticks;
2690         int i;
2691
2692         read_lock(&bond->lock);
2693
2694         delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
2695
2696         if (bond->kill_timers) {
2697                 goto out;
2698         }
2699
2700         if (bond->slave_cnt == 0) {
2701                 goto re_arm;
2702         }
2703
2704         read_lock(&bond->curr_slave_lock);
2705         oldcurrent = bond->curr_active_slave;
2706         read_unlock(&bond->curr_slave_lock);
2707
2708         /* see if any of the previous devices are up now (i.e. they have
2709          * xmt and rcv traffic). the curr_active_slave does not come into
2710          * the picture unless it is null. also, slave->jiffies is not needed
2711          * here because we send an arp on each slave and give a slave as
2712          * long as it needs to get the tx/rx within the delta.
2713          * TODO: what about up/down delay in arp mode? it wasn't here before
2714          *       so it can wait
2715          */
2716         bond_for_each_slave(bond, slave, i) {
2717                 if (slave->link != BOND_LINK_UP) {
2718                         if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) &&
2719                             time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) {
2720
2721                                 slave->link  = BOND_LINK_UP;
2722                                 slave->state = BOND_STATE_ACTIVE;
2723
2724                                 /* primary_slave has no meaning in round-robin
2725                                  * mode. the window of a slave being up and
2726                                  * curr_active_slave being null after enslaving
2727                                  * is closed.
2728                                  */
2729                                 if (!oldcurrent) {
2730                                         printk(KERN_INFO DRV_NAME
2731                                                ": %s: link status definitely "
2732                                                "up for interface %s, ",
2733                                                bond->dev->name,
2734                                                slave->dev->name);
2735                                         do_failover = 1;
2736                                 } else {
2737                                         printk(KERN_INFO DRV_NAME
2738                                                ": %s: interface %s is now up\n",
2739                                                bond->dev->name,
2740                                                slave->dev->name);
2741                                 }
2742                         }
2743                 } else {
2744                         /* slave->link == BOND_LINK_UP */
2745
2746                         /* not all switches will respond to an arp request
2747                          * when the source ip is 0, so don't take the link down
2748                          * if we don't know our ip yet
2749                          */
2750                         if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
2751                             (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) {
2752
2753                                 slave->link  = BOND_LINK_DOWN;
2754                                 slave->state = BOND_STATE_BACKUP;
2755
2756                                 if (slave->link_failure_count < UINT_MAX) {
2757                                         slave->link_failure_count++;
2758                                 }
2759
2760                                 printk(KERN_INFO DRV_NAME
2761                                        ": %s: interface %s is now down.\n",
2762                                        bond->dev->name,
2763                                        slave->dev->name);
2764
2765                                 if (slave == oldcurrent) {
2766                                         do_failover = 1;
2767                                 }
2768                         }
2769                 }
2770
2771                 /* note: if switch is in round-robin mode, all links
2772                  * must tx arp to ensure all links rx an arp - otherwise
2773                  * links may oscillate or not come up at all; if switch is
2774                  * in something like xor mode, there is nothing we can
2775                  * do - all replies will be rx'ed on same link causing slaves
2776                  * to be unstable during low/no traffic periods
2777                  */
2778                 if (IS_UP(slave->dev)) {
2779                         bond_arp_send_all(bond, slave);
2780                 }
2781         }
2782
2783         if (do_failover) {
2784                 write_lock_bh(&bond->curr_slave_lock);
2785
2786                 bond_select_active_slave(bond);
2787
2788                 write_unlock_bh(&bond->curr_slave_lock);
2789         }
2790
2791 re_arm:
2792         if (bond->params.arp_interval)
2793                 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
2794 out:
2795         read_unlock(&bond->lock);
2796 }
2797
2798 /*
2799  * Called to inspect slaves for active-backup mode ARP monitor link state
2800  * changes.  Sets new_link in slaves to specify what action should take
2801  * place for the slave.  Returns 0 if no changes are found, >0 if changes
2802  * to link states must be committed.
2803  *
2804  * Called with bond->lock held for read.
2805  */
2806 static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2807 {
2808         struct slave *slave;
2809         int i, commit = 0;
2810
2811         bond_for_each_slave(bond, slave, i) {
2812                 slave->new_link = BOND_LINK_NOCHANGE;
2813
2814                 if (slave->link != BOND_LINK_UP) {
2815                         if (time_before_eq(jiffies, slave_last_rx(bond, slave) +
2816                                            delta_in_ticks)) {
2817                                 slave->new_link = BOND_LINK_UP;
2818                                 commit++;
2819                         }
2820
2821                         continue;
2822                 }
2823
2824                 /*
2825                  * Give slaves 2*delta after being enslaved or made
2826                  * active.  This avoids bouncing, as the last receive
2827                  * times need a full ARP monitor cycle to be updated.
2828                  */
2829                 if (!time_after_eq(jiffies, slave->jiffies +
2830                                    2 * delta_in_ticks))
2831                         continue;
2832
2833                 /*
2834                  * Backup slave is down if:
2835                  * - No current_arp_slave AND
2836                  * - more than 3*delta since last receive AND
2837                  * - the bond has an IP address
2838                  *
2839                  * Note: a non-null current_arp_slave indicates
2840                  * the curr_active_slave went down and we are
2841                  * searching for a new one; under this condition
2842                  * we only take the curr_active_slave down - this
2843                  * gives each slave a chance to tx/rx traffic
2844                  * before being taken out
2845                  */
2846                 if (slave->state == BOND_STATE_BACKUP &&
2847                     !bond->current_arp_slave &&
2848                     time_after(jiffies, slave_last_rx(bond, slave) +
2849                                3 * delta_in_ticks)) {
2850                         slave->new_link = BOND_LINK_DOWN;
2851                         commit++;
2852                 }
2853
2854                 /*
2855                  * Active slave is down if:
2856                  * - more than 2*delta since transmitting OR
2857                  * - (more than 2*delta since receive AND
2858                  *    the bond has an IP address)
2859                  */
2860                 if ((slave->state == BOND_STATE_ACTIVE) &&
2861                     (time_after_eq(jiffies, slave->dev->trans_start +
2862                                     2 * delta_in_ticks) ||
2863                       (time_after_eq(jiffies, slave_last_rx(bond, slave)
2864                                      + 2 * delta_in_ticks)))) {
2865                         slave->new_link = BOND_LINK_DOWN;
2866                         commit++;
2867                 }
2868         }
2869
2870         read_lock(&bond->curr_slave_lock);
2871
2872         /*
2873          * Trigger a commit if the primary option setting has changed.
2874          */
2875         if (bond->primary_slave &&
2876             (bond->primary_slave != bond->curr_active_slave) &&
2877             (bond->primary_slave->link == BOND_LINK_UP))
2878                 commit++;
2879
2880         read_unlock(&bond->curr_slave_lock);
2881
2882         return commit;
2883 }
2884
2885 /*
2886  * Called to commit link state changes noted by inspection step of
2887  * active-backup mode ARP monitor.
2888  *
2889  * Called with RTNL and bond->lock for read.
2890  */
2891 static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
2892 {
2893         struct slave *slave;
2894         int i;
2895
2896         bond_for_each_slave(bond, slave, i) {
2897                 switch (slave->new_link) {
2898                 case BOND_LINK_NOCHANGE:
2899                         continue;
2900
2901                 case BOND_LINK_UP:
2902                         write_lock_bh(&bond->curr_slave_lock);
2903
2904                         if (!bond->curr_active_slave &&
2905                             time_before_eq(jiffies, slave->dev->trans_start +
2906                                            delta_in_ticks)) {
2907                                 slave->link = BOND_LINK_UP;
2908                                 bond_change_active_slave(bond, slave);
2909                                 bond->current_arp_slave = NULL;
2910
2911                                 printk(KERN_INFO DRV_NAME
2912                                        ": %s: %s is up and now the "
2913                                        "active interface\n",
2914                                        bond->dev->name, slave->dev->name);
2915
2916                         } else if (bond->curr_active_slave != slave) {
2917                                 /* this slave has just come up but we
2918                                  * already have a current slave; this can
2919                                  * also happen if bond_enslave adds a new
2920                                  * slave that is up while we are searching
2921                                  * for a new slave
2922                                  */
2923                                 slave->link = BOND_LINK_UP;
2924                                 bond_set_slave_inactive_flags(slave);
2925                                 bond->current_arp_slave = NULL;
2926
2927                                 printk(KERN_INFO DRV_NAME
2928                                        ": %s: backup interface %s is now up\n",
2929                                        bond->dev->name, slave->dev->name);
2930                         }
2931
2932                         write_unlock_bh(&bond->curr_slave_lock);
2933
2934                         break;
2935
2936                 case BOND_LINK_DOWN:
2937                         if (slave->link_failure_count < UINT_MAX)
2938                                 slave->link_failure_count++;
2939
2940                         slave->link = BOND_LINK_DOWN;
2941
2942                         if (slave == bond->curr_active_slave) {
2943                                 printk(KERN_INFO DRV_NAME
2944                                        ": %s: link status down for active "
2945                                        "interface %s, disabling it\n",
2946                                        bond->dev->name, slave->dev->name);
2947
2948                                 bond_set_slave_inactive_flags(slave);
2949
2950                                 write_lock_bh(&bond->curr_slave_lock);
2951
2952                                 bond_select_active_slave(bond);
2953                                 if (bond->curr_active_slave)
2954                                         bond->curr_active_slave->jiffies =
2955                                                 jiffies;
2956
2957                                 write_unlock_bh(&bond->curr_slave_lock);
2958
2959                                 bond->current_arp_slave = NULL;
2960
2961                         } else if (slave->state == BOND_STATE_BACKUP) {
2962                                 printk(KERN_INFO DRV_NAME
2963                                        ": %s: backup interface %s is now down\n",
2964                                        bond->dev->name, slave->dev->name);
2965
2966                                 bond_set_slave_inactive_flags(slave);
2967                         }
2968                         break;
2969
2970                 default:
2971                         printk(KERN_ERR DRV_NAME
2972                                ": %s: impossible: new_link %d on slave %s\n",
2973                                bond->dev->name, slave->new_link,
2974                                slave->dev->name);
2975                 }
2976         }
2977
2978         /*
2979          * No race with changes to primary via sysfs, as we hold rtnl.
2980          */
2981         if (bond->primary_slave &&
2982             (bond->primary_slave != bond->curr_active_slave) &&
2983             (bond->primary_slave->link == BOND_LINK_UP)) {
2984                 write_lock_bh(&bond->curr_slave_lock);
2985                 bond_change_active_slave(bond, bond->primary_slave);
2986                 write_unlock_bh(&bond->curr_slave_lock);
2987         }
2988
2989         bond_set_carrier(bond);
2990 }
2991
2992 /*
2993  * Send ARP probes for active-backup mode ARP monitor.
2994  *
2995  * Called with bond->lock held for read.
2996  */
2997 static void bond_ab_arp_probe(struct bonding *bond)
2998 {
2999         struct slave *slave;
3000         int i;
3001
3002         read_lock(&bond->curr_slave_lock);
3003
3004         if (bond->current_arp_slave && bond->curr_active_slave)
3005                 printk("PROBE: c_arp %s && cas %s BAD\n",
3006                        bond->current_arp_slave->dev->name,
3007                        bond->curr_active_slave->dev->name);
3008
3009         if (bond->curr_active_slave) {
3010                 bond_arp_send_all(bond, bond->curr_active_slave);
3011                 read_unlock(&bond->curr_slave_lock);
3012                 return;
3013         }
3014
3015         read_unlock(&bond->curr_slave_lock);
3016
3017         /* if we don't have a curr_active_slave, search for the next available
3018          * backup slave from the current_arp_slave and make it the candidate
3019          * for becoming the curr_active_slave
3020          */
3021
3022         if (!bond->current_arp_slave) {
3023                 bond->current_arp_slave = bond->first_slave;
3024                 if (!bond->current_arp_slave)
3025                         return;
3026         }
3027
3028         bond_set_slave_inactive_flags(bond->current_arp_slave);
3029
3030         /* search for next candidate */
3031         bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) {
3032                 if (IS_UP(slave->dev)) {
3033                         slave->link = BOND_LINK_BACK;
3034                         bond_set_slave_active_flags(slave);
3035                         bond_arp_send_all(bond, slave);
3036                         slave->jiffies = jiffies;
3037                         bond->current_arp_slave = slave;
3038                         break;
3039                 }
3040
3041                 /* if the link state is up at this point, we
3042                  * mark it down - this can happen if we have
3043                  * simultaneous link failures and
3044                  * reselect_active_interface doesn't make this
3045                  * one the current slave so it is still marked
3046                  * up when it is actually down
3047                  */
3048                 if (slave->link == BOND_LINK_UP) {
3049                         slave->link = BOND_LINK_DOWN;
3050                         if (slave->link_failure_count < UINT_MAX)
3051                                 slave->link_failure_count++;
3052
3053                         bond_set_slave_inactive_flags(slave);
3054
3055                         printk(KERN_INFO DRV_NAME
3056                                ": %s: backup interface %s is now down.\n",
3057                                bond->dev->name, slave->dev->name);
3058                 }
3059         }
3060 }
3061
3062 void bond_activebackup_arp_mon(struct work_struct *work)
3063 {
3064         struct bonding *bond = container_of(work, struct bonding,
3065                                             arp_work.work);
3066         int delta_in_ticks;
3067
3068         read_lock(&bond->lock);
3069
3070         if (bond->kill_timers)
3071                 goto out;
3072
3073         delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
3074
3075         if (bond->slave_cnt == 0)
3076                 goto re_arm;
3077
3078         if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
3079                 read_unlock(&bond->lock);
3080                 rtnl_lock();
3081                 read_lock(&bond->lock);
3082
3083                 bond_ab_arp_commit(bond, delta_in_ticks);
3084
3085                 read_unlock(&bond->lock);
3086                 rtnl_unlock();
3087                 read_lock(&bond->lock);
3088         }
3089
3090         bond_ab_arp_probe(bond);
3091
3092 re_arm:
3093         if (bond->params.arp_interval) {
3094                 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
3095         }
3096 out:
3097         read_unlock(&bond->lock);
3098 }
3099
3100 /*------------------------------ proc/seq_file-------------------------------*/
3101
3102 #ifdef CONFIG_PROC_FS
3103
3104 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
3105 {
3106         struct bonding *bond = seq->private;
3107         loff_t off = 0;
3108         struct slave *slave;
3109         int i;
3110
3111         /* make sure the bond won't be taken away */
3112         read_lock(&dev_base_lock);
3113         read_lock(&bond->lock);
3114
3115         if (*pos == 0) {
3116                 return SEQ_START_TOKEN;
3117         }
3118
3119         bond_for_each_slave(bond, slave, i) {
3120                 if (++off == *pos) {
3121                         return slave;
3122                 }
3123         }
3124
3125         return NULL;
3126 }
3127
3128 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3129 {
3130         struct bonding *bond = seq->private;
3131         struct slave *slave = v;
3132
3133         ++*pos;
3134         if (v == SEQ_START_TOKEN) {
3135                 return bond->first_slave;
3136         }
3137
3138         slave = slave->next;
3139
3140         return (slave == bond->first_slave) ? NULL : slave;
3141 }
3142
3143 static void bond_info_seq_stop(struct seq_file *seq, void *v)
3144 {
3145         struct bonding *bond = seq->private;
3146
3147         read_unlock(&bond->lock);
3148         read_unlock(&dev_base_lock);
3149 }
3150
3151 static void bond_info_show_master(struct seq_file *seq)
3152 {
3153         struct bonding *bond = seq->private;
3154         struct slave *curr;
3155         int i;
3156         u32 target;
3157
3158         read_lock(&bond->curr_slave_lock);
3159         curr = bond->curr_active_slave;
3160         read_unlock(&bond->curr_slave_lock);
3161
3162         seq_printf(seq, "Bonding Mode: %s",
3163                    bond_mode_name(bond->params.mode));
3164
3165         if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
3166             bond->params.fail_over_mac)
3167                 seq_printf(seq, " (fail_over_mac)");
3168
3169         seq_printf(seq, "\n");
3170
3171         if (bond->params.mode == BOND_MODE_XOR ||
3172                 bond->params.mode == BOND_MODE_8023AD) {
3173                 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
3174                         xmit_hashtype_tbl[bond->params.xmit_policy].modename,
3175                         bond->params.xmit_policy);
3176         }
3177
3178         if (USES_PRIMARY(bond->params.mode)) {
3179                 seq_printf(seq, "Primary Slave: %s\n",
3180                            (bond->primary_slave) ?
3181                            bond->primary_slave->dev->name : "None");
3182
3183                 seq_printf(seq, "Currently Active Slave: %s\n",
3184                            (curr) ? curr->dev->name : "None");
3185         }
3186
3187         seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ?
3188                    "up" : "down");
3189         seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon);
3190         seq_printf(seq, "Up Delay (ms): %d\n",
3191                    bond->params.updelay * bond->params.miimon);
3192         seq_printf(seq, "Down Delay (ms): %d\n",
3193                    bond->params.downdelay * bond->params.miimon);
3194
3195
3196         /* ARP information */
3197         if(bond->params.arp_interval > 0) {
3198                 int printed=0;
3199                 seq_printf(seq, "ARP Polling Interval (ms): %d\n",
3200                                 bond->params.arp_interval);
3201
3202                 seq_printf(seq, "ARP IP target/s (n.n.n.n form):");
3203
3204                 for(i = 0; (i < BOND_MAX_ARP_TARGETS) ;i++) {
3205                         if (!bond->params.arp_targets[i])
3206                                 continue;
3207                         if (printed)
3208                                 seq_printf(seq, ",");
3209                         target = ntohl(bond->params.arp_targets[i]);
3210                         seq_printf(seq, " %d.%d.%d.%d", HIPQUAD(target));
3211                         printed = 1;
3212                 }
3213                 seq_printf(seq, "\n");
3214         }
3215
3216         if (bond->params.mode == BOND_MODE_8023AD) {
3217                 struct ad_info ad_info;
3218                 DECLARE_MAC_BUF(mac);
3219
3220                 seq_puts(seq, "\n802.3ad info\n");
3221                 seq_printf(seq, "LACP rate: %s\n",
3222                            (bond->params.lacp_fast) ? "fast" : "slow");
3223
3224                 if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
3225                         seq_printf(seq, "bond %s has no active aggregator\n",
3226                                    bond->dev->name);
3227                 } else {
3228                         seq_printf(seq, "Active Aggregator Info:\n");
3229
3230                         seq_printf(seq, "\tAggregator ID: %d\n",
3231                                    ad_info.aggregator_id);
3232                         seq_printf(seq, "\tNumber of ports: %d\n",
3233                                    ad_info.ports);
3234                         seq_printf(seq, "\tActor Key: %d\n",
3235                                    ad_info.actor_key);
3236                         seq_printf(seq, "\tPartner Key: %d\n",
3237                                    ad_info.partner_key);
3238                         seq_printf(seq, "\tPartner Mac Address: %s\n",
3239                                    print_mac(mac, ad_info.partner_system));
3240                 }
3241         }
3242 }
3243
3244 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave)
3245 {
3246         struct bonding *bond = seq->private;
3247         DECLARE_MAC_BUF(mac);
3248
3249         seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
3250         seq_printf(seq, "MII Status: %s\n",
3251                    (slave->link == BOND_LINK_UP) ?  "up" : "down");
3252         seq_printf(seq, "Link Failure Count: %u\n",
3253                    slave->link_failure_count);
3254
3255         seq_printf(seq,
3256                    "Permanent HW addr: %s\n",
3257                    print_mac(mac, slave->perm_hwaddr));
3258
3259         if (bond->params.mode == BOND_MODE_8023AD) {
3260                 const struct aggregator *agg
3261                         = SLAVE_AD_INFO(slave).port.aggregator;
3262
3263                 if (agg) {
3264                         seq_printf(seq, "Aggregator ID: %d\n",
3265                                    agg->aggregator_identifier);
3266                 } else {
3267                         seq_puts(seq, "Aggregator ID: N/A\n");
3268                 }
3269         }
3270 }
3271
3272 static int bond_info_seq_show(struct seq_file *seq, void *v)
3273 {
3274         if (v == SEQ_START_TOKEN) {
3275                 seq_printf(seq, "%s\n", version);
3276                 bond_info_show_master(seq);
3277         } else {
3278                 bond_info_show_slave(seq, v);
3279         }
3280
3281         return 0;
3282 }
3283
3284 static struct seq_operations bond_info_seq_ops = {
3285         .start = bond_info_seq_start,
3286         .next  = bond_info_seq_next,
3287         .stop  = bond_info_seq_stop,
3288         .show  = bond_info_seq_show,
3289 };
3290
3291 static int bond_info_open(struct inode *inode, struct file *file)
3292 {
3293         struct seq_file *seq;
3294         struct proc_dir_entry *proc;
3295         int res;
3296
3297         res = seq_open(file, &bond_info_seq_ops);
3298         if (!res) {
3299                 /* recover the pointer buried in proc_dir_entry data */
3300                 seq = file->private_data;
3301                 proc = PDE(inode);
3302                 seq->private = proc->data;
3303         }
3304
3305         return res;
3306 }
3307
3308 static const struct file_operations bond_info_fops = {
3309         .owner   = THIS_MODULE,
3310         .open    = bond_info_open,
3311         .read    = seq_read,
3312         .llseek  = seq_lseek,
3313         .release = seq_release,
3314 };
3315
3316 static int bond_create_proc_entry(struct bonding *bond)
3317 {
3318         struct net_device *bond_dev = bond->dev;
3319
3320         if (bond_proc_dir) {
3321                 bond->proc_entry = proc_create_data(bond_dev->name,
3322                                                     S_IRUGO, bond_proc_dir,
3323                                                     &bond_info_fops, bond);
3324                 if (bond->proc_entry == NULL) {
3325                         printk(KERN_WARNING DRV_NAME
3326                                ": Warning: Cannot create /proc/net/%s/%s\n",
3327                                DRV_NAME, bond_dev->name);
3328                 } else {
3329                         memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ);
3330                 }
3331         }
3332
3333         return 0;
3334 }
3335
3336 static void bond_remove_proc_entry(struct bonding *bond)
3337 {
3338         if (bond_proc_dir && bond->proc_entry) {
3339                 remove_proc_entry(bond->proc_file_name, bond_proc_dir);
3340                 memset(bond->proc_file_name, 0, IFNAMSIZ);
3341                 bond->proc_entry = NULL;
3342         }
3343 }
3344
3345 /* Create the bonding directory under /proc/net, if doesn't exist yet.
3346  * Caller must hold rtnl_lock.
3347  */
3348 static void bond_create_proc_dir(void)
3349 {
3350         int len = strlen(DRV_NAME);
3351
3352         for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir;
3353              bond_proc_dir = bond_proc_dir->next) {
3354                 if ((bond_proc_dir->namelen == len) &&
3355                     !memcmp(bond_proc_dir->name, DRV_NAME, len)) {
3356                         break;
3357                 }
3358         }
3359
3360         if (!bond_proc_dir) {
3361                 bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net);
3362                 if (bond_proc_dir) {
3363                         bond_proc_dir->owner = THIS_MODULE;
3364                 } else {
3365                         printk(KERN_WARNING DRV_NAME
3366                                 ": Warning: cannot create /proc/net/%s\n",
3367                                 DRV_NAME);
3368                 }
3369         }
3370 }
3371
3372 /* Destroy the bonding directory under /proc/net, if empty.
3373  * Caller must hold rtnl_lock.
3374  */
3375 static void bond_destroy_proc_dir(void)
3376 {
3377         struct proc_dir_entry *de;
3378
3379         if (!bond_proc_dir) {
3380                 return;
3381         }
3382
3383         /* verify that the /proc dir is empty */
3384         for (de = bond_proc_dir->subdir; de; de = de->next) {
3385                 /* ignore . and .. */
3386                 if (*(de->name) != '.') {
3387                         break;
3388                 }
3389         }
3390
3391         if (de) {
3392                 if (bond_proc_dir->owner == THIS_MODULE) {
3393                         bond_proc_dir->owner = NULL;
3394                 }
3395         } else {
3396                 remove_proc_entry(DRV_NAME, init_net.proc_net);
3397                 bond_proc_dir = NULL;
3398         }
3399 }
3400 #endif /* CONFIG_PROC_FS */
3401
3402 /*-------------------------- netdev event handling --------------------------*/
3403
3404 /*
3405  * Change device name
3406  */
3407 static int bond_event_changename(struct bonding *bond)
3408 {
3409 #ifdef CONFIG_PROC_FS
3410         bond_remove_proc_entry(bond);
3411         bond_create_proc_entry(bond);
3412 #endif
3413         down_write(&(bonding_rwsem));
3414         bond_destroy_sysfs_entry(bond);
3415         bond_create_sysfs_entry(bond);
3416         up_write(&(bonding_rwsem));
3417         return NOTIFY_DONE;
3418 }
3419
3420 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev)
3421 {
3422         struct bonding *event_bond = bond_dev->priv;
3423
3424         switch (event) {
3425         case NETDEV_CHANGENAME:
3426                 return bond_event_changename(event_bond);
3427         case NETDEV_UNREGISTER:
3428                 bond_release_all(event_bond->dev);
3429                 break;
3430         default:
3431                 break;
3432         }
3433
3434         return NOTIFY_DONE;
3435 }
3436
3437 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev)
3438 {
3439         struct net_device *bond_dev = slave_dev->master;
3440         struct bonding *bond = bond_dev->priv;
3441
3442         switch (event) {
3443         case NETDEV_UNREGISTER:
3444                 if (bond_dev) {
3445                         if (bond->setup_by_slave)
3446                                 bond_release_and_destroy(bond_dev, slave_dev);
3447                         else
3448                                 bond_release(bond_dev, slave_dev);
3449                 }
3450                 break;
3451         case NETDEV_CHANGE:
3452                 /*
3453                  * TODO: is this what we get if somebody
3454                  * sets up a hierarchical bond, then rmmod's
3455                  * one of the slave bonding devices?
3456                  */
3457                 break;
3458         case NETDEV_DOWN:
3459                 /*
3460                  * ... Or is it this?
3461                  */
3462                 break;
3463         case NETDEV_CHANGEMTU:
3464                 /*
3465                  * TODO: Should slaves be allowed to
3466                  * independently alter their MTU?  For
3467                  * an active-backup bond, slaves need
3468                  * not be the same type of device, so
3469                  * MTUs may vary.  For other modes,
3470                  * slaves arguably should have the
3471                  * same MTUs. To do this, we'd need to
3472                  * take over the slave's change_mtu
3473                  * function for the duration of their
3474                  * servitude.
3475                  */
3476                 break;
3477         case NETDEV_CHANGENAME:
3478                 /*
3479                  * TODO: handle changing the primary's name
3480                  */
3481                 break;
3482         case NETDEV_FEAT_CHANGE:
3483                 bond_compute_features(bond);
3484                 break;
3485         default:
3486                 break;
3487         }
3488
3489         return NOTIFY_DONE;
3490 }
3491
3492 /*
3493  * bond_netdev_event: handle netdev notifier chain events.
3494  *
3495  * This function receives events for the netdev chain.  The caller (an
3496  * ioctl handler calling blocking_notifier_call_chain) holds the necessary
3497  * locks for us to safely manipulate the slave devices (RTNL lock,
3498  * dev_probe_lock).
3499  */
3500 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
3501 {
3502         struct net_device *event_dev = (struct net_device *)ptr;
3503
3504         if (dev_net(event_dev) != &init_net)
3505                 return NOTIFY_DONE;
3506
3507         dprintk("event_dev: %s, event: %lx\n",
3508                 (event_dev ? event_dev->name : "None"),
3509                 event);
3510
3511         if (!(event_dev->priv_flags & IFF_BONDING))
3512                 return NOTIFY_DONE;
3513
3514         if (event_dev->flags & IFF_MASTER) {
3515                 dprintk("IFF_MASTER\n");
3516                 return bond_master_netdev_event(event, event_dev);
3517         }
3518
3519         if (event_dev->flags & IFF_SLAVE) {
3520                 dprintk("IFF_SLAVE\n");
3521                 return bond_slave_netdev_event(event, event_dev);
3522         }
3523
3524         return NOTIFY_DONE;
3525 }
3526
3527 /*
3528  * bond_inetaddr_event: handle inetaddr notifier chain events.
3529  *
3530  * We keep track of device IPs primarily to use as source addresses in
3531  * ARP monitor probes (rather than spewing out broadcasts all the time).
3532  *
3533  * We track one IP for the main device (if it has one), plus one per VLAN.
3534  */
3535 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
3536 {
3537         struct in_ifaddr *ifa = ptr;
3538         struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev;
3539         struct bonding *bond;
3540         struct vlan_entry *vlan;
3541
3542         if (dev_net(ifa->ifa_dev->dev) != &init_net)
3543                 return NOTIFY_DONE;
3544
3545         list_for_each_entry(bond, &bond_dev_list, bond_list) {
3546                 if (bond->dev == event_dev) {
3547                         switch (event) {
3548                         case NETDEV_UP:
3549                                 bond->master_ip = ifa->ifa_local;
3550                                 return NOTIFY_OK;
3551                         case NETDEV_DOWN:
3552                                 bond->master_ip = bond_glean_dev_ip(bond->dev);
3553                                 return NOTIFY_OK;
3554                         default:
3555                                 return NOTIFY_DONE;
3556                         }
3557                 }
3558
3559                 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
3560                         vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
3561                         if (vlan_dev == event_dev) {
3562                                 switch (event) {
3563                                 case NETDEV_UP:
3564                                         vlan->vlan_ip = ifa->ifa_local;
3565                                         return NOTIFY_OK;
3566                                 case NETDEV_DOWN:
3567                                         vlan->vlan_ip =
3568                                                 bond_glean_dev_ip(vlan_dev);
3569                                         return NOTIFY_OK;
3570                                 default:
3571                                         return NOTIFY_DONE;
3572                                 }
3573                         }
3574                 }
3575         }
3576         return NOTIFY_DONE;
3577 }
3578
3579 static struct notifier_block bond_netdev_notifier = {
3580         .notifier_call = bond_netdev_event,
3581 };
3582
3583 static struct notifier_block bond_inetaddr_notifier = {
3584         .notifier_call = bond_inetaddr_event,
3585 };
3586
3587 /*-------------------------- Packet type handling ---------------------------*/
3588
3589 /* register to receive lacpdus on a bond */
3590 static void bond_register_lacpdu(struct bonding *bond)
3591 {
3592         struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type);
3593
3594         /* initialize packet type */
3595         pk_type->type = PKT_TYPE_LACPDU;
3596         pk_type->dev = bond->dev;
3597         pk_type->func = bond_3ad_lacpdu_recv;
3598
3599         dev_add_pack(pk_type);
3600 }
3601
3602 /* unregister to receive lacpdus on a bond */
3603 static void bond_unregister_lacpdu(struct bonding *bond)
3604 {
3605         dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
3606 }
3607
3608 void bond_register_arp(struct bonding *bond)
3609 {
3610         struct packet_type *pt = &bond->arp_mon_pt;
3611
3612         if (pt->type)
3613                 return;
3614
3615         pt->type = htons(ETH_P_ARP);
3616         pt->dev = bond->dev;
3617         pt->func = bond_arp_rcv;
3618         dev_add_pack(pt);
3619 }
3620
3621 void bond_unregister_arp(struct bonding *bond)
3622 {
3623         struct packet_type *pt = &bond->arp_mon_pt;
3624
3625         dev_remove_pack(pt);
3626         pt->type = 0;
3627 }
3628
3629 /*---------------------------- Hashing Policies -----------------------------*/
3630
3631 /*
3632  * Hash for the output device based upon layer 2 and layer 3 data. If
3633  * the packet is not IP mimic bond_xmit_hash_policy_l2()
3634  */
3635 static int bond_xmit_hash_policy_l23(struct sk_buff *skb,
3636                                      struct net_device *bond_dev, int count)
3637 {
3638         struct ethhdr *data = (struct ethhdr *)skb->data;
3639         struct iphdr *iph = ip_hdr(skb);
3640
3641         if (skb->protocol == __constant_htons(ETH_P_IP)) {
3642                 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
3643                         (data->h_dest[5] ^ bond_dev->dev_addr[5])) % count;
3644         }
3645
3646         return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
3647 }
3648
3649 /*
3650  * Hash for the output device based upon layer 3 and layer 4 data. If
3651  * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
3652  * altogether not IP, mimic bond_xmit_hash_policy_l2()
3653  */
3654 static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
3655                                     struct net_device *bond_dev, int count)
3656 {
3657         struct ethhdr *data = (struct ethhdr *)skb->data;
3658         struct iphdr *iph = ip_hdr(skb);
3659         __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
3660         int layer4_xor = 0;
3661
3662         if (skb->protocol == __constant_htons(ETH_P_IP)) {
3663                 if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) &&
3664                     (iph->protocol == IPPROTO_TCP ||
3665                      iph->protocol == IPPROTO_UDP)) {
3666                         layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
3667                 }
3668                 return (layer4_xor ^
3669                         ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
3670
3671         }
3672
3673         return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
3674 }
3675
3676 /*
3677  * Hash for the output device based upon layer 2 data
3678  */
3679 static int bond_xmit_hash_policy_l2(struct sk_buff *skb,
3680                                    struct net_device *bond_dev, int count)
3681 {
3682         struct ethhdr *data = (struct ethhdr *)skb->data;
3683
3684         return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
3685 }
3686
3687 /*-------------------------- Device entry points ----------------------------*/
3688
3689 static int bond_open(struct net_device *bond_dev)
3690 {
3691         struct bonding *bond = bond_dev->priv;
3692
3693         bond->kill_timers = 0;
3694
3695         if ((bond->params.mode == BOND_MODE_TLB) ||
3696             (bond->params.mode == BOND_MODE_ALB)) {
3697                 /* bond_alb_initialize must be called before the timer
3698                  * is started.
3699                  */
3700                 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) {
3701                         /* something went wrong - fail the open operation */
3702                         return -1;
3703                 }
3704
3705                 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
3706                 queue_delayed_work(bond->wq, &bond->alb_work, 0);
3707         }
3708
3709         if (bond->params.miimon) {  /* link check interval, in milliseconds. */
3710                 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
3711                 queue_delayed_work(bond->wq, &bond->mii_work, 0);
3712         }
3713
3714         if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
3715                 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
3716                         INIT_DELAYED_WORK(&bond->arp_work,
3717                                           bond_activebackup_arp_mon);
3718                 else
3719                         INIT_DELAYED_WORK(&bond->arp_work,
3720                                           bond_loadbalance_arp_mon);
3721
3722                 queue_delayed_work(bond->wq, &bond->arp_work, 0);
3723                 if (bond->params.arp_validate)
3724                         bond_register_arp(bond);
3725         }
3726
3727         if (bond->params.mode == BOND_MODE_8023AD) {
3728                 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
3729                 queue_delayed_work(bond->wq, &bond->ad_work, 0);
3730                 /* register to receive LACPDUs */
3731                 bond_register_lacpdu(bond);
3732         }
3733
3734         return 0;
3735 }
3736
3737 static int bond_close(struct net_device *bond_dev)
3738 {
3739         struct bonding *bond = bond_dev->priv;
3740
3741         if (bond->params.mode == BOND_MODE_8023AD) {
3742                 /* Unregister the receive of LACPDUs */
3743                 bond_unregister_lacpdu(bond);
3744         }
3745
3746         if (bond->params.arp_validate)
3747                 bond_unregister_arp(bond);
3748
3749         write_lock_bh(&bond->lock);
3750
3751
3752         /* signal timers not to re-arm */
3753         bond->kill_timers = 1;
3754
3755         write_unlock_bh(&bond->lock);
3756
3757         if (bond->params.miimon) {  /* link check interval, in milliseconds. */
3758                 cancel_delayed_work(&bond->mii_work);
3759         }
3760
3761         if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
3762                 cancel_delayed_work(&bond->arp_work);
3763         }
3764
3765         switch (bond->params.mode) {
3766         case BOND_MODE_8023AD:
3767                 cancel_delayed_work(&bond->ad_work);
3768                 break;
3769         case BOND_MODE_TLB:
3770         case BOND_MODE_ALB:
3771                 cancel_delayed_work(&bond->alb_work);
3772                 break;
3773         default:
3774                 break;
3775         }
3776
3777
3778         if ((bond->params.mode == BOND_MODE_TLB) ||
3779             (bond->params.mode == BOND_MODE_ALB)) {
3780                 /* Must be called only after all
3781                  * slaves have been released
3782                  */
3783                 bond_alb_deinitialize(bond);
3784         }
3785
3786         return 0;
3787 }
3788
3789 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
3790 {
3791         struct bonding *bond = bond_dev->priv;
3792         struct net_device_stats *stats = &(bond->stats), *sstats;
3793         struct net_device_stats local_stats;
3794         struct slave *slave;
3795         int i;
3796
3797         memset(&local_stats, 0, sizeof(struct net_device_stats));
3798
3799         read_lock_bh(&bond->lock);
3800
3801         bond_for_each_slave(bond, slave, i) {
3802                 sstats = slave->dev->get_stats(slave->dev);
3803                 local_stats.rx_packets += sstats->rx_packets;
3804                 local_stats.rx_bytes += sstats->rx_bytes;
3805                 local_stats.rx_errors += sstats->rx_errors;
3806                 local_stats.rx_dropped += sstats->rx_dropped;
3807
3808                 local_stats.tx_packets += sstats->tx_packets;
3809                 local_stats.tx_bytes += sstats->tx_bytes;
3810                 local_stats.tx_errors += sstats->tx_errors;
3811                 local_stats.tx_dropped += sstats->tx_dropped;
3812
3813                 local_stats.multicast += sstats->multicast;
3814                 local_stats.collisions += sstats->collisions;
3815
3816                 local_stats.rx_length_errors += sstats->rx_length_errors;
3817                 local_stats.rx_over_errors += sstats->rx_over_errors;
3818                 local_stats.rx_crc_errors += sstats->rx_crc_errors;
3819                 local_stats.rx_frame_errors += sstats->rx_frame_errors;
3820                 local_stats.rx_fifo_errors += sstats->rx_fifo_errors;
3821                 local_stats.rx_missed_errors += sstats->rx_missed_errors;
3822
3823                 local_stats.tx_aborted_errors += sstats->tx_aborted_errors;
3824                 local_stats.tx_carrier_errors += sstats->tx_carrier_errors;
3825                 local_stats.tx_fifo_errors += sstats->tx_fifo_errors;
3826                 local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors;
3827                 local_stats.tx_window_errors += sstats->tx_window_errors;
3828         }
3829
3830         memcpy(stats, &local_stats, sizeof(struct net_device_stats));
3831
3832         read_unlock_bh(&bond->lock);
3833
3834         return stats;
3835 }
3836
3837 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
3838 {
3839         struct net_device *slave_dev = NULL;
3840         struct ifbond k_binfo;
3841         struct ifbond __user *u_binfo = NULL;
3842         struct ifslave k_sinfo;
3843         struct ifslave __user *u_sinfo = NULL;
3844         struct mii_ioctl_data *mii = NULL;
3845         int res = 0;
3846
3847         dprintk("bond_ioctl: master=%s, cmd=%d\n",
3848                 bond_dev->name, cmd);
3849
3850         switch (cmd) {
3851         case SIOCGMIIPHY:
3852                 mii = if_mii(ifr);
3853                 if (!mii) {
3854                         return -EINVAL;
3855                 }
3856                 mii->phy_id = 0;
3857                 /* Fall Through */
3858         case SIOCGMIIREG:
3859                 /*
3860                  * We do this again just in case we were called by SIOCGMIIREG
3861                  * instead of SIOCGMIIPHY.
3862                  */
3863                 mii = if_mii(ifr);
3864                 if (!mii) {
3865                         return -EINVAL;
3866                 }
3867
3868                 if (mii->reg_num == 1) {
3869                         struct bonding *bond = bond_dev->priv;
3870                         mii->val_out = 0;
3871                         read_lock(&bond->lock);
3872                         read_lock(&bond->curr_slave_lock);
3873                         if (netif_carrier_ok(bond->dev)) {
3874                                 mii->val_out = BMSR_LSTATUS;
3875                         }
3876                         read_unlock(&bond->curr_slave_lock);
3877                         read_unlock(&bond->lock);
3878                 }
3879
3880                 return 0;
3881         case BOND_INFO_QUERY_OLD:
3882         case SIOCBONDINFOQUERY:
3883                 u_binfo = (struct ifbond __user *)ifr->ifr_data;
3884
3885                 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) {
3886                         return -EFAULT;
3887                 }
3888
3889                 res = bond_info_query(bond_dev, &k_binfo);
3890                 if (res == 0) {
3891                         if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) {
3892                                 return -EFAULT;
3893                         }
3894                 }
3895
3896                 return res;
3897         case BOND_SLAVE_INFO_QUERY_OLD:
3898         case SIOCBONDSLAVEINFOQUERY:
3899                 u_sinfo = (struct ifslave __user *)ifr->ifr_data;
3900
3901                 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) {
3902                         return -EFAULT;
3903                 }
3904
3905                 res = bond_slave_info_query(bond_dev, &k_sinfo);
3906                 if (res == 0) {
3907                         if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) {
3908                                 return -EFAULT;
3909                         }
3910                 }
3911
3912                 return res;
3913         default:
3914                 /* Go on */
3915                 break;
3916         }
3917
3918         if (!capable(CAP_NET_ADMIN)) {
3919                 return -EPERM;
3920         }
3921
3922         down_write(&(bonding_rwsem));
3923         slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave);
3924
3925         dprintk("slave_dev=%p: \n", slave_dev);
3926
3927         if (!slave_dev) {
3928                 res = -ENODEV;
3929         } else {
3930                 dprintk("slave_dev->name=%s: \n", slave_dev->name);
3931                 switch (cmd) {
3932                 case BOND_ENSLAVE_OLD:
3933                 case SIOCBONDENSLAVE:
3934                         res = bond_enslave(bond_dev, slave_dev);
3935                         break;
3936                 case BOND_RELEASE_OLD:
3937                 case SIOCBONDRELEASE:
3938                         res = bond_release(bond_dev, slave_dev);
3939                         break;
3940                 case BOND_SETHWADDR_OLD:
3941                 case SIOCBONDSETHWADDR:
3942                         res = bond_sethwaddr(bond_dev, slave_dev);
3943                         break;
3944                 case BOND_CHANGE_ACTIVE_OLD:
3945                 case SIOCBONDCHANGEACTIVE:
3946                         res = bond_ioctl_change_active(bond_dev, slave_dev);
3947                         break;
3948                 default:
3949                         res = -EOPNOTSUPP;
3950                 }
3951
3952                 dev_put(slave_dev);
3953         }
3954
3955         up_write(&(bonding_rwsem));
3956         return res;
3957 }
3958
3959 static void bond_set_multicast_list(struct net_device *bond_dev)
3960 {
3961         struct bonding *bond = bond_dev->priv;
3962         struct dev_mc_list *dmi;
3963
3964         /*
3965          * Do promisc before checking multicast_mode
3966          */
3967         if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) {
3968                 bond_set_promiscuity(bond, 1);
3969         }
3970
3971         if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) {
3972                 bond_set_promiscuity(bond, -1);
3973         }
3974
3975         /* set allmulti flag to slaves */
3976         if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) {
3977                 bond_set_allmulti(bond, 1);
3978         }
3979
3980         if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) {
3981                 bond_set_allmulti(bond, -1);
3982         }
3983
3984         read_lock(&bond->lock);
3985
3986         bond->flags = bond_dev->flags;
3987
3988         /* looking for addresses to add to slaves' mc list */
3989         for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
3990                 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) {
3991                         bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen);
3992                 }
3993         }
3994
3995         /* looking for addresses to delete from slaves' list */
3996         for (dmi = bond->mc_list; dmi; dmi = dmi->next) {
3997                 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) {
3998                         bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen);
3999                 }
4000         }
4001
4002         /* save master's multicast list */
4003         bond_mc_list_destroy(bond);
4004         bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC);
4005
4006         read_unlock(&bond->lock);
4007 }
4008
4009 /*
4010  * Change the MTU of all of a master's slaves to match the master
4011  */
4012 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
4013 {
4014         struct bonding *bond = bond_dev->priv;
4015         struct slave *slave, *stop_at;
4016         int res = 0;
4017         int i;
4018
4019         dprintk("bond=%p, name=%s, new_mtu=%d\n", bond,
4020                 (bond_dev ? bond_dev->name : "None"), new_mtu);
4021
4022         /* Can't hold bond->lock with bh disabled here since
4023          * some base drivers panic. On the other hand we can't
4024          * hold bond->lock without bh disabled because we'll
4025          * deadlock. The only solution is to rely on the fact
4026          * that we're under rtnl_lock here, and the slaves
4027          * list won't change. This doesn't solve the problem
4028          * of setting the slave's MTU while it is
4029          * transmitting, but the assumption is that the base
4030          * driver can handle that.
4031          *
4032          * TODO: figure out a way to safely iterate the slaves
4033          * list, but without holding a lock around the actual
4034          * call to the base driver.
4035          */
4036
4037         bond_for_each_slave(bond, slave, i) {
4038                 dprintk("s %p s->p %p c_m %p\n", slave,
4039                         slave->prev, slave->dev->change_mtu);
4040
4041                 res = dev_set_mtu(slave->dev, new_mtu);
4042
4043                 if (res) {
4044                         /* If we failed to set the slave's mtu to the new value
4045                          * we must abort the operation even in ACTIVE_BACKUP
4046                          * mode, because if we allow the backup slaves to have
4047                          * different mtu values than the active slave we'll
4048                          * need to change their mtu when doing a failover. That
4049                          * means changing their mtu from timer context, which
4050                          * is probably not a good idea.
4051                          */
4052                         dprintk("err %d %s\n", res, slave->dev->name);
4053                         goto unwind;
4054                 }
4055         }
4056
4057         bond_dev->mtu = new_mtu;
4058
4059         return 0;
4060
4061 unwind:
4062         /* unwind from head to the slave that failed */
4063         stop_at = slave;
4064         bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4065                 int tmp_res;
4066
4067                 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu);
4068                 if (tmp_res) {
4069                         dprintk("unwind err %d dev %s\n", tmp_res,
4070                                 slave->dev->name);
4071                 }
4072         }
4073
4074         return res;
4075 }
4076
4077 /*
4078  * Change HW address
4079  *
4080  * Note that many devices must be down to change the HW address, and
4081  * downing the master releases all slaves.  We can make bonds full of
4082  * bonding devices to test this, however.
4083  */
4084 static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
4085 {
4086         struct bonding *bond = bond_dev->priv;
4087         struct sockaddr *sa = addr, tmp_sa;
4088         struct slave *slave, *stop_at;
4089         int res = 0;
4090         int i;
4091
4092         dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
4093
4094         /*
4095          * If fail_over_mac is enabled, do nothing and return success.
4096          * Returning an error causes ifenslave to fail.
4097          */
4098         if (bond->params.fail_over_mac)
4099                 return 0;
4100
4101         if (!is_valid_ether_addr(sa->sa_data)) {
4102                 return -EADDRNOTAVAIL;
4103         }
4104
4105         /* Can't hold bond->lock with bh disabled here since
4106          * some base drivers panic. On the other hand we can't
4107          * hold bond->lock without bh disabled because we'll
4108          * deadlock. The only solution is to rely on the fact
4109          * that we're under rtnl_lock here, and the slaves
4110          * list won't change. This doesn't solve the problem
4111          * of setting the slave's hw address while it is
4112          * transmitting, but the assumption is that the base
4113          * driver can handle that.
4114          *
4115          * TODO: figure out a way to safely iterate the slaves
4116          * list, but without holding a lock around the actual
4117          * call to the base driver.
4118          */
4119
4120         bond_for_each_slave(bond, slave, i) {
4121                 dprintk("slave %p %s\n", slave, slave->dev->name);
4122
4123                 if (slave->dev->set_mac_address == NULL) {
4124                         res = -EOPNOTSUPP;
4125                         dprintk("EOPNOTSUPP %s\n", slave->dev->name);
4126                         goto unwind;
4127                 }
4128
4129                 res = dev_set_mac_address(slave->dev, addr);
4130                 if (res) {
4131                         /* TODO: consider downing the slave
4132                          * and retry ?
4133                          * User should expect communications
4134                          * breakage anyway until ARP finish
4135                          * updating, so...
4136                          */
4137                         dprintk("err %d %s\n", res, slave->dev->name);
4138                         goto unwind;
4139                 }
4140         }
4141
4142         /* success */
4143         memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len);
4144         return 0;
4145
4146 unwind:
4147         memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
4148         tmp_sa.sa_family = bond_dev->type;
4149
4150         /* unwind from head to the slave that failed */
4151         stop_at = slave;
4152         bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4153                 int tmp_res;
4154
4155                 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa);
4156                 if (tmp_res) {
4157                         dprintk("unwind err %d dev %s\n", tmp_res,
4158                                 slave->dev->name);
4159                 }
4160         }
4161
4162         return res;
4163 }
4164
4165 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
4166 {
4167         struct bonding *bond = bond_dev->priv;
4168         struct slave *slave, *start_at;
4169         int i, slave_no, res = 1;
4170
4171         read_lock(&bond->lock);
4172
4173         if (!BOND_IS_OK(bond)) {
4174                 goto out;
4175         }
4176
4177         /*
4178          * Concurrent TX may collide on rr_tx_counter; we accept that
4179          * as being rare enough not to justify using an atomic op here
4180          */
4181         slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
4182
4183         bond_for_each_slave(bond, slave, i) {
4184                 slave_no--;
4185                 if (slave_no < 0) {
4186                         break;
4187                 }
4188         }
4189
4190         start_at = slave;
4191         bond_for_each_slave_from(bond, slave, i, start_at) {
4192                 if (IS_UP(slave->dev) &&
4193                     (slave->link == BOND_LINK_UP) &&
4194                     (slave->state == BOND_STATE_ACTIVE)) {
4195                         res = bond_dev_queue_xmit(bond, skb, slave->dev);
4196                         break;
4197                 }
4198         }
4199
4200 out:
4201         if (res) {
4202                 /* no suitable interface, frame not sent */
4203                 dev_kfree_skb(skb);
4204         }
4205         read_unlock(&bond->lock);
4206         return 0;
4207 }
4208
4209
4210 /*
4211  * in active-backup mode, we know that bond->curr_active_slave is always valid if
4212  * the bond has a usable interface.
4213  */
4214 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
4215 {
4216         struct bonding *bond = bond_dev->priv;
4217         int res = 1;
4218
4219         read_lock(&bond->lock);
4220         read_lock(&bond->curr_slave_lock);
4221
4222         if (!BOND_IS_OK(bond)) {
4223                 goto out;
4224         }
4225
4226         if (!bond->curr_active_slave)
4227                 goto out;
4228
4229         res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
4230
4231 out:
4232         if (res) {
4233                 /* no suitable interface, frame not sent */
4234                 dev_kfree_skb(skb);
4235         }
4236         read_unlock(&bond->curr_slave_lock);
4237         read_unlock(&bond->lock);
4238         return 0;
4239 }
4240
4241 /*
4242  * In bond_xmit_xor() , we determine the output device by using a pre-
4243  * determined xmit_hash_policy(), If the selected device is not enabled,
4244  * find the next active slave.
4245  */
4246 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
4247 {
4248         struct bonding *bond = bond_dev->priv;
4249         struct slave *slave, *start_at;
4250         int slave_no;
4251         int i;
4252         int res = 1;
4253
4254         read_lock(&bond->lock);
4255
4256         if (!BOND_IS_OK(bond)) {
4257                 goto out;
4258         }
4259
4260         slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt);
4261
4262         bond_for_each_slave(bond, slave, i) {
4263                 slave_no--;
4264                 if (slave_no < 0) {
4265                         break;
4266                 }
4267         }
4268
4269         start_at = slave;
4270
4271         bond_for_each_slave_from(bond, slave, i, start_at) {
4272                 if (IS_UP(slave->dev) &&
4273                     (slave->link == BOND_LINK_UP) &&
4274                     (slave->state == BOND_STATE_ACTIVE)) {
4275                         res = bond_dev_queue_xmit(bond, skb, slave->dev);
4276                         break;
4277                 }
4278         }
4279
4280 out:
4281         if (res) {
4282                 /* no suitable interface, frame not sent */
4283                 dev_kfree_skb(skb);
4284         }
4285         read_unlock(&bond->lock);
4286         return 0;
4287 }
4288
4289 /*
4290  * in broadcast mode, we send everything to all usable interfaces.
4291  */
4292 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
4293 {
4294         struct bonding *bond = bond_dev->priv;
4295         struct slave *slave, *start_at;
4296         struct net_device *tx_dev = NULL;
4297         int i;
4298         int res = 1;
4299
4300         read_lock(&bond->lock);
4301
4302         if (!BOND_IS_OK(bond)) {
4303                 goto out;
4304         }
4305
4306         read_lock(&bond->curr_slave_lock);
4307         start_at = bond->curr_active_slave;
4308         read_unlock(&bond->curr_slave_lock);
4309
4310         if (!start_at) {
4311                 goto out;
4312         }
4313
4314         bond_for_each_slave_from(bond, slave, i, start_at) {
4315                 if (IS_UP(slave->dev) &&
4316                     (slave->link == BOND_LINK_UP) &&
4317                     (slave->state == BOND_STATE_ACTIVE)) {
4318                         if (tx_dev) {
4319                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
4320                                 if (!skb2) {
4321                                         printk(KERN_ERR DRV_NAME
4322                                                ": %s: Error: bond_xmit_broadcast(): "
4323                                                "skb_clone() failed\n",
4324                                                bond_dev->name);
4325                                         continue;
4326                                 }
4327
4328                                 res = bond_dev_queue_xmit(bond, skb2, tx_dev);
4329                                 if (res) {
4330                                         dev_kfree_skb(skb2);
4331                                         continue;
4332                                 }
4333                         }
4334                         tx_dev = slave->dev;
4335                 }
4336         }
4337
4338         if (tx_dev) {
4339                 res = bond_dev_queue_xmit(bond, skb, tx_dev);
4340         }
4341
4342 out:
4343         if (res) {
4344                 /* no suitable interface, frame not sent */
4345                 dev_kfree_skb(skb);
4346         }
4347         /* frame sent to all suitable interfaces */
4348         read_unlock(&bond->lock);
4349         return 0;
4350 }
4351
4352 /*------------------------- Device initialization ---------------------------*/
4353
4354 static void bond_set_xmit_hash_policy(struct bonding *bond)
4355 {
4356         switch (bond->params.xmit_policy) {
4357         case BOND_XMIT_POLICY_LAYER23:
4358                 bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
4359                 break;
4360         case BOND_XMIT_POLICY_LAYER34:
4361                 bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
4362                 break;
4363         case BOND_XMIT_POLICY_LAYER2:
4364         default:
4365                 bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
4366                 break;
4367         }
4368 }
4369
4370 /*
4371  * set bond mode specific net device operations
4372  */
4373 void bond_set_mode_ops(struct bonding *bond, int mode)
4374 {
4375         struct net_device *bond_dev = bond->dev;
4376
4377         switch (mode) {
4378         case BOND_MODE_ROUNDROBIN:
4379                 bond_dev->hard_start_xmit = bond_xmit_roundrobin;
4380                 break;
4381         case BOND_MODE_ACTIVEBACKUP:
4382                 bond_dev->hard_start_xmit = bond_xmit_activebackup;
4383                 break;
4384         case BOND_MODE_XOR:
4385                 bond_dev->hard_start_xmit = bond_xmit_xor;
4386                 bond_set_xmit_hash_policy(bond);
4387                 break;
4388         case BOND_MODE_BROADCAST:
4389                 bond_dev->hard_start_xmit = bond_xmit_broadcast;
4390                 break;
4391         case BOND_MODE_8023AD:
4392                 bond_set_master_3ad_flags(bond);
4393                 bond_dev->hard_start_xmit = bond_3ad_xmit_xor;
4394                 bond_set_xmit_hash_policy(bond);
4395                 break;
4396         case BOND_MODE_ALB:
4397                 bond_set_master_alb_flags(bond);
4398                 /* FALLTHRU */
4399         case BOND_MODE_TLB:
4400                 bond_dev->hard_start_xmit = bond_alb_xmit;
4401                 bond_dev->set_mac_address = bond_alb_set_mac_address;
4402                 break;
4403         default:
4404                 /* Should never happen, mode already checked */
4405                 printk(KERN_ERR DRV_NAME
4406                        ": %s: Error: Unknown bonding mode %d\n",
4407                        bond_dev->name,
4408                        mode);
4409                 break;
4410         }
4411 }
4412
4413 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
4414                                     struct ethtool_drvinfo *drvinfo)
4415 {
4416         strncpy(drvinfo->driver, DRV_NAME, 32);
4417         strncpy(drvinfo->version, DRV_VERSION, 32);
4418         snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION);
4419 }
4420
4421 static const struct ethtool_ops bond_ethtool_ops = {
4422         .get_drvinfo            = bond_ethtool_get_drvinfo,
4423 };
4424
4425 /*
4426  * Does not allocate but creates a /proc entry.
4427  * Allowed to fail.
4428  */
4429 static int bond_init(struct net_device *bond_dev, struct bond_params *params)
4430 {
4431         struct bonding *bond = bond_dev->priv;
4432
4433         dprintk("Begin bond_init for %s\n", bond_dev->name);
4434
4435         /* initialize rwlocks */
4436         rwlock_init(&bond->lock);
4437         rwlock_init(&bond->curr_slave_lock);
4438
4439         bond->params = *params; /* copy params struct */
4440
4441         bond->wq = create_singlethread_workqueue(bond_dev->name);
4442         if (!bond->wq)
4443                 return -ENOMEM;
4444
4445         /* Initialize pointers */
4446         bond->first_slave = NULL;
4447         bond->curr_active_slave = NULL;
4448         bond->current_arp_slave = NULL;
4449         bond->primary_slave = NULL;
4450         bond->dev = bond_dev;
4451         bond->send_grat_arp = 0;
4452         bond->setup_by_slave = 0;
4453         INIT_LIST_HEAD(&bond->vlan_list);
4454
4455         /* Initialize the device entry points */
4456         bond_dev->open = bond_open;
4457         bond_dev->stop = bond_close;
4458         bond_dev->get_stats = bond_get_stats;
4459         bond_dev->do_ioctl = bond_do_ioctl;
4460         bond_dev->ethtool_ops = &bond_ethtool_ops;
4461         bond_dev->set_multicast_list = bond_set_multicast_list;
4462         bond_dev->change_mtu = bond_change_mtu;
4463         bond_dev->set_mac_address = bond_set_mac_address;
4464         bond_dev->validate_addr = NULL;
4465
4466         bond_set_mode_ops(bond, bond->params.mode);
4467
4468         bond_dev->destructor = free_netdev;
4469
4470         /* Initialize the device options */
4471         bond_dev->tx_queue_len = 0;
4472         bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
4473         bond_dev->priv_flags |= IFF_BONDING;
4474
4475         /* At first, we block adding VLANs. That's the only way to
4476          * prevent problems that occur when adding VLANs over an
4477          * empty bond. The block will be removed once non-challenged
4478          * slaves are enslaved.
4479          */
4480         bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
4481
4482         /* don't acquire bond device's netif_tx_lock when
4483          * transmitting */
4484         bond_dev->features |= NETIF_F_LLTX;
4485
4486         /* By default, we declare the bond to be fully
4487          * VLAN hardware accelerated capable. Special
4488          * care is taken in the various xmit functions
4489          * when there are slaves that are not hw accel
4490          * capable
4491          */
4492         bond_dev->vlan_rx_register = bond_vlan_rx_register;
4493         bond_dev->vlan_rx_add_vid  = bond_vlan_rx_add_vid;
4494         bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid;
4495         bond_dev->features |= (NETIF_F_HW_VLAN_TX |
4496                                NETIF_F_HW_VLAN_RX |
4497                                NETIF_F_HW_VLAN_FILTER);
4498
4499 #ifdef CONFIG_PROC_FS
4500         bond_create_proc_entry(bond);
4501 #endif
4502         list_add_tail(&bond->bond_list, &bond_dev_list);
4503
4504         return 0;
4505 }
4506
4507 /* De-initialize device specific data.
4508  * Caller must hold rtnl_lock.
4509  */
4510 static void bond_deinit(struct net_device *bond_dev)
4511 {
4512         struct bonding *bond = bond_dev->priv;
4513
4514         list_del(&bond->bond_list);
4515
4516 #ifdef CONFIG_PROC_FS
4517         bond_remove_proc_entry(bond);
4518 #endif
4519 }
4520
4521 static void bond_work_cancel_all(struct bonding *bond)
4522 {
4523         write_lock_bh(&bond->lock);
4524         bond->kill_timers = 1;
4525         write_unlock_bh(&bond->lock);
4526
4527         if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
4528                 cancel_delayed_work(&bond->mii_work);
4529
4530         if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
4531                 cancel_delayed_work(&bond->arp_work);
4532
4533         if (bond->params.mode == BOND_MODE_ALB &&
4534             delayed_work_pending(&bond->alb_work))
4535                 cancel_delayed_work(&bond->alb_work);
4536
4537         if (bond->params.mode == BOND_MODE_8023AD &&
4538             delayed_work_pending(&bond->ad_work))
4539                 cancel_delayed_work(&bond->ad_work);
4540 }
4541
4542 /* Unregister and free all bond devices.
4543  * Caller must hold rtnl_lock.
4544  */
4545 static void bond_free_all(void)
4546 {
4547         struct bonding *bond, *nxt;
4548
4549         list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
4550                 struct net_device *bond_dev = bond->dev;
4551
4552                 bond_work_cancel_all(bond);
4553                 netif_tx_lock_bh(bond_dev);
4554                 bond_mc_list_destroy(bond);
4555                 netif_tx_unlock_bh(bond_dev);
4556                 /* Release the bonded slaves */
4557                 bond_release_all(bond_dev);
4558                 bond_destroy(bond);
4559         }
4560
4561 #ifdef CONFIG_PROC_FS
4562         bond_destroy_proc_dir();
4563 #endif
4564 }
4565
4566 /*------------------------- Module initialization ---------------------------*/
4567
4568 /*
4569  * Convert string input module parms.  Accept either the
4570  * number of the mode or its string name.  A bit complicated because
4571  * some mode names are substrings of other names, and calls from sysfs
4572  * may have whitespace in the name (trailing newlines, for example).
4573  */
4574 int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl)
4575 {
4576         int mode = -1, i, rv;
4577         char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, };
4578
4579         for (p = (char *)buf; *p; p++)
4580                 if (!(isdigit(*p) || isspace(*p)))
4581                         break;
4582
4583         if (*p)
4584                 rv = sscanf(buf, "%20s", modestr);
4585         else
4586                 rv = sscanf(buf, "%d", &mode);
4587
4588         if (!rv)
4589                 return -1;
4590
4591         for (i = 0; tbl[i].modename; i++) {
4592                 if (mode == tbl[i].mode)
4593                         return tbl[i].mode;
4594                 if (strcmp(modestr, tbl[i].modename) == 0)
4595                         return tbl[i].mode;
4596         }
4597
4598         return -1;
4599 }
4600
4601 static int bond_check_params(struct bond_params *params)
4602 {
4603         int arp_validate_value;
4604
4605         /*
4606          * Convert string parameters.
4607          */
4608         if (mode) {
4609                 bond_mode = bond_parse_parm(mode, bond_mode_tbl);
4610                 if (bond_mode == -1) {
4611                         printk(KERN_ERR DRV_NAME
4612                                ": Error: Invalid bonding mode \"%s\"\n",
4613                                mode == NULL ? "NULL" : mode);
4614                         return -EINVAL;
4615                 }
4616         }
4617
4618         if (xmit_hash_policy) {
4619                 if ((bond_mode != BOND_MODE_XOR) &&
4620                     (bond_mode != BOND_MODE_8023AD)) {
4621                         printk(KERN_INFO DRV_NAME
4622                                ": xor_mode param is irrelevant in mode %s\n",
4623                                bond_mode_name(bond_mode));
4624                 } else {
4625                         xmit_hashtype = bond_parse_parm(xmit_hash_policy,
4626                                                         xmit_hashtype_tbl);
4627                         if (xmit_hashtype == -1) {
4628                                 printk(KERN_ERR DRV_NAME
4629                                 ": Error: Invalid xmit_hash_policy \"%s\"\n",
4630                                 xmit_hash_policy == NULL ? "NULL" :
4631                                        xmit_hash_policy);
4632                                 return -EINVAL;
4633                         }
4634                 }
4635         }
4636
4637         if (lacp_rate) {
4638                 if (bond_mode != BOND_MODE_8023AD) {
4639                         printk(KERN_INFO DRV_NAME
4640                                ": lacp_rate param is irrelevant in mode %s\n",
4641                                bond_mode_name(bond_mode));
4642                 } else {
4643                         lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl);
4644                         if (lacp_fast == -1) {
4645                                 printk(KERN_ERR DRV_NAME
4646                                        ": Error: Invalid lacp rate \"%s\"\n",
4647                                        lacp_rate == NULL ? "NULL" : lacp_rate);
4648                                 return -EINVAL;
4649                         }
4650                 }
4651         }
4652
4653         if (max_bonds < 1 || max_bonds > INT_MAX) {
4654                 printk(KERN_WARNING DRV_NAME
4655                        ": Warning: max_bonds (%d) not in range %d-%d, so it "
4656                        "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n",
4657                        max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS);
4658                 max_bonds = BOND_DEFAULT_MAX_BONDS;
4659         }
4660
4661         if (miimon < 0) {
4662                 printk(KERN_WARNING DRV_NAME
4663                        ": Warning: miimon module parameter (%d), "
4664                        "not in range 0-%d, so it was reset to %d\n",
4665                        miimon, INT_MAX, BOND_LINK_MON_INTERV);
4666                 miimon = BOND_LINK_MON_INTERV;
4667         }
4668
4669         if (updelay < 0) {
4670                 printk(KERN_WARNING DRV_NAME
4671                        ": Warning: updelay module parameter (%d), "
4672                        "not in range 0-%d, so it was reset to 0\n",
4673                        updelay, INT_MAX);
4674                 updelay = 0;
4675         }
4676
4677         if (downdelay < 0) {
4678                 printk(KERN_WARNING DRV_NAME
4679                        ": Warning: downdelay module parameter (%d), "
4680                        "not in range 0-%d, so it was reset to 0\n",
4681                        downdelay, INT_MAX);
4682                 downdelay = 0;
4683         }
4684
4685         if ((use_carrier != 0) && (use_carrier != 1)) {
4686                 printk(KERN_WARNING DRV_NAME
4687                        ": Warning: use_carrier module parameter (%d), "
4688                        "not of valid value (0/1), so it was set to 1\n",
4689                        use_carrier);
4690                 use_carrier = 1;
4691         }
4692
4693         if (num_grat_arp < 0 || num_grat_arp > 255) {
4694                 printk(KERN_WARNING DRV_NAME
4695                        ": Warning: num_grat_arp (%d) not in range 0-255 so it "
4696                        "was reset to 1 \n", num_grat_arp);
4697                 num_grat_arp = 1;
4698         }
4699
4700         /* reset values for 802.3ad */
4701         if (bond_mode == BOND_MODE_8023AD) {
4702                 if (!miimon) {
4703                         printk(KERN_WARNING DRV_NAME
4704                                ": Warning: miimon must be specified, "
4705                                "otherwise bonding will not detect link "
4706                                "failure, speed and duplex which are "
4707                                "essential for 802.3ad operation\n");
4708                         printk(KERN_WARNING "Forcing miimon to 100msec\n");
4709                         miimon = 100;
4710                 }
4711         }
4712
4713         /* reset values for TLB/ALB */
4714         if ((bond_mode == BOND_MODE_TLB) ||
4715             (bond_mode == BOND_MODE_ALB)) {
4716                 if (!miimon) {
4717                         printk(KERN_WARNING DRV_NAME
4718                                ": Warning: miimon must be specified, "
4719                                "otherwise bonding will not detect link "
4720                                "failure and link speed which are essential "
4721                                "for TLB/ALB load balancing\n");
4722                         printk(KERN_WARNING "Forcing miimon to 100msec\n");
4723                         miimon = 100;
4724                 }
4725         }
4726
4727         if (bond_mode == BOND_MODE_ALB) {
4728                 printk(KERN_NOTICE DRV_NAME
4729                        ": In ALB mode you might experience client "
4730                        "disconnections upon reconnection of a link if the "
4731                        "bonding module updelay parameter (%d msec) is "
4732                        "incompatible with the forwarding delay time of the "
4733                        "switch\n",
4734                        updelay);
4735         }
4736
4737         if (!miimon) {
4738                 if (updelay || downdelay) {
4739                         /* just warn the user the up/down delay will have
4740                          * no effect since miimon is zero...
4741                          */
4742                         printk(KERN_WARNING DRV_NAME
4743                                ": Warning: miimon module parameter not set "
4744                                "and updelay (%d) or downdelay (%d) module "
4745                                "parameter is set; updelay and downdelay have "
4746                                "no effect unless miimon is set\n",
4747                                updelay, downdelay);
4748                 }
4749         } else {
4750                 /* don't allow arp monitoring */
4751                 if (arp_interval) {
4752                         printk(KERN_WARNING DRV_NAME
4753                                ": Warning: miimon (%d) and arp_interval (%d) "
4754                                "can't be used simultaneously, disabling ARP "
4755                                "monitoring\n",
4756                                miimon, arp_interval);
4757                         arp_interval = 0;
4758                 }
4759
4760                 if ((updelay % miimon) != 0) {
4761                         printk(KERN_WARNING DRV_NAME
4762                                ": Warning: updelay (%d) is not a multiple "
4763                                "of miimon (%d), updelay rounded to %d ms\n",
4764                                updelay, miimon, (updelay / miimon) * miimon);
4765                 }
4766
4767                 updelay /= miimon;
4768
4769                 if ((downdelay % miimon) != 0) {
4770                         printk(KERN_WARNING DRV_NAME
4771                                ": Warning: downdelay (%d) is not a multiple "
4772                                "of miimon (%d), downdelay rounded to %d ms\n",
4773                                downdelay, miimon,
4774                                (downdelay / miimon) * miimon);
4775                 }
4776
4777                 downdelay /= miimon;
4778         }
4779
4780         if (arp_interval < 0) {
4781                 printk(KERN_WARNING DRV_NAME
4782                        ": Warning: arp_interval module parameter (%d) "
4783                        ", not in range 0-%d, so it was reset to %d\n",
4784                        arp_interval, INT_MAX, BOND_LINK_ARP_INTERV);
4785                 arp_interval = BOND_LINK_ARP_INTERV;
4786         }
4787
4788         for (arp_ip_count = 0;
4789              (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count];
4790              arp_ip_count++) {
4791                 /* not complete check, but should be good enough to
4792                    catch mistakes */
4793                 if (!isdigit(arp_ip_target[arp_ip_count][0])) {
4794                         printk(KERN_WARNING DRV_NAME
4795                                ": Warning: bad arp_ip_target module parameter "
4796                                "(%s), ARP monitoring will not be performed\n",
4797                                arp_ip_target[arp_ip_count]);
4798                         arp_interval = 0;
4799                 } else {
4800                         __be32 ip = in_aton(arp_ip_target[arp_ip_count]);
4801                         arp_target[arp_ip_count] = ip;
4802                 }
4803         }
4804
4805         if (arp_interval && !arp_ip_count) {
4806                 /* don't allow arping if no arp_ip_target given... */
4807                 printk(KERN_WARNING DRV_NAME
4808                        ": Warning: arp_interval module parameter (%d) "
4809                        "specified without providing an arp_ip_target "
4810                        "parameter, arp_interval was reset to 0\n",
4811                        arp_interval);
4812                 arp_interval = 0;
4813         }
4814
4815         if (arp_validate) {
4816                 if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
4817                         printk(KERN_ERR DRV_NAME
4818                ": arp_validate only supported in active-backup mode\n");
4819                         return -EINVAL;
4820                 }
4821                 if (!arp_interval) {
4822                         printk(KERN_ERR DRV_NAME
4823                                ": arp_validate requires arp_interval\n");
4824                         return -EINVAL;
4825                 }
4826
4827                 arp_validate_value = bond_parse_parm(arp_validate,
4828                                                      arp_validate_tbl);
4829                 if (arp_validate_value == -1) {
4830                         printk(KERN_ERR DRV_NAME
4831                                ": Error: invalid arp_validate \"%s\"\n",
4832                                arp_validate == NULL ? "NULL" : arp_validate);
4833                         return -EINVAL;
4834                 }
4835         } else
4836                 arp_validate_value = 0;
4837
4838         if (miimon) {
4839                 printk(KERN_INFO DRV_NAME
4840                        ": MII link monitoring set to %d ms\n",
4841                        miimon);
4842         } else if (arp_interval) {
4843                 int i;
4844
4845                 printk(KERN_INFO DRV_NAME
4846                        ": ARP monitoring set to %d ms, validate %s, with %d target(s):",
4847                        arp_interval,
4848                        arp_validate_tbl[arp_validate_value].modename,
4849                        arp_ip_count);
4850
4851                 for (i = 0; i < arp_ip_count; i++)
4852                         printk (" %s", arp_ip_target[i]);
4853
4854                 printk("\n");
4855
4856         } else {
4857                 /* miimon and arp_interval not set, we need one so things
4858                  * work as expected, see bonding.txt for details
4859                  */
4860                 printk(KERN_WARNING DRV_NAME
4861                        ": Warning: either miimon or arp_interval and "
4862                        "arp_ip_target module parameters must be specified, "
4863                        "otherwise bonding will not detect link failures! see "
4864                        "bonding.txt for details.\n");
4865         }
4866
4867         if (primary && !USES_PRIMARY(bond_mode)) {
4868                 /* currently, using a primary only makes sense
4869                  * in active backup, TLB or ALB modes
4870                  */
4871                 printk(KERN_WARNING DRV_NAME
4872                        ": Warning: %s primary device specified but has no "
4873                        "effect in %s mode\n",
4874                        primary, bond_mode_name(bond_mode));
4875                 primary = NULL;
4876         }
4877
4878         if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP))
4879                 printk(KERN_WARNING DRV_NAME
4880                        ": Warning: fail_over_mac only affects "
4881                        "active-backup mode.\n");
4882
4883         /* fill params struct with the proper values */
4884         params->mode = bond_mode;
4885         params->xmit_policy = xmit_hashtype;
4886         params->miimon = miimon;
4887         params->num_grat_arp = num_grat_arp;
4888         params->arp_interval = arp_interval;
4889         params->arp_validate = arp_validate_value;
4890         params->updelay = updelay;
4891         params->downdelay = downdelay;
4892         params->use_carrier = use_carrier;
4893         params->lacp_fast = lacp_fast;
4894         params->primary[0] = 0;
4895         params->fail_over_mac = fail_over_mac;
4896
4897         if (primary) {
4898                 strncpy(params->primary, primary, IFNAMSIZ);
4899                 params->primary[IFNAMSIZ - 1] = 0;
4900         }
4901
4902         memcpy(params->arp_targets, arp_target, sizeof(arp_target));
4903
4904         return 0;
4905 }
4906
4907 static struct lock_class_key bonding_netdev_xmit_lock_key;
4908
4909 /* Create a new bond based on the specified name and bonding parameters.
4910  * If name is NULL, obtain a suitable "bond%d" name for us.
4911  * Caller must NOT hold rtnl_lock; we need to release it here before we
4912  * set up our sysfs entries.
4913  */
4914 int bond_create(char *name, struct bond_params *params)
4915 {
4916         struct net_device *bond_dev;
4917         struct bonding *bond;
4918         int res;
4919
4920         rtnl_lock();
4921         down_write(&bonding_rwsem);
4922
4923         /* Check to see if the bond already exists. */
4924         if (name) {
4925                 list_for_each_entry(bond, &bond_dev_list, bond_list)
4926                         if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) {
4927                                 printk(KERN_ERR DRV_NAME
4928                                ": cannot add bond %s; it already exists\n",
4929                                        name);
4930                                 res = -EPERM;
4931                                 goto out_rtnl;
4932                         }
4933         }
4934
4935         bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "",
4936                                 ether_setup);
4937         if (!bond_dev) {
4938                 printk(KERN_ERR DRV_NAME
4939                        ": %s: eek! can't alloc netdev!\n",
4940                        name);
4941                 res = -ENOMEM;
4942                 goto out_rtnl;
4943         }
4944
4945         if (!name) {
4946                 res = dev_alloc_name(bond_dev, "bond%d");
4947                 if (res < 0)
4948                         goto out_netdev;
4949         }
4950
4951         /* bond_init() must be called after dev_alloc_name() (for the
4952          * /proc files), but before register_netdevice(), because we
4953          * need to set function pointers.
4954          */
4955
4956         res = bond_init(bond_dev, params);
4957         if (res < 0) {
4958                 goto out_netdev;
4959         }
4960
4961         res = register_netdevice(bond_dev);
4962         if (res < 0) {
4963                 goto out_bond;
4964         }
4965
4966         lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key);
4967
4968         netif_carrier_off(bond_dev);
4969
4970         up_write(&bonding_rwsem);
4971         rtnl_unlock(); /* allows sysfs registration of net device */
4972         res = bond_create_sysfs_entry(bond_dev->priv);
4973         if (res < 0) {
4974                 rtnl_lock();
4975                 down_write(&bonding_rwsem);
4976                 bond_deinit(bond_dev);
4977                 unregister_netdevice(bond_dev);
4978                 goto out_rtnl;
4979         }
4980
4981         return 0;
4982
4983 out_bond:
4984         bond_deinit(bond_dev);
4985 out_netdev:
4986         free_netdev(bond_dev);
4987 out_rtnl:
4988         up_write(&bonding_rwsem);
4989         rtnl_unlock();
4990         return res;
4991 }
4992
4993 static int __init bonding_init(void)
4994 {
4995         int i;
4996         int res;
4997         struct bonding *bond;
4998
4999         printk(KERN_INFO "%s", version);
5000
5001         res = bond_check_params(&bonding_defaults);
5002         if (res) {
5003                 goto out;
5004         }
5005
5006 #ifdef CONFIG_PROC_FS
5007         bond_create_proc_dir();
5008 #endif
5009
5010         init_rwsem(&bonding_rwsem);
5011
5012         for (i = 0; i < max_bonds; i++) {
5013                 res = bond_create(NULL, &bonding_defaults);
5014                 if (res)
5015                         goto err;
5016         }
5017
5018         res = bond_create_sysfs();
5019         if (res)
5020                 goto err;
5021
5022         register_netdevice_notifier(&bond_netdev_notifier);
5023         register_inetaddr_notifier(&bond_inetaddr_notifier);
5024
5025         goto out;
5026 err:
5027         list_for_each_entry(bond, &bond_dev_list, bond_list) {
5028                 bond_work_cancel_all(bond);
5029                 destroy_workqueue(bond->wq);
5030         }
5031
5032         bond_destroy_sysfs();
5033
5034         rtnl_lock();
5035         bond_free_all();
5036         rtnl_unlock();
5037 out:
5038         return res;
5039
5040 }
5041
5042 static void __exit bonding_exit(void)
5043 {
5044         unregister_netdevice_notifier(&bond_netdev_notifier);
5045         unregister_inetaddr_notifier(&bond_inetaddr_notifier);
5046
5047         bond_destroy_sysfs();
5048
5049         rtnl_lock();
5050         bond_free_all();
5051         rtnl_unlock();
5052 }
5053
5054 module_init(bonding_init);
5055 module_exit(bonding_exit);
5056 MODULE_LICENSE("GPL");
5057 MODULE_VERSION(DRV_VERSION);
5058 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);
5059 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
5060 MODULE_SUPPORTED_DEVICE("most ethernet devices");
5061
5062 /*
5063  * Local variables:
5064  *  c-indent-level: 8
5065  *  c-basic-offset: 8
5066  *  tab-width: 8
5067  * End:
5068  */
5069