bonding: alternate agg selection policies for 802.3ad
[safe/jmp/linux-2.6] / drivers / net / bonding / bond_main.c
1 /*
2  * originally based on the dummy device.
3  *
4  * Copyright 1999, Thomas Davis, tadavis@lbl.gov.
5  * Licensed under the GPL. Based on dummy.c, and eql.c devices.
6  *
7  * bonding.c: an Ethernet Bonding driver
8  *
9  * This is useful to talk to a Cisco EtherChannel compatible equipment:
10  *      Cisco 5500
11  *      Sun Trunking (Solaris)
12  *      Alteon AceDirector Trunks
13  *      Linux Bonding
14  *      and probably many L2 switches ...
15  *
16  * How it works:
17  *    ifconfig bond0 ipaddress netmask up
18  *      will setup a network device, with an ip address.  No mac address
19  *      will be assigned at this time.  The hw mac address will come from
20  *      the first slave bonded to the channel.  All slaves will then use
21  *      this hw mac address.
22  *
23  *    ifconfig bond0 down
24  *         will release all slaves, marking them as down.
25  *
26  *    ifenslave bond0 eth0
27  *      will attach eth0 to bond0 as a slave.  eth0 hw mac address will either
28  *      a: be used as initial mac address
29  *      b: if a hw mac address already is there, eth0's hw mac address
30  *         will then be set from bond0.
31  *
32  */
33
34 //#define BONDING_DEBUG 1
35
36 #include <linux/kernel.h>
37 #include <linux/module.h>
38 #include <linux/types.h>
39 #include <linux/fcntl.h>
40 #include <linux/interrupt.h>
41 #include <linux/ptrace.h>
42 #include <linux/ioport.h>
43 #include <linux/in.h>
44 #include <net/ip.h>
45 #include <linux/ip.h>
46 #include <linux/tcp.h>
47 #include <linux/udp.h>
48 #include <linux/slab.h>
49 #include <linux/string.h>
50 #include <linux/init.h>
51 #include <linux/timer.h>
52 #include <linux/socket.h>
53 #include <linux/ctype.h>
54 #include <linux/inet.h>
55 #include <linux/bitops.h>
56 #include <asm/system.h>
57 #include <asm/io.h>
58 #include <asm/dma.h>
59 #include <asm/uaccess.h>
60 #include <linux/errno.h>
61 #include <linux/netdevice.h>
62 #include <linux/inetdevice.h>
63 #include <linux/igmp.h>
64 #include <linux/etherdevice.h>
65 #include <linux/skbuff.h>
66 #include <net/sock.h>
67 #include <linux/rtnetlink.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
70 #include <linux/smp.h>
71 #include <linux/if_ether.h>
72 #include <net/arp.h>
73 #include <linux/mii.h>
74 #include <linux/ethtool.h>
75 #include <linux/if_vlan.h>
76 #include <linux/if_bonding.h>
77 #include <linux/jiffies.h>
78 #include <net/route.h>
79 #include <net/net_namespace.h>
80 #include "bonding.h"
81 #include "bond_3ad.h"
82 #include "bond_alb.h"
83
84 /*---------------------------- Module parameters ----------------------------*/
85
86 /* monitor all links that often (in milliseconds). <=0 disables monitoring */
87 #define BOND_LINK_MON_INTERV    0
88 #define BOND_LINK_ARP_INTERV    0
89
90 static int max_bonds    = BOND_DEFAULT_MAX_BONDS;
91 static int num_grat_arp = 1;
92 static int num_unsol_na = 1;
93 static int miimon       = BOND_LINK_MON_INTERV;
94 static int updelay      = 0;
95 static int downdelay    = 0;
96 static int use_carrier  = 1;
97 static char *mode       = NULL;
98 static char *primary    = NULL;
99 static char *lacp_rate  = NULL;
100 static char *ad_select  = NULL;
101 static char *xmit_hash_policy = NULL;
102 static int arp_interval = BOND_LINK_ARP_INTERV;
103 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
104 static char *arp_validate = NULL;
105 static char *fail_over_mac = NULL;
106 struct bond_params bonding_defaults;
107
108 module_param(max_bonds, int, 0);
109 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
110 module_param(num_grat_arp, int, 0644);
111 MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
112 module_param(num_unsol_na, int, 0644);
113 MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event");
114 module_param(miimon, int, 0);
115 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
116 module_param(updelay, int, 0);
117 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
118 module_param(downdelay, int, 0);
119 MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
120                             "in milliseconds");
121 module_param(use_carrier, int, 0);
122 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
123                               "0 for off, 1 for on (default)");
124 module_param(mode, charp, 0);
125 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
126                        "1 for active-backup, 2 for balance-xor, "
127                        "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
128                        "6 for balance-alb");
129 module_param(primary, charp, 0);
130 MODULE_PARM_DESC(primary, "Primary network device to use");
131 module_param(lacp_rate, charp, 0);
132 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
133                             "(slow/fast)");
134 module_param(ad_select, charp, 0);
135 MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic: stable (0, default), bandwidth (1), count (2)");
136 module_param(xmit_hash_policy, charp, 0);
137 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)"
138                                    ", 1 for layer 3+4");
139 module_param(arp_interval, int, 0);
140 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
141 module_param_array(arp_ip_target, charp, NULL, 0);
142 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
143 module_param(arp_validate, charp, 0);
144 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
145 module_param(fail_over_mac, charp, 0);
146 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  none (default), active or follow");
147
148 /*----------------------------- Global variables ----------------------------*/
149
150 static const char * const version =
151         DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
152
153 LIST_HEAD(bond_dev_list);
154
155 #ifdef CONFIG_PROC_FS
156 static struct proc_dir_entry *bond_proc_dir = NULL;
157 #endif
158
159 extern struct rw_semaphore bonding_rwsem;
160 static __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
161 static int arp_ip_count = 0;
162 static int bond_mode    = BOND_MODE_ROUNDROBIN;
163 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2;
164 static int lacp_fast    = 0;
165
166
167 struct bond_parm_tbl bond_lacp_tbl[] = {
168 {       "slow",         AD_LACP_SLOW},
169 {       "fast",         AD_LACP_FAST},
170 {       NULL,           -1},
171 };
172
173 struct bond_parm_tbl bond_mode_tbl[] = {
174 {       "balance-rr",           BOND_MODE_ROUNDROBIN},
175 {       "active-backup",        BOND_MODE_ACTIVEBACKUP},
176 {       "balance-xor",          BOND_MODE_XOR},
177 {       "broadcast",            BOND_MODE_BROADCAST},
178 {       "802.3ad",              BOND_MODE_8023AD},
179 {       "balance-tlb",          BOND_MODE_TLB},
180 {       "balance-alb",          BOND_MODE_ALB},
181 {       NULL,                   -1},
182 };
183
184 struct bond_parm_tbl xmit_hashtype_tbl[] = {
185 {       "layer2",               BOND_XMIT_POLICY_LAYER2},
186 {       "layer3+4",             BOND_XMIT_POLICY_LAYER34},
187 {       "layer2+3",             BOND_XMIT_POLICY_LAYER23},
188 {       NULL,                   -1},
189 };
190
191 struct bond_parm_tbl arp_validate_tbl[] = {
192 {       "none",                 BOND_ARP_VALIDATE_NONE},
193 {       "active",               BOND_ARP_VALIDATE_ACTIVE},
194 {       "backup",               BOND_ARP_VALIDATE_BACKUP},
195 {       "all",                  BOND_ARP_VALIDATE_ALL},
196 {       NULL,                   -1},
197 };
198
199 struct bond_parm_tbl fail_over_mac_tbl[] = {
200 {       "none",                 BOND_FOM_NONE},
201 {       "active",               BOND_FOM_ACTIVE},
202 {       "follow",               BOND_FOM_FOLLOW},
203 {       NULL,                   -1},
204 };
205
206 struct bond_parm_tbl ad_select_tbl[] = {
207 {       "stable",       BOND_AD_STABLE},
208 {       "bandwidth",    BOND_AD_BANDWIDTH},
209 {       "count",        BOND_AD_COUNT},
210 {       NULL,           -1},
211 };
212
213 /*-------------------------- Forward declarations ---------------------------*/
214
215 static void bond_send_gratuitous_arp(struct bonding *bond);
216 static void bond_deinit(struct net_device *bond_dev);
217
218 /*---------------------------- General routines -----------------------------*/
219
220 static const char *bond_mode_name(int mode)
221 {
222         switch (mode) {
223         case BOND_MODE_ROUNDROBIN :
224                 return "load balancing (round-robin)";
225         case BOND_MODE_ACTIVEBACKUP :
226                 return "fault-tolerance (active-backup)";
227         case BOND_MODE_XOR :
228                 return "load balancing (xor)";
229         case BOND_MODE_BROADCAST :
230                 return "fault-tolerance (broadcast)";
231         case BOND_MODE_8023AD:
232                 return "IEEE 802.3ad Dynamic link aggregation";
233         case BOND_MODE_TLB:
234                 return "transmit load balancing";
235         case BOND_MODE_ALB:
236                 return "adaptive load balancing";
237         default:
238                 return "unknown";
239         }
240 }
241
242 /*---------------------------------- VLAN -----------------------------------*/
243
244 /**
245  * bond_add_vlan - add a new vlan id on bond
246  * @bond: bond that got the notification
247  * @vlan_id: the vlan id to add
248  *
249  * Returns -ENOMEM if allocation failed.
250  */
251 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
252 {
253         struct vlan_entry *vlan;
254
255         dprintk("bond: %s, vlan id %d\n",
256                 (bond ? bond->dev->name: "None"), vlan_id);
257
258         vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
259         if (!vlan) {
260                 return -ENOMEM;
261         }
262
263         INIT_LIST_HEAD(&vlan->vlan_list);
264         vlan->vlan_id = vlan_id;
265
266         write_lock_bh(&bond->lock);
267
268         list_add_tail(&vlan->vlan_list, &bond->vlan_list);
269
270         write_unlock_bh(&bond->lock);
271
272         dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
273
274         return 0;
275 }
276
277 /**
278  * bond_del_vlan - delete a vlan id from bond
279  * @bond: bond that got the notification
280  * @vlan_id: the vlan id to delete
281  *
282  * returns -ENODEV if @vlan_id was not found in @bond.
283  */
284 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
285 {
286         struct vlan_entry *vlan;
287         int res = -ENODEV;
288
289         dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
290
291         write_lock_bh(&bond->lock);
292
293         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
294                 if (vlan->vlan_id == vlan_id) {
295                         list_del(&vlan->vlan_list);
296
297                         if ((bond->params.mode == BOND_MODE_TLB) ||
298                             (bond->params.mode == BOND_MODE_ALB)) {
299                                 bond_alb_clear_vlan(bond, vlan_id);
300                         }
301
302                         dprintk("removed VLAN ID %d from bond %s\n", vlan_id,
303                                 bond->dev->name);
304
305                         kfree(vlan);
306
307                         if (list_empty(&bond->vlan_list) &&
308                             (bond->slave_cnt == 0)) {
309                                 /* Last VLAN removed and no slaves, so
310                                  * restore block on adding VLANs. This will
311                                  * be removed once new slaves that are not
312                                  * VLAN challenged will be added.
313                                  */
314                                 bond->dev->features |= NETIF_F_VLAN_CHALLENGED;
315                         }
316
317                         res = 0;
318                         goto out;
319                 }
320         }
321
322         dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id,
323                 bond->dev->name);
324
325 out:
326         write_unlock_bh(&bond->lock);
327         return res;
328 }
329
330 /**
331  * bond_has_challenged_slaves
332  * @bond: the bond we're working on
333  *
334  * Searches the slave list. Returns 1 if a vlan challenged slave
335  * was found, 0 otherwise.
336  *
337  * Assumes bond->lock is held.
338  */
339 static int bond_has_challenged_slaves(struct bonding *bond)
340 {
341         struct slave *slave;
342         int i;
343
344         bond_for_each_slave(bond, slave, i) {
345                 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) {
346                         dprintk("found VLAN challenged slave - %s\n",
347                                 slave->dev->name);
348                         return 1;
349                 }
350         }
351
352         dprintk("no VLAN challenged slaves found\n");
353         return 0;
354 }
355
356 /**
357  * bond_next_vlan - safely skip to the next item in the vlans list.
358  * @bond: the bond we're working on
359  * @curr: item we're advancing from
360  *
361  * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
362  * or @curr->next otherwise (even if it is @curr itself again).
363  * 
364  * Caller must hold bond->lock
365  */
366 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
367 {
368         struct vlan_entry *next, *last;
369
370         if (list_empty(&bond->vlan_list)) {
371                 return NULL;
372         }
373
374         if (!curr) {
375                 next = list_entry(bond->vlan_list.next,
376                                   struct vlan_entry, vlan_list);
377         } else {
378                 last = list_entry(bond->vlan_list.prev,
379                                   struct vlan_entry, vlan_list);
380                 if (last == curr) {
381                         next = list_entry(bond->vlan_list.next,
382                                           struct vlan_entry, vlan_list);
383                 } else {
384                         next = list_entry(curr->vlan_list.next,
385                                           struct vlan_entry, vlan_list);
386                 }
387         }
388
389         return next;
390 }
391
392 /**
393  * bond_dev_queue_xmit - Prepare skb for xmit.
394  * 
395  * @bond: bond device that got this skb for tx.
396  * @skb: hw accel VLAN tagged skb to transmit
397  * @slave_dev: slave that is supposed to xmit this skbuff
398  * 
399  * When the bond gets an skb to transmit that is
400  * already hardware accelerated VLAN tagged, and it
401  * needs to relay this skb to a slave that is not
402  * hw accel capable, the skb needs to be "unaccelerated",
403  * i.e. strip the hwaccel tag and re-insert it as part
404  * of the payload.
405  */
406 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev)
407 {
408         unsigned short uninitialized_var(vlan_id);
409
410         if (!list_empty(&bond->vlan_list) &&
411             !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
412             vlan_get_tag(skb, &vlan_id) == 0) {
413                 skb->dev = slave_dev;
414                 skb = vlan_put_tag(skb, vlan_id);
415                 if (!skb) {
416                         /* vlan_put_tag() frees the skb in case of error,
417                          * so return success here so the calling functions
418                          * won't attempt to free is again.
419                          */
420                         return 0;
421                 }
422         } else {
423                 skb->dev = slave_dev;
424         }
425
426         skb->priority = 1;
427         dev_queue_xmit(skb);
428
429         return 0;
430 }
431
432 /*
433  * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid
434  * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
435  * lock because:
436  * a. This operation is performed in IOCTL context,
437  * b. The operation is protected by the RTNL semaphore in the 8021q code,
438  * c. Holding a lock with BH disabled while directly calling a base driver
439  *    entry point is generally a BAD idea.
440  * 
441  * The design of synchronization/protection for this operation in the 8021q
442  * module is good for one or more VLAN devices over a single physical device
443  * and cannot be extended for a teaming solution like bonding, so there is a
444  * potential race condition here where a net device from the vlan group might
445  * be referenced (either by a base driver or the 8021q code) while it is being
446  * removed from the system. However, it turns out we're not making matters
447  * worse, and if it works for regular VLAN usage it will work here too.
448 */
449
450 /**
451  * bond_vlan_rx_register - Propagates registration to slaves
452  * @bond_dev: bonding net device that got called
453  * @grp: vlan group being registered
454  */
455 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp)
456 {
457         struct bonding *bond = bond_dev->priv;
458         struct slave *slave;
459         int i;
460
461         bond->vlgrp = grp;
462
463         bond_for_each_slave(bond, slave, i) {
464                 struct net_device *slave_dev = slave->dev;
465
466                 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
467                     slave_dev->vlan_rx_register) {
468                         slave_dev->vlan_rx_register(slave_dev, grp);
469                 }
470         }
471 }
472
473 /**
474  * bond_vlan_rx_add_vid - Propagates adding an id to slaves
475  * @bond_dev: bonding net device that got called
476  * @vid: vlan id being added
477  */
478 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
479 {
480         struct bonding *bond = bond_dev->priv;
481         struct slave *slave;
482         int i, res;
483
484         bond_for_each_slave(bond, slave, i) {
485                 struct net_device *slave_dev = slave->dev;
486
487                 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
488                     slave_dev->vlan_rx_add_vid) {
489                         slave_dev->vlan_rx_add_vid(slave_dev, vid);
490                 }
491         }
492
493         res = bond_add_vlan(bond, vid);
494         if (res) {
495                 printk(KERN_ERR DRV_NAME
496                        ": %s: Error: Failed to add vlan id %d\n",
497                        bond_dev->name, vid);
498         }
499 }
500
501 /**
502  * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
503  * @bond_dev: bonding net device that got called
504  * @vid: vlan id being removed
505  */
506 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
507 {
508         struct bonding *bond = bond_dev->priv;
509         struct slave *slave;
510         struct net_device *vlan_dev;
511         int i, res;
512
513         bond_for_each_slave(bond, slave, i) {
514                 struct net_device *slave_dev = slave->dev;
515
516                 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
517                     slave_dev->vlan_rx_kill_vid) {
518                         /* Save and then restore vlan_dev in the grp array,
519                          * since the slave's driver might clear it.
520                          */
521                         vlan_dev = vlan_group_get_device(bond->vlgrp, vid);
522                         slave_dev->vlan_rx_kill_vid(slave_dev, vid);
523                         vlan_group_set_device(bond->vlgrp, vid, vlan_dev);
524                 }
525         }
526
527         res = bond_del_vlan(bond, vid);
528         if (res) {
529                 printk(KERN_ERR DRV_NAME
530                        ": %s: Error: Failed to remove vlan id %d\n",
531                        bond_dev->name, vid);
532         }
533 }
534
535 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
536 {
537         struct vlan_entry *vlan;
538
539         write_lock_bh(&bond->lock);
540
541         if (list_empty(&bond->vlan_list)) {
542                 goto out;
543         }
544
545         if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
546             slave_dev->vlan_rx_register) {
547                 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp);
548         }
549
550         if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
551             !(slave_dev->vlan_rx_add_vid)) {
552                 goto out;
553         }
554
555         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
556                 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id);
557         }
558
559 out:
560         write_unlock_bh(&bond->lock);
561 }
562
563 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev)
564 {
565         struct vlan_entry *vlan;
566         struct net_device *vlan_dev;
567
568         write_lock_bh(&bond->lock);
569
570         if (list_empty(&bond->vlan_list)) {
571                 goto out;
572         }
573
574         if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
575             !(slave_dev->vlan_rx_kill_vid)) {
576                 goto unreg;
577         }
578
579         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
580                 /* Save and then restore vlan_dev in the grp array,
581                  * since the slave's driver might clear it.
582                  */
583                 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
584                 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
585                 vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev);
586         }
587
588 unreg:
589         if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
590             slave_dev->vlan_rx_register) {
591                 slave_dev->vlan_rx_register(slave_dev, NULL);
592         }
593
594 out:
595         write_unlock_bh(&bond->lock);
596 }
597
598 /*------------------------------- Link status -------------------------------*/
599
600 /*
601  * Set the carrier state for the master according to the state of its
602  * slaves.  If any slaves are up, the master is up.  In 802.3ad mode,
603  * do special 802.3ad magic.
604  *
605  * Returns zero if carrier state does not change, nonzero if it does.
606  */
607 static int bond_set_carrier(struct bonding *bond)
608 {
609         struct slave *slave;
610         int i;
611
612         if (bond->slave_cnt == 0)
613                 goto down;
614
615         if (bond->params.mode == BOND_MODE_8023AD)
616                 return bond_3ad_set_carrier(bond);
617
618         bond_for_each_slave(bond, slave, i) {
619                 if (slave->link == BOND_LINK_UP) {
620                         if (!netif_carrier_ok(bond->dev)) {
621                                 netif_carrier_on(bond->dev);
622                                 return 1;
623                         }
624                         return 0;
625                 }
626         }
627
628 down:
629         if (netif_carrier_ok(bond->dev)) {
630                 netif_carrier_off(bond->dev);
631                 return 1;
632         }
633         return 0;
634 }
635
636 /*
637  * Get link speed and duplex from the slave's base driver
638  * using ethtool. If for some reason the call fails or the
639  * values are invalid, fake speed and duplex to 100/Full
640  * and return error.
641  */
642 static int bond_update_speed_duplex(struct slave *slave)
643 {
644         struct net_device *slave_dev = slave->dev;
645         struct ethtool_cmd etool;
646         int res;
647
648         /* Fake speed and duplex */
649         slave->speed = SPEED_100;
650         slave->duplex = DUPLEX_FULL;
651
652         if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings)
653                 return -1;
654
655         res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
656         if (res < 0)
657                 return -1;
658
659         switch (etool.speed) {
660         case SPEED_10:
661         case SPEED_100:
662         case SPEED_1000:
663         case SPEED_10000:
664                 break;
665         default:
666                 return -1;
667         }
668
669         switch (etool.duplex) {
670         case DUPLEX_FULL:
671         case DUPLEX_HALF:
672                 break;
673         default:
674                 return -1;
675         }
676
677         slave->speed = etool.speed;
678         slave->duplex = etool.duplex;
679
680         return 0;
681 }
682
683 /*
684  * if <dev> supports MII link status reporting, check its link status.
685  *
686  * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
687  * depening upon the setting of the use_carrier parameter.
688  *
689  * Return either BMSR_LSTATUS, meaning that the link is up (or we
690  * can't tell and just pretend it is), or 0, meaning that the link is
691  * down.
692  *
693  * If reporting is non-zero, instead of faking link up, return -1 if
694  * both ETHTOOL and MII ioctls fail (meaning the device does not
695  * support them).  If use_carrier is set, return whatever it says.
696  * It'd be nice if there was a good way to tell if a driver supports
697  * netif_carrier, but there really isn't.
698  */
699 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting)
700 {
701         static int (* ioctl)(struct net_device *, struct ifreq *, int);
702         struct ifreq ifr;
703         struct mii_ioctl_data *mii;
704
705         if (bond->params.use_carrier) {
706                 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
707         }
708
709         ioctl = slave_dev->do_ioctl;
710         if (ioctl) {
711                 /* TODO: set pointer to correct ioctl on a per team member */
712                 /*       bases to make this more efficient. that is, once  */
713                 /*       we determine the correct ioctl, we will always    */
714                 /*       call it and not the others for that team          */
715                 /*       member.                                           */
716
717                 /*
718                  * We cannot assume that SIOCGMIIPHY will also read a
719                  * register; not all network drivers (e.g., e100)
720                  * support that.
721                  */
722
723                 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */
724                 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
725                 mii = if_mii(&ifr);
726                 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
727                         mii->reg_num = MII_BMSR;
728                         if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) {
729                                 return (mii->val_out & BMSR_LSTATUS);
730                         }
731                 }
732         }
733
734         /*
735          * Some drivers cache ETHTOOL_GLINK for a period of time so we only
736          * attempt to get link status from it if the above MII ioctls fail.
737          */
738         if (slave_dev->ethtool_ops) {
739                 if (slave_dev->ethtool_ops->get_link) {
740                         u32 link;
741
742                         link = slave_dev->ethtool_ops->get_link(slave_dev);
743
744                         return link ? BMSR_LSTATUS : 0;
745                 }
746         }
747
748         /*
749          * If reporting, report that either there's no dev->do_ioctl,
750          * or both SIOCGMIIREG and get_link failed (meaning that we
751          * cannot report link status).  If not reporting, pretend
752          * we're ok.
753          */
754         return (reporting ? -1 : BMSR_LSTATUS);
755 }
756
757 /*----------------------------- Multicast list ------------------------------*/
758
759 /*
760  * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
761  */
762 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2)
763 {
764         return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
765                         dmi1->dmi_addrlen == dmi2->dmi_addrlen;
766 }
767
768 /*
769  * returns dmi entry if found, NULL otherwise
770  */
771 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list)
772 {
773         struct dev_mc_list *idmi;
774
775         for (idmi = mc_list; idmi; idmi = idmi->next) {
776                 if (bond_is_dmi_same(dmi, idmi)) {
777                         return idmi;
778                 }
779         }
780
781         return NULL;
782 }
783
784 /*
785  * Push the promiscuity flag down to appropriate slaves
786  */
787 static int bond_set_promiscuity(struct bonding *bond, int inc)
788 {
789         int err = 0;
790         if (USES_PRIMARY(bond->params.mode)) {
791                 /* write lock already acquired */
792                 if (bond->curr_active_slave) {
793                         err = dev_set_promiscuity(bond->curr_active_slave->dev,
794                                                   inc);
795                 }
796         } else {
797                 struct slave *slave;
798                 int i;
799                 bond_for_each_slave(bond, slave, i) {
800                         err = dev_set_promiscuity(slave->dev, inc);
801                         if (err)
802                                 return err;
803                 }
804         }
805         return err;
806 }
807
808 /*
809  * Push the allmulti flag down to all slaves
810  */
811 static int bond_set_allmulti(struct bonding *bond, int inc)
812 {
813         int err = 0;
814         if (USES_PRIMARY(bond->params.mode)) {
815                 /* write lock already acquired */
816                 if (bond->curr_active_slave) {
817                         err = dev_set_allmulti(bond->curr_active_slave->dev,
818                                                inc);
819                 }
820         } else {
821                 struct slave *slave;
822                 int i;
823                 bond_for_each_slave(bond, slave, i) {
824                         err = dev_set_allmulti(slave->dev, inc);
825                         if (err)
826                                 return err;
827                 }
828         }
829         return err;
830 }
831
832 /*
833  * Add a Multicast address to slaves
834  * according to mode
835  */
836 static void bond_mc_add(struct bonding *bond, void *addr, int alen)
837 {
838         if (USES_PRIMARY(bond->params.mode)) {
839                 /* write lock already acquired */
840                 if (bond->curr_active_slave) {
841                         dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
842                 }
843         } else {
844                 struct slave *slave;
845                 int i;
846                 bond_for_each_slave(bond, slave, i) {
847                         dev_mc_add(slave->dev, addr, alen, 0);
848                 }
849         }
850 }
851
852 /*
853  * Remove a multicast address from slave
854  * according to mode
855  */
856 static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
857 {
858         if (USES_PRIMARY(bond->params.mode)) {
859                 /* write lock already acquired */
860                 if (bond->curr_active_slave) {
861                         dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0);
862                 }
863         } else {
864                 struct slave *slave;
865                 int i;
866                 bond_for_each_slave(bond, slave, i) {
867                         dev_mc_delete(slave->dev, addr, alen, 0);
868                 }
869         }
870 }
871
872
873 /*
874  * Retrieve the list of registered multicast addresses for the bonding
875  * device and retransmit an IGMP JOIN request to the current active
876  * slave.
877  */
878 static void bond_resend_igmp_join_requests(struct bonding *bond)
879 {
880         struct in_device *in_dev;
881         struct ip_mc_list *im;
882
883         rcu_read_lock();
884         in_dev = __in_dev_get_rcu(bond->dev);
885         if (in_dev) {
886                 for (im = in_dev->mc_list; im; im = im->next) {
887                         ip_mc_rejoin_group(im);
888                 }
889         }
890
891         rcu_read_unlock();
892 }
893
894 /*
895  * Totally destroys the mc_list in bond
896  */
897 static void bond_mc_list_destroy(struct bonding *bond)
898 {
899         struct dev_mc_list *dmi;
900
901         dmi = bond->mc_list;
902         while (dmi) {
903                 bond->mc_list = dmi->next;
904                 kfree(dmi);
905                 dmi = bond->mc_list;
906         }
907         bond->mc_list = NULL;
908 }
909
910 /*
911  * Copy all the Multicast addresses from src to the bonding device dst
912  */
913 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
914                              gfp_t gfp_flag)
915 {
916         struct dev_mc_list *dmi, *new_dmi;
917
918         for (dmi = mc_list; dmi; dmi = dmi->next) {
919                 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag);
920
921                 if (!new_dmi) {
922                         /* FIXME: Potential memory leak !!! */
923                         return -ENOMEM;
924                 }
925
926                 new_dmi->next = bond->mc_list;
927                 bond->mc_list = new_dmi;
928                 new_dmi->dmi_addrlen = dmi->dmi_addrlen;
929                 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen);
930                 new_dmi->dmi_users = dmi->dmi_users;
931                 new_dmi->dmi_gusers = dmi->dmi_gusers;
932         }
933
934         return 0;
935 }
936
937 /*
938  * flush all members of flush->mc_list from device dev->mc_list
939  */
940 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev)
941 {
942         struct bonding *bond = bond_dev->priv;
943         struct dev_mc_list *dmi;
944
945         for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
946                 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
947         }
948
949         if (bond->params.mode == BOND_MODE_8023AD) {
950                 /* del lacpdu mc addr from mc list */
951                 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
952
953                 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
954         }
955 }
956
957 /*--------------------------- Active slave change ---------------------------*/
958
959 /*
960  * Update the mc list and multicast-related flags for the new and
961  * old active slaves (if any) according to the multicast mode, and
962  * promiscuous flags unconditionally.
963  */
964 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active)
965 {
966         struct dev_mc_list *dmi;
967
968         if (!USES_PRIMARY(bond->params.mode)) {
969                 /* nothing to do -  mc list is already up-to-date on
970                  * all slaves
971                  */
972                 return;
973         }
974
975         if (old_active) {
976                 if (bond->dev->flags & IFF_PROMISC) {
977                         dev_set_promiscuity(old_active->dev, -1);
978                 }
979
980                 if (bond->dev->flags & IFF_ALLMULTI) {
981                         dev_set_allmulti(old_active->dev, -1);
982                 }
983
984                 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
985                         dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
986                 }
987         }
988
989         if (new_active) {
990                 /* FIXME: Signal errors upstream. */
991                 if (bond->dev->flags & IFF_PROMISC) {
992                         dev_set_promiscuity(new_active->dev, 1);
993                 }
994
995                 if (bond->dev->flags & IFF_ALLMULTI) {
996                         dev_set_allmulti(new_active->dev, 1);
997                 }
998
999                 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
1000                         dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
1001                 }
1002                 bond_resend_igmp_join_requests(bond);
1003         }
1004 }
1005
1006 /*
1007  * bond_do_fail_over_mac
1008  *
1009  * Perform special MAC address swapping for fail_over_mac settings
1010  *
1011  * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh.
1012  */
1013 static void bond_do_fail_over_mac(struct bonding *bond,
1014                                   struct slave *new_active,
1015                                   struct slave *old_active)
1016 {
1017         u8 tmp_mac[ETH_ALEN];
1018         struct sockaddr saddr;
1019         int rv;
1020
1021         switch (bond->params.fail_over_mac) {
1022         case BOND_FOM_ACTIVE:
1023                 if (new_active)
1024                         memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr,
1025                                new_active->dev->addr_len);
1026                 break;
1027         case BOND_FOM_FOLLOW:
1028                 /*
1029                  * if new_active && old_active, swap them
1030                  * if just old_active, do nothing (going to no active slave)
1031                  * if just new_active, set new_active to bond's MAC
1032                  */
1033                 if (!new_active)
1034                         return;
1035
1036                 write_unlock_bh(&bond->curr_slave_lock);
1037                 read_unlock(&bond->lock);
1038
1039                 if (old_active) {
1040                         memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN);
1041                         memcpy(saddr.sa_data, old_active->dev->dev_addr,
1042                                ETH_ALEN);
1043                         saddr.sa_family = new_active->dev->type;
1044                 } else {
1045                         memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN);
1046                         saddr.sa_family = bond->dev->type;
1047                 }
1048
1049                 rv = dev_set_mac_address(new_active->dev, &saddr);
1050                 if (rv) {
1051                         printk(KERN_ERR DRV_NAME
1052                                ": %s: Error %d setting MAC of slave %s\n",
1053                                bond->dev->name, -rv, new_active->dev->name);
1054                         goto out;
1055                 }
1056
1057                 if (!old_active)
1058                         goto out;
1059
1060                 memcpy(saddr.sa_data, tmp_mac, ETH_ALEN);
1061                 saddr.sa_family = old_active->dev->type;
1062
1063                 rv = dev_set_mac_address(old_active->dev, &saddr);
1064                 if (rv)
1065                         printk(KERN_ERR DRV_NAME
1066                                ": %s: Error %d setting MAC of slave %s\n",
1067                                bond->dev->name, -rv, new_active->dev->name);
1068 out:
1069                 read_lock(&bond->lock);
1070                 write_lock_bh(&bond->curr_slave_lock);
1071                 break;
1072         default:
1073                 printk(KERN_ERR DRV_NAME
1074                        ": %s: bond_do_fail_over_mac impossible: bad policy %d\n",
1075                        bond->dev->name, bond->params.fail_over_mac);
1076                 break;
1077         }
1078
1079 }
1080
1081
1082 /**
1083  * find_best_interface - select the best available slave to be the active one
1084  * @bond: our bonding struct
1085  *
1086  * Warning: Caller must hold curr_slave_lock for writing.
1087  */
1088 static struct slave *bond_find_best_slave(struct bonding *bond)
1089 {
1090         struct slave *new_active, *old_active;
1091         struct slave *bestslave = NULL;
1092         int mintime = bond->params.updelay;
1093         int i;
1094
1095         new_active = old_active = bond->curr_active_slave;
1096
1097         if (!new_active) { /* there were no active slaves left */
1098                 if (bond->slave_cnt > 0) {  /* found one slave */
1099                         new_active = bond->first_slave;
1100                 } else {
1101                         return NULL; /* still no slave, return NULL */
1102                 }
1103         }
1104
1105         /* first try the primary link; if arping, a link must tx/rx traffic
1106          * before it can be considered the curr_active_slave - also, we would skip
1107          * slaves between the curr_active_slave and primary_slave that may be up
1108          * and able to arp
1109          */
1110         if ((bond->primary_slave) &&
1111             (!bond->params.arp_interval) &&
1112             (IS_UP(bond->primary_slave->dev))) {
1113                 new_active = bond->primary_slave;
1114         }
1115
1116         /* remember where to stop iterating over the slaves */
1117         old_active = new_active;
1118
1119         bond_for_each_slave_from(bond, new_active, i, old_active) {
1120                 if (IS_UP(new_active->dev)) {
1121                         if (new_active->link == BOND_LINK_UP) {
1122                                 return new_active;
1123                         } else if (new_active->link == BOND_LINK_BACK) {
1124                                 /* link up, but waiting for stabilization */
1125                                 if (new_active->delay < mintime) {
1126                                         mintime = new_active->delay;
1127                                         bestslave = new_active;
1128                                 }
1129                         }
1130                 }
1131         }
1132
1133         return bestslave;
1134 }
1135
1136 /**
1137  * change_active_interface - change the active slave into the specified one
1138  * @bond: our bonding struct
1139  * @new: the new slave to make the active one
1140  *
1141  * Set the new slave to the bond's settings and unset them on the old
1142  * curr_active_slave.
1143  * Setting include flags, mc-list, promiscuity, allmulti, etc.
1144  *
1145  * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP,
1146  * because it is apparently the best available slave we have, even though its
1147  * updelay hasn't timed out yet.
1148  *
1149  * If new_active is not NULL, caller must hold bond->lock for read and
1150  * curr_slave_lock for write_bh.
1151  */
1152 void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1153 {
1154         struct slave *old_active = bond->curr_active_slave;
1155
1156         if (old_active == new_active) {
1157                 return;
1158         }
1159
1160         if (new_active) {
1161                 new_active->jiffies = jiffies;
1162
1163                 if (new_active->link == BOND_LINK_BACK) {
1164                         if (USES_PRIMARY(bond->params.mode)) {
1165                                 printk(KERN_INFO DRV_NAME
1166                                        ": %s: making interface %s the new "
1167                                        "active one %d ms earlier.\n",
1168                                        bond->dev->name, new_active->dev->name,
1169                                        (bond->params.updelay - new_active->delay) * bond->params.miimon);
1170                         }
1171
1172                         new_active->delay = 0;
1173                         new_active->link = BOND_LINK_UP;
1174
1175                         if (bond->params.mode == BOND_MODE_8023AD) {
1176                                 bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
1177                         }
1178
1179                         if ((bond->params.mode == BOND_MODE_TLB) ||
1180                             (bond->params.mode == BOND_MODE_ALB)) {
1181                                 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
1182                         }
1183                 } else {
1184                         if (USES_PRIMARY(bond->params.mode)) {
1185                                 printk(KERN_INFO DRV_NAME
1186                                        ": %s: making interface %s the new "
1187                                        "active one.\n",
1188                                        bond->dev->name, new_active->dev->name);
1189                         }
1190                 }
1191         }
1192
1193         if (USES_PRIMARY(bond->params.mode)) {
1194                 bond_mc_swap(bond, new_active, old_active);
1195         }
1196
1197         if ((bond->params.mode == BOND_MODE_TLB) ||
1198             (bond->params.mode == BOND_MODE_ALB)) {
1199                 bond_alb_handle_active_change(bond, new_active);
1200                 if (old_active)
1201                         bond_set_slave_inactive_flags(old_active);
1202                 if (new_active)
1203                         bond_set_slave_active_flags(new_active);
1204         } else {
1205                 bond->curr_active_slave = new_active;
1206         }
1207
1208         if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
1209                 if (old_active) {
1210                         bond_set_slave_inactive_flags(old_active);
1211                 }
1212
1213                 if (new_active) {
1214                         bond_set_slave_active_flags(new_active);
1215
1216                         if (bond->params.fail_over_mac)
1217                                 bond_do_fail_over_mac(bond, new_active,
1218                                                       old_active);
1219
1220                         bond->send_grat_arp = bond->params.num_grat_arp;
1221                         bond_send_gratuitous_arp(bond);
1222
1223                         bond->send_unsol_na = bond->params.num_unsol_na;
1224                         bond_send_unsolicited_na(bond);
1225
1226                         write_unlock_bh(&bond->curr_slave_lock);
1227                         read_unlock(&bond->lock);
1228
1229                         netdev_bonding_change(bond->dev);
1230
1231                         read_lock(&bond->lock);
1232                         write_lock_bh(&bond->curr_slave_lock);
1233                 }
1234         }
1235 }
1236
1237 /**
1238  * bond_select_active_slave - select a new active slave, if needed
1239  * @bond: our bonding struct
1240  *
1241  * This functions shoud be called when one of the following occurs:
1242  * - The old curr_active_slave has been released or lost its link.
1243  * - The primary_slave has got its link back.
1244  * - A slave has got its link back and there's no old curr_active_slave.
1245  *
1246  * Caller must hold bond->lock for read and curr_slave_lock for write_bh.
1247  */
1248 void bond_select_active_slave(struct bonding *bond)
1249 {
1250         struct slave *best_slave;
1251         int rv;
1252
1253         best_slave = bond_find_best_slave(bond);
1254         if (best_slave != bond->curr_active_slave) {
1255                 bond_change_active_slave(bond, best_slave);
1256                 rv = bond_set_carrier(bond);
1257                 if (!rv)
1258                         return;
1259
1260                 if (netif_carrier_ok(bond->dev)) {
1261                         printk(KERN_INFO DRV_NAME
1262                                ": %s: first active interface up!\n",
1263                                bond->dev->name);
1264                 } else {
1265                         printk(KERN_INFO DRV_NAME ": %s: "
1266                                "now running without any active interface !\n",
1267                                bond->dev->name);
1268                 }
1269         }
1270 }
1271
1272 /*--------------------------- slave list handling ---------------------------*/
1273
1274 /*
1275  * This function attaches the slave to the end of list.
1276  *
1277  * bond->lock held for writing by caller.
1278  */
1279 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
1280 {
1281         if (bond->first_slave == NULL) { /* attaching the first slave */
1282                 new_slave->next = new_slave;
1283                 new_slave->prev = new_slave;
1284                 bond->first_slave = new_slave;
1285         } else {
1286                 new_slave->next = bond->first_slave;
1287                 new_slave->prev = bond->first_slave->prev;
1288                 new_slave->next->prev = new_slave;
1289                 new_slave->prev->next = new_slave;
1290         }
1291
1292         bond->slave_cnt++;
1293 }
1294
1295 /*
1296  * This function detaches the slave from the list.
1297  * WARNING: no check is made to verify if the slave effectively
1298  * belongs to <bond>.
1299  * Nothing is freed on return, structures are just unchained.
1300  * If any slave pointer in bond was pointing to <slave>,
1301  * it should be changed by the calling function.
1302  *
1303  * bond->lock held for writing by caller.
1304  */
1305 static void bond_detach_slave(struct bonding *bond, struct slave *slave)
1306 {
1307         if (slave->next) {
1308                 slave->next->prev = slave->prev;
1309         }
1310
1311         if (slave->prev) {
1312                 slave->prev->next = slave->next;
1313         }
1314
1315         if (bond->first_slave == slave) { /* slave is the first slave */
1316                 if (bond->slave_cnt > 1) { /* there are more slave */
1317                         bond->first_slave = slave->next;
1318                 } else {
1319                         bond->first_slave = NULL; /* slave was the last one */
1320                 }
1321         }
1322
1323         slave->next = NULL;
1324         slave->prev = NULL;
1325         bond->slave_cnt--;
1326 }
1327
1328 /*---------------------------------- IOCTL ----------------------------------*/
1329
1330 static int bond_sethwaddr(struct net_device *bond_dev,
1331                           struct net_device *slave_dev)
1332 {
1333         dprintk("bond_dev=%p\n", bond_dev);
1334         dprintk("slave_dev=%p\n", slave_dev);
1335         dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len);
1336         memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
1337         return 0;
1338 }
1339
1340 #define BOND_VLAN_FEATURES \
1341         (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \
1342          NETIF_F_HW_VLAN_FILTER)
1343
1344 /* 
1345  * Compute the common dev->feature set available to all slaves.  Some
1346  * feature bits are managed elsewhere, so preserve those feature bits
1347  * on the master device.
1348  */
1349 static int bond_compute_features(struct bonding *bond)
1350 {
1351         struct slave *slave;
1352         struct net_device *bond_dev = bond->dev;
1353         unsigned long features = bond_dev->features;
1354         unsigned short max_hard_header_len = max((u16)ETH_HLEN,
1355                                                 bond_dev->hard_header_len);
1356         int i;
1357
1358         features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
1359         features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM;
1360
1361         if (!bond->first_slave)
1362                 goto done;
1363
1364         features &= ~NETIF_F_ONE_FOR_ALL;
1365
1366         bond_for_each_slave(bond, slave, i) {
1367                 features = netdev_increment_features(features,
1368                                                      slave->dev->features,
1369                                                      NETIF_F_ONE_FOR_ALL);
1370                 if (slave->dev->hard_header_len > max_hard_header_len)
1371                         max_hard_header_len = slave->dev->hard_header_len;
1372         }
1373
1374 done:
1375         features |= (bond_dev->features & BOND_VLAN_FEATURES);
1376         bond_dev->features = netdev_fix_features(features, NULL);
1377         bond_dev->hard_header_len = max_hard_header_len;
1378
1379         return 0;
1380 }
1381
1382
1383 static void bond_setup_by_slave(struct net_device *bond_dev,
1384                                 struct net_device *slave_dev)
1385 {
1386         struct bonding *bond = bond_dev->priv;
1387
1388         bond_dev->neigh_setup           = slave_dev->neigh_setup;
1389         bond_dev->header_ops            = slave_dev->header_ops;
1390
1391         bond_dev->type              = slave_dev->type;
1392         bond_dev->hard_header_len   = slave_dev->hard_header_len;
1393         bond_dev->addr_len          = slave_dev->addr_len;
1394
1395         memcpy(bond_dev->broadcast, slave_dev->broadcast,
1396                 slave_dev->addr_len);
1397         bond->setup_by_slave = 1;
1398 }
1399
1400 /* enslave device <slave> to bond device <master> */
1401 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1402 {
1403         struct bonding *bond = bond_dev->priv;
1404         struct slave *new_slave = NULL;
1405         struct dev_mc_list *dmi;
1406         struct sockaddr addr;
1407         int link_reporting;
1408         int old_features = bond_dev->features;
1409         int res = 0;
1410
1411         if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL &&
1412                 slave_dev->do_ioctl == NULL) {
1413                 printk(KERN_WARNING DRV_NAME
1414                        ": %s: Warning: no link monitoring support for %s\n",
1415                        bond_dev->name, slave_dev->name);
1416         }
1417
1418         /* bond must be initialized by bond_open() before enslaving */
1419         if (!(bond_dev->flags & IFF_UP)) {
1420                 printk(KERN_WARNING DRV_NAME
1421                         " %s: master_dev is not up in bond_enslave\n",
1422                         bond_dev->name);
1423         }
1424
1425         /* already enslaved */
1426         if (slave_dev->flags & IFF_SLAVE) {
1427                 dprintk("Error, Device was already enslaved\n");
1428                 return -EBUSY;
1429         }
1430
1431         /* vlan challenged mutual exclusion */
1432         /* no need to lock since we're protected by rtnl_lock */
1433         if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
1434                 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1435                 if (!list_empty(&bond->vlan_list)) {
1436                         printk(KERN_ERR DRV_NAME
1437                                ": %s: Error: cannot enslave VLAN "
1438                                "challenged slave %s on VLAN enabled "
1439                                "bond %s\n", bond_dev->name, slave_dev->name,
1440                                bond_dev->name);
1441                         return -EPERM;
1442                 } else {
1443                         printk(KERN_WARNING DRV_NAME
1444                                ": %s: Warning: enslaved VLAN challenged "
1445                                "slave %s. Adding VLANs will be blocked as "
1446                                "long as %s is part of bond %s\n",
1447                                bond_dev->name, slave_dev->name, slave_dev->name,
1448                                bond_dev->name);
1449                         bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1450                 }
1451         } else {
1452                 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
1453                 if (bond->slave_cnt == 0) {
1454                         /* First slave, and it is not VLAN challenged,
1455                          * so remove the block of adding VLANs over the bond.
1456                          */
1457                         bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1458                 }
1459         }
1460
1461         /*
1462          * Old ifenslave binaries are no longer supported.  These can
1463          * be identified with moderate accurary by the state of the slave:
1464          * the current ifenslave will set the interface down prior to
1465          * enslaving it; the old ifenslave will not.
1466          */
1467         if ((slave_dev->flags & IFF_UP)) {
1468                 printk(KERN_ERR DRV_NAME ": %s is up. "
1469                        "This may be due to an out of date ifenslave.\n",
1470                        slave_dev->name);
1471                 res = -EPERM;
1472                 goto err_undo_flags;
1473         }
1474
1475         /* set bonding device ether type by slave - bonding netdevices are
1476          * created with ether_setup, so when the slave type is not ARPHRD_ETHER
1477          * there is a need to override some of the type dependent attribs/funcs.
1478          *
1479          * bond ether type mutual exclusion - don't allow slaves of dissimilar
1480          * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
1481          */
1482         if (bond->slave_cnt == 0) {
1483                 if (slave_dev->type != ARPHRD_ETHER)
1484                         bond_setup_by_slave(bond_dev, slave_dev);
1485         } else if (bond_dev->type != slave_dev->type) {
1486                 printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different "
1487                         "from other slaves (%d), can not enslave it.\n",
1488                         slave_dev->name,
1489                         slave_dev->type, bond_dev->type);
1490                         res = -EINVAL;
1491                         goto err_undo_flags;
1492         }
1493
1494         if (slave_dev->set_mac_address == NULL) {
1495                 if (bond->slave_cnt == 0) {
1496                         printk(KERN_WARNING DRV_NAME
1497                                ": %s: Warning: The first slave device "
1498                                "specified does not support setting the MAC "
1499                                "address. Setting fail_over_mac to active.",
1500                                bond_dev->name);
1501                         bond->params.fail_over_mac = BOND_FOM_ACTIVE;
1502                 } else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
1503                         printk(KERN_ERR DRV_NAME
1504                                 ": %s: Error: The slave device specified "
1505                                 "does not support setting the MAC address, "
1506                                 "but fail_over_mac is not set to active.\n"
1507                                 , bond_dev->name);
1508                         res = -EOPNOTSUPP;
1509                         goto err_undo_flags;
1510                 }
1511         }
1512
1513         new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
1514         if (!new_slave) {
1515                 res = -ENOMEM;
1516                 goto err_undo_flags;
1517         }
1518
1519         /* save slave's original flags before calling
1520          * netdev_set_master and dev_open
1521          */
1522         new_slave->original_flags = slave_dev->flags;
1523
1524         /*
1525          * Save slave's original ("permanent") mac address for modes
1526          * that need it, and for restoring it upon release, and then
1527          * set it to the master's address
1528          */
1529         memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
1530
1531         if (!bond->params.fail_over_mac) {
1532                 /*
1533                  * Set slave to master's mac address.  The application already
1534                  * set the master's mac address to that of the first slave
1535                  */
1536                 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
1537                 addr.sa_family = slave_dev->type;
1538                 res = dev_set_mac_address(slave_dev, &addr);
1539                 if (res) {
1540                         dprintk("Error %d calling set_mac_address\n", res);
1541                         goto err_free;
1542                 }
1543         }
1544
1545         res = netdev_set_master(slave_dev, bond_dev);
1546         if (res) {
1547                 dprintk("Error %d calling netdev_set_master\n", res);
1548                 goto err_restore_mac;
1549         }
1550         /* open the slave since the application closed it */
1551         res = dev_open(slave_dev);
1552         if (res) {
1553                 dprintk("Openning slave %s failed\n", slave_dev->name);
1554                 goto err_unset_master;
1555         }
1556
1557         new_slave->dev = slave_dev;
1558         slave_dev->priv_flags |= IFF_BONDING;
1559
1560         if ((bond->params.mode == BOND_MODE_TLB) ||
1561             (bond->params.mode == BOND_MODE_ALB)) {
1562                 /* bond_alb_init_slave() must be called before all other stages since
1563                  * it might fail and we do not want to have to undo everything
1564                  */
1565                 res = bond_alb_init_slave(bond, new_slave);
1566                 if (res) {
1567                         goto err_close;
1568                 }
1569         }
1570
1571         /* If the mode USES_PRIMARY, then the new slave gets the
1572          * master's promisc (and mc) settings only if it becomes the
1573          * curr_active_slave, and that is taken care of later when calling
1574          * bond_change_active()
1575          */
1576         if (!USES_PRIMARY(bond->params.mode)) {
1577                 /* set promiscuity level to new slave */
1578                 if (bond_dev->flags & IFF_PROMISC) {
1579                         res = dev_set_promiscuity(slave_dev, 1);
1580                         if (res)
1581                                 goto err_close;
1582                 }
1583
1584                 /* set allmulti level to new slave */
1585                 if (bond_dev->flags & IFF_ALLMULTI) {
1586                         res = dev_set_allmulti(slave_dev, 1);
1587                         if (res)
1588                                 goto err_close;
1589                 }
1590
1591                 netif_addr_lock_bh(bond_dev);
1592                 /* upload master's mc_list to new slave */
1593                 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
1594                         dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
1595                 }
1596                 netif_addr_unlock_bh(bond_dev);
1597         }
1598
1599         if (bond->params.mode == BOND_MODE_8023AD) {
1600                 /* add lacpdu mc addr to mc list */
1601                 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
1602
1603                 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
1604         }
1605
1606         bond_add_vlans_on_slave(bond, slave_dev);
1607
1608         write_lock_bh(&bond->lock);
1609
1610         bond_attach_slave(bond, new_slave);
1611
1612         new_slave->delay = 0;
1613         new_slave->link_failure_count = 0;
1614
1615         bond_compute_features(bond);
1616
1617         write_unlock_bh(&bond->lock);
1618
1619         read_lock(&bond->lock);
1620
1621         new_slave->last_arp_rx = jiffies;
1622
1623         if (bond->params.miimon && !bond->params.use_carrier) {
1624                 link_reporting = bond_check_dev_link(bond, slave_dev, 1);
1625
1626                 if ((link_reporting == -1) && !bond->params.arp_interval) {
1627                         /*
1628                          * miimon is set but a bonded network driver
1629                          * does not support ETHTOOL/MII and
1630                          * arp_interval is not set.  Note: if
1631                          * use_carrier is enabled, we will never go
1632                          * here (because netif_carrier is always
1633                          * supported); thus, we don't need to change
1634                          * the messages for netif_carrier.
1635                          */
1636                         printk(KERN_WARNING DRV_NAME
1637                                ": %s: Warning: MII and ETHTOOL support not "
1638                                "available for interface %s, and "
1639                                "arp_interval/arp_ip_target module parameters "
1640                                "not specified, thus bonding will not detect "
1641                                "link failures! see bonding.txt for details.\n",
1642                                bond_dev->name, slave_dev->name);
1643                 } else if (link_reporting == -1) {
1644                         /* unable get link status using mii/ethtool */
1645                         printk(KERN_WARNING DRV_NAME
1646                                ": %s: Warning: can't get link status from "
1647                                "interface %s; the network driver associated "
1648                                "with this interface does not support MII or "
1649                                "ETHTOOL link status reporting, thus miimon "
1650                                "has no effect on this interface.\n",
1651                                bond_dev->name, slave_dev->name);
1652                 }
1653         }
1654
1655         /* check for initial state */
1656         if (!bond->params.miimon ||
1657             (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) {
1658                 if (bond->params.updelay) {
1659                         dprintk("Initial state of slave_dev is "
1660                                 "BOND_LINK_BACK\n");
1661                         new_slave->link  = BOND_LINK_BACK;
1662                         new_slave->delay = bond->params.updelay;
1663                 } else {
1664                         dprintk("Initial state of slave_dev is "
1665                                 "BOND_LINK_UP\n");
1666                         new_slave->link  = BOND_LINK_UP;
1667                 }
1668                 new_slave->jiffies = jiffies;
1669         } else {
1670                 dprintk("Initial state of slave_dev is "
1671                         "BOND_LINK_DOWN\n");
1672                 new_slave->link  = BOND_LINK_DOWN;
1673         }
1674
1675         if (bond_update_speed_duplex(new_slave) &&
1676             (new_slave->link != BOND_LINK_DOWN)) {
1677                 printk(KERN_WARNING DRV_NAME
1678                        ": %s: Warning: failed to get speed and duplex from %s, "
1679                        "assumed to be 100Mb/sec and Full.\n",
1680                        bond_dev->name, new_slave->dev->name);
1681
1682                 if (bond->params.mode == BOND_MODE_8023AD) {
1683                         printk(KERN_WARNING DRV_NAME
1684                                ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL "
1685                                "support in base driver for proper aggregator "
1686                                "selection.\n", bond_dev->name);
1687                 }
1688         }
1689
1690         if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
1691                 /* if there is a primary slave, remember it */
1692                 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
1693                         bond->primary_slave = new_slave;
1694                 }
1695         }
1696
1697         write_lock_bh(&bond->curr_slave_lock);
1698
1699         switch (bond->params.mode) {
1700         case BOND_MODE_ACTIVEBACKUP:
1701                 bond_set_slave_inactive_flags(new_slave);
1702                 bond_select_active_slave(bond);
1703                 break;
1704         case BOND_MODE_8023AD:
1705                 /* in 802.3ad mode, the internal mechanism
1706                  * will activate the slaves in the selected
1707                  * aggregator
1708                  */
1709                 bond_set_slave_inactive_flags(new_slave);
1710                 /* if this is the first slave */
1711                 if (bond->slave_cnt == 1) {
1712                         SLAVE_AD_INFO(new_slave).id = 1;
1713                         /* Initialize AD with the number of times that the AD timer is called in 1 second
1714                          * can be called only after the mac address of the bond is set
1715                          */
1716                         bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL,
1717                                             bond->params.lacp_fast);
1718                 } else {
1719                         SLAVE_AD_INFO(new_slave).id =
1720                                 SLAVE_AD_INFO(new_slave->prev).id + 1;
1721                 }
1722
1723                 bond_3ad_bind_slave(new_slave);
1724                 break;
1725         case BOND_MODE_TLB:
1726         case BOND_MODE_ALB:
1727                 new_slave->state = BOND_STATE_ACTIVE;
1728                 bond_set_slave_inactive_flags(new_slave);
1729                 break;
1730         default:
1731                 dprintk("This slave is always active in trunk mode\n");
1732
1733                 /* always active in trunk mode */
1734                 new_slave->state = BOND_STATE_ACTIVE;
1735
1736                 /* In trunking mode there is little meaning to curr_active_slave
1737                  * anyway (it holds no special properties of the bond device),
1738                  * so we can change it without calling change_active_interface()
1739                  */
1740                 if (!bond->curr_active_slave) {
1741                         bond->curr_active_slave = new_slave;
1742                 }
1743                 break;
1744         } /* switch(bond_mode) */
1745
1746         write_unlock_bh(&bond->curr_slave_lock);
1747
1748         bond_set_carrier(bond);
1749
1750         read_unlock(&bond->lock);
1751
1752         res = bond_create_slave_symlinks(bond_dev, slave_dev);
1753         if (res)
1754                 goto err_close;
1755
1756         printk(KERN_INFO DRV_NAME
1757                ": %s: enslaving %s as a%s interface with a%s link.\n",
1758                bond_dev->name, slave_dev->name,
1759                new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
1760                new_slave->link != BOND_LINK_DOWN ? "n up" : " down");
1761
1762         /* enslave is successful */
1763         return 0;
1764
1765 /* Undo stages on error */
1766 err_close:
1767         dev_close(slave_dev);
1768
1769 err_unset_master:
1770         netdev_set_master(slave_dev, NULL);
1771
1772 err_restore_mac:
1773         if (!bond->params.fail_over_mac) {
1774                 /* XXX TODO - fom follow mode needs to change master's
1775                  * MAC if this slave's MAC is in use by the bond, or at
1776                  * least print a warning.
1777                  */
1778                 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
1779                 addr.sa_family = slave_dev->type;
1780                 dev_set_mac_address(slave_dev, &addr);
1781         }
1782
1783 err_free:
1784         kfree(new_slave);
1785
1786 err_undo_flags:
1787         bond_dev->features = old_features;
1788  
1789         return res;
1790 }
1791
1792 /*
1793  * Try to release the slave device <slave> from the bond device <master>
1794  * It is legal to access curr_active_slave without a lock because all the function
1795  * is write-locked.
1796  *
1797  * The rules for slave state should be:
1798  *   for Active/Backup:
1799  *     Active stays on all backups go down
1800  *   for Bonded connections:
1801  *     The first up interface should be left on and all others downed.
1802  */
1803 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1804 {
1805         struct bonding *bond = bond_dev->priv;
1806         struct slave *slave, *oldcurrent;
1807         struct sockaddr addr;
1808         int mac_addr_differ;
1809
1810         /* slave is not a slave or master is not master of this slave */
1811         if (!(slave_dev->flags & IFF_SLAVE) ||
1812             (slave_dev->master != bond_dev)) {
1813                 printk(KERN_ERR DRV_NAME
1814                        ": %s: Error: cannot release %s.\n",
1815                        bond_dev->name, slave_dev->name);
1816                 return -EINVAL;
1817         }
1818
1819         write_lock_bh(&bond->lock);
1820
1821         slave = bond_get_slave_by_dev(bond, slave_dev);
1822         if (!slave) {
1823                 /* not a slave of this bond */
1824                 printk(KERN_INFO DRV_NAME
1825                        ": %s: %s not enslaved\n",
1826                        bond_dev->name, slave_dev->name);
1827                 write_unlock_bh(&bond->lock);
1828                 return -EINVAL;
1829         }
1830
1831         if (!bond->params.fail_over_mac) {
1832                 mac_addr_differ = memcmp(bond_dev->dev_addr, slave->perm_hwaddr,
1833                                          ETH_ALEN);
1834                 if (!mac_addr_differ && (bond->slave_cnt > 1))
1835                         printk(KERN_WARNING DRV_NAME
1836                                ": %s: Warning: the permanent HWaddr of %s - "
1837                                "%pM - is still in use by %s. "
1838                                "Set the HWaddr of %s to a different address "
1839                                "to avoid conflicts.\n",
1840                                bond_dev->name, slave_dev->name,
1841                                slave->perm_hwaddr,
1842                                bond_dev->name, slave_dev->name);
1843         }
1844
1845         /* Inform AD package of unbinding of slave. */
1846         if (bond->params.mode == BOND_MODE_8023AD) {
1847                 /* must be called before the slave is
1848                  * detached from the list
1849                  */
1850                 bond_3ad_unbind_slave(slave);
1851         }
1852
1853         printk(KERN_INFO DRV_NAME
1854                ": %s: releasing %s interface %s\n",
1855                bond_dev->name,
1856                (slave->state == BOND_STATE_ACTIVE)
1857                ? "active" : "backup",
1858                slave_dev->name);
1859
1860         oldcurrent = bond->curr_active_slave;
1861
1862         bond->current_arp_slave = NULL;
1863
1864         /* release the slave from its bond */
1865         bond_detach_slave(bond, slave);
1866
1867         bond_compute_features(bond);
1868
1869         if (bond->primary_slave == slave) {
1870                 bond->primary_slave = NULL;
1871         }
1872
1873         if (oldcurrent == slave) {
1874                 bond_change_active_slave(bond, NULL);
1875         }
1876
1877         if ((bond->params.mode == BOND_MODE_TLB) ||
1878             (bond->params.mode == BOND_MODE_ALB)) {
1879                 /* Must be called only after the slave has been
1880                  * detached from the list and the curr_active_slave
1881                  * has been cleared (if our_slave == old_current),
1882                  * but before a new active slave is selected.
1883                  */
1884                 write_unlock_bh(&bond->lock);
1885                 bond_alb_deinit_slave(bond, slave);
1886                 write_lock_bh(&bond->lock);
1887         }
1888
1889         if (oldcurrent == slave) {
1890                 /*
1891                  * Note that we hold RTNL over this sequence, so there
1892                  * is no concern that another slave add/remove event
1893                  * will interfere.
1894                  */
1895                 write_unlock_bh(&bond->lock);
1896                 read_lock(&bond->lock);
1897                 write_lock_bh(&bond->curr_slave_lock);
1898
1899                 bond_select_active_slave(bond);
1900
1901                 write_unlock_bh(&bond->curr_slave_lock);
1902                 read_unlock(&bond->lock);
1903                 write_lock_bh(&bond->lock);
1904         }
1905
1906         if (bond->slave_cnt == 0) {
1907                 bond_set_carrier(bond);
1908
1909                 /* if the last slave was removed, zero the mac address
1910                  * of the master so it will be set by the application
1911                  * to the mac address of the first slave
1912                  */
1913                 memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
1914
1915                 if (list_empty(&bond->vlan_list)) {
1916                         bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
1917                 } else {
1918                         printk(KERN_WARNING DRV_NAME
1919                                ": %s: Warning: clearing HW address of %s while it "
1920                                "still has VLANs.\n",
1921                                bond_dev->name, bond_dev->name);
1922                         printk(KERN_WARNING DRV_NAME
1923                                ": %s: When re-adding slaves, make sure the bond's "
1924                                "HW address matches its VLANs'.\n",
1925                                bond_dev->name);
1926                 }
1927         } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
1928                    !bond_has_challenged_slaves(bond)) {
1929                 printk(KERN_INFO DRV_NAME
1930                        ": %s: last VLAN challenged slave %s "
1931                        "left bond %s. VLAN blocking is removed\n",
1932                        bond_dev->name, slave_dev->name, bond_dev->name);
1933                 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;
1934         }
1935
1936         write_unlock_bh(&bond->lock);
1937
1938         /* must do this from outside any spinlocks */
1939         bond_destroy_slave_symlinks(bond_dev, slave_dev);
1940
1941         bond_del_vlans_from_slave(bond, slave_dev);
1942
1943         /* If the mode USES_PRIMARY, then we should only remove its
1944          * promisc and mc settings if it was the curr_active_slave, but that was
1945          * already taken care of above when we detached the slave
1946          */
1947         if (!USES_PRIMARY(bond->params.mode)) {
1948                 /* unset promiscuity level from slave */
1949                 if (bond_dev->flags & IFF_PROMISC) {
1950                         dev_set_promiscuity(slave_dev, -1);
1951                 }
1952
1953                 /* unset allmulti level from slave */
1954                 if (bond_dev->flags & IFF_ALLMULTI) {
1955                         dev_set_allmulti(slave_dev, -1);
1956                 }
1957
1958                 /* flush master's mc_list from slave */
1959                 netif_addr_lock_bh(bond_dev);
1960                 bond_mc_list_flush(bond_dev, slave_dev);
1961                 netif_addr_unlock_bh(bond_dev);
1962         }
1963
1964         netdev_set_master(slave_dev, NULL);
1965
1966         /* close slave before restoring its mac address */
1967         dev_close(slave_dev);
1968
1969         if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
1970                 /* restore original ("permanent") mac address */
1971                 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
1972                 addr.sa_family = slave_dev->type;
1973                 dev_set_mac_address(slave_dev, &addr);
1974         }
1975
1976         slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
1977                                    IFF_SLAVE_INACTIVE | IFF_BONDING |
1978                                    IFF_SLAVE_NEEDARP);
1979
1980         kfree(slave);
1981
1982         return 0;  /* deletion OK */
1983 }
1984
1985 /*
1986 * Destroy a bonding device.
1987 * Must be under rtnl_lock when this function is called.
1988 */
1989 void bond_destroy(struct bonding *bond)
1990 {
1991         bond_deinit(bond->dev);
1992         bond_destroy_sysfs_entry(bond);
1993         unregister_netdevice(bond->dev);
1994 }
1995
1996 static void bond_destructor(struct net_device *bond_dev)
1997 {
1998         struct bonding *bond = bond_dev->priv;
1999
2000         if (bond->wq)
2001                 destroy_workqueue(bond->wq);
2002
2003         netif_addr_lock_bh(bond_dev);
2004         bond_mc_list_destroy(bond);
2005         netif_addr_unlock_bh(bond_dev);
2006
2007         free_netdev(bond_dev);
2008 }
2009
2010 /*
2011 * First release a slave and than destroy the bond if no more slaves iare left.
2012 * Must be under rtnl_lock when this function is called.
2013 */
2014 int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev)
2015 {
2016         struct bonding *bond = bond_dev->priv;
2017         int ret;
2018
2019         ret = bond_release(bond_dev, slave_dev);
2020         if ((ret == 0) && (bond->slave_cnt == 0)) {
2021                 printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n",
2022                        bond_dev->name, bond_dev->name);
2023                 bond_destroy(bond);
2024         }
2025         return ret;
2026 }
2027
2028 /*
2029  * This function releases all slaves.
2030  */
2031 static int bond_release_all(struct net_device *bond_dev)
2032 {
2033         struct bonding *bond = bond_dev->priv;
2034         struct slave *slave;
2035         struct net_device *slave_dev;
2036         struct sockaddr addr;
2037
2038         write_lock_bh(&bond->lock);
2039
2040         netif_carrier_off(bond_dev);
2041
2042         if (bond->slave_cnt == 0) {
2043                 goto out;
2044         }
2045
2046         bond->current_arp_slave = NULL;
2047         bond->primary_slave = NULL;
2048         bond_change_active_slave(bond, NULL);
2049
2050         while ((slave = bond->first_slave) != NULL) {
2051                 /* Inform AD package of unbinding of slave
2052                  * before slave is detached from the list.
2053                  */
2054                 if (bond->params.mode == BOND_MODE_8023AD) {
2055                         bond_3ad_unbind_slave(slave);
2056                 }
2057
2058                 slave_dev = slave->dev;
2059                 bond_detach_slave(bond, slave);
2060
2061                 /* now that the slave is detached, unlock and perform
2062                  * all the undo steps that should not be called from
2063                  * within a lock.
2064                  */
2065                 write_unlock_bh(&bond->lock);
2066
2067                 if ((bond->params.mode == BOND_MODE_TLB) ||
2068                     (bond->params.mode == BOND_MODE_ALB)) {
2069                         /* must be called only after the slave
2070                          * has been detached from the list
2071                          */
2072                         bond_alb_deinit_slave(bond, slave);
2073                 }
2074
2075                 bond_compute_features(bond);
2076
2077                 bond_destroy_slave_symlinks(bond_dev, slave_dev);
2078                 bond_del_vlans_from_slave(bond, slave_dev);
2079
2080                 /* If the mode USES_PRIMARY, then we should only remove its
2081                  * promisc and mc settings if it was the curr_active_slave, but that was
2082                  * already taken care of above when we detached the slave
2083                  */
2084                 if (!USES_PRIMARY(bond->params.mode)) {
2085                         /* unset promiscuity level from slave */
2086                         if (bond_dev->flags & IFF_PROMISC) {
2087                                 dev_set_promiscuity(slave_dev, -1);
2088                         }
2089
2090                         /* unset allmulti level from slave */
2091                         if (bond_dev->flags & IFF_ALLMULTI) {
2092                                 dev_set_allmulti(slave_dev, -1);
2093                         }
2094
2095                         /* flush master's mc_list from slave */
2096                         netif_addr_lock_bh(bond_dev);
2097                         bond_mc_list_flush(bond_dev, slave_dev);
2098                         netif_addr_unlock_bh(bond_dev);
2099                 }
2100
2101                 netdev_set_master(slave_dev, NULL);
2102
2103                 /* close slave before restoring its mac address */
2104                 dev_close(slave_dev);
2105
2106                 if (!bond->params.fail_over_mac) {
2107                         /* restore original ("permanent") mac address*/
2108                         memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
2109                         addr.sa_family = slave_dev->type;
2110                         dev_set_mac_address(slave_dev, &addr);
2111                 }
2112
2113                 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
2114                                            IFF_SLAVE_INACTIVE);
2115
2116                 kfree(slave);
2117
2118                 /* re-acquire the lock before getting the next slave */
2119                 write_lock_bh(&bond->lock);
2120         }
2121
2122         /* zero the mac address of the master so it will be
2123          * set by the application to the mac address of the
2124          * first slave
2125          */
2126         memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
2127
2128         if (list_empty(&bond->vlan_list)) {
2129                 bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
2130         } else {
2131                 printk(KERN_WARNING DRV_NAME
2132                        ": %s: Warning: clearing HW address of %s while it "
2133                        "still has VLANs.\n",
2134                        bond_dev->name, bond_dev->name);
2135                 printk(KERN_WARNING DRV_NAME
2136                        ": %s: When re-adding slaves, make sure the bond's "
2137                        "HW address matches its VLANs'.\n",
2138                        bond_dev->name);
2139         }
2140
2141         printk(KERN_INFO DRV_NAME
2142                ": %s: released all slaves\n",
2143                bond_dev->name);
2144
2145 out:
2146         write_unlock_bh(&bond->lock);
2147
2148         return 0;
2149 }
2150
2151 /*
2152  * This function changes the active slave to slave <slave_dev>.
2153  * It returns -EINVAL in the following cases.
2154  *  - <slave_dev> is not found in the list.
2155  *  - There is not active slave now.
2156  *  - <slave_dev> is already active.
2157  *  - The link state of <slave_dev> is not BOND_LINK_UP.
2158  *  - <slave_dev> is not running.
2159  * In these cases, this fuction does nothing.
2160  * In the other cases, currnt_slave pointer is changed and 0 is returned.
2161  */
2162 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev)
2163 {
2164         struct bonding *bond = bond_dev->priv;
2165         struct slave *old_active = NULL;
2166         struct slave *new_active = NULL;
2167         int res = 0;
2168
2169         if (!USES_PRIMARY(bond->params.mode)) {
2170                 return -EINVAL;
2171         }
2172
2173         /* Verify that master_dev is indeed the master of slave_dev */
2174         if (!(slave_dev->flags & IFF_SLAVE) ||
2175             (slave_dev->master != bond_dev)) {
2176                 return -EINVAL;
2177         }
2178
2179         read_lock(&bond->lock);
2180
2181         read_lock(&bond->curr_slave_lock);
2182         old_active = bond->curr_active_slave;
2183         read_unlock(&bond->curr_slave_lock);
2184
2185         new_active = bond_get_slave_by_dev(bond, slave_dev);
2186
2187         /*
2188          * Changing to the current active: do nothing; return success.
2189          */
2190         if (new_active && (new_active == old_active)) {
2191                 read_unlock(&bond->lock);
2192                 return 0;
2193         }
2194
2195         if ((new_active) &&
2196             (old_active) &&
2197             (new_active->link == BOND_LINK_UP) &&
2198             IS_UP(new_active->dev)) {
2199                 write_lock_bh(&bond->curr_slave_lock);
2200                 bond_change_active_slave(bond, new_active);
2201                 write_unlock_bh(&bond->curr_slave_lock);
2202         } else {
2203                 res = -EINVAL;
2204         }
2205
2206         read_unlock(&bond->lock);
2207
2208         return res;
2209 }
2210
2211 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
2212 {
2213         struct bonding *bond = bond_dev->priv;
2214
2215         info->bond_mode = bond->params.mode;
2216         info->miimon = bond->params.miimon;
2217
2218         read_lock(&bond->lock);
2219         info->num_slaves = bond->slave_cnt;
2220         read_unlock(&bond->lock);
2221
2222         return 0;
2223 }
2224
2225 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)
2226 {
2227         struct bonding *bond = bond_dev->priv;
2228         struct slave *slave;
2229         int i, found = 0;
2230
2231         if (info->slave_id < 0) {
2232                 return -ENODEV;
2233         }
2234
2235         read_lock(&bond->lock);
2236
2237         bond_for_each_slave(bond, slave, i) {
2238                 if (i == (int)info->slave_id) {
2239                         found = 1;
2240                         break;
2241                 }
2242         }
2243
2244         read_unlock(&bond->lock);
2245
2246         if (found) {
2247                 strcpy(info->slave_name, slave->dev->name);
2248                 info->link = slave->link;
2249                 info->state = slave->state;
2250                 info->link_failure_count = slave->link_failure_count;
2251         } else {
2252                 return -ENODEV;
2253         }
2254
2255         return 0;
2256 }
2257
2258 /*-------------------------------- Monitoring -------------------------------*/
2259
2260
2261 static int bond_miimon_inspect(struct bonding *bond)
2262 {
2263         struct slave *slave;
2264         int i, link_state, commit = 0;
2265
2266         bond_for_each_slave(bond, slave, i) {
2267                 slave->new_link = BOND_LINK_NOCHANGE;
2268
2269                 link_state = bond_check_dev_link(bond, slave->dev, 0);
2270
2271                 switch (slave->link) {
2272                 case BOND_LINK_UP:
2273                         if (link_state)
2274                                 continue;
2275
2276                         slave->link = BOND_LINK_FAIL;
2277                         slave->delay = bond->params.downdelay;
2278                         if (slave->delay) {
2279                                 printk(KERN_INFO DRV_NAME
2280                                        ": %s: link status down for %s"
2281                                        "interface %s, disabling it in %d ms.\n",
2282                                        bond->dev->name,
2283                                        (bond->params.mode ==
2284                                         BOND_MODE_ACTIVEBACKUP) ?
2285                                        ((slave->state == BOND_STATE_ACTIVE) ?
2286                                         "active " : "backup ") : "",
2287                                        slave->dev->name,
2288                                        bond->params.downdelay * bond->params.miimon);
2289                         }
2290                         /*FALLTHRU*/
2291                 case BOND_LINK_FAIL:
2292                         if (link_state) {
2293                                 /*
2294                                  * recovered before downdelay expired
2295                                  */
2296                                 slave->link = BOND_LINK_UP;
2297                                 slave->jiffies = jiffies;
2298                                 printk(KERN_INFO DRV_NAME
2299                                        ": %s: link status up again after %d "
2300                                        "ms for interface %s.\n",
2301                                        bond->dev->name,
2302                                        (bond->params.downdelay - slave->delay) *
2303                                        bond->params.miimon,
2304                                        slave->dev->name);
2305                                 continue;
2306                         }
2307
2308                         if (slave->delay <= 0) {
2309                                 slave->new_link = BOND_LINK_DOWN;
2310                                 commit++;
2311                                 continue;
2312                         }
2313
2314                         slave->delay--;
2315                         break;
2316
2317                 case BOND_LINK_DOWN:
2318                         if (!link_state)
2319                                 continue;
2320
2321                         slave->link = BOND_LINK_BACK;
2322                         slave->delay = bond->params.updelay;
2323
2324                         if (slave->delay) {
2325                                 printk(KERN_INFO DRV_NAME
2326                                        ": %s: link status up for "
2327                                        "interface %s, enabling it in %d ms.\n",
2328                                        bond->dev->name, slave->dev->name,
2329                                        bond->params.updelay *
2330                                        bond->params.miimon);
2331                         }
2332                         /*FALLTHRU*/
2333                 case BOND_LINK_BACK:
2334                         if (!link_state) {
2335                                 slave->link = BOND_LINK_DOWN;
2336                                 printk(KERN_INFO DRV_NAME
2337                                        ": %s: link status down again after %d "
2338                                        "ms for interface %s.\n",
2339                                        bond->dev->name,
2340                                        (bond->params.updelay - slave->delay) *
2341                                        bond->params.miimon,
2342                                        slave->dev->name);
2343
2344                                 continue;
2345                         }
2346
2347                         if (slave->delay <= 0) {
2348                                 slave->new_link = BOND_LINK_UP;
2349                                 commit++;
2350                                 continue;
2351                         }
2352
2353                         slave->delay--;
2354                         break;
2355                 }
2356         }
2357
2358         return commit;
2359 }
2360
2361 static void bond_miimon_commit(struct bonding *bond)
2362 {
2363         struct slave *slave;
2364         int i;
2365
2366         bond_for_each_slave(bond, slave, i) {
2367                 switch (slave->new_link) {
2368                 case BOND_LINK_NOCHANGE:
2369                         continue;
2370
2371                 case BOND_LINK_UP:
2372                         slave->link = BOND_LINK_UP;
2373                         slave->jiffies = jiffies;
2374
2375                         if (bond->params.mode == BOND_MODE_8023AD) {
2376                                 /* prevent it from being the active one */
2377                                 slave->state = BOND_STATE_BACKUP;
2378                         } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
2379                                 /* make it immediately active */
2380                                 slave->state = BOND_STATE_ACTIVE;
2381                         } else if (slave != bond->primary_slave) {
2382                                 /* prevent it from being the active one */
2383                                 slave->state = BOND_STATE_BACKUP;
2384                         }
2385
2386                         printk(KERN_INFO DRV_NAME
2387                                ": %s: link status definitely "
2388                                "up for interface %s.\n",
2389                                bond->dev->name, slave->dev->name);
2390
2391                         /* notify ad that the link status has changed */
2392                         if (bond->params.mode == BOND_MODE_8023AD)
2393                                 bond_3ad_handle_link_change(slave, BOND_LINK_UP);
2394
2395                         if ((bond->params.mode == BOND_MODE_TLB) ||
2396                             (bond->params.mode == BOND_MODE_ALB))
2397                                 bond_alb_handle_link_change(bond, slave,
2398                                                             BOND_LINK_UP);
2399
2400                         if (!bond->curr_active_slave ||
2401                             (slave == bond->primary_slave))
2402                                 goto do_failover;
2403
2404                         continue;
2405
2406                 case BOND_LINK_DOWN:
2407                         if (slave->link_failure_count < UINT_MAX)
2408                                 slave->link_failure_count++;
2409
2410                         slave->link = BOND_LINK_DOWN;
2411
2412                         if (bond->params.mode == BOND_MODE_ACTIVEBACKUP ||
2413                             bond->params.mode == BOND_MODE_8023AD)
2414                                 bond_set_slave_inactive_flags(slave);
2415
2416                         printk(KERN_INFO DRV_NAME
2417                                ": %s: link status definitely down for "
2418                                "interface %s, disabling it\n",
2419                                bond->dev->name, slave->dev->name);
2420
2421                         if (bond->params.mode == BOND_MODE_8023AD)
2422                                 bond_3ad_handle_link_change(slave,
2423                                                             BOND_LINK_DOWN);
2424
2425                         if (bond->params.mode == BOND_MODE_TLB ||
2426                             bond->params.mode == BOND_MODE_ALB)
2427                                 bond_alb_handle_link_change(bond, slave,
2428                                                             BOND_LINK_DOWN);
2429
2430                         if (slave == bond->curr_active_slave)
2431                                 goto do_failover;
2432
2433                         continue;
2434
2435                 default:
2436                         printk(KERN_ERR DRV_NAME
2437                                ": %s: invalid new link %d on slave %s\n",
2438                                bond->dev->name, slave->new_link,
2439                                slave->dev->name);
2440                         slave->new_link = BOND_LINK_NOCHANGE;
2441
2442                         continue;
2443                 }
2444
2445 do_failover:
2446                 ASSERT_RTNL();
2447                 write_lock_bh(&bond->curr_slave_lock);
2448                 bond_select_active_slave(bond);
2449                 write_unlock_bh(&bond->curr_slave_lock);
2450         }
2451
2452         bond_set_carrier(bond);
2453 }
2454
2455 /*
2456  * bond_mii_monitor
2457  *
2458  * Really a wrapper that splits the mii monitor into two phases: an
2459  * inspection, then (if inspection indicates something needs to be done)
2460  * an acquisition of appropriate locks followed by a commit phase to
2461  * implement whatever link state changes are indicated.
2462  */
2463 void bond_mii_monitor(struct work_struct *work)
2464 {
2465         struct bonding *bond = container_of(work, struct bonding,
2466                                             mii_work.work);
2467
2468         read_lock(&bond->lock);
2469         if (bond->kill_timers)
2470                 goto out;
2471
2472         if (bond->slave_cnt == 0)
2473                 goto re_arm;
2474
2475         if (bond->send_grat_arp) {
2476                 read_lock(&bond->curr_slave_lock);
2477                 bond_send_gratuitous_arp(bond);
2478                 read_unlock(&bond->curr_slave_lock);
2479         }
2480
2481         if (bond->send_unsol_na) {
2482                 read_lock(&bond->curr_slave_lock);
2483                 bond_send_unsolicited_na(bond);
2484                 read_unlock(&bond->curr_slave_lock);
2485         }
2486
2487         if (bond_miimon_inspect(bond)) {
2488                 read_unlock(&bond->lock);
2489                 rtnl_lock();
2490                 read_lock(&bond->lock);
2491
2492                 bond_miimon_commit(bond);
2493
2494                 read_unlock(&bond->lock);
2495                 rtnl_unlock();  /* might sleep, hold no other locks */
2496                 read_lock(&bond->lock);
2497         }
2498
2499 re_arm:
2500         if (bond->params.miimon)
2501                 queue_delayed_work(bond->wq, &bond->mii_work,
2502                                    msecs_to_jiffies(bond->params.miimon));
2503 out:
2504         read_unlock(&bond->lock);
2505 }
2506
2507 static __be32 bond_glean_dev_ip(struct net_device *dev)
2508 {
2509         struct in_device *idev;
2510         struct in_ifaddr *ifa;
2511         __be32 addr = 0;
2512
2513         if (!dev)
2514                 return 0;
2515
2516         rcu_read_lock();
2517         idev = __in_dev_get_rcu(dev);
2518         if (!idev)
2519                 goto out;
2520
2521         ifa = idev->ifa_list;
2522         if (!ifa)
2523                 goto out;
2524
2525         addr = ifa->ifa_local;
2526 out:
2527         rcu_read_unlock();
2528         return addr;
2529 }
2530
2531 static int bond_has_this_ip(struct bonding *bond, __be32 ip)
2532 {
2533         struct vlan_entry *vlan;
2534
2535         if (ip == bond->master_ip)
2536                 return 1;
2537
2538         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2539                 if (ip == vlan->vlan_ip)
2540                         return 1;
2541         }
2542
2543         return 0;
2544 }
2545
2546 /*
2547  * We go to the (large) trouble of VLAN tagging ARP frames because
2548  * switches in VLAN mode (especially if ports are configured as
2549  * "native" to a VLAN) might not pass non-tagged frames.
2550  */
2551 static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id)
2552 {
2553         struct sk_buff *skb;
2554
2555         dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op,
2556                slave_dev->name, dest_ip, src_ip, vlan_id);
2557                
2558         skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,
2559                          NULL, slave_dev->dev_addr, NULL);
2560
2561         if (!skb) {
2562                 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n");
2563                 return;
2564         }
2565         if (vlan_id) {
2566                 skb = vlan_put_tag(skb, vlan_id);
2567                 if (!skb) {
2568                         printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n");
2569                         return;
2570                 }
2571         }
2572         arp_xmit(skb);
2573 }
2574
2575
2576 static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
2577 {
2578         int i, vlan_id, rv;
2579         __be32 *targets = bond->params.arp_targets;
2580         struct vlan_entry *vlan;
2581         struct net_device *vlan_dev;
2582         struct flowi fl;
2583         struct rtable *rt;
2584
2585         for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
2586                 if (!targets[i])
2587                         continue;
2588                 dprintk("basa: target %x\n", targets[i]);
2589                 if (list_empty(&bond->vlan_list)) {
2590                         dprintk("basa: empty vlan: arp_send\n");
2591                         bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2592                                       bond->master_ip, 0);
2593                         continue;
2594                 }
2595
2596                 /*
2597                  * If VLANs are configured, we do a route lookup to
2598                  * determine which VLAN interface would be used, so we
2599                  * can tag the ARP with the proper VLAN tag.
2600                  */
2601                 memset(&fl, 0, sizeof(fl));
2602                 fl.fl4_dst = targets[i];
2603                 fl.fl4_tos = RTO_ONLINK;
2604
2605                 rv = ip_route_output_key(&init_net, &rt, &fl);
2606                 if (rv) {
2607                         if (net_ratelimit()) {
2608                                 printk(KERN_WARNING DRV_NAME
2609                              ": %s: no route to arp_ip_target %pI4\n",
2610                                        bond->dev->name, &fl.fl4_dst);
2611                         }
2612                         continue;
2613                 }
2614
2615                 /*
2616                  * This target is not on a VLAN
2617                  */
2618                 if (rt->u.dst.dev == bond->dev) {
2619                         ip_rt_put(rt);
2620                         dprintk("basa: rtdev == bond->dev: arp_send\n");
2621                         bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2622                                       bond->master_ip, 0);
2623                         continue;
2624                 }
2625
2626                 vlan_id = 0;
2627                 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2628                         vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
2629                         if (vlan_dev == rt->u.dst.dev) {
2630                                 vlan_id = vlan->vlan_id;
2631                                 dprintk("basa: vlan match on %s %d\n",
2632                                        vlan_dev->name, vlan_id);
2633                                 break;
2634                         }
2635                 }
2636
2637                 if (vlan_id) {
2638                         ip_rt_put(rt);
2639                         bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2640                                       vlan->vlan_ip, vlan_id);
2641                         continue;
2642                 }
2643
2644                 if (net_ratelimit()) {
2645                         printk(KERN_WARNING DRV_NAME
2646                ": %s: no path to arp_ip_target %pI4 via rt.dev %s\n",
2647                                bond->dev->name, &fl.fl4_dst,
2648                                rt->u.dst.dev ? rt->u.dst.dev->name : "NULL");
2649                 }
2650                 ip_rt_put(rt);
2651         }
2652 }
2653
2654 /*
2655  * Kick out a gratuitous ARP for an IP on the bonding master plus one
2656  * for each VLAN above us.
2657  *
2658  * Caller must hold curr_slave_lock for read or better
2659  */
2660 static void bond_send_gratuitous_arp(struct bonding *bond)
2661 {
2662         struct slave *slave = bond->curr_active_slave;
2663         struct vlan_entry *vlan;
2664         struct net_device *vlan_dev;
2665
2666         dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name,
2667                                 slave ? slave->dev->name : "NULL");
2668
2669         if (!slave || !bond->send_grat_arp ||
2670             test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
2671                 return;
2672
2673         bond->send_grat_arp--;
2674
2675         if (bond->master_ip) {
2676                 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
2677                                 bond->master_ip, 0);
2678         }
2679
2680         list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
2681                 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
2682                 if (vlan->vlan_ip) {
2683                         bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
2684                                       vlan->vlan_ip, vlan->vlan_id);
2685                 }
2686         }
2687 }
2688
2689 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
2690 {
2691         int i;
2692         __be32 *targets = bond->params.arp_targets;
2693
2694         targets = bond->params.arp_targets;
2695         for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
2696                 dprintk("bva: sip %pI4 tip %pI4 t[%d] %pI4 bhti(tip) %d\n",
2697                         &sip, &tip, i, &targets[i], bond_has_this_ip(bond, tip));
2698                 if (sip == targets[i]) {
2699                         if (bond_has_this_ip(bond, tip))
2700                                 slave->last_arp_rx = jiffies;
2701                         return;
2702                 }
2703         }
2704 }
2705
2706 static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
2707 {
2708         struct arphdr *arp;
2709         struct slave *slave;
2710         struct bonding *bond;
2711         unsigned char *arp_ptr;
2712         __be32 sip, tip;
2713
2714         if (dev_net(dev) != &init_net)
2715                 goto out;
2716
2717         if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
2718                 goto out;
2719
2720         bond = dev->priv;
2721         read_lock(&bond->lock);
2722
2723         dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
2724                 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
2725                 orig_dev ? orig_dev->name : "NULL");
2726
2727         slave = bond_get_slave_by_dev(bond, orig_dev);
2728         if (!slave || !slave_do_arp_validate(bond, slave))
2729                 goto out_unlock;
2730
2731         if (!pskb_may_pull(skb, arp_hdr_len(dev)))
2732                 goto out_unlock;
2733
2734         arp = arp_hdr(skb);
2735         if (arp->ar_hln != dev->addr_len ||
2736             skb->pkt_type == PACKET_OTHERHOST ||
2737             skb->pkt_type == PACKET_LOOPBACK ||
2738             arp->ar_hrd != htons(ARPHRD_ETHER) ||
2739             arp->ar_pro != htons(ETH_P_IP) ||
2740             arp->ar_pln != 4)
2741                 goto out_unlock;
2742
2743         arp_ptr = (unsigned char *)(arp + 1);
2744         arp_ptr += dev->addr_len;
2745         memcpy(&sip, arp_ptr, 4);
2746         arp_ptr += 4 + dev->addr_len;
2747         memcpy(&tip, arp_ptr, 4);
2748
2749         dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n",
2750                 bond->dev->name, slave->dev->name, slave->state,
2751                 bond->params.arp_validate, slave_do_arp_validate(bond, slave),
2752                 &sip, &tip);
2753
2754         /*
2755          * Backup slaves won't see the ARP reply, but do come through
2756          * here for each ARP probe (so we swap the sip/tip to validate
2757          * the probe).  In a "redundant switch, common router" type of
2758          * configuration, the ARP probe will (hopefully) travel from
2759          * the active, through one switch, the router, then the other
2760          * switch before reaching the backup.
2761          */
2762         if (slave->state == BOND_STATE_ACTIVE)
2763                 bond_validate_arp(bond, slave, sip, tip);
2764         else
2765                 bond_validate_arp(bond, slave, tip, sip);
2766
2767 out_unlock:
2768         read_unlock(&bond->lock);
2769 out:
2770         dev_kfree_skb(skb);
2771         return NET_RX_SUCCESS;
2772 }
2773
2774 /*
2775  * this function is called regularly to monitor each slave's link
2776  * ensuring that traffic is being sent and received when arp monitoring
2777  * is used in load-balancing mode. if the adapter has been dormant, then an
2778  * arp is transmitted to generate traffic. see activebackup_arp_monitor for
2779  * arp monitoring in active backup mode.
2780  */
2781 void bond_loadbalance_arp_mon(struct work_struct *work)
2782 {
2783         struct bonding *bond = container_of(work, struct bonding,
2784                                             arp_work.work);
2785         struct slave *slave, *oldcurrent;
2786         int do_failover = 0;
2787         int delta_in_ticks;
2788         int i;
2789
2790         read_lock(&bond->lock);
2791
2792         delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
2793
2794         if (bond->kill_timers) {
2795                 goto out;
2796         }
2797
2798         if (bond->slave_cnt == 0) {
2799                 goto re_arm;
2800         }
2801
2802         read_lock(&bond->curr_slave_lock);
2803         oldcurrent = bond->curr_active_slave;
2804         read_unlock(&bond->curr_slave_lock);
2805
2806         /* see if any of the previous devices are up now (i.e. they have
2807          * xmt and rcv traffic). the curr_active_slave does not come into
2808          * the picture unless it is null. also, slave->jiffies is not needed
2809          * here because we send an arp on each slave and give a slave as
2810          * long as it needs to get the tx/rx within the delta.
2811          * TODO: what about up/down delay in arp mode? it wasn't here before
2812          *       so it can wait
2813          */
2814         bond_for_each_slave(bond, slave, i) {
2815                 if (slave->link != BOND_LINK_UP) {
2816                         if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) &&
2817                             time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) {
2818
2819                                 slave->link  = BOND_LINK_UP;
2820                                 slave->state = BOND_STATE_ACTIVE;
2821
2822                                 /* primary_slave has no meaning in round-robin
2823                                  * mode. the window of a slave being up and
2824                                  * curr_active_slave being null after enslaving
2825                                  * is closed.
2826                                  */
2827                                 if (!oldcurrent) {
2828                                         printk(KERN_INFO DRV_NAME
2829                                                ": %s: link status definitely "
2830                                                "up for interface %s, ",
2831                                                bond->dev->name,
2832                                                slave->dev->name);
2833                                         do_failover = 1;
2834                                 } else {
2835                                         printk(KERN_INFO DRV_NAME
2836                                                ": %s: interface %s is now up\n",
2837                                                bond->dev->name,
2838                                                slave->dev->name);
2839                                 }
2840                         }
2841                 } else {
2842                         /* slave->link == BOND_LINK_UP */
2843
2844                         /* not all switches will respond to an arp request
2845                          * when the source ip is 0, so don't take the link down
2846                          * if we don't know our ip yet
2847                          */
2848                         if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
2849                             (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) {
2850
2851                                 slave->link  = BOND_LINK_DOWN;
2852                                 slave->state = BOND_STATE_BACKUP;
2853
2854                                 if (slave->link_failure_count < UINT_MAX) {
2855                                         slave->link_failure_count++;
2856                                 }
2857
2858                                 printk(KERN_INFO DRV_NAME
2859                                        ": %s: interface %s is now down.\n",
2860                                        bond->dev->name,
2861                                        slave->dev->name);
2862
2863                                 if (slave == oldcurrent) {
2864                                         do_failover = 1;
2865                                 }
2866                         }
2867                 }
2868
2869                 /* note: if switch is in round-robin mode, all links
2870                  * must tx arp to ensure all links rx an arp - otherwise
2871                  * links may oscillate or not come up at all; if switch is
2872                  * in something like xor mode, there is nothing we can
2873                  * do - all replies will be rx'ed on same link causing slaves
2874                  * to be unstable during low/no traffic periods
2875                  */
2876                 if (IS_UP(slave->dev)) {
2877                         bond_arp_send_all(bond, slave);
2878                 }
2879         }
2880
2881         if (do_failover) {
2882                 write_lock_bh(&bond->curr_slave_lock);
2883
2884                 bond_select_active_slave(bond);
2885
2886                 write_unlock_bh(&bond->curr_slave_lock);
2887         }
2888
2889 re_arm:
2890         if (bond->params.arp_interval)
2891                 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
2892 out:
2893         read_unlock(&bond->lock);
2894 }
2895
2896 /*
2897  * Called to inspect slaves for active-backup mode ARP monitor link state
2898  * changes.  Sets new_link in slaves to specify what action should take
2899  * place for the slave.  Returns 0 if no changes are found, >0 if changes
2900  * to link states must be committed.
2901  *
2902  * Called with bond->lock held for read.
2903  */
2904 static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2905 {
2906         struct slave *slave;
2907         int i, commit = 0;
2908
2909         bond_for_each_slave(bond, slave, i) {
2910                 slave->new_link = BOND_LINK_NOCHANGE;
2911
2912                 if (slave->link != BOND_LINK_UP) {
2913                         if (time_before_eq(jiffies, slave_last_rx(bond, slave) +
2914                                            delta_in_ticks)) {
2915                                 slave->new_link = BOND_LINK_UP;
2916                                 commit++;
2917                         }
2918
2919                         continue;
2920                 }
2921
2922                 /*
2923                  * Give slaves 2*delta after being enslaved or made
2924                  * active.  This avoids bouncing, as the last receive
2925                  * times need a full ARP monitor cycle to be updated.
2926                  */
2927                 if (!time_after_eq(jiffies, slave->jiffies +
2928                                    2 * delta_in_ticks))
2929                         continue;
2930
2931                 /*
2932                  * Backup slave is down if:
2933                  * - No current_arp_slave AND
2934                  * - more than 3*delta since last receive AND
2935                  * - the bond has an IP address
2936                  *
2937                  * Note: a non-null current_arp_slave indicates
2938                  * the curr_active_slave went down and we are
2939                  * searching for a new one; under this condition
2940                  * we only take the curr_active_slave down - this
2941                  * gives each slave a chance to tx/rx traffic
2942                  * before being taken out
2943                  */
2944                 if (slave->state == BOND_STATE_BACKUP &&
2945                     !bond->current_arp_slave &&
2946                     time_after(jiffies, slave_last_rx(bond, slave) +
2947                                3 * delta_in_ticks)) {
2948                         slave->new_link = BOND_LINK_DOWN;
2949                         commit++;
2950                 }
2951
2952                 /*
2953                  * Active slave is down if:
2954                  * - more than 2*delta since transmitting OR
2955                  * - (more than 2*delta since receive AND
2956                  *    the bond has an IP address)
2957                  */
2958                 if ((slave->state == BOND_STATE_ACTIVE) &&
2959                     (time_after_eq(jiffies, slave->dev->trans_start +
2960                                     2 * delta_in_ticks) ||
2961                       (time_after_eq(jiffies, slave_last_rx(bond, slave)
2962                                      + 2 * delta_in_ticks)))) {
2963                         slave->new_link = BOND_LINK_DOWN;
2964                         commit++;
2965                 }
2966         }
2967
2968         read_lock(&bond->curr_slave_lock);
2969
2970         /*
2971          * Trigger a commit if the primary option setting has changed.
2972          */
2973         if (bond->primary_slave &&
2974             (bond->primary_slave != bond->curr_active_slave) &&
2975             (bond->primary_slave->link == BOND_LINK_UP))
2976                 commit++;
2977
2978         read_unlock(&bond->curr_slave_lock);
2979
2980         return commit;
2981 }
2982
2983 /*
2984  * Called to commit link state changes noted by inspection step of
2985  * active-backup mode ARP monitor.
2986  *
2987  * Called with RTNL and bond->lock for read.
2988  */
2989 static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
2990 {
2991         struct slave *slave;
2992         int i;
2993
2994         bond_for_each_slave(bond, slave, i) {
2995                 switch (slave->new_link) {
2996                 case BOND_LINK_NOCHANGE:
2997                         continue;
2998
2999                 case BOND_LINK_UP:
3000                         write_lock_bh(&bond->curr_slave_lock);
3001
3002                         if (!bond->curr_active_slave &&
3003                             time_before_eq(jiffies, slave->dev->trans_start +
3004                                            delta_in_ticks)) {
3005                                 slave->link = BOND_LINK_UP;
3006                                 bond_change_active_slave(bond, slave);
3007                                 bond->current_arp_slave = NULL;
3008
3009                                 printk(KERN_INFO DRV_NAME
3010                                        ": %s: %s is up and now the "
3011                                        "active interface\n",
3012                                        bond->dev->name, slave->dev->name);
3013
3014                         } else if (bond->curr_active_slave != slave) {
3015                                 /* this slave has just come up but we
3016                                  * already have a current slave; this can
3017                                  * also happen if bond_enslave adds a new
3018                                  * slave that is up while we are searching
3019                                  * for a new slave
3020                                  */
3021                                 slave->link = BOND_LINK_UP;
3022                                 bond_set_slave_inactive_flags(slave);
3023                                 bond->current_arp_slave = NULL;
3024
3025                                 printk(KERN_INFO DRV_NAME
3026                                        ": %s: backup interface %s is now up\n",
3027                                        bond->dev->name, slave->dev->name);
3028                         }
3029
3030                         write_unlock_bh(&bond->curr_slave_lock);
3031
3032                         break;
3033
3034                 case BOND_LINK_DOWN:
3035                         if (slave->link_failure_count < UINT_MAX)
3036                                 slave->link_failure_count++;
3037
3038                         slave->link = BOND_LINK_DOWN;
3039
3040                         if (slave == bond->curr_active_slave) {
3041                                 printk(KERN_INFO DRV_NAME
3042                                        ": %s: link status down for active "
3043                                        "interface %s, disabling it\n",
3044                                        bond->dev->name, slave->dev->name);
3045
3046                                 bond_set_slave_inactive_flags(slave);
3047
3048                                 write_lock_bh(&bond->curr_slave_lock);
3049
3050                                 bond_select_active_slave(bond);
3051                                 if (bond->curr_active_slave)
3052                                         bond->curr_active_slave->jiffies =
3053                                                 jiffies;
3054
3055                                 write_unlock_bh(&bond->curr_slave_lock);
3056
3057                                 bond->current_arp_slave = NULL;
3058
3059                         } else if (slave->state == BOND_STATE_BACKUP) {
3060                                 printk(KERN_INFO DRV_NAME
3061                                        ": %s: backup interface %s is now down\n",
3062                                        bond->dev->name, slave->dev->name);
3063
3064                                 bond_set_slave_inactive_flags(slave);
3065                         }
3066                         break;
3067
3068                 default:
3069                         printk(KERN_ERR DRV_NAME
3070                                ": %s: impossible: new_link %d on slave %s\n",
3071                                bond->dev->name, slave->new_link,
3072                                slave->dev->name);
3073                 }
3074         }
3075
3076         /*
3077          * No race with changes to primary via sysfs, as we hold rtnl.
3078          */
3079         if (bond->primary_slave &&
3080             (bond->primary_slave != bond->curr_active_slave) &&
3081             (bond->primary_slave->link == BOND_LINK_UP)) {
3082                 write_lock_bh(&bond->curr_slave_lock);
3083                 bond_change_active_slave(bond, bond->primary_slave);
3084                 write_unlock_bh(&bond->curr_slave_lock);
3085         }
3086
3087         bond_set_carrier(bond);
3088 }
3089
3090 /*
3091  * Send ARP probes for active-backup mode ARP monitor.
3092  *
3093  * Called with bond->lock held for read.
3094  */
3095 static void bond_ab_arp_probe(struct bonding *bond)
3096 {
3097         struct slave *slave;
3098         int i;
3099
3100         read_lock(&bond->curr_slave_lock);
3101
3102         if (bond->current_arp_slave && bond->curr_active_slave)
3103                 printk("PROBE: c_arp %s && cas %s BAD\n",
3104                        bond->current_arp_slave->dev->name,
3105                        bond->curr_active_slave->dev->name);
3106
3107         if (bond->curr_active_slave) {
3108                 bond_arp_send_all(bond, bond->curr_active_slave);
3109                 read_unlock(&bond->curr_slave_lock);
3110                 return;
3111         }
3112
3113         read_unlock(&bond->curr_slave_lock);
3114
3115         /* if we don't have a curr_active_slave, search for the next available
3116          * backup slave from the current_arp_slave and make it the candidate
3117          * for becoming the curr_active_slave
3118          */
3119
3120         if (!bond->current_arp_slave) {
3121                 bond->current_arp_slave = bond->first_slave;
3122                 if (!bond->current_arp_slave)
3123                         return;
3124         }
3125
3126         bond_set_slave_inactive_flags(bond->current_arp_slave);
3127
3128         /* search for next candidate */
3129         bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) {
3130                 if (IS_UP(slave->dev)) {
3131                         slave->link = BOND_LINK_BACK;
3132                         bond_set_slave_active_flags(slave);
3133                         bond_arp_send_all(bond, slave);
3134                         slave->jiffies = jiffies;
3135                         bond->current_arp_slave = slave;
3136                         break;
3137                 }
3138
3139                 /* if the link state is up at this point, we
3140                  * mark it down - this can happen if we have
3141                  * simultaneous link failures and
3142                  * reselect_active_interface doesn't make this
3143                  * one the current slave so it is still marked
3144                  * up when it is actually down
3145                  */
3146                 if (slave->link == BOND_LINK_UP) {
3147                         slave->link = BOND_LINK_DOWN;
3148                         if (slave->link_failure_count < UINT_MAX)
3149                                 slave->link_failure_count++;
3150
3151                         bond_set_slave_inactive_flags(slave);
3152
3153                         printk(KERN_INFO DRV_NAME
3154                                ": %s: backup interface %s is now down.\n",
3155                                bond->dev->name, slave->dev->name);
3156                 }
3157         }
3158 }
3159
3160 void bond_activebackup_arp_mon(struct work_struct *work)
3161 {
3162         struct bonding *bond = container_of(work, struct bonding,
3163                                             arp_work.work);
3164         int delta_in_ticks;
3165
3166         read_lock(&bond->lock);
3167
3168         if (bond->kill_timers)
3169                 goto out;
3170
3171         delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
3172
3173         if (bond->slave_cnt == 0)
3174                 goto re_arm;
3175
3176         if (bond->send_grat_arp) {
3177                 read_lock(&bond->curr_slave_lock);
3178                 bond_send_gratuitous_arp(bond);
3179                 read_unlock(&bond->curr_slave_lock);
3180         }
3181
3182         if (bond->send_unsol_na) {
3183                 read_lock(&bond->curr_slave_lock);
3184                 bond_send_unsolicited_na(bond);
3185                 read_unlock(&bond->curr_slave_lock);
3186         }
3187
3188         if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
3189                 read_unlock(&bond->lock);
3190                 rtnl_lock();
3191                 read_lock(&bond->lock);
3192
3193                 bond_ab_arp_commit(bond, delta_in_ticks);
3194
3195                 read_unlock(&bond->lock);
3196                 rtnl_unlock();
3197                 read_lock(&bond->lock);
3198         }
3199
3200         bond_ab_arp_probe(bond);
3201
3202 re_arm:
3203         if (bond->params.arp_interval) {
3204                 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
3205         }
3206 out:
3207         read_unlock(&bond->lock);
3208 }
3209
3210 /*------------------------------ proc/seq_file-------------------------------*/
3211
3212 #ifdef CONFIG_PROC_FS
3213
3214 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
3215 {
3216         struct bonding *bond = seq->private;
3217         loff_t off = 0;
3218         struct slave *slave;
3219         int i;
3220
3221         /* make sure the bond won't be taken away */
3222         read_lock(&dev_base_lock);
3223         read_lock(&bond->lock);
3224
3225         if (*pos == 0) {
3226                 return SEQ_START_TOKEN;
3227         }
3228
3229         bond_for_each_slave(bond, slave, i) {
3230                 if (++off == *pos) {
3231                         return slave;
3232                 }
3233         }
3234
3235         return NULL;
3236 }
3237
3238 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3239 {
3240         struct bonding *bond = seq->private;
3241         struct slave *slave = v;
3242
3243         ++*pos;
3244         if (v == SEQ_START_TOKEN) {
3245                 return bond->first_slave;
3246         }
3247
3248         slave = slave->next;
3249
3250         return (slave == bond->first_slave) ? NULL : slave;
3251 }
3252
3253 static void bond_info_seq_stop(struct seq_file *seq, void *v)
3254 {
3255         struct bonding *bond = seq->private;
3256
3257         read_unlock(&bond->lock);
3258         read_unlock(&dev_base_lock);
3259 }
3260
3261 static void bond_info_show_master(struct seq_file *seq)
3262 {
3263         struct bonding *bond = seq->private;
3264         struct slave *curr;
3265         int i;
3266
3267         read_lock(&bond->curr_slave_lock);
3268         curr = bond->curr_active_slave;
3269         read_unlock(&bond->curr_slave_lock);
3270
3271         seq_printf(seq, "Bonding Mode: %s",
3272                    bond_mode_name(bond->params.mode));
3273
3274         if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
3275             bond->params.fail_over_mac)
3276                 seq_printf(seq, " (fail_over_mac %s)",
3277                    fail_over_mac_tbl[bond->params.fail_over_mac].modename);
3278
3279         seq_printf(seq, "\n");
3280
3281         if (bond->params.mode == BOND_MODE_XOR ||
3282                 bond->params.mode == BOND_MODE_8023AD) {
3283                 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
3284                         xmit_hashtype_tbl[bond->params.xmit_policy].modename,
3285                         bond->params.xmit_policy);
3286         }
3287
3288         if (USES_PRIMARY(bond->params.mode)) {
3289                 seq_printf(seq, "Primary Slave: %s\n",
3290                            (bond->primary_slave) ?
3291                            bond->primary_slave->dev->name : "None");
3292
3293                 seq_printf(seq, "Currently Active Slave: %s\n",
3294                            (curr) ? curr->dev->name : "None");
3295         }
3296
3297         seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ?
3298                    "up" : "down");
3299         seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon);
3300         seq_printf(seq, "Up Delay (ms): %d\n",
3301                    bond->params.updelay * bond->params.miimon);
3302         seq_printf(seq, "Down Delay (ms): %d\n",
3303                    bond->params.downdelay * bond->params.miimon);
3304
3305
3306         /* ARP information */
3307         if(bond->params.arp_interval > 0) {
3308                 int printed=0;
3309                 seq_printf(seq, "ARP Polling Interval (ms): %d\n",
3310                                 bond->params.arp_interval);
3311
3312                 seq_printf(seq, "ARP IP target/s (n.n.n.n form):");
3313
3314                 for(i = 0; (i < BOND_MAX_ARP_TARGETS) ;i++) {
3315                         if (!bond->params.arp_targets[i])
3316                                 continue;
3317                         if (printed)
3318                                 seq_printf(seq, ",");
3319                         seq_printf(seq, " %pI4", &bond->params.arp_targets[i]);
3320                         printed = 1;
3321                 }
3322                 seq_printf(seq, "\n");
3323         }
3324
3325         if (bond->params.mode == BOND_MODE_8023AD) {
3326                 struct ad_info ad_info;
3327
3328                 seq_puts(seq, "\n802.3ad info\n");
3329                 seq_printf(seq, "LACP rate: %s\n",
3330                            (bond->params.lacp_fast) ? "fast" : "slow");
3331                 seq_printf(seq, "Aggregator selection policy (ad_select): %s\n",
3332                            ad_select_tbl[bond->params.ad_select].modename);
3333
3334                 if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
3335                         seq_printf(seq, "bond %s has no active aggregator\n",
3336                                    bond->dev->name);
3337                 } else {
3338                         seq_printf(seq, "Active Aggregator Info:\n");
3339
3340                         seq_printf(seq, "\tAggregator ID: %d\n",
3341                                    ad_info.aggregator_id);
3342                         seq_printf(seq, "\tNumber of ports: %d\n",
3343                                    ad_info.ports);
3344                         seq_printf(seq, "\tActor Key: %d\n",
3345                                    ad_info.actor_key);
3346                         seq_printf(seq, "\tPartner Key: %d\n",
3347                                    ad_info.partner_key);
3348                         seq_printf(seq, "\tPartner Mac Address: %pM\n",
3349                                    ad_info.partner_system);
3350                 }
3351         }
3352 }
3353
3354 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave)
3355 {
3356         struct bonding *bond = seq->private;
3357
3358         seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
3359         seq_printf(seq, "MII Status: %s\n",
3360                    (slave->link == BOND_LINK_UP) ?  "up" : "down");
3361         seq_printf(seq, "Link Failure Count: %u\n",
3362                    slave->link_failure_count);
3363
3364         seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr);
3365
3366         if (bond->params.mode == BOND_MODE_8023AD) {
3367                 const struct aggregator *agg
3368                         = SLAVE_AD_INFO(slave).port.aggregator;
3369
3370                 if (agg) {
3371                         seq_printf(seq, "Aggregator ID: %d\n",
3372                                    agg->aggregator_identifier);
3373                 } else {
3374                         seq_puts(seq, "Aggregator ID: N/A\n");
3375                 }
3376         }
3377 }
3378
3379 static int bond_info_seq_show(struct seq_file *seq, void *v)
3380 {
3381         if (v == SEQ_START_TOKEN) {
3382                 seq_printf(seq, "%s\n", version);
3383                 bond_info_show_master(seq);
3384         } else {
3385                 bond_info_show_slave(seq, v);
3386         }
3387
3388         return 0;
3389 }
3390
3391 static struct seq_operations bond_info_seq_ops = {
3392         .start = bond_info_seq_start,
3393         .next  = bond_info_seq_next,
3394         .stop  = bond_info_seq_stop,
3395         .show  = bond_info_seq_show,
3396 };
3397
3398 static int bond_info_open(struct inode *inode, struct file *file)
3399 {
3400         struct seq_file *seq;
3401         struct proc_dir_entry *proc;
3402         int res;
3403
3404         res = seq_open(file, &bond_info_seq_ops);
3405         if (!res) {
3406                 /* recover the pointer buried in proc_dir_entry data */
3407                 seq = file->private_data;
3408                 proc = PDE(inode);
3409                 seq->private = proc->data;
3410         }
3411
3412         return res;
3413 }
3414
3415 static const struct file_operations bond_info_fops = {
3416         .owner   = THIS_MODULE,
3417         .open    = bond_info_open,
3418         .read    = seq_read,
3419         .llseek  = seq_lseek,
3420         .release = seq_release,
3421 };
3422
3423 static int bond_create_proc_entry(struct bonding *bond)
3424 {
3425         struct net_device *bond_dev = bond->dev;
3426
3427         if (bond_proc_dir) {
3428                 bond->proc_entry = proc_create_data(bond_dev->name,
3429                                                     S_IRUGO, bond_proc_dir,
3430                                                     &bond_info_fops, bond);
3431                 if (bond->proc_entry == NULL) {
3432                         printk(KERN_WARNING DRV_NAME
3433                                ": Warning: Cannot create /proc/net/%s/%s\n",
3434                                DRV_NAME, bond_dev->name);
3435                 } else {
3436                         memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ);
3437                 }
3438         }
3439
3440         return 0;
3441 }
3442
3443 static void bond_remove_proc_entry(struct bonding *bond)
3444 {
3445         if (bond_proc_dir && bond->proc_entry) {
3446                 remove_proc_entry(bond->proc_file_name, bond_proc_dir);
3447                 memset(bond->proc_file_name, 0, IFNAMSIZ);
3448                 bond->proc_entry = NULL;
3449         }
3450 }
3451
3452 /* Create the bonding directory under /proc/net, if doesn't exist yet.
3453  * Caller must hold rtnl_lock.
3454  */
3455 static void bond_create_proc_dir(void)
3456 {
3457         int len = strlen(DRV_NAME);
3458
3459         for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir;
3460              bond_proc_dir = bond_proc_dir->next) {
3461                 if ((bond_proc_dir->namelen == len) &&
3462                     !memcmp(bond_proc_dir->name, DRV_NAME, len)) {
3463                         break;
3464                 }
3465         }
3466
3467         if (!bond_proc_dir) {
3468                 bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net);
3469                 if (bond_proc_dir) {
3470                         bond_proc_dir->owner = THIS_MODULE;
3471                 } else {
3472                         printk(KERN_WARNING DRV_NAME
3473                                 ": Warning: cannot create /proc/net/%s\n",
3474                                 DRV_NAME);
3475                 }
3476         }
3477 }
3478
3479 /* Destroy the bonding directory under /proc/net, if empty.
3480  * Caller must hold rtnl_lock.
3481  */
3482 static void bond_destroy_proc_dir(void)
3483 {
3484         struct proc_dir_entry *de;
3485
3486         if (!bond_proc_dir) {
3487                 return;
3488         }
3489
3490         /* verify that the /proc dir is empty */
3491         for (de = bond_proc_dir->subdir; de; de = de->next) {
3492                 /* ignore . and .. */
3493                 if (*(de->name) != '.') {
3494                         break;
3495                 }
3496         }
3497
3498         if (de) {
3499                 if (bond_proc_dir->owner == THIS_MODULE) {
3500                         bond_proc_dir->owner = NULL;
3501                 }
3502         } else {
3503                 remove_proc_entry(DRV_NAME, init_net.proc_net);
3504                 bond_proc_dir = NULL;
3505         }
3506 }
3507 #endif /* CONFIG_PROC_FS */
3508
3509 /*-------------------------- netdev event handling --------------------------*/
3510
3511 /*
3512  * Change device name
3513  */
3514 static int bond_event_changename(struct bonding *bond)
3515 {
3516 #ifdef CONFIG_PROC_FS
3517         bond_remove_proc_entry(bond);
3518         bond_create_proc_entry(bond);
3519 #endif
3520         down_write(&(bonding_rwsem));
3521         bond_destroy_sysfs_entry(bond);
3522         bond_create_sysfs_entry(bond);
3523         up_write(&(bonding_rwsem));
3524         return NOTIFY_DONE;
3525 }
3526
3527 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev)
3528 {
3529         struct bonding *event_bond = bond_dev->priv;
3530
3531         switch (event) {
3532         case NETDEV_CHANGENAME:
3533                 return bond_event_changename(event_bond);
3534         case NETDEV_UNREGISTER:
3535                 bond_release_all(event_bond->dev);
3536                 break;
3537         default:
3538                 break;
3539         }
3540
3541         return NOTIFY_DONE;
3542 }
3543
3544 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev)
3545 {
3546         struct net_device *bond_dev = slave_dev->master;
3547         struct bonding *bond = bond_dev->priv;
3548
3549         switch (event) {
3550         case NETDEV_UNREGISTER:
3551                 if (bond_dev) {
3552                         if (bond->setup_by_slave)
3553                                 bond_release_and_destroy(bond_dev, slave_dev);
3554                         else
3555                                 bond_release(bond_dev, slave_dev);
3556                 }
3557                 break;
3558         case NETDEV_CHANGE:
3559                 /*
3560                  * TODO: is this what we get if somebody
3561                  * sets up a hierarchical bond, then rmmod's
3562                  * one of the slave bonding devices?
3563                  */
3564                 break;
3565         case NETDEV_DOWN:
3566                 /*
3567                  * ... Or is it this?
3568                  */
3569                 break;
3570         case NETDEV_CHANGEMTU:
3571                 /*
3572                  * TODO: Should slaves be allowed to
3573                  * independently alter their MTU?  For
3574                  * an active-backup bond, slaves need
3575                  * not be the same type of device, so
3576                  * MTUs may vary.  For other modes,
3577                  * slaves arguably should have the
3578                  * same MTUs. To do this, we'd need to
3579                  * take over the slave's change_mtu
3580                  * function for the duration of their
3581                  * servitude.
3582                  */
3583                 break;
3584         case NETDEV_CHANGENAME:
3585                 /*
3586                  * TODO: handle changing the primary's name
3587                  */
3588                 break;
3589         case NETDEV_FEAT_CHANGE:
3590                 bond_compute_features(bond);
3591                 break;
3592         default:
3593                 break;
3594         }
3595
3596         return NOTIFY_DONE;
3597 }
3598
3599 /*
3600  * bond_netdev_event: handle netdev notifier chain events.
3601  *
3602  * This function receives events for the netdev chain.  The caller (an
3603  * ioctl handler calling blocking_notifier_call_chain) holds the necessary
3604  * locks for us to safely manipulate the slave devices (RTNL lock,
3605  * dev_probe_lock).
3606  */
3607 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
3608 {
3609         struct net_device *event_dev = (struct net_device *)ptr;
3610
3611         if (dev_net(event_dev) != &init_net)
3612                 return NOTIFY_DONE;
3613
3614         dprintk("event_dev: %s, event: %lx\n",
3615                 (event_dev ? event_dev->name : "None"),
3616                 event);
3617
3618         if (!(event_dev->priv_flags & IFF_BONDING))
3619                 return NOTIFY_DONE;
3620
3621         if (event_dev->flags & IFF_MASTER) {
3622                 dprintk("IFF_MASTER\n");
3623                 return bond_master_netdev_event(event, event_dev);
3624         }
3625
3626         if (event_dev->flags & IFF_SLAVE) {
3627                 dprintk("IFF_SLAVE\n");
3628                 return bond_slave_netdev_event(event, event_dev);
3629         }
3630
3631         return NOTIFY_DONE;
3632 }
3633
3634 /*
3635  * bond_inetaddr_event: handle inetaddr notifier chain events.
3636  *
3637  * We keep track of device IPs primarily to use as source addresses in
3638  * ARP monitor probes (rather than spewing out broadcasts all the time).
3639  *
3640  * We track one IP for the main device (if it has one), plus one per VLAN.
3641  */
3642 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
3643 {
3644         struct in_ifaddr *ifa = ptr;
3645         struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev;
3646         struct bonding *bond;
3647         struct vlan_entry *vlan;
3648
3649         if (dev_net(ifa->ifa_dev->dev) != &init_net)
3650                 return NOTIFY_DONE;
3651
3652         list_for_each_entry(bond, &bond_dev_list, bond_list) {
3653                 if (bond->dev == event_dev) {
3654                         switch (event) {
3655                         case NETDEV_UP:
3656                                 bond->master_ip = ifa->ifa_local;
3657                                 return NOTIFY_OK;
3658                         case NETDEV_DOWN:
3659                                 bond->master_ip = bond_glean_dev_ip(bond->dev);
3660                                 return NOTIFY_OK;
3661                         default:
3662                                 return NOTIFY_DONE;
3663                         }
3664                 }
3665
3666                 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
3667                         vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
3668                         if (vlan_dev == event_dev) {
3669                                 switch (event) {
3670                                 case NETDEV_UP:
3671                                         vlan->vlan_ip = ifa->ifa_local;
3672                                         return NOTIFY_OK;
3673                                 case NETDEV_DOWN:
3674                                         vlan->vlan_ip =
3675                                                 bond_glean_dev_ip(vlan_dev);
3676                                         return NOTIFY_OK;
3677                                 default:
3678                                         return NOTIFY_DONE;
3679                                 }
3680                         }
3681                 }
3682         }
3683         return NOTIFY_DONE;
3684 }
3685
3686 static struct notifier_block bond_netdev_notifier = {
3687         .notifier_call = bond_netdev_event,
3688 };
3689
3690 static struct notifier_block bond_inetaddr_notifier = {
3691         .notifier_call = bond_inetaddr_event,
3692 };
3693
3694 /*-------------------------- Packet type handling ---------------------------*/
3695
3696 /* register to receive lacpdus on a bond */
3697 static void bond_register_lacpdu(struct bonding *bond)
3698 {
3699         struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type);
3700
3701         /* initialize packet type */
3702         pk_type->type = PKT_TYPE_LACPDU;
3703         pk_type->dev = bond->dev;
3704         pk_type->func = bond_3ad_lacpdu_recv;
3705
3706         dev_add_pack(pk_type);
3707 }
3708
3709 /* unregister to receive lacpdus on a bond */
3710 static void bond_unregister_lacpdu(struct bonding *bond)
3711 {
3712         dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
3713 }
3714
3715 void bond_register_arp(struct bonding *bond)
3716 {
3717         struct packet_type *pt = &bond->arp_mon_pt;
3718
3719         if (pt->type)
3720                 return;
3721
3722         pt->type = htons(ETH_P_ARP);
3723         pt->dev = bond->dev;
3724         pt->func = bond_arp_rcv;
3725         dev_add_pack(pt);
3726 }
3727
3728 void bond_unregister_arp(struct bonding *bond)
3729 {
3730         struct packet_type *pt = &bond->arp_mon_pt;
3731
3732         dev_remove_pack(pt);
3733         pt->type = 0;
3734 }
3735
3736 /*---------------------------- Hashing Policies -----------------------------*/
3737
3738 /*
3739  * Hash for the output device based upon layer 2 and layer 3 data. If
3740  * the packet is not IP mimic bond_xmit_hash_policy_l2()
3741  */
3742 static int bond_xmit_hash_policy_l23(struct sk_buff *skb,
3743                                      struct net_device *bond_dev, int count)
3744 {
3745         struct ethhdr *data = (struct ethhdr *)skb->data;
3746         struct iphdr *iph = ip_hdr(skb);
3747
3748         if (skb->protocol == htons(ETH_P_IP)) {
3749                 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
3750                         (data->h_dest[5] ^ bond_dev->dev_addr[5])) % count;
3751         }
3752
3753         return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
3754 }
3755
3756 /*
3757  * Hash for the output device based upon layer 3 and layer 4 data. If
3758  * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
3759  * altogether not IP, mimic bond_xmit_hash_policy_l2()
3760  */
3761 static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
3762                                     struct net_device *bond_dev, int count)
3763 {
3764         struct ethhdr *data = (struct ethhdr *)skb->data;
3765         struct iphdr *iph = ip_hdr(skb);
3766         __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
3767         int layer4_xor = 0;
3768
3769         if (skb->protocol == htons(ETH_P_IP)) {
3770                 if (!(iph->frag_off & htons(IP_MF|IP_OFFSET)) &&
3771                     (iph->protocol == IPPROTO_TCP ||
3772                      iph->protocol == IPPROTO_UDP)) {
3773                         layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
3774                 }
3775                 return (layer4_xor ^
3776                         ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
3777
3778         }
3779
3780         return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
3781 }
3782
3783 /*
3784  * Hash for the output device based upon layer 2 data
3785  */
3786 static int bond_xmit_hash_policy_l2(struct sk_buff *skb,
3787                                    struct net_device *bond_dev, int count)
3788 {
3789         struct ethhdr *data = (struct ethhdr *)skb->data;
3790
3791         return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
3792 }
3793
3794 /*-------------------------- Device entry points ----------------------------*/
3795
3796 static int bond_open(struct net_device *bond_dev)
3797 {
3798         struct bonding *bond = bond_dev->priv;
3799
3800         bond->kill_timers = 0;
3801
3802         if ((bond->params.mode == BOND_MODE_TLB) ||
3803             (bond->params.mode == BOND_MODE_ALB)) {
3804                 /* bond_alb_initialize must be called before the timer
3805                  * is started.
3806                  */
3807                 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) {
3808                         /* something went wrong - fail the open operation */
3809                         return -1;
3810                 }
3811
3812                 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
3813                 queue_delayed_work(bond->wq, &bond->alb_work, 0);
3814         }
3815
3816         if (bond->params.miimon) {  /* link check interval, in milliseconds. */
3817                 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
3818                 queue_delayed_work(bond->wq, &bond->mii_work, 0);
3819         }
3820
3821         if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
3822                 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
3823                         INIT_DELAYED_WORK(&bond->arp_work,
3824                                           bond_activebackup_arp_mon);
3825                 else
3826                         INIT_DELAYED_WORK(&bond->arp_work,
3827                                           bond_loadbalance_arp_mon);
3828
3829                 queue_delayed_work(bond->wq, &bond->arp_work, 0);
3830                 if (bond->params.arp_validate)
3831                         bond_register_arp(bond);
3832         }
3833
3834         if (bond->params.mode == BOND_MODE_8023AD) {
3835                 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
3836                 queue_delayed_work(bond->wq, &bond->ad_work, 0);
3837                 /* register to receive LACPDUs */
3838                 bond_register_lacpdu(bond);
3839                 bond_3ad_initiate_agg_selection(bond, 1);
3840         }
3841
3842         return 0;
3843 }
3844
3845 static int bond_close(struct net_device *bond_dev)
3846 {
3847         struct bonding *bond = bond_dev->priv;
3848
3849         if (bond->params.mode == BOND_MODE_8023AD) {
3850                 /* Unregister the receive of LACPDUs */
3851                 bond_unregister_lacpdu(bond);
3852         }
3853
3854         if (bond->params.arp_validate)
3855                 bond_unregister_arp(bond);
3856
3857         write_lock_bh(&bond->lock);
3858
3859         bond->send_grat_arp = 0;
3860         bond->send_unsol_na = 0;
3861
3862         /* signal timers not to re-arm */
3863         bond->kill_timers = 1;
3864
3865         write_unlock_bh(&bond->lock);
3866
3867         if (bond->params.miimon) {  /* link check interval, in milliseconds. */
3868                 cancel_delayed_work(&bond->mii_work);
3869         }
3870
3871         if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
3872                 cancel_delayed_work(&bond->arp_work);
3873         }
3874
3875         switch (bond->params.mode) {
3876         case BOND_MODE_8023AD:
3877                 cancel_delayed_work(&bond->ad_work);
3878                 break;
3879         case BOND_MODE_TLB:
3880         case BOND_MODE_ALB:
3881                 cancel_delayed_work(&bond->alb_work);
3882                 break;
3883         default:
3884                 break;
3885         }
3886
3887
3888         if ((bond->params.mode == BOND_MODE_TLB) ||
3889             (bond->params.mode == BOND_MODE_ALB)) {
3890                 /* Must be called only after all
3891                  * slaves have been released
3892                  */
3893                 bond_alb_deinitialize(bond);
3894         }
3895
3896         return 0;
3897 }
3898
3899 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
3900 {
3901         struct bonding *bond = bond_dev->priv;
3902         struct net_device_stats *stats = &(bond->stats), *sstats;
3903         struct net_device_stats local_stats;
3904         struct slave *slave;
3905         int i;
3906
3907         memset(&local_stats, 0, sizeof(struct net_device_stats));
3908
3909         read_lock_bh(&bond->lock);
3910
3911         bond_for_each_slave(bond, slave, i) {
3912                 sstats = slave->dev->get_stats(slave->dev);
3913                 local_stats.rx_packets += sstats->rx_packets;
3914                 local_stats.rx_bytes += sstats->rx_bytes;
3915                 local_stats.rx_errors += sstats->rx_errors;
3916                 local_stats.rx_dropped += sstats->rx_dropped;
3917
3918                 local_stats.tx_packets += sstats->tx_packets;
3919                 local_stats.tx_bytes += sstats->tx_bytes;
3920                 local_stats.tx_errors += sstats->tx_errors;
3921                 local_stats.tx_dropped += sstats->tx_dropped;
3922
3923                 local_stats.multicast += sstats->multicast;
3924                 local_stats.collisions += sstats->collisions;
3925
3926                 local_stats.rx_length_errors += sstats->rx_length_errors;
3927                 local_stats.rx_over_errors += sstats->rx_over_errors;
3928                 local_stats.rx_crc_errors += sstats->rx_crc_errors;
3929                 local_stats.rx_frame_errors += sstats->rx_frame_errors;
3930                 local_stats.rx_fifo_errors += sstats->rx_fifo_errors;
3931                 local_stats.rx_missed_errors += sstats->rx_missed_errors;
3932
3933                 local_stats.tx_aborted_errors += sstats->tx_aborted_errors;
3934                 local_stats.tx_carrier_errors += sstats->tx_carrier_errors;
3935                 local_stats.tx_fifo_errors += sstats->tx_fifo_errors;
3936                 local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors;
3937                 local_stats.tx_window_errors += sstats->tx_window_errors;
3938         }
3939
3940         memcpy(stats, &local_stats, sizeof(struct net_device_stats));
3941
3942         read_unlock_bh(&bond->lock);
3943
3944         return stats;
3945 }
3946
3947 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
3948 {
3949         struct net_device *slave_dev = NULL;
3950         struct ifbond k_binfo;
3951         struct ifbond __user *u_binfo = NULL;
3952         struct ifslave k_sinfo;
3953         struct ifslave __user *u_sinfo = NULL;
3954         struct mii_ioctl_data *mii = NULL;
3955         int res = 0;
3956
3957         dprintk("bond_ioctl: master=%s, cmd=%d\n",
3958                 bond_dev->name, cmd);
3959
3960         switch (cmd) {
3961         case SIOCGMIIPHY:
3962                 mii = if_mii(ifr);
3963                 if (!mii) {
3964                         return -EINVAL;
3965                 }
3966                 mii->phy_id = 0;
3967                 /* Fall Through */
3968         case SIOCGMIIREG:
3969                 /*
3970                  * We do this again just in case we were called by SIOCGMIIREG
3971                  * instead of SIOCGMIIPHY.
3972                  */
3973                 mii = if_mii(ifr);
3974                 if (!mii) {
3975                         return -EINVAL;
3976                 }
3977
3978                 if (mii->reg_num == 1) {
3979                         struct bonding *bond = bond_dev->priv;
3980                         mii->val_out = 0;
3981                         read_lock(&bond->lock);
3982                         read_lock(&bond->curr_slave_lock);
3983                         if (netif_carrier_ok(bond->dev)) {
3984                                 mii->val_out = BMSR_LSTATUS;
3985                         }
3986                         read_unlock(&bond->curr_slave_lock);
3987                         read_unlock(&bond->lock);
3988                 }
3989
3990                 return 0;
3991         case BOND_INFO_QUERY_OLD:
3992         case SIOCBONDINFOQUERY:
3993                 u_binfo = (struct ifbond __user *)ifr->ifr_data;
3994
3995                 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) {
3996                         return -EFAULT;
3997                 }
3998
3999                 res = bond_info_query(bond_dev, &k_binfo);
4000                 if (res == 0) {
4001                         if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) {
4002                                 return -EFAULT;
4003                         }
4004                 }
4005
4006                 return res;
4007         case BOND_SLAVE_INFO_QUERY_OLD:
4008         case SIOCBONDSLAVEINFOQUERY:
4009                 u_sinfo = (struct ifslave __user *)ifr->ifr_data;
4010
4011                 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) {
4012                         return -EFAULT;
4013                 }
4014
4015                 res = bond_slave_info_query(bond_dev, &k_sinfo);
4016                 if (res == 0) {
4017                         if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) {
4018                                 return -EFAULT;
4019                         }
4020                 }
4021
4022                 return res;
4023         default:
4024                 /* Go on */
4025                 break;
4026         }
4027
4028         if (!capable(CAP_NET_ADMIN)) {
4029                 return -EPERM;
4030         }
4031
4032         down_write(&(bonding_rwsem));
4033         slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave);
4034
4035         dprintk("slave_dev=%p: \n", slave_dev);
4036
4037         if (!slave_dev) {
4038                 res = -ENODEV;
4039         } else {
4040                 dprintk("slave_dev->name=%s: \n", slave_dev->name);
4041                 switch (cmd) {
4042                 case BOND_ENSLAVE_OLD:
4043                 case SIOCBONDENSLAVE:
4044                         res = bond_enslave(bond_dev, slave_dev);
4045                         break;
4046                 case BOND_RELEASE_OLD:
4047                 case SIOCBONDRELEASE:
4048                         res = bond_release(bond_dev, slave_dev);
4049                         break;
4050                 case BOND_SETHWADDR_OLD:
4051                 case SIOCBONDSETHWADDR:
4052                         res = bond_sethwaddr(bond_dev, slave_dev);
4053                         break;
4054                 case BOND_CHANGE_ACTIVE_OLD:
4055                 case SIOCBONDCHANGEACTIVE:
4056                         res = bond_ioctl_change_active(bond_dev, slave_dev);
4057                         break;
4058                 default:
4059                         res = -EOPNOTSUPP;
4060                 }
4061
4062                 dev_put(slave_dev);
4063         }
4064
4065         up_write(&(bonding_rwsem));
4066         return res;
4067 }
4068
4069 static void bond_set_multicast_list(struct net_device *bond_dev)
4070 {
4071         struct bonding *bond = bond_dev->priv;
4072         struct dev_mc_list *dmi;
4073
4074         /*
4075          * Do promisc before checking multicast_mode
4076          */
4077         if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) {
4078                 /*
4079                  * FIXME: Need to handle the error when one of the multi-slaves
4080                  * encounters error.
4081                  */
4082                 bond_set_promiscuity(bond, 1);
4083         }
4084
4085         if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) {
4086                 bond_set_promiscuity(bond, -1);
4087         }
4088
4089         /* set allmulti flag to slaves */
4090         if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) {
4091                 /*
4092                  * FIXME: Need to handle the error when one of the multi-slaves
4093                  * encounters error.
4094                  */
4095                 bond_set_allmulti(bond, 1);
4096         }
4097
4098         if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) {
4099                 bond_set_allmulti(bond, -1);
4100         }
4101
4102         read_lock(&bond->lock);
4103
4104         bond->flags = bond_dev->flags;
4105
4106         /* looking for addresses to add to slaves' mc list */
4107         for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
4108                 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) {
4109                         bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen);
4110                 }
4111         }
4112
4113         /* looking for addresses to delete from slaves' list */
4114         for (dmi = bond->mc_list; dmi; dmi = dmi->next) {
4115                 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) {
4116                         bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen);
4117                 }
4118         }
4119
4120         /* save master's multicast list */
4121         bond_mc_list_destroy(bond);
4122         bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC);
4123
4124         read_unlock(&bond->lock);
4125 }
4126
4127 /*
4128  * Change the MTU of all of a master's slaves to match the master
4129  */
4130 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
4131 {
4132         struct bonding *bond = bond_dev->priv;
4133         struct slave *slave, *stop_at;
4134         int res = 0;
4135         int i;
4136
4137         dprintk("bond=%p, name=%s, new_mtu=%d\n", bond,
4138                 (bond_dev ? bond_dev->name : "None"), new_mtu);
4139
4140         /* Can't hold bond->lock with bh disabled here since
4141          * some base drivers panic. On the other hand we can't
4142          * hold bond->lock without bh disabled because we'll
4143          * deadlock. The only solution is to rely on the fact
4144          * that we're under rtnl_lock here, and the slaves
4145          * list won't change. This doesn't solve the problem
4146          * of setting the slave's MTU while it is
4147          * transmitting, but the assumption is that the base
4148          * driver can handle that.
4149          *
4150          * TODO: figure out a way to safely iterate the slaves
4151          * list, but without holding a lock around the actual
4152          * call to the base driver.
4153          */
4154
4155         bond_for_each_slave(bond, slave, i) {
4156                 dprintk("s %p s->p %p c_m %p\n", slave,
4157                         slave->prev, slave->dev->change_mtu);
4158
4159                 res = dev_set_mtu(slave->dev, new_mtu);
4160
4161                 if (res) {
4162                         /* If we failed to set the slave's mtu to the new value
4163                          * we must abort the operation even in ACTIVE_BACKUP
4164                          * mode, because if we allow the backup slaves to have
4165                          * different mtu values than the active slave we'll
4166                          * need to change their mtu when doing a failover. That
4167                          * means changing their mtu from timer context, which
4168                          * is probably not a good idea.
4169                          */
4170                         dprintk("err %d %s\n", res, slave->dev->name);
4171                         goto unwind;
4172                 }
4173         }
4174
4175         bond_dev->mtu = new_mtu;
4176
4177         return 0;
4178
4179 unwind:
4180         /* unwind from head to the slave that failed */
4181         stop_at = slave;
4182         bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4183                 int tmp_res;
4184
4185                 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu);
4186                 if (tmp_res) {
4187                         dprintk("unwind err %d dev %s\n", tmp_res,
4188                                 slave->dev->name);
4189                 }
4190         }
4191
4192         return res;
4193 }
4194
4195 /*
4196  * Change HW address
4197  *
4198  * Note that many devices must be down to change the HW address, and
4199  * downing the master releases all slaves.  We can make bonds full of
4200  * bonding devices to test this, however.
4201  */
4202 static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
4203 {
4204         struct bonding *bond = bond_dev->priv;
4205         struct sockaddr *sa = addr, tmp_sa;
4206         struct slave *slave, *stop_at;
4207         int res = 0;
4208         int i;
4209
4210         dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
4211
4212         /*
4213          * If fail_over_mac is set to active, do nothing and return
4214          * success.  Returning an error causes ifenslave to fail.
4215          */
4216         if (bond->params.fail_over_mac == BOND_FOM_ACTIVE)
4217                 return 0;
4218
4219         if (!is_valid_ether_addr(sa->sa_data)) {
4220                 return -EADDRNOTAVAIL;
4221         }
4222
4223         /* Can't hold bond->lock with bh disabled here since
4224          * some base drivers panic. On the other hand we can't
4225          * hold bond->lock without bh disabled because we'll
4226          * deadlock. The only solution is to rely on the fact
4227          * that we're under rtnl_lock here, and the slaves
4228          * list won't change. This doesn't solve the problem
4229          * of setting the slave's hw address while it is
4230          * transmitting, but the assumption is that the base
4231          * driver can handle that.
4232          *
4233          * TODO: figure out a way to safely iterate the slaves
4234          * list, but without holding a lock around the actual
4235          * call to the base driver.
4236          */
4237
4238         bond_for_each_slave(bond, slave, i) {
4239                 dprintk("slave %p %s\n", slave, slave->dev->name);
4240
4241                 if (slave->dev->set_mac_address == NULL) {
4242                         res = -EOPNOTSUPP;
4243                         dprintk("EOPNOTSUPP %s\n", slave->dev->name);
4244                         goto unwind;
4245                 }
4246
4247                 res = dev_set_mac_address(slave->dev, addr);
4248                 if (res) {
4249                         /* TODO: consider downing the slave
4250                          * and retry ?
4251                          * User should expect communications
4252                          * breakage anyway until ARP finish
4253                          * updating, so...
4254                          */
4255                         dprintk("err %d %s\n", res, slave->dev->name);
4256                         goto unwind;
4257                 }
4258         }
4259
4260         /* success */
4261         memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len);
4262         return 0;
4263
4264 unwind:
4265         memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
4266         tmp_sa.sa_family = bond_dev->type;
4267
4268         /* unwind from head to the slave that failed */
4269         stop_at = slave;
4270         bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
4271                 int tmp_res;
4272
4273                 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa);
4274                 if (tmp_res) {
4275                         dprintk("unwind err %d dev %s\n", tmp_res,
4276                                 slave->dev->name);
4277                 }
4278         }
4279
4280         return res;
4281 }
4282
4283 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
4284 {
4285         struct bonding *bond = bond_dev->priv;
4286         struct slave *slave, *start_at;
4287         int i, slave_no, res = 1;
4288
4289         read_lock(&bond->lock);
4290
4291         if (!BOND_IS_OK(bond)) {
4292                 goto out;
4293         }
4294
4295         /*
4296          * Concurrent TX may collide on rr_tx_counter; we accept that
4297          * as being rare enough not to justify using an atomic op here
4298          */
4299         slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
4300
4301         bond_for_each_slave(bond, slave, i) {
4302                 slave_no--;
4303                 if (slave_no < 0) {
4304                         break;
4305                 }
4306         }
4307
4308         start_at = slave;
4309         bond_for_each_slave_from(bond, slave, i, start_at) {
4310                 if (IS_UP(slave->dev) &&
4311                     (slave->link == BOND_LINK_UP) &&
4312                     (slave->state == BOND_STATE_ACTIVE)) {
4313                         res = bond_dev_queue_xmit(bond, skb, slave->dev);
4314                         break;
4315                 }
4316         }
4317
4318 out:
4319         if (res) {
4320                 /* no suitable interface, frame not sent */
4321                 dev_kfree_skb(skb);
4322         }
4323         read_unlock(&bond->lock);
4324         return 0;
4325 }
4326
4327
4328 /*
4329  * in active-backup mode, we know that bond->curr_active_slave is always valid if
4330  * the bond has a usable interface.
4331  */
4332 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
4333 {
4334         struct bonding *bond = bond_dev->priv;
4335         int res = 1;
4336
4337         read_lock(&bond->lock);
4338         read_lock(&bond->curr_slave_lock);
4339
4340         if (!BOND_IS_OK(bond)) {
4341                 goto out;
4342         }
4343
4344         if (!bond->curr_active_slave)
4345                 goto out;
4346
4347         res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
4348
4349 out:
4350         if (res) {
4351                 /* no suitable interface, frame not sent */
4352                 dev_kfree_skb(skb);
4353         }
4354         read_unlock(&bond->curr_slave_lock);
4355         read_unlock(&bond->lock);
4356         return 0;
4357 }
4358
4359 /*
4360  * In bond_xmit_xor() , we determine the output device by using a pre-
4361  * determined xmit_hash_policy(), If the selected device is not enabled,
4362  * find the next active slave.
4363  */
4364 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
4365 {
4366         struct bonding *bond = bond_dev->priv;
4367         struct slave *slave, *start_at;
4368         int slave_no;
4369         int i;
4370         int res = 1;
4371
4372         read_lock(&bond->lock);
4373
4374         if (!BOND_IS_OK(bond)) {
4375                 goto out;
4376         }
4377
4378         slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt);
4379
4380         bond_for_each_slave(bond, slave, i) {
4381                 slave_no--;
4382                 if (slave_no < 0) {
4383                         break;
4384                 }
4385         }
4386
4387         start_at = slave;
4388
4389         bond_for_each_slave_from(bond, slave, i, start_at) {
4390                 if (IS_UP(slave->dev) &&
4391                     (slave->link == BOND_LINK_UP) &&
4392                     (slave->state == BOND_STATE_ACTIVE)) {
4393                         res = bond_dev_queue_xmit(bond, skb, slave->dev);
4394                         break;
4395                 }
4396         }
4397
4398 out:
4399         if (res) {
4400                 /* no suitable interface, frame not sent */
4401                 dev_kfree_skb(skb);
4402         }
4403         read_unlock(&bond->lock);
4404         return 0;
4405 }
4406
4407 /*
4408  * in broadcast mode, we send everything to all usable interfaces.
4409  */
4410 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
4411 {
4412         struct bonding *bond = bond_dev->priv;
4413         struct slave *slave, *start_at;
4414         struct net_device *tx_dev = NULL;
4415         int i;
4416         int res = 1;
4417
4418         read_lock(&bond->lock);
4419
4420         if (!BOND_IS_OK(bond)) {
4421                 goto out;
4422         }
4423
4424         read_lock(&bond->curr_slave_lock);
4425         start_at = bond->curr_active_slave;
4426         read_unlock(&bond->curr_slave_lock);
4427
4428         if (!start_at) {
4429                 goto out;
4430         }
4431
4432         bond_for_each_slave_from(bond, slave, i, start_at) {
4433                 if (IS_UP(slave->dev) &&
4434                     (slave->link == BOND_LINK_UP) &&
4435                     (slave->state == BOND_STATE_ACTIVE)) {
4436                         if (tx_dev) {
4437                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
4438                                 if (!skb2) {
4439                                         printk(KERN_ERR DRV_NAME
4440                                                ": %s: Error: bond_xmit_broadcast(): "
4441                                                "skb_clone() failed\n",
4442                                                bond_dev->name);
4443                                         continue;
4444                                 }
4445
4446                                 res = bond_dev_queue_xmit(bond, skb2, tx_dev);
4447                                 if (res) {
4448                                         dev_kfree_skb(skb2);
4449                                         continue;
4450                                 }
4451                         }
4452                         tx_dev = slave->dev;
4453                 }
4454         }
4455
4456         if (tx_dev) {
4457                 res = bond_dev_queue_xmit(bond, skb, tx_dev);
4458         }
4459
4460 out:
4461         if (res) {
4462                 /* no suitable interface, frame not sent */
4463                 dev_kfree_skb(skb);
4464         }
4465         /* frame sent to all suitable interfaces */
4466         read_unlock(&bond->lock);
4467         return 0;
4468 }
4469
4470 /*------------------------- Device initialization ---------------------------*/
4471
4472 static void bond_set_xmit_hash_policy(struct bonding *bond)
4473 {
4474         switch (bond->params.xmit_policy) {
4475         case BOND_XMIT_POLICY_LAYER23:
4476                 bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
4477                 break;
4478         case BOND_XMIT_POLICY_LAYER34:
4479                 bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
4480                 break;
4481         case BOND_XMIT_POLICY_LAYER2:
4482         default:
4483                 bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
4484                 break;
4485         }
4486 }
4487
4488 /*
4489  * set bond mode specific net device operations
4490  */
4491 void bond_set_mode_ops(struct bonding *bond, int mode)
4492 {
4493         struct net_device *bond_dev = bond->dev;
4494
4495         switch (mode) {
4496         case BOND_MODE_ROUNDROBIN:
4497                 bond_dev->hard_start_xmit = bond_xmit_roundrobin;
4498                 break;
4499         case BOND_MODE_ACTIVEBACKUP:
4500                 bond_dev->hard_start_xmit = bond_xmit_activebackup;
4501                 break;
4502         case BOND_MODE_XOR:
4503                 bond_dev->hard_start_xmit = bond_xmit_xor;
4504                 bond_set_xmit_hash_policy(bond);
4505                 break;
4506         case BOND_MODE_BROADCAST:
4507                 bond_dev->hard_start_xmit = bond_xmit_broadcast;
4508                 break;
4509         case BOND_MODE_8023AD:
4510                 bond_set_master_3ad_flags(bond);
4511                 bond_dev->hard_start_xmit = bond_3ad_xmit_xor;
4512                 bond_set_xmit_hash_policy(bond);
4513                 break;
4514         case BOND_MODE_ALB:
4515                 bond_set_master_alb_flags(bond);
4516                 /* FALLTHRU */
4517         case BOND_MODE_TLB:
4518                 bond_dev->hard_start_xmit = bond_alb_xmit;
4519                 bond_dev->set_mac_address = bond_alb_set_mac_address;
4520                 break;
4521         default:
4522                 /* Should never happen, mode already checked */
4523                 printk(KERN_ERR DRV_NAME
4524                        ": %s: Error: Unknown bonding mode %d\n",
4525                        bond_dev->name,
4526                        mode);
4527                 break;
4528         }
4529 }
4530
4531 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
4532                                     struct ethtool_drvinfo *drvinfo)
4533 {
4534         strncpy(drvinfo->driver, DRV_NAME, 32);
4535         strncpy(drvinfo->version, DRV_VERSION, 32);
4536         snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION);
4537 }
4538
4539 static const struct ethtool_ops bond_ethtool_ops = {
4540         .get_drvinfo            = bond_ethtool_get_drvinfo,
4541         .get_link               = ethtool_op_get_link,
4542         .get_tx_csum            = ethtool_op_get_tx_csum,
4543         .get_sg                 = ethtool_op_get_sg,
4544         .get_tso                = ethtool_op_get_tso,
4545         .get_ufo                = ethtool_op_get_ufo,
4546         .get_flags              = ethtool_op_get_flags,
4547 };
4548
4549 /*
4550  * Does not allocate but creates a /proc entry.
4551  * Allowed to fail.
4552  */
4553 static int bond_init(struct net_device *bond_dev, struct bond_params *params)
4554 {
4555         struct bonding *bond = bond_dev->priv;
4556
4557         dprintk("Begin bond_init for %s\n", bond_dev->name);
4558
4559         /* initialize rwlocks */
4560         rwlock_init(&bond->lock);
4561         rwlock_init(&bond->curr_slave_lock);
4562
4563         bond->params = *params; /* copy params struct */
4564
4565         bond->wq = create_singlethread_workqueue(bond_dev->name);
4566         if (!bond->wq)
4567                 return -ENOMEM;
4568
4569         /* Initialize pointers */
4570         bond->first_slave = NULL;
4571         bond->curr_active_slave = NULL;
4572         bond->current_arp_slave = NULL;
4573         bond->primary_slave = NULL;
4574         bond->dev = bond_dev;
4575         bond->send_grat_arp = 0;
4576         bond->send_unsol_na = 0;
4577         bond->setup_by_slave = 0;
4578         INIT_LIST_HEAD(&bond->vlan_list);
4579
4580         /* Initialize the device entry points */
4581         bond_dev->open = bond_open;
4582         bond_dev->stop = bond_close;
4583         bond_dev->get_stats = bond_get_stats;
4584         bond_dev->do_ioctl = bond_do_ioctl;
4585         bond_dev->ethtool_ops = &bond_ethtool_ops;
4586         bond_dev->set_multicast_list = bond_set_multicast_list;
4587         bond_dev->change_mtu = bond_change_mtu;
4588         bond_dev->set_mac_address = bond_set_mac_address;
4589         bond_dev->validate_addr = NULL;
4590
4591         bond_set_mode_ops(bond, bond->params.mode);
4592
4593         bond_dev->destructor = bond_destructor;
4594
4595         /* Initialize the device options */
4596         bond_dev->tx_queue_len = 0;
4597         bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
4598         bond_dev->priv_flags |= IFF_BONDING;
4599         if (bond->params.arp_interval)
4600                 bond_dev->priv_flags |= IFF_MASTER_ARPMON;
4601
4602         /* At first, we block adding VLANs. That's the only way to
4603          * prevent problems that occur when adding VLANs over an
4604          * empty bond. The block will be removed once non-challenged
4605          * slaves are enslaved.
4606          */
4607         bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
4608
4609         /* don't acquire bond device's netif_tx_lock when
4610          * transmitting */
4611         bond_dev->features |= NETIF_F_LLTX;
4612
4613         /* By default, we declare the bond to be fully
4614          * VLAN hardware accelerated capable. Special
4615          * care is taken in the various xmit functions
4616          * when there are slaves that are not hw accel
4617          * capable
4618          */
4619         bond_dev->vlan_rx_register = bond_vlan_rx_register;
4620         bond_dev->vlan_rx_add_vid  = bond_vlan_rx_add_vid;
4621         bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid;
4622         bond_dev->features |= (NETIF_F_HW_VLAN_TX |
4623                                NETIF_F_HW_VLAN_RX |
4624                                NETIF_F_HW_VLAN_FILTER);
4625
4626 #ifdef CONFIG_PROC_FS
4627         bond_create_proc_entry(bond);
4628 #endif
4629         list_add_tail(&bond->bond_list, &bond_dev_list);
4630
4631         return 0;
4632 }
4633
4634 static void bond_work_cancel_all(struct bonding *bond)
4635 {
4636         write_lock_bh(&bond->lock);
4637         bond->kill_timers = 1;
4638         write_unlock_bh(&bond->lock);
4639
4640         if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
4641                 cancel_delayed_work(&bond->mii_work);
4642
4643         if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
4644                 cancel_delayed_work(&bond->arp_work);
4645
4646         if (bond->params.mode == BOND_MODE_ALB &&
4647             delayed_work_pending(&bond->alb_work))
4648                 cancel_delayed_work(&bond->alb_work);
4649
4650         if (bond->params.mode == BOND_MODE_8023AD &&
4651             delayed_work_pending(&bond->ad_work))
4652                 cancel_delayed_work(&bond->ad_work);
4653 }
4654
4655 /* De-initialize device specific data.
4656  * Caller must hold rtnl_lock.
4657  */
4658 static void bond_deinit(struct net_device *bond_dev)
4659 {
4660         struct bonding *bond = bond_dev->priv;
4661
4662         list_del(&bond->bond_list);
4663
4664         bond_work_cancel_all(bond);
4665
4666 #ifdef CONFIG_PROC_FS
4667         bond_remove_proc_entry(bond);
4668 #endif
4669 }
4670
4671 /* Unregister and free all bond devices.
4672  * Caller must hold rtnl_lock.
4673  */
4674 static void bond_free_all(void)
4675 {
4676         struct bonding *bond, *nxt;
4677
4678         list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
4679                 struct net_device *bond_dev = bond->dev;
4680
4681                 bond_work_cancel_all(bond);
4682                 /* Release the bonded slaves */
4683                 bond_release_all(bond_dev);
4684                 bond_destroy(bond);
4685         }
4686
4687 #ifdef CONFIG_PROC_FS
4688         bond_destroy_proc_dir();
4689 #endif
4690 }
4691
4692 /*------------------------- Module initialization ---------------------------*/
4693
4694 /*
4695  * Convert string input module parms.  Accept either the
4696  * number of the mode or its string name.  A bit complicated because
4697  * some mode names are substrings of other names, and calls from sysfs
4698  * may have whitespace in the name (trailing newlines, for example).
4699  */
4700 int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl)
4701 {
4702         int mode = -1, i, rv;
4703         char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, };
4704
4705         for (p = (char *)buf; *p; p++)
4706                 if (!(isdigit(*p) || isspace(*p)))
4707                         break;
4708
4709         if (*p)
4710                 rv = sscanf(buf, "%20s", modestr);
4711         else
4712                 rv = sscanf(buf, "%d", &mode);
4713
4714         if (!rv)
4715                 return -1;
4716
4717         for (i = 0; tbl[i].modename; i++) {
4718                 if (mode == tbl[i].mode)
4719                         return tbl[i].mode;
4720                 if (strcmp(modestr, tbl[i].modename) == 0)
4721                         return tbl[i].mode;
4722         }
4723
4724         return -1;
4725 }
4726
4727 static int bond_check_params(struct bond_params *params)
4728 {
4729         int arp_validate_value, fail_over_mac_value;
4730
4731         /*
4732          * Convert string parameters.
4733          */
4734         if (mode) {
4735                 bond_mode = bond_parse_parm(mode, bond_mode_tbl);
4736                 if (bond_mode == -1) {
4737                         printk(KERN_ERR DRV_NAME
4738                                ": Error: Invalid bonding mode \"%s\"\n",
4739                                mode == NULL ? "NULL" : mode);
4740                         return -EINVAL;
4741                 }
4742         }
4743
4744         if (xmit_hash_policy) {
4745                 if ((bond_mode != BOND_MODE_XOR) &&
4746                     (bond_mode != BOND_MODE_8023AD)) {
4747                         printk(KERN_INFO DRV_NAME
4748                                ": xor_mode param is irrelevant in mode %s\n",
4749                                bond_mode_name(bond_mode));
4750                 } else {
4751                         xmit_hashtype = bond_parse_parm(xmit_hash_policy,
4752                                                         xmit_hashtype_tbl);
4753                         if (xmit_hashtype == -1) {
4754                                 printk(KERN_ERR DRV_NAME
4755                                 ": Error: Invalid xmit_hash_policy \"%s\"\n",
4756                                 xmit_hash_policy == NULL ? "NULL" :
4757                                        xmit_hash_policy);
4758                                 return -EINVAL;
4759                         }
4760                 }
4761         }
4762
4763         if (lacp_rate) {
4764                 if (bond_mode != BOND_MODE_8023AD) {
4765                         printk(KERN_INFO DRV_NAME
4766                                ": lacp_rate param is irrelevant in mode %s\n",
4767                                bond_mode_name(bond_mode));
4768                 } else {
4769                         lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl);
4770                         if (lacp_fast == -1) {
4771                                 printk(KERN_ERR DRV_NAME
4772                                        ": Error: Invalid lacp rate \"%s\"\n",
4773                                        lacp_rate == NULL ? "NULL" : lacp_rate);
4774                                 return -EINVAL;
4775                         }
4776                 }
4777         }
4778
4779         if (ad_select) {
4780                 params->ad_select = bond_parse_parm(ad_select, ad_select_tbl);
4781                 if (params->ad_select == -1) {
4782                         printk(KERN_ERR DRV_NAME
4783                                ": Error: Invalid ad_select \"%s\"\n",
4784                                ad_select == NULL ? "NULL" : ad_select);
4785                         return -EINVAL;
4786                 }
4787
4788                 if (bond_mode != BOND_MODE_8023AD) {
4789                         printk(KERN_WARNING DRV_NAME
4790                                ": ad_select param only affects 802.3ad mode\n");
4791                 }
4792         } else {
4793                 params->ad_select = BOND_AD_STABLE;
4794         }
4795
4796         if (max_bonds < 0 || max_bonds > INT_MAX) {
4797                 printk(KERN_WARNING DRV_NAME
4798                        ": Warning: max_bonds (%d) not in range %d-%d, so it "
4799                        "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n",
4800                        max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);
4801                 max_bonds = BOND_DEFAULT_MAX_BONDS;
4802         }
4803
4804         if (miimon < 0) {
4805                 printk(KERN_WARNING DRV_NAME
4806                        ": Warning: miimon module parameter (%d), "
4807                        "not in range 0-%d, so it was reset to %d\n",
4808                        miimon, INT_MAX, BOND_LINK_MON_INTERV);
4809                 miimon = BOND_LINK_MON_INTERV;
4810         }
4811
4812         if (updelay < 0) {
4813                 printk(KERN_WARNING DRV_NAME
4814                        ": Warning: updelay module parameter (%d), "
4815                        "not in range 0-%d, so it was reset to 0\n",
4816                        updelay, INT_MAX);
4817                 updelay = 0;
4818         }
4819
4820         if (downdelay < 0) {
4821                 printk(KERN_WARNING DRV_NAME
4822                        ": Warning: downdelay module parameter (%d), "
4823                        "not in range 0-%d, so it was reset to 0\n",
4824                        downdelay, INT_MAX);
4825                 downdelay = 0;
4826         }
4827
4828         if ((use_carrier != 0) && (use_carrier != 1)) {
4829                 printk(KERN_WARNING DRV_NAME
4830                        ": Warning: use_carrier module parameter (%d), "
4831                        "not of valid value (0/1), so it was set to 1\n",
4832                        use_carrier);
4833                 use_carrier = 1;
4834         }
4835
4836         if (num_grat_arp < 0 || num_grat_arp > 255) {
4837                 printk(KERN_WARNING DRV_NAME
4838                        ": Warning: num_grat_arp (%d) not in range 0-255 so it "
4839                        "was reset to 1 \n", num_grat_arp);
4840                 num_grat_arp = 1;
4841         }
4842
4843         if (num_unsol_na < 0 || num_unsol_na > 255) {
4844                 printk(KERN_WARNING DRV_NAME
4845                        ": Warning: num_unsol_na (%d) not in range 0-255 so it "
4846                        "was reset to 1 \n", num_unsol_na);
4847                 num_unsol_na = 1;
4848         }
4849
4850         /* reset values for 802.3ad */
4851         if (bond_mode == BOND_MODE_8023AD) {
4852                 if (!miimon) {
4853                         printk(KERN_WARNING DRV_NAME
4854                                ": Warning: miimon must be specified, "
4855                                "otherwise bonding will not detect link "
4856                                "failure, speed and duplex which are "
4857                                "essential for 802.3ad operation\n");
4858                         printk(KERN_WARNING "Forcing miimon to 100msec\n");
4859                         miimon = 100;
4860                 }
4861         }
4862
4863         /* reset values for TLB/ALB */
4864         if ((bond_mode == BOND_MODE_TLB) ||
4865             (bond_mode == BOND_MODE_ALB)) {
4866                 if (!miimon) {
4867                         printk(KERN_WARNING DRV_NAME
4868                                ": Warning: miimon must be specified, "
4869                                "otherwise bonding will not detect link "
4870                                "failure and link speed which are essential "
4871                                "for TLB/ALB load balancing\n");
4872                         printk(KERN_WARNING "Forcing miimon to 100msec\n");
4873                         miimon = 100;
4874                 }
4875         }
4876
4877         if (bond_mode == BOND_MODE_ALB) {
4878                 printk(KERN_NOTICE DRV_NAME
4879                        ": In ALB mode you might experience client "
4880                        "disconnections upon reconnection of a link if the "
4881                        "bonding module updelay parameter (%d msec) is "
4882                        "incompatible with the forwarding delay time of the "
4883                        "switch\n",
4884                        updelay);
4885         }
4886
4887         if (!miimon) {
4888                 if (updelay || downdelay) {
4889                         /* just warn the user the up/down delay will have
4890                          * no effect since miimon is zero...
4891                          */
4892                         printk(KERN_WARNING DRV_NAME
4893                                ": Warning: miimon module parameter not set "
4894                                "and updelay (%d) or downdelay (%d) module "
4895                                "parameter is set; updelay and downdelay have "
4896                                "no effect unless miimon is set\n",
4897                                updelay, downdelay);
4898                 }
4899         } else {
4900                 /* don't allow arp monitoring */
4901                 if (arp_interval) {
4902                         printk(KERN_WARNING DRV_NAME
4903                                ": Warning: miimon (%d) and arp_interval (%d) "
4904                                "can't be used simultaneously, disabling ARP "
4905                                "monitoring\n",
4906                                miimon, arp_interval);
4907                         arp_interval = 0;
4908                 }
4909
4910                 if ((updelay % miimon) != 0) {
4911                         printk(KERN_WARNING DRV_NAME
4912                                ": Warning: updelay (%d) is not a multiple "
4913                                "of miimon (%d), updelay rounded to %d ms\n",
4914                                updelay, miimon, (updelay / miimon) * miimon);
4915                 }
4916
4917                 updelay /= miimon;
4918
4919                 if ((downdelay % miimon) != 0) {
4920                         printk(KERN_WARNING DRV_NAME
4921                                ": Warning: downdelay (%d) is not a multiple "
4922                                "of miimon (%d), downdelay rounded to %d ms\n",
4923                                downdelay, miimon,
4924                                (downdelay / miimon) * miimon);
4925                 }
4926
4927                 downdelay /= miimon;
4928         }
4929
4930         if (arp_interval < 0) {
4931                 printk(KERN_WARNING DRV_NAME
4932                        ": Warning: arp_interval module parameter (%d) "
4933                        ", not in range 0-%d, so it was reset to %d\n",
4934                        arp_interval, INT_MAX, BOND_LINK_ARP_INTERV);
4935                 arp_interval = BOND_LINK_ARP_INTERV;
4936         }
4937
4938         for (arp_ip_count = 0;
4939              (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count];
4940              arp_ip_count++) {
4941                 /* not complete check, but should be good enough to
4942                    catch mistakes */
4943                 if (!isdigit(arp_ip_target[arp_ip_count][0])) {
4944                         printk(KERN_WARNING DRV_NAME
4945                                ": Warning: bad arp_ip_target module parameter "
4946                                "(%s), ARP monitoring will not be performed\n",
4947                                arp_ip_target[arp_ip_count]);
4948                         arp_interval = 0;
4949                 } else {
4950                         __be32 ip = in_aton(arp_ip_target[arp_ip_count]);
4951                         arp_target[arp_ip_count] = ip;
4952                 }
4953         }
4954
4955         if (arp_interval && !arp_ip_count) {
4956                 /* don't allow arping if no arp_ip_target given... */
4957                 printk(KERN_WARNING DRV_NAME
4958                        ": Warning: arp_interval module parameter (%d) "
4959                        "specified without providing an arp_ip_target "
4960                        "parameter, arp_interval was reset to 0\n",
4961                        arp_interval);
4962                 arp_interval = 0;
4963         }
4964
4965         if (arp_validate) {
4966                 if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
4967                         printk(KERN_ERR DRV_NAME
4968                ": arp_validate only supported in active-backup mode\n");
4969                         return -EINVAL;
4970                 }
4971                 if (!arp_interval) {
4972                         printk(KERN_ERR DRV_NAME
4973                                ": arp_validate requires arp_interval\n");
4974                         return -EINVAL;
4975                 }
4976
4977                 arp_validate_value = bond_parse_parm(arp_validate,
4978                                                      arp_validate_tbl);
4979                 if (arp_validate_value == -1) {
4980                         printk(KERN_ERR DRV_NAME
4981                                ": Error: invalid arp_validate \"%s\"\n",
4982                                arp_validate == NULL ? "NULL" : arp_validate);
4983                         return -EINVAL;
4984                 }
4985         } else
4986                 arp_validate_value = 0;
4987
4988         if (miimon) {
4989                 printk(KERN_INFO DRV_NAME
4990                        ": MII link monitoring set to %d ms\n",
4991                        miimon);
4992         } else if (arp_interval) {
4993                 int i;
4994
4995                 printk(KERN_INFO DRV_NAME
4996                        ": ARP monitoring set to %d ms, validate %s, with %d target(s):",
4997                        arp_interval,
4998                        arp_validate_tbl[arp_validate_value].modename,
4999                        arp_ip_count);
5000
5001                 for (i = 0; i < arp_ip_count; i++)
5002                         printk (" %s", arp_ip_target[i]);
5003
5004                 printk("\n");
5005
5006         } else if (max_bonds) {
5007                 /* miimon and arp_interval not set, we need one so things
5008                  * work as expected, see bonding.txt for details
5009                  */
5010                 printk(KERN_WARNING DRV_NAME
5011                        ": Warning: either miimon or arp_interval and "
5012                        "arp_ip_target module parameters must be specified, "
5013                        "otherwise bonding will not detect link failures! see "
5014                        "bonding.txt for details.\n");
5015         }
5016
5017         if (primary && !USES_PRIMARY(bond_mode)) {
5018                 /* currently, using a primary only makes sense
5019                  * in active backup, TLB or ALB modes
5020                  */
5021                 printk(KERN_WARNING DRV_NAME
5022                        ": Warning: %s primary device specified but has no "
5023                        "effect in %s mode\n",
5024                        primary, bond_mode_name(bond_mode));
5025                 primary = NULL;
5026         }
5027
5028         if (fail_over_mac) {
5029                 fail_over_mac_value = bond_parse_parm(fail_over_mac,
5030                                                       fail_over_mac_tbl);
5031                 if (fail_over_mac_value == -1) {
5032                         printk(KERN_ERR DRV_NAME
5033                                ": Error: invalid fail_over_mac \"%s\"\n",
5034                                arp_validate == NULL ? "NULL" : arp_validate);
5035                         return -EINVAL;
5036                 }
5037
5038                 if (bond_mode != BOND_MODE_ACTIVEBACKUP)
5039                         printk(KERN_WARNING DRV_NAME
5040                                ": Warning: fail_over_mac only affects "
5041                                "active-backup mode.\n");
5042         } else {
5043                 fail_over_mac_value = BOND_FOM_NONE;
5044         }
5045
5046         /* fill params struct with the proper values */
5047         params->mode = bond_mode;
5048         params->xmit_policy = xmit_hashtype;
5049         params->miimon = miimon;
5050         params->num_grat_arp = num_grat_arp;
5051         params->num_unsol_na = num_unsol_na;
5052         params->arp_interval = arp_interval;
5053         params->arp_validate = arp_validate_value;
5054         params->updelay = updelay;
5055         params->downdelay = downdelay;
5056         params->use_carrier = use_carrier;
5057         params->lacp_fast = lacp_fast;
5058         params->primary[0] = 0;
5059         params->fail_over_mac = fail_over_mac_value;
5060
5061         if (primary) {
5062                 strncpy(params->primary, primary, IFNAMSIZ);
5063                 params->primary[IFNAMSIZ - 1] = 0;
5064         }
5065
5066         memcpy(params->arp_targets, arp_target, sizeof(arp_target));
5067
5068         return 0;
5069 }
5070
5071 static struct lock_class_key bonding_netdev_xmit_lock_key;
5072 static struct lock_class_key bonding_netdev_addr_lock_key;
5073
5074 static void bond_set_lockdep_class_one(struct net_device *dev,
5075                                        struct netdev_queue *txq,
5076                                        void *_unused)
5077 {
5078         lockdep_set_class(&txq->_xmit_lock,
5079                           &bonding_netdev_xmit_lock_key);
5080 }
5081
5082 static void bond_set_lockdep_class(struct net_device *dev)
5083 {
5084         lockdep_set_class(&dev->addr_list_lock,
5085                           &bonding_netdev_addr_lock_key);
5086         netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
5087 }
5088
5089 /* Create a new bond based on the specified name and bonding parameters.
5090  * If name is NULL, obtain a suitable "bond%d" name for us.
5091  * Caller must NOT hold rtnl_lock; we need to release it here before we
5092  * set up our sysfs entries.
5093  */
5094 int bond_create(char *name, struct bond_params *params)
5095 {
5096         struct net_device *bond_dev;
5097         struct bonding *bond;
5098         int res;
5099
5100         rtnl_lock();
5101         down_write(&bonding_rwsem);
5102
5103         /* Check to see if the bond already exists. */
5104         if (name) {
5105                 list_for_each_entry(bond, &bond_dev_list, bond_list)
5106                         if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) {
5107                                 printk(KERN_ERR DRV_NAME
5108                                ": cannot add bond %s; it already exists\n",
5109                                        name);
5110                                 res = -EPERM;
5111                                 goto out_rtnl;
5112                         }
5113         }
5114
5115         bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "",
5116                                 ether_setup);
5117         if (!bond_dev) {
5118                 printk(KERN_ERR DRV_NAME
5119                        ": %s: eek! can't alloc netdev!\n",
5120                        name);
5121                 res = -ENOMEM;
5122                 goto out_rtnl;
5123         }
5124
5125         if (!name) {
5126                 res = dev_alloc_name(bond_dev, "bond%d");
5127                 if (res < 0)
5128                         goto out_netdev;
5129         }
5130
5131         /* bond_init() must be called after dev_alloc_name() (for the
5132          * /proc files), but before register_netdevice(), because we
5133          * need to set function pointers.
5134          */
5135
5136         res = bond_init(bond_dev, params);
5137         if (res < 0) {
5138                 goto out_netdev;
5139         }
5140
5141         res = register_netdevice(bond_dev);
5142         if (res < 0) {
5143                 goto out_bond;
5144         }
5145
5146         bond_set_lockdep_class(bond_dev);
5147
5148         netif_carrier_off(bond_dev);
5149
5150         up_write(&bonding_rwsem);
5151         rtnl_unlock(); /* allows sysfs registration of net device */
5152         res = bond_create_sysfs_entry(bond_dev->priv);
5153         if (res < 0) {
5154                 rtnl_lock();
5155                 down_write(&bonding_rwsem);
5156                 bond_deinit(bond_dev);
5157                 unregister_netdevice(bond_dev);
5158                 goto out_rtnl;
5159         }
5160
5161         return 0;
5162
5163 out_bond:
5164         bond_deinit(bond_dev);
5165 out_netdev:
5166         free_netdev(bond_dev);
5167 out_rtnl:
5168         up_write(&bonding_rwsem);
5169         rtnl_unlock();
5170         return res;
5171 }
5172
5173 static int __init bonding_init(void)
5174 {
5175         int i;
5176         int res;
5177         struct bonding *bond;
5178
5179         printk(KERN_INFO "%s", version);
5180
5181         res = bond_check_params(&bonding_defaults);
5182         if (res) {
5183                 goto out;
5184         }
5185
5186 #ifdef CONFIG_PROC_FS
5187         bond_create_proc_dir();
5188 #endif
5189
5190         init_rwsem(&bonding_rwsem);
5191
5192         for (i = 0; i < max_bonds; i++) {
5193                 res = bond_create(NULL, &bonding_defaults);
5194                 if (res)
5195                         goto err;
5196         }
5197
5198         res = bond_create_sysfs();
5199         if (res)
5200                 goto err;
5201
5202         register_netdevice_notifier(&bond_netdev_notifier);
5203         register_inetaddr_notifier(&bond_inetaddr_notifier);
5204         bond_register_ipv6_notifier();
5205
5206         goto out;
5207 err:
5208         list_for_each_entry(bond, &bond_dev_list, bond_list) {
5209                 bond_work_cancel_all(bond);
5210                 destroy_workqueue(bond->wq);
5211         }
5212
5213         bond_destroy_sysfs();
5214
5215         rtnl_lock();
5216         bond_free_all();
5217         rtnl_unlock();
5218 out:
5219         return res;
5220
5221 }
5222
5223 static void __exit bonding_exit(void)
5224 {
5225         unregister_netdevice_notifier(&bond_netdev_notifier);
5226         unregister_inetaddr_notifier(&bond_inetaddr_notifier);
5227         bond_unregister_ipv6_notifier();
5228
5229         bond_destroy_sysfs();
5230
5231         rtnl_lock();
5232         bond_free_all();
5233         rtnl_unlock();
5234 }
5235
5236 module_init(bonding_init);
5237 module_exit(bonding_exit);
5238 MODULE_LICENSE("GPL");
5239 MODULE_VERSION(DRV_VERSION);
5240 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);
5241 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
5242 MODULE_SUPPORTED_DEVICE("most ethernet devices");
5243
5244 /*
5245  * Local variables:
5246  *  c-indent-level: 8
5247  *  c-basic-offset: 8
5248  *  tab-width: 8
5249  * End:
5250  */
5251