net/bonding: Optionally allow ethernet slaves to keep own MAC
[safe/jmp/linux-2.6] / drivers / net / bonding / bond_main.c
index 8f77db2..db80f24 100644 (file)
@@ -98,6 +98,7 @@ static char *xmit_hash_policy = NULL;
 static int arp_interval = BOND_LINK_ARP_INTERV;
 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
 static char *arp_validate = NULL;
+static int fail_over_mac = 0;
 struct bond_params bonding_defaults;
 
 module_param(max_bonds, int, 0);
@@ -131,6 +132,8 @@ module_param_array(arp_ip_target, charp, NULL, 0);
 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
 module_param(arp_validate, charp, 0);
 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
+module_param(fail_over_mac, int, 0);
+MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  0 of off (default), 1 for on.");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -144,7 +147,7 @@ static struct proc_dir_entry *bond_proc_dir = NULL;
 #endif
 
 extern struct rw_semaphore bonding_rwsem;
-static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
+static __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
 static int arp_ip_count        = 0;
 static int bond_mode   = BOND_MODE_ROUNDROBIN;
 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2;
@@ -1096,7 +1099,21 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                if (new_active) {
                        bond_set_slave_active_flags(new_active);
                }
-               bond_send_gratuitous_arp(bond);
+
+               /* when bonding does not set the slave MAC address, the bond MAC
+                * address is the one of the active slave.
+                */
+               if (new_active && bond->params.fail_over_mac)
+                       memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr,
+                               new_active->dev->addr_len);
+               if (bond->curr_active_slave &&
+                       test_bit(__LINK_STATE_LINKWATCH_PENDING,
+                                       &bond->curr_active_slave->dev->state)) {
+                       dprintk("delaying gratuitous arp on %s\n",
+                               bond->curr_active_slave->dev->name);
+                       bond->send_grat_arp = 1;
+               } else
+                       bond_send_gratuitous_arp(bond);
        }
 }
 
@@ -1217,7 +1234,8 @@ static int bond_compute_features(struct bonding *bond)
        struct slave *slave;
        struct net_device *bond_dev = bond->dev;
        unsigned long features = bond_dev->features;
-       unsigned short max_hard_header_len = ETH_HLEN;
+       unsigned short max_hard_header_len = max((u16)ETH_HLEN,
+                                               bond_dev->hard_header_len);
        int i;
 
        features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
@@ -1238,6 +1256,23 @@ static int bond_compute_features(struct bonding *bond)
        return 0;
 }
 
+
+static void bond_setup_by_slave(struct net_device *bond_dev,
+                               struct net_device *slave_dev)
+{
+       struct bonding *bond = bond_dev->priv;
+
+       bond_dev->neigh_setup           = slave_dev->neigh_setup;
+
+       bond_dev->type              = slave_dev->type;
+       bond_dev->hard_header_len   = slave_dev->hard_header_len;
+       bond_dev->addr_len          = slave_dev->addr_len;
+
+       memcpy(bond_dev->broadcast, slave_dev->broadcast,
+               slave_dev->addr_len);
+       bond->setup_by_slave = 1;
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
@@ -1258,8 +1293,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
        /* bond must be initialized by bond_open() before enslaving */
        if (!(bond_dev->flags & IFF_UP)) {
-               dprintk("Error, master_dev is not up\n");
-               return -EPERM;
+               printk(KERN_WARNING DRV_NAME
+                       " %s: master_dev is not up in bond_enslave\n",
+                       bond_dev->name);
        }
 
        /* already enslaved */
@@ -1312,14 +1348,42 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                goto err_undo_flags;
        }
 
+       /* set bonding device ether type by slave - bonding netdevices are
+        * created with ether_setup, so when the slave type is not ARPHRD_ETHER
+        * there is a need to override some of the type dependent attribs/funcs.
+        *
+        * bond ether type mutual exclusion - don't allow slaves of dissimilar
+        * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
+        */
+       if (bond->slave_cnt == 0) {
+               if (slave_dev->type != ARPHRD_ETHER)
+                       bond_setup_by_slave(bond_dev, slave_dev);
+       } else if (bond_dev->type != slave_dev->type) {
+               printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different "
+                       "from other slaves (%d), can not enslave it.\n",
+                       slave_dev->name,
+                       slave_dev->type, bond_dev->type);
+                       res = -EINVAL;
+                       goto err_undo_flags;
+       }
+
        if (slave_dev->set_mac_address == NULL) {
-               printk(KERN_ERR DRV_NAME
-                       ": %s: Error: The slave device you specified does "
-                       "not support setting the MAC address. "
-                       "Your kernel likely does not support slave "
-                       "devices.\n", bond_dev->name);
-               res = -EOPNOTSUPP;
-               goto err_undo_flags;
+               if (bond->slave_cnt == 0) {
+                       printk(KERN_WARNING DRV_NAME
+                              ": %s: Warning: The first slave device "
+                              "specified does not support setting the MAC "
+                              "address. Enabling the fail_over_mac option.",
+                              bond_dev->name);
+                       bond->params.fail_over_mac = 1;
+               } else if (!bond->params.fail_over_mac) {
+                       printk(KERN_ERR DRV_NAME
+                               ": %s: Error: The slave device specified "
+                               "does not support setting the MAC address, "
+                               "but fail_over_mac is not enabled.\n"
+                               , bond_dev->name);
+                       res = -EOPNOTSUPP;
+                       goto err_undo_flags;
+               }
        }
 
        new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
@@ -1340,16 +1404,18 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
         */
        memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
 
-       /*
-        * Set slave to master's mac address.  The application already
-        * set the master's mac address to that of the first slave
-        */
-       memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
-       addr.sa_family = slave_dev->type;
-       res = dev_set_mac_address(slave_dev, &addr);
-       if (res) {
-               dprintk("Error %d calling set_mac_address\n", res);
-               goto err_free;
+       if (!bond->params.fail_over_mac) {
+               /*
+                * Set slave to master's mac address.  The application already
+                * set the master's mac address to that of the first slave
+                */
+               memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
+               addr.sa_family = slave_dev->type;
+               res = dev_set_mac_address(slave_dev, &addr);
+               if (res) {
+                       dprintk("Error %d calling set_mac_address\n", res);
+                       goto err_free;
+               }
        }
 
        res = netdev_set_master(slave_dev, bond_dev);
@@ -1574,9 +1640,11 @@ err_close:
        dev_close(slave_dev);
 
 err_restore_mac:
-       memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
-       addr.sa_family = slave_dev->type;
-       dev_set_mac_address(slave_dev, &addr);
+       if (!bond->params.fail_over_mac) {
+               memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
+               addr.sa_family = slave_dev->type;
+               dev_set_mac_address(slave_dev, &addr);
+       }
 
 err_free:
        kfree(new_slave);
@@ -1749,10 +1817,12 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
        /* close slave before restoring its mac address */
        dev_close(slave_dev);
 
-       /* restore original ("permanent") mac address */
-       memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
-       addr.sa_family = slave_dev->type;
-       dev_set_mac_address(slave_dev, &addr);
+       if (!bond->params.fail_over_mac) {
+               /* restore original ("permanent") mac address */
+               memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
+               addr.sa_family = slave_dev->type;
+               dev_set_mac_address(slave_dev, &addr);
+       }
 
        slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
                                   IFF_SLAVE_INACTIVE | IFF_BONDING |
@@ -1764,6 +1834,35 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 }
 
 /*
+* Destroy a bonding device.
+* Must be under rtnl_lock when this function is called.
+*/
+void bond_destroy(struct bonding *bond)
+{
+       bond_deinit(bond->dev);
+       bond_destroy_sysfs_entry(bond);
+       unregister_netdevice(bond->dev);
+}
+
+/*
+* First release a slave and than destroy the bond if no more slaves iare left.
+* Must be under rtnl_lock when this function is called.
+*/
+int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev)
+{
+       struct bonding *bond = bond_dev->priv;
+       int ret;
+
+       ret = bond_release(bond_dev, slave_dev);
+       if ((ret == 0) && (bond->slave_cnt == 0)) {
+               printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n",
+                      bond_dev->name, bond_dev->name);
+               bond_destroy(bond);
+       }
+       return ret;
+}
+
+/*
  * This function releases all slaves.
  */
 static int bond_release_all(struct net_device *bond_dev)
@@ -1839,10 +1938,12 @@ static int bond_release_all(struct net_device *bond_dev)
                /* close slave before restoring its mac address */
                dev_close(slave_dev);
 
-               /* restore original ("permanent") mac address*/
-               memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
-               addr.sa_family = slave_dev->type;
-               dev_set_mac_address(slave_dev, &addr);
+               if (!bond->params.fail_over_mac) {
+                       /* restore original ("permanent") mac address*/
+                       memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
+                       addr.sa_family = slave_dev->type;
+                       dev_set_mac_address(slave_dev, &addr);
+               }
 
                slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
                                           IFF_SLAVE_INACTIVE);
@@ -2013,6 +2114,17 @@ void bond_mii_monitor(struct net_device *bond_dev)
         * program could monitor the link itself if needed.
         */
 
+       if (bond->send_grat_arp) {
+               if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING,
+                               &bond->curr_active_slave->dev->state))
+                       dprintk("Needs to send gratuitous arp but not yet\n");
+               else {
+                       dprintk("sending delayed gratuitous arp on on %s\n",
+                               bond->curr_active_slave->dev->name);
+                       bond_send_gratuitous_arp(bond);
+                       bond->send_grat_arp = 0;
+               }
+       }
        read_lock(&bond->curr_slave_lock);
        oldcurrent = bond->curr_active_slave;
        read_unlock(&bond->curr_slave_lock);
@@ -2226,7 +2338,7 @@ out:
 }
 
 
-static u32 bond_glean_dev_ip(struct net_device *dev)
+static __be32 bond_glean_dev_ip(struct net_device *dev)
 {
        struct in_device *idev;
        struct in_ifaddr *ifa;
@@ -2269,7 +2381,7 @@ static int bond_has_ip(struct bonding *bond)
        return 0;
 }
 
-static int bond_has_this_ip(struct bonding *bond, u32 ip)
+static int bond_has_this_ip(struct bonding *bond, __be32 ip)
 {
        struct vlan_entry *vlan, *vlan_next;
 
@@ -2293,7 +2405,7 @@ static int bond_has_this_ip(struct bonding *bond, u32 ip)
  * switches in VLAN mode (especially if ports are configured as
  * "native" to a VLAN) might not pass non-tagged frames.
  */
-static void bond_arp_send(struct net_device *slave_dev, int arp_op, u32 dest_ip, u32 src_ip, unsigned short vlan_id)
+static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id)
 {
        struct sk_buff *skb;
 
@@ -2321,7 +2433,7 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, u32 dest_ip,
 static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 {
        int i, vlan_id, rv;
-       u32 *targets = bond->params.arp_targets;
+       __be32 *targets = bond->params.arp_targets;
        struct vlan_entry *vlan, *vlan_next;
        struct net_device *vlan_dev;
        struct flowi fl;
@@ -2414,7 +2526,7 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
 
        if (bond->master_ip) {
                bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
-                                 bond->master_ip, 0);
+                               bond->master_ip, 0);
        }
 
        list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
@@ -2426,10 +2538,10 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
        }
 }
 
-static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip)
+static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
 {
        int i;
-       u32 *targets = bond->params.arp_targets;
+       __be32 *targets = bond->params.arp_targets;
 
        targets = bond->params.arp_targets;
        for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
@@ -2451,7 +2563,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
        struct slave *slave;
        struct bonding *bond;
        unsigned char *arp_ptr;
-       u32 sip, tip;
+       __be32 sip, tip;
 
        if (dev->nd_net != &init_net)
                goto out;
@@ -2951,9 +3063,15 @@ static void bond_info_show_master(struct seq_file *seq)
        curr = bond->curr_active_slave;
        read_unlock(&bond->curr_slave_lock);
 
-       seq_printf(seq, "Bonding Mode: %s\n",
+       seq_printf(seq, "Bonding Mode: %s",
                   bond_mode_name(bond->params.mode));
 
+       if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
+           bond->params.fail_over_mac)
+               seq_printf(seq, " (fail_over_mac)");
+
+       seq_printf(seq, "\n");
+
        if (bond->params.mode == BOND_MODE_XOR ||
                bond->params.mode == BOND_MODE_8023AD) {
                seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
@@ -3248,6 +3366,11 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave
                 * ... Or is it this?
                 */
                break;
+       case NETDEV_GOING_DOWN:
+               dprintk("slave %s is going down\n", slave_dev->name);
+               if (bond->setup_by_slave)
+                       bond_release_and_destroy(bond_dev, slave_dev);
+               break;
        case NETDEV_CHANGEMTU:
                /*
                 * TODO: Should slaves be allowed to
@@ -3427,14 +3550,14 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
 {
        struct ethhdr *data = (struct ethhdr *)skb->data;
        struct iphdr *iph = ip_hdr(skb);
-       u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
+       __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
        int layer4_xor = 0;
 
        if (skb->protocol == __constant_htons(ETH_P_IP)) {
                if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) &&
                    (iph->protocol == IPPROTO_TCP ||
                     iph->protocol == IPPROTO_UDP)) {
-                       layer4_xor = htons((*layer4hdr ^ *(layer4hdr + 1)));
+                       layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
                }
                return (layer4_xor ^
                        ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
@@ -3880,6 +4003,13 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
 
        dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
 
+       /*
+        * If fail_over_mac is enabled, do nothing and return success.
+        * Returning an error causes ifenslave to fail.
+        */
+       if (bond->params.fail_over_mac)
+               return 0;
+
        if (!is_valid_ether_addr(sa->sa_data)) {
                return -EADDRNOTAVAIL;
        }
@@ -4217,6 +4347,8 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
        bond->current_arp_slave = NULL;
        bond->primary_slave = NULL;
        bond->dev = bond_dev;
+       bond->send_grat_arp = 0;
+       bond->setup_by_slave = 0;
        INIT_LIST_HEAD(&bond->vlan_list);
 
        /* Initialize the device entry points */
@@ -4265,7 +4397,6 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
 #ifdef CONFIG_PROC_FS
        bond_create_proc_entry(bond);
 #endif
-
        list_add_tail(&bond->bond_list, &bond_dev_list);
 
        return 0;
@@ -4521,7 +4652,7 @@ static int bond_check_params(struct bond_params *params)
                               arp_ip_target[arp_ip_count]);
                        arp_interval = 0;
                } else {
-                       u32 ip = in_aton(arp_ip_target[arp_ip_count]);
+                       __be32 ip = in_aton(arp_ip_target[arp_ip_count]);
                        arp_target[arp_ip_count] = ip;
                }
        }
@@ -4599,6 +4730,11 @@ static int bond_check_params(struct bond_params *params)
                primary = NULL;
        }
 
+       if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP))
+               printk(KERN_WARNING DRV_NAME
+                      ": Warning: fail_over_mac only affects "
+                      "active-backup mode.\n");
+
        /* fill params struct with the proper values */
        params->mode = bond_mode;
        params->xmit_policy = xmit_hashtype;
@@ -4610,6 +4746,7 @@ static int bond_check_params(struct bond_params *params)
        params->use_carrier = use_carrier;
        params->lacp_fast = lacp_fast;
        params->primary[0] = 0;
+       params->fail_over_mac = fail_over_mac;
 
        if (primary) {
                strncpy(params->primary, primary, IFNAMSIZ);