IB/mad: Ignore iWARP devices on device removal
[safe/jmp/linux-2.6] / drivers / infiniband / core / mad.c
index 5ad41a6..e351b15 100644 (file)
@@ -1,7 +1,8 @@
 /*
- * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -31,9 +32,9 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 #include <linux/dma-mapping.h>
+#include <rdma/ib_cache.h>
 
 #include "mad_priv.h"
 #include "mad_rmpp.h"
@@ -45,15 +46,21 @@ MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
 MODULE_AUTHOR("Sean Hefty");
 
+int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
+int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
 
-kmem_cache_t *ib_mad_cache;
+module_param_named(send_queue_size, mad_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
+module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+
+static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
 static u32 ib_mad_client_id = 0;
 
 /* Port list lock */
-static spinlock_t ib_mad_port_list_lock;
-
+static DEFINE_SPINLOCK(ib_mad_port_list_lock);
 
 /* Forward declarations */
 static int method_in_use(struct ib_mad_mgmt_method_table **method,
@@ -65,8 +72,8 @@ static struct ib_mad_agent_private *find_mad_agent(
 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                                    struct ib_mad_private *mad);
 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
-static void timeout_sends(void *data);
-static void local_completions(void *data);
+static void timeout_sends(struct work_struct *work);
+static void local_completions(struct work_struct *work);
 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                              struct ib_mad_agent_private *agent_priv,
                              u8 mgmt_class);
@@ -167,6 +174,15 @@ static int is_vendor_method_in_use(
        return 0;
 }
 
+int ib_response_mad(struct ib_mad *mad)
+{
+       return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) ||
+               (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
+               ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) &&
+                (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP)));
+}
+EXPORT_SYMBOL(ib_response_mad);
+
 /*
  * ib_register_mad_agent - Register to send/receive MADs
  */
@@ -293,6 +309,16 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
        mad_agent_priv->agent.context = context;
        mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
        mad_agent_priv->agent.port_num = port_num;
+       spin_lock_init(&mad_agent_priv->lock);
+       INIT_LIST_HEAD(&mad_agent_priv->send_list);
+       INIT_LIST_HEAD(&mad_agent_priv->wait_list);
+       INIT_LIST_HEAD(&mad_agent_priv->done_list);
+       INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
+       INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
+       INIT_LIST_HEAD(&mad_agent_priv->local_list);
+       INIT_WORK(&mad_agent_priv->local_work, local_completions);
+       atomic_set(&mad_agent_priv->refcount, 1);
+       init_completion(&mad_agent_priv->comp);
 
        spin_lock_irqsave(&port_priv->reg_lock, flags);
        mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
@@ -342,18 +368,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
        list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
        spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 
-       spin_lock_init(&mad_agent_priv->lock);
-       INIT_LIST_HEAD(&mad_agent_priv->send_list);
-       INIT_LIST_HEAD(&mad_agent_priv->wait_list);
-       INIT_LIST_HEAD(&mad_agent_priv->done_list);
-       INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
-       INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv);
-       INIT_LIST_HEAD(&mad_agent_priv->local_list);
-       INIT_WORK(&mad_agent_priv->local_work, local_completions,
-                  mad_agent_priv);
-       atomic_set(&mad_agent_priv->refcount, 1);
-       init_completion(&mad_agent_priv->comp);
-
        return &mad_agent_priv->agent;
 
 error4:
@@ -399,19 +413,15 @@ static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
 
        if (i == qp_info->snoop_table_size) {
                /* Grow table. */
-               new_snoop_table = kmalloc(sizeof mad_snoop_priv *
-                                         qp_info->snoop_table_size + 1,
-                                         GFP_ATOMIC);
+               new_snoop_table = krealloc(qp_info->snoop_table,
+                                          sizeof mad_snoop_priv *
+                                          (qp_info->snoop_table_size + 1),
+                                          GFP_ATOMIC);
                if (!new_snoop_table) {
                        i = -ENOMEM;
                        goto out;
                }
-               if (qp_info->snoop_table) {
-                       memcpy(new_snoop_table, qp_info->snoop_table,
-                              sizeof mad_snoop_priv *
-                              qp_info->snoop_table_size);
-                       kfree(qp_info->snoop_table);
-               }
+
                qp_info->snoop_table = new_snoop_table;
                qp_info->snoop_table_size++;
        }
@@ -570,13 +580,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
 }
 EXPORT_SYMBOL(ib_unregister_mad_agent);
 
-static inline int response_mad(struct ib_mad *mad)
-{
-       /* Trap represses are responses although response bit is reset */
-       return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
-               (mad->mad_hdr.method & IB_MGMT_METHOD_RESP));
-}
-
 static void dequeue_mad(struct ib_mad_list_head *mad_list)
 {
        struct ib_mad_queue *mad_queue;
@@ -641,7 +644,8 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
        spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 }
 
-static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
+static void build_smp_wc(struct ib_qp *qp,
+                        u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
                         struct ib_wc *wc)
 {
        memset(wc, 0, sizeof *wc);
@@ -651,7 +655,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
        wc->pkey_index = pkey_index;
        wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
        wc->src_qp = IB_QP0;
-       wc->qp_num = IB_QP0;
+       wc->qp = qp;
        wc->slid = slid;
        wc->sl = 0;
        wc->dlid_path_bits = 0;
@@ -666,7 +670,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
                                  struct ib_mad_send_wr_private *mad_send_wr)
 {
-       int ret;
+       int ret = 0;
        struct ib_smp *smp = mad_send_wr->send_buf.mad;
        unsigned long flags;
        struct ib_mad_local_private *local;
@@ -674,10 +678,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        struct ib_mad_port_private *port_priv;
        struct ib_mad_agent_private *recv_mad_agent = NULL;
        struct ib_device *device = mad_agent_priv->agent.device;
-       u8 port_num = mad_agent_priv->agent.port_num;
+       u8 port_num;
        struct ib_wc mad_wc;
        struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
 
+       if (device->node_type == RDMA_NODE_IB_SWITCH &&
+           smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+               port_num = send_wr->wr.ud.port_num;
+       else
+               port_num = mad_agent_priv->agent.port_num;
+
        /*
         * Directed route handling starts if the initial LID routed part of
         * a request or the ending LID routed part of a response is empty.
@@ -686,14 +696,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
         */
        if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
             IB_LID_PERMISSIVE &&
-           !smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
+            smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
+            IB_SMI_DISCARD) {
                ret = -EINVAL;
                printk(KERN_ERR PFX "Invalid directed route\n");
                goto out;
        }
+
        /* Check to post send on QP or process locally */
-       ret = smi_check_local_smp(smp, device);
-       if (!ret)
+       if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
+           smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
                goto out;
 
        local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -712,7 +724,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
                goto out;
        }
 
-       build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid),
+       build_smp_wc(mad_agent_priv->agent.qp,
+                    send_wr->wr_id, be16_to_cpu(smp->dr_slid),
                     send_wr->wr.ud.pkey_index,
                     send_wr->wr.ud.port_num, &mad_wc);
 
@@ -723,7 +736,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        switch (ret)
        {
        case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
-               if (response_mad(&mad_priv->mad.mad) &&
+               if (ib_response_mad(&mad_priv->mad.mad) &&
                    mad_agent_priv->agent.recv_handler) {
                        local->mad_priv = mad_priv;
                        local->recv_mad_agent = mad_agent_priv;
@@ -743,16 +756,17 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
                port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
                                            mad_agent_priv->agent.port_num);
                if (port_priv) {
-                       mad_priv->mad.mad.mad_hdr.tid =
-                               ((struct ib_mad *)smp)->mad_hdr.tid;
+                       memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad));
                        recv_mad_agent = find_mad_agent(port_priv,
                                                        &mad_priv->mad.mad);
                }
                if (!port_priv || !recv_mad_agent) {
+                       /*
+                        * No receiving agent so drop packet and
+                        * generate send completion.
+                        */
                        kmem_cache_free(ib_mad_cache, mad_priv);
-                       kfree(local);
-                       ret = 0;
-                       goto out;
+                       break;
                }
                local->mad_priv = mad_priv;
                local->recv_mad_agent = recv_mad_agent;
@@ -997,17 +1011,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
        mad_agent = mad_send_wr->send_buf.mad_agent;
        sge = mad_send_wr->sg_list;
-       sge[0].addr = dma_map_single(mad_agent->device->dma_device,
-                                    mad_send_wr->send_buf.mad,
-                                    sge[0].length,
-                                    DMA_TO_DEVICE);
-       pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
-
-       sge[1].addr = dma_map_single(mad_agent->device->dma_device,
-                                    ib_get_payload(mad_send_wr),
-                                    sge[1].length,
-                                    DMA_TO_DEVICE);
-       pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
+       sge[0].addr = ib_dma_map_single(mad_agent->device,
+                                       mad_send_wr->send_buf.mad,
+                                       sge[0].length,
+                                       DMA_TO_DEVICE);
+       mad_send_wr->header_mapping = sge[0].addr;
+
+       sge[1].addr = ib_dma_map_single(mad_agent->device,
+                                       ib_get_payload(mad_send_wr),
+                                       sge[1].length,
+                                       DMA_TO_DEVICE);
+       mad_send_wr->payload_mapping = sge[1].addr;
 
        spin_lock_irqsave(&qp_info->send_queue.lock, flags);
        if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -1025,12 +1039,12 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
        }
        spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
        if (ret) {
-               dma_unmap_single(mad_agent->device->dma_device,
-                                pci_unmap_addr(mad_send_wr, header_mapping),
-                                sge[0].length, DMA_TO_DEVICE);
-               dma_unmap_single(mad_agent->device->dma_device,
-                                pci_unmap_addr(mad_send_wr, payload_mapping),
-                                sge[1].length, DMA_TO_DEVICE);
+               ib_dma_unmap_single(mad_agent->device,
+                                   mad_send_wr->header_mapping,
+                                   sge[0].length, DMA_TO_DEVICE);
+               ib_dma_unmap_single(mad_agent->device,
+                                   mad_send_wr->payload_mapping,
+                                   sge[1].length, DMA_TO_DEVICE);
        }
        return ret;
 }
@@ -1091,7 +1105,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
                mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
                /* Timeout will be updated after send completes */
                mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
-               mad_send_wr->retries = send_buf->retries;
+               mad_send_wr->max_retries = send_buf->retries;
+               mad_send_wr->retries_left = send_buf->retries;
+               send_buf->retries = 0;
                /* Reference for work request to QP + response */
                mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
                mad_send_wr->status = IB_WC_SUCCESS;
@@ -1177,10 +1193,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
 {
        int i;
 
-       for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
-            i < IB_MGMT_MAX_METHODS;
-            i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
-                              1+i)) {
+       for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
                if ((*method)->agent[i]) {
                        printk(KERN_ERR PFX "Method %d already in use\n", i);
                        return -EINVAL;
@@ -1244,8 +1257,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
        int i;
 
        for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+               /* Is there matching OUI for this vendor class ? */
+               if (!memcmp(vendor_class->oui[i], oui, 3))
                        return i;
 
        return -1;
@@ -1314,13 +1327,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
                goto error3;
 
        /* Finally, add in methods being registered */
-       for (i = find_first_bit(mad_reg_req->method_mask,
-                               IB_MGMT_MAX_METHODS);
-            i < IB_MGMT_MAX_METHODS;
-            i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
-                              1+i)) {
+       for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
                (*method)->agent[i] = agent_priv;
-       }
+
        return 0;
 
 error3:
@@ -1413,13 +1422,9 @@ check_in_use:
                goto error4;
 
        /* Finally, add in methods being registered */
-       for (i = find_first_bit(mad_reg_req->method_mask,
-                               IB_MGMT_MAX_METHODS);
-            i < IB_MGMT_MAX_METHODS;
-            i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
-                              1+i)) {
+       for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
                (*method)->agent[i] = agent_priv;
-       }
+
        return 0;
 
 error4:
@@ -1551,7 +1556,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
        unsigned long flags;
 
        spin_lock_irqsave(&port_priv->reg_lock, flags);
-       if (response_mad(mad)) {
+       if (ib_response_mad(mad)) {
                u32 hi_tid;
                struct ib_mad_agent_private *entry;
 
@@ -1673,19 +1678,19 @@ static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
                rwc->recv_buf.mad->mad_hdr.mgmt_class;
 }
 
-static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
+static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
+                                  struct ib_mad_send_wr_private *wr,
                                   struct ib_mad_recv_wc *rwc )
 {
        struct ib_ah_attr attr;
        u8 send_resp, rcv_resp;
+       union ib_gid sgid;
+       struct ib_device *device = mad_agent_priv->agent.device;
+       u8 port_num = mad_agent_priv->agent.port_num;
+       u8 lmc;
 
-       send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
-                    mad_hdr.method & IB_MGMT_METHOD_RESP;
-       rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
-
-       if (!send_resp && rcv_resp)
-               /* is request/response. GID/LIDs are both local (same). */
-               return 1;
+       send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad);
+       rcv_resp = ib_response_mad(rwc->recv_buf.mad);
 
        if (send_resp == rcv_resp)
                /* both requests, or both responses. GIDs different */
@@ -1695,48 +1700,78 @@ static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
                /* Assume not equal, to avoid false positives. */
                return 0;
 
-       if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH))
-               return attr.dlid == rwc->wc->slid;
-       else if ((attr.ah_flags & IB_AH_GRH) &&
-                (rwc->wc->wc_flags & IB_WC_GRH))
-               return memcmp(attr.grh.dgid.raw,
-                             rwc->recv_buf.grh->sgid.raw, 16) == 0;
-       else
+       if (!!(attr.ah_flags & IB_AH_GRH) !=
+           !!(rwc->wc->wc_flags & IB_WC_GRH))
                /* one has GID, other does not.  Assume different */
                return 0;
+
+       if (!send_resp && rcv_resp) {
+               /* is request/response. */
+               if (!(attr.ah_flags & IB_AH_GRH)) {
+                       if (ib_get_cached_lmc(device, port_num, &lmc))
+                               return 0;
+                       return (!lmc || !((attr.src_path_bits ^
+                                          rwc->wc->dlid_path_bits) &
+                                         ((1 << lmc) - 1)));
+               } else {
+                       if (ib_get_cached_gid(device, port_num,
+                                             attr.grh.sgid_index, &sgid))
+                               return 0;
+                       return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
+                                      16);
+               }
+       }
+
+       if (!(attr.ah_flags & IB_AH_GRH))
+               return attr.dlid == rwc->wc->slid;
+       else
+               return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+                              16);
+}
+
+static inline int is_direct(u8 class)
+{
+       return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
 }
+
 struct ib_mad_send_wr_private*
 ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
-                struct ib_mad_recv_wc *mad_recv_wc)
+                struct ib_mad_recv_wc *wc)
 {
-       struct ib_mad_send_wr_private *mad_send_wr;
+       struct ib_mad_send_wr_private *wr;
        struct ib_mad *mad;
 
-       mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad;
+       mad = (struct ib_mad *)wc->recv_buf.mad;
 
-       list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
-                           agent_list) {
-               if ((mad_send_wr->tid == mad->mad_hdr.tid) &&
-                   rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
-                   rcv_has_same_gid(mad_send_wr, mad_recv_wc))
-                       return mad_send_wr;
+       list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
+               if ((wr->tid == mad->mad_hdr.tid) &&
+                   rcv_has_same_class(wr, wc) &&
+                   /*
+                    * Don't check GID for direct routed MADs.
+                    * These might have permissive LIDs.
+                    */
+                   (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
+                    rcv_has_same_gid(mad_agent_priv, wr, wc)))
+                       return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
        }
 
        /*
         * It's possible to receive the response before we've
         * been notified that the send has completed
         */
-       list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
-                           agent_list) {
-               if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
-                   mad_send_wr->tid == mad->mad_hdr.tid &&
-                   mad_send_wr->timeout &&
-                   rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
-                   rcv_has_same_gid(mad_send_wr, mad_recv_wc)) {
+       list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
+               if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
+                   wr->tid == mad->mad_hdr.tid &&
+                   wr->timeout &&
+                   rcv_has_same_class(wr, wc) &&
+                   /*
+                    * Don't check GID for direct routed MADs.
+                    * These might have permissive LIDs.
+                    */
+                   (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
+                    rcv_has_same_gid(mad_agent_priv, wr, wc)))
                        /* Verify request has not been canceled */
-                       return (mad_send_wr->status == IB_WC_SUCCESS) ?
-                               mad_send_wr : NULL;
-               }
+                       return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
        }
        return NULL;
 }
@@ -1744,11 +1779,9 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
 {
        mad_send_wr->timeout = 0;
-       if (mad_send_wr->refcount == 1) {
-               list_del(&mad_send_wr->agent_list);
-               list_add_tail(&mad_send_wr->agent_list,
+       if (mad_send_wr->refcount == 1)
+               list_move_tail(&mad_send_wr->agent_list,
                              &mad_send_wr->mad_agent_priv->done_list);
-       }
 }
 
 static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
@@ -1770,7 +1803,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
        }
 
        /* Complete corresponding request */
-       if (response_mad(mad_recv_wc->recv_buf.mad)) {
+       if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
                spin_lock_irqsave(&mad_agent_priv->lock, flags);
                mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
                if (!mad_send_wr) {
@@ -1804,14 +1837,10 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 {
        struct ib_mad_qp_info *qp_info;
        struct ib_mad_private_header *mad_priv_hdr;
-       struct ib_mad_private *recv, *response;
+       struct ib_mad_private *recv, *response = NULL;
        struct ib_mad_list_head *mad_list;
        struct ib_mad_agent_private *mad_agent;
-
-       response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
-       if (!response)
-               printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
-                      "for response buffer\n");
+       int port_num;
 
        mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
        qp_info = mad_list->mad_queue->qp_info;
@@ -1820,11 +1849,11 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
                                    mad_list);
        recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
-       dma_unmap_single(port_priv->device->dma_device,
-                        pci_unmap_addr(&recv->header, mapping),
-                        sizeof(struct ib_mad_private) -
-                        sizeof(struct ib_mad_private_header),
-                        DMA_FROM_DEVICE);
+       ib_dma_unmap_single(port_priv->device,
+                           recv->header.mapping,
+                           sizeof(struct ib_mad_private) -
+                             sizeof(struct ib_mad_private_header),
+                           DMA_FROM_DEVICE);
 
        /* Setup MAD receive work completion from "normal" work completion */
        recv->header.wc = *wc;
@@ -1840,21 +1869,56 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
        if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
                goto out;
 
+       response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
+       if (!response) {
+               printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
+                      "for response buffer\n");
+               goto out;
+       }
+
+       if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
+               port_num = wc->port_num;
+       else
+               port_num = port_priv->port_num;
+
        if (recv->mad.mad.mad_hdr.mgmt_class ==
            IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-               if (!smi_handle_dr_smp_recv(&recv->mad.smp,
-                                           port_priv->device->node_type,
-                                           port_priv->port_num,
-                                           port_priv->device->phys_port_cnt))
+               enum smi_forward_action retsmi;
+
+               if (smi_handle_dr_smp_recv(&recv->mad.smp,
+                                          port_priv->device->node_type,
+                                          port_num,
+                                          port_priv->device->phys_port_cnt) ==
+                                          IB_SMI_DISCARD)
                        goto out;
-               if (!smi_check_forward_dr_smp(&recv->mad.smp))
+
+               retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
+               if (retsmi == IB_SMI_LOCAL)
                        goto local;
-               if (!smi_handle_dr_smp_send(&recv->mad.smp,
-                                           port_priv->device->node_type,
-                                           port_priv->port_num))
-                       goto out;
-               if (!smi_check_local_smp(&recv->mad.smp, port_priv->device))
+
+               if (retsmi == IB_SMI_SEND) { /* don't forward */
+                       if (smi_handle_dr_smp_send(&recv->mad.smp,
+                                                  port_priv->device->node_type,
+                                                  port_num) == IB_SMI_DISCARD)
+                               goto out;
+
+                       if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
+                               goto out;
+               } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
+                       /* forward case for switches */
+                       memcpy(response, recv, sizeof(*response));
+                       response->header.recv_wc.wc = &response->header.wc;
+                       response->header.recv_wc.recv_buf.mad = &response->mad.mad;
+                       response->header.recv_wc.recv_buf.grh = &response->grh;
+
+                       agent_send_response(&response->mad.mad,
+                                           &response->grh, wc,
+                                           port_priv->device,
+                                           smi_get_fwd_port(&recv->mad.smp),
+                                           qp_info->qp->qp_num);
+
                        goto out;
+               }
        }
 
 local:
@@ -1862,15 +1926,6 @@ local:
        if (port_priv->device->process_mad) {
                int ret;
 
-               if (!response) {
-                       printk(KERN_ERR PFX "No memory for response MAD\n");
-                       /*
-                        * Is it better to assume that
-                        * it wouldn't be processed ?
-                        */
-                       goto out;
-               }
-
                ret = port_priv->device->process_mad(port_priv->device, 0,
                                                     port_priv->port_num,
                                                     wc, &recv->grh,
@@ -1883,7 +1938,7 @@ local:
                                agent_send_response(&response->mad.mad,
                                                    &recv->grh, wc,
                                                    port_priv->device,
-                                                   port_priv->port_num,
+                                                   port_num,
                                                    qp_info->qp->qp_num);
                                goto out;
                        }
@@ -1916,7 +1971,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
        unsigned long delay;
 
        if (list_empty(&mad_agent_priv->wait_list)) {
-               cancel_delayed_work(&mad_agent_priv->timed_work);
+               __cancel_delayed_work(&mad_agent_priv->timed_work);
        } else {
                mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
                                         struct ib_mad_send_wr_private,
@@ -1925,7 +1980,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
                if (time_after(mad_agent_priv->timeout,
                               mad_send_wr->timeout)) {
                        mad_agent_priv->timeout = mad_send_wr->timeout;
-                       cancel_delayed_work(&mad_agent_priv->timed_work);
+                       __cancel_delayed_work(&mad_agent_priv->timed_work);
                        delay = mad_send_wr->timeout - jiffies;
                        if ((long)delay <= 0)
                                delay = 1;
@@ -1965,7 +2020,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
 
        /* Reschedule a work item if we have a shorter timeout */
        if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
-               cancel_delayed_work(&mad_agent_priv->timed_work);
+               __cancel_delayed_work(&mad_agent_priv->timed_work);
                queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
                                   &mad_agent_priv->timed_work, delay);
        }
@@ -2050,12 +2105,12 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
        qp_info = send_queue->qp_info;
 
 retry:
-       dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
-                        pci_unmap_addr(mad_send_wr, header_mapping),
-                        mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
-       dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
-                        pci_unmap_addr(mad_send_wr, payload_mapping),
-                        mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
+       ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
+                           mad_send_wr->header_mapping,
+                           mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
+       ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
+                           mad_send_wr->payload_mapping,
+                           mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
        queued_send_wr = NULL;
        spin_lock_irqsave(&send_queue->lock, flags);
        list_del(&mad_list->list);
@@ -2067,8 +2122,7 @@ retry:
                queued_send_wr = container_of(mad_list,
                                        struct ib_mad_send_wr_private,
                                        mad_list);
-               list_del(&mad_list->list);
-               list_add_tail(&mad_list->list, &send_queue->list);
+               list_move_tail(&mad_list->list, &send_queue->list);
        }
        spin_unlock_irqrestore(&send_queue->lock, flags);
 
@@ -2168,12 +2222,12 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
 /*
  * IB MAD completion callback
  */
-static void ib_mad_completion_handler(void *data)
+static void ib_mad_completion_handler(struct work_struct *work)
 {
        struct ib_mad_port_private *port_priv;
        struct ib_wc wc;
 
-       port_priv = (struct ib_mad_port_private *)data;
+       port_priv = container_of(work, struct ib_mad_port_private, work);
        ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
 
        while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
@@ -2207,15 +2261,13 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
        list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
                                 &mad_agent_priv->send_list, agent_list) {
                if (mad_send_wr->status == IB_WC_SUCCESS) {
-                       mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+                       mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
                        mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
                }
        }
 
        /* Empty wait list to prevent receives from finding a request */
        list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
-       /* Empty local completion list as well */
-       list_splice_init(&mad_agent_priv->local_list, &cancel_list);
        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
        /* Report all cancelled requests */
@@ -2294,17 +2346,18 @@ void ib_cancel_mad(struct ib_mad_agent *mad_agent,
 }
 EXPORT_SYMBOL(ib_cancel_mad);
 
-static void local_completions(void *data)
+static void local_completions(struct work_struct *work)
 {
        struct ib_mad_agent_private *mad_agent_priv;
        struct ib_mad_local_private *local;
        struct ib_mad_agent_private *recv_mad_agent;
        unsigned long flags;
-       int recv = 0;
+       int free_mad;
        struct ib_wc wc;
        struct ib_mad_send_wc mad_send_wc;
 
-       mad_agent_priv = (struct ib_mad_agent_private *)data;
+       mad_agent_priv =
+               container_of(work, struct ib_mad_agent_private, local_work);
 
        spin_lock_irqsave(&mad_agent_priv->lock, flags);
        while (!list_empty(&mad_agent_priv->local_list)) {
@@ -2313,19 +2366,21 @@ static void local_completions(void *data)
                                   completion_list);
                list_del(&local->completion_list);
                spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+               free_mad = 0;
                if (local->mad_priv) {
                        recv_mad_agent = local->recv_mad_agent;
                        if (!recv_mad_agent) {
                                printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+                               free_mad = 1;
                                goto local_send_completion;
                        }
 
-                       recv = 1;
                        /*
                         * Defined behavior is to complete response
                         * before request
                         */
-                       build_smp_wc((unsigned long) local->mad_send_wr,
+                       build_smp_wc(recv_mad_agent->agent.qp,
+                                    (unsigned long) local->mad_send_wr,
                                     be16_to_cpu(IB_LID_PERMISSIVE),
                                     0, recv_mad_agent->agent.port_num, &wc);
 
@@ -2364,7 +2419,7 @@ local_send_completion:
 
                spin_lock_irqsave(&mad_agent_priv->lock, flags);
                atomic_dec(&mad_agent_priv->refcount);
-               if (!recv)
+               if (free_mad)
                        kmem_cache_free(ib_mad_cache, local->mad_priv);
                kfree(local);
        }
@@ -2375,9 +2430,12 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 {
        int ret;
 
-       if (!mad_send_wr->retries--)
+       if (!mad_send_wr->retries_left)
                return -ETIMEDOUT;
 
+       mad_send_wr->retries_left--;
+       mad_send_wr->send_buf.retries++;
+
        mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 
        if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
@@ -2404,14 +2462,15 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
        return ret;
 }
 
-static void timeout_sends(void *data)
+static void timeout_sends(struct work_struct *work)
 {
        struct ib_mad_agent_private *mad_agent_priv;
        struct ib_mad_send_wr_private *mad_send_wr;
        struct ib_mad_send_wc mad_send_wc;
        unsigned long flags, delay;
 
-       mad_agent_priv = (struct ib_mad_agent_private *)data;
+       mad_agent_priv = container_of(work, struct ib_mad_agent_private,
+                                     timed_work.work);
        mad_send_wc.vendor_err = 0;
 
        spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -2497,13 +2556,12 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                                break;
                        }
                }
-               sg_list.addr = dma_map_single(qp_info->port_priv->
-                                               device->dma_device,
-                                             &mad_priv->grh,
-                                             sizeof *mad_priv -
-                                               sizeof mad_priv->header,
-                                             DMA_FROM_DEVICE);
-               pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
+               sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
+                                                &mad_priv->grh,
+                                                sizeof *mad_priv -
+                                                  sizeof mad_priv->header,
+                                                DMA_FROM_DEVICE);
+               mad_priv->header.mapping = sg_list.addr;
                recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
                mad_priv->header.mad_list.mad_queue = recv_queue;
 
@@ -2518,12 +2576,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                        list_del(&mad_priv->header.mad_list.list);
                        recv_queue->count--;
                        spin_unlock_irqrestore(&recv_queue->lock, flags);
-                       dma_unmap_single(qp_info->port_priv->device->dma_device,
-                                        pci_unmap_addr(&mad_priv->header,
-                                                       mapping),
-                                        sizeof *mad_priv -
-                                          sizeof mad_priv->header,
-                                        DMA_FROM_DEVICE);
+                       ib_dma_unmap_single(qp_info->port_priv->device,
+                                           mad_priv->header.mapping,
+                                           sizeof *mad_priv -
+                                             sizeof mad_priv->header,
+                                           DMA_FROM_DEVICE);
                        kmem_cache_free(ib_mad_cache, mad_priv);
                        printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
                        break;
@@ -2555,11 +2612,11 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
                /* Remove from posted receive MAD list */
                list_del(&mad_list->list);
 
-               dma_unmap_single(qp_info->port_priv->device->dma_device,
-                                pci_unmap_addr(&recv->header, mapping),
-                                sizeof(struct ib_mad_private) -
-                                sizeof(struct ib_mad_private_header),
-                                DMA_FROM_DEVICE);
+               ib_dma_unmap_single(qp_info->port_priv->device,
+                                   recv->header.mapping,
+                                   sizeof(struct ib_mad_private) -
+                                     sizeof(struct ib_mad_private_header),
+                                   DMA_FROM_DEVICE);
                kmem_cache_free(ib_mad_cache, recv);
        }
 
@@ -2576,7 +2633,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
        struct ib_qp *qp;
 
        attr = kmalloc(sizeof *attr, GFP_KERNEL);
-       if (!attr) {
+       if (!attr) {
                printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
                return -ENOMEM;
        }
@@ -2676,8 +2733,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
        qp_init_attr.send_cq = qp_info->port_priv->cq;
        qp_init_attr.recv_cq = qp_info->port_priv->cq;
        qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
-       qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+       qp_init_attr.cap.max_send_wr = mad_sendq_size;
+       qp_init_attr.cap.max_recv_wr = mad_recvq_size;
        qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
        qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
        qp_init_attr.qp_type = qp_type;
@@ -2692,8 +2749,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
                goto error;
        }
        /* Use minimum queue sizes unless the CQ is resized */
-       qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE;
-       qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE;
+       qp_info->send_queue.max_active = mad_sendq_size;
+       qp_info->recv_queue.max_active = mad_recvq_size;
        return 0;
 
 error:
@@ -2732,10 +2789,10 @@ static int ib_mad_port_open(struct ib_device *device,
        init_mad_qp(port_priv, &port_priv->qp_info[0]);
        init_mad_qp(port_priv, &port_priv->qp_info[1]);
 
-       cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
+       cq_size = (mad_sendq_size + mad_recvq_size) * 2;
        port_priv->cq = ib_create_cq(port_priv->device,
                                     ib_mad_thread_completion_handler,
-                                    NULL, port_priv, cq_size);
+                                    NULL, port_priv, cq_size, 0);
        if (IS_ERR(port_priv->cq)) {
                printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
                ret = PTR_ERR(port_priv->cq);
@@ -2769,7 +2826,7 @@ static int ib_mad_port_open(struct ib_device *device,
                ret = -ENOMEM;
                goto error8;
        }
-       INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
+       INIT_WORK(&port_priv->work, ib_mad_completion_handler);
 
        spin_lock_irqsave(&ib_mad_port_list_lock, flags);
        list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -2846,7 +2903,10 @@ static void ib_mad_init_device(struct ib_device *device)
 {
        int start, end, i;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                start = 0;
                end   = 0;
        } else {
@@ -2893,7 +2953,10 @@ static void ib_mad_remove_device(struct ib_device *device)
 {
        int i, num_ports, cur_port;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                num_ports = 1;
                cur_port = 0;
        } else {
@@ -2921,13 +2984,16 @@ static int __init ib_mad_init_module(void)
 {
        int ret;
 
-       spin_lock_init(&ib_mad_port_list_lock);
+       mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
+       mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
+
+       mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
+       mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
 
        ib_mad_cache = kmem_cache_create("ib_mad",
                                         sizeof(struct ib_mad_private),
                                         0,
                                         SLAB_HWCACHE_ALIGN,
-                                        NULL,
                                         NULL);
        if (!ib_mad_cache) {
                printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
@@ -2954,12 +3020,8 @@ error1:
 static void __exit ib_mad_cleanup_module(void)
 {
        ib_unregister_client(&mad_client);
-
-       if (kmem_cache_destroy(ib_mad_cache)) {
-               printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n");
-       }
+       kmem_cache_destroy(ib_mad_cache);
 }
 
 module_init(ib_mad_init_module);
 module_exit(ib_mad_cleanup_module);
-