Merge branches 'amso1100', 'cma', 'cxgb3', 'ehca', 'ipath', 'ipoib', 'iser', 'misc...
authorRoland Dreier <rolandd@cisco.com>
Wed, 16 Dec 2009 07:39:25 +0000 (23:39 -0800)
committerRoland Dreier <rolandd@cisco.com>
Wed, 16 Dec 2009 07:39:25 +0000 (23:39 -0800)
38 files changed:
Documentation/infiniband/ipoib.txt
drivers/infiniband/core/addr.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_eq.c
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/nes/Kconfig
drivers/infiniband/hw/nes/nes.c
drivers/infiniband/hw/nes/nes.h
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_context.h
drivers/infiniband/hw/nes/nes_hw.c
drivers/infiniband/hw/nes/nes_hw.h
drivers/infiniband/hw/nes/nes_nic.c
drivers/infiniband/hw/nes/nes_user.h
drivers/infiniband/hw/nes/nes_utils.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/net/mlx4/fw.c
include/linux/mlx4/device.h
include/rdma/ib_addr.h
include/rdma/ib_sa.h
include/rdma/ib_user_sa.h
include/rdma/ib_verbs.h
include/rdma/rdma_user_cm.h
net/rds/ib.c
net/rds/iw.c

index 6d40f00..64eeb55 100644 (file)
@@ -36,11 +36,11 @@ Datagram vs Connected modes
   fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
 
   In connected mode, the IB RC (Reliable Connected) transport is used.
-  Connected mode is to takes advantage of the connected nature of the
-  IB transport and allows an MTU up to the maximal IP packet size of
-  64K, which reduces the number of IP packets needed for handling
-  large UDP datagrams, TCP segments, etc and increases the performance
-  for large messages.
+  Connected mode takes advantage of the connected nature of the IB
+  transport and allows an MTU up to the maximal IP packet size of 64K,
+  which reduces the number of IP packets needed for handling large UDP
+  datagrams, TCP segments, etc and increases the performance for large
+  messages.
 
   In connected mode, the interface's UD QP is still used for multicast
   and communication with peers that don't support connected mode. In
index bd07803..abbb069 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/mutex.h>
 #include <linux/inetdevice.h>
 #include <linux/workqueue.h>
-#include <linux/if_arp.h>
 #include <net/arp.h>
 #include <net/neighbour.h>
 #include <net/route.h>
@@ -92,22 +91,12 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
                     const unsigned char *dst_dev_addr)
 {
-       switch (dev->type) {
-       case ARPHRD_INFINIBAND:
-               dev_addr->dev_type = RDMA_NODE_IB_CA;
-               break;
-       case ARPHRD_ETHER:
-               dev_addr->dev_type = RDMA_NODE_RNIC;
-               break;
-       default:
-               return -EADDRNOTAVAIL;
-       }
-
+       dev_addr->dev_type = dev->type;
        memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
        memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
        if (dst_dev_addr)
                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
-       dev_addr->src_dev = dev;
+       dev_addr->bound_dev_if = dev->ifindex;
        return 0;
 }
 EXPORT_SYMBOL(rdma_copy_addr);
@@ -117,6 +106,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
        struct net_device *dev;
        int ret = -EADDRNOTAVAIL;
 
+       if (dev_addr->bound_dev_if) {
+               dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+               if (!dev)
+                       return -ENODEV;
+               ret = rdma_copy_addr(dev_addr, dev, NULL);
+               dev_put(dev);
+               return ret;
+       }
+
        switch (addr->sa_family) {
        case AF_INET:
                dev = ip_dev_find(&init_net,
@@ -131,6 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
        case AF_INET6:
+               read_lock(&dev_base_lock);
                for_each_netdev(&init_net, dev) {
                        if (ipv6_chk_addr(&init_net,
                                          &((struct sockaddr_in6 *) addr)->sin6_addr,
@@ -139,6 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
                                break;
                        }
                }
+               read_unlock(&dev_base_lock);
                break;
 #endif
        }
@@ -176,48 +176,9 @@ static void queue_req(struct addr_req *req)
        mutex_unlock(&lock);
 }
 
-static void addr_send_arp(struct sockaddr *dst_in)
-{
-       struct rtable *rt;
-       struct flowi fl;
-
-       memset(&fl, 0, sizeof fl);
-
-       switch (dst_in->sa_family) {
-       case AF_INET:
-               fl.nl_u.ip4_u.daddr =
-                       ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
-               if (ip_route_output_key(&init_net, &rt, &fl))
-                       return;
-
-               neigh_event_send(rt->u.dst.neighbour, NULL);
-               ip_rt_put(rt);
-               break;
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-       case AF_INET6:
-       {
-               struct dst_entry *dst;
-
-               fl.nl_u.ip6_u.daddr =
-                       ((struct sockaddr_in6 *) dst_in)->sin6_addr;
-
-               dst = ip6_route_output(&init_net, NULL, &fl);
-               if (!dst)
-                       return;
-
-               neigh_event_send(dst->neighbour, NULL);
-               dst_release(dst);
-               break;
-       }
-#endif
-       }
-}
-
-static int addr4_resolve_remote(struct sockaddr_in *src_in,
-                              struct sockaddr_in *dst_in,
-                              struct rdma_dev_addr *addr)
+static int addr4_resolve(struct sockaddr_in *src_in,
+                        struct sockaddr_in *dst_in,
+                        struct rdma_dev_addr *addr)
 {
        __be32 src_ip = src_in->sin_addr.s_addr;
        __be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -229,10 +190,22 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
        memset(&fl, 0, sizeof fl);
        fl.nl_u.ip4_u.daddr = dst_ip;
        fl.nl_u.ip4_u.saddr = src_ip;
+       fl.oif = addr->bound_dev_if;
+
        ret = ip_route_output_key(&init_net, &rt, &fl);
        if (ret)
                goto out;
 
+       src_in->sin_family = AF_INET;
+       src_in->sin_addr.s_addr = rt->rt_src;
+
+       if (rt->idev->dev->flags & IFF_LOOPBACK) {
+               ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+               if (!ret)
+                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+               goto put;
+       }
+
        /* If the device does ARP internally, return 'done' */
        if (rt->idev->dev->flags & IFF_NOARP) {
                rdma_copy_addr(addr, rt->idev->dev, NULL);
@@ -240,21 +213,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
        }
 
        neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
-       if (!neigh) {
+       if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+               neigh_event_send(rt->u.dst.neighbour, NULL);
                ret = -ENODATA;
+               if (neigh)
+                       goto release;
                goto put;
        }
 
-       if (!(neigh->nud_state & NUD_VALID)) {
-               ret = -ENODATA;
-               goto release;
-       }
-
-       if (!src_ip) {
-               src_in->sin_family = dst_in->sin_family;
-               src_in->sin_addr.s_addr = rt->rt_src;
-       }
-
        ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
        neigh_release(neigh);
@@ -265,52 +231,77 @@ out:
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
-                              struct sockaddr_in6 *dst_in,
-                              struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+                        struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr)
 {
        struct flowi fl;
        struct neighbour *neigh;
        struct dst_entry *dst;
-       int ret = -ENODATA;
+       int ret;
 
        memset(&fl, 0, sizeof fl);
-       fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
-       fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+       ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
+       ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
+       fl.oif = addr->bound_dev_if;
 
        dst = ip6_route_output(&init_net, NULL, &fl);
-       if (!dst)
-               return ret;
+       if ((ret = dst->error))
+               goto put;
+
+       if (ipv6_addr_any(&fl.fl6_src)) {
+               ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+                                        &fl.fl6_dst, 0, &fl.fl6_src);
+               if (ret)
+                       goto put;
+
+               src_in->sin6_family = AF_INET6;
+               ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
+       }
+
+       if (dst->dev->flags & IFF_LOOPBACK) {
+               ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+               if (!ret)
+                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+               goto put;
+       }
 
+       /* If the device does ARP internally, return 'done' */
        if (dst->dev->flags & IFF_NOARP) {
                ret = rdma_copy_addr(addr, dst->dev, NULL);
-       } else {
-               neigh = dst->neighbour;
-               if (neigh && (neigh->nud_state & NUD_VALID))
-                       ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+               goto put;
+       }
+
+       neigh = dst->neighbour;
+       if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+               neigh_event_send(dst->neighbour, NULL);
+               ret = -ENODATA;
+               goto put;
        }
 
+       ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+put:
        dst_release(dst);
        return ret;
 }
 #else
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
-                              struct sockaddr_in6 *dst_in,
-                              struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+                        struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr)
 {
        return -EADDRNOTAVAIL;
 }
 #endif
 
-static int addr_resolve_remote(struct sockaddr *src_in,
-                               struct sockaddr *dst_in,
-                               struct rdma_dev_addr *addr)
+static int addr_resolve(struct sockaddr *src_in,
+                       struct sockaddr *dst_in,
+                       struct rdma_dev_addr *addr)
 {
        if (src_in->sa_family == AF_INET) {
-               return addr4_resolve_remote((struct sockaddr_in *) src_in,
+               return addr4_resolve((struct sockaddr_in *) src_in,
                        (struct sockaddr_in *) dst_in, addr);
        } else
-               return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
+               return addr6_resolve((struct sockaddr_in6 *) src_in,
                        (struct sockaddr_in6 *) dst_in, addr);
 }
 
@@ -327,8 +318,7 @@ static void process_req(struct work_struct *work)
                if (req->status == -ENODATA) {
                        src_in = (struct sockaddr *) &req->src_addr;
                        dst_in = (struct sockaddr *) &req->dst_addr;
-                       req->status = addr_resolve_remote(src_in, dst_in,
-                                                         req->addr);
+                       req->status = addr_resolve(src_in, dst_in, req->addr);
                        if (req->status && time_after_eq(jiffies, req->timeout))
                                req->status = -ETIMEDOUT;
                        else if (req->status == -ENODATA)
@@ -352,82 +342,6 @@ static void process_req(struct work_struct *work)
        }
 }
 
-static int addr_resolve_local(struct sockaddr *src_in,
-                             struct sockaddr *dst_in,
-                             struct rdma_dev_addr *addr)
-{
-       struct net_device *dev;
-       int ret;
-
-       switch (dst_in->sa_family) {
-       case AF_INET:
-       {
-               __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
-               __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
-               dev = ip_dev_find(&init_net, dst_ip);
-               if (!dev)
-                       return -EADDRNOTAVAIL;
-
-               if (ipv4_is_zeronet(src_ip)) {
-                       src_in->sa_family = dst_in->sa_family;
-                       ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
-                       ret = rdma_copy_addr(addr, dev, dev->dev_addr);
-               } else if (ipv4_is_loopback(src_ip)) {
-                       ret = rdma_translate_ip(dst_in, addr);
-                       if (!ret)
-                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-               } else {
-                       ret = rdma_translate_ip(src_in, addr);
-                       if (!ret)
-                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-               }
-               dev_put(dev);
-               break;
-       }
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-       case AF_INET6:
-       {
-               struct in6_addr *a;
-
-               for_each_netdev(&init_net, dev)
-                       if (ipv6_chk_addr(&init_net,
-                                         &((struct sockaddr_in6 *) dst_in)->sin6_addr,
-                                         dev, 1))
-                               break;
-
-               if (!dev)
-                       return -EADDRNOTAVAIL;
-
-               a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
-
-               if (ipv6_addr_any(a)) {
-                       src_in->sa_family = dst_in->sa_family;
-                       ((struct sockaddr_in6 *) src_in)->sin6_addr =
-                               ((struct sockaddr_in6 *) dst_in)->sin6_addr;
-                       ret = rdma_copy_addr(addr, dev, dev->dev_addr);
-               } else if (ipv6_addr_loopback(a)) {
-                       ret = rdma_translate_ip(dst_in, addr);
-                       if (!ret)
-                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-               } else  {
-                       ret = rdma_translate_ip(src_in, addr);
-                       if (!ret)
-                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-               }
-               break;
-       }
-#endif
-
-       default:
-               ret = -EADDRNOTAVAIL;
-               break;
-       }
-
-       return ret;
-}
-
 int rdma_resolve_ip(struct rdma_addr_client *client,
                    struct sockaddr *src_addr, struct sockaddr *dst_addr,
                    struct rdma_dev_addr *addr, int timeout_ms,
@@ -443,22 +357,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        if (!req)
                return -ENOMEM;
 
-       if (src_addr)
-               memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
-       memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+       src_in = (struct sockaddr *) &req->src_addr;
+       dst_in = (struct sockaddr *) &req->dst_addr;
+
+       if (src_addr) {
+               if (src_addr->sa_family != dst_addr->sa_family) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               memcpy(src_in, src_addr, ip_addr_size(src_addr));
+       } else {
+               src_in->sa_family = dst_addr->sa_family;
+       }
+
+       memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
        req->addr = addr;
        req->callback = callback;
        req->context = context;
        req->client = client;
        atomic_inc(&client->refcount);
 
-       src_in = (struct sockaddr *) &req->src_addr;
-       dst_in = (struct sockaddr *) &req->dst_addr;
-
-       req->status = addr_resolve_local(src_in, dst_in, addr);
-       if (req->status == -EADDRNOTAVAIL)
-               req->status = addr_resolve_remote(src_in, dst_in, addr);
-
+       req->status = addr_resolve(src_in, dst_in, addr);
        switch (req->status) {
        case 0:
                req->timeout = jiffies;
@@ -467,15 +387,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        case -ENODATA:
                req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
                queue_req(req);
-               addr_send_arp(dst_in);
                break;
        default:
                ret = req->status;
                atomic_dec(&client->refcount);
-               kfree(req);
-               break;
+               goto err;
        }
        return ret;
+err:
+       kfree(req);
+       return ret;
 }
 EXPORT_SYMBOL(rdma_resolve_ip);
 
index 0753178..fbdd731 100644 (file)
@@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
        union ib_gid gid;
        int ret = -ENODEV;
 
-       switch (rdma_node_get_transport(dev_addr->dev_type)) {
-       case RDMA_TRANSPORT_IB:
-               ib_addr_get_sgid(dev_addr, &gid);
-               break;
-       case RDMA_TRANSPORT_IWARP:
-               iw_addr_get_sgid(dev_addr, &gid);
-               break;
-       default:
-               return -ENODEV;
-       }
-
+       rdma_addr_get_sgid(dev_addr, &gid);
        list_for_each_entry(cma_dev, &dev_list, list) {
                ret = ib_find_cached_gid(cma_dev->device, &gid,
                                         &id_priv->id.port_num, NULL);
@@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
        if (rt->num_paths == 2)
                rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
 
-       ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
-       ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-                               &id->route.addr.dev_addr);
-       if (ret)
-               goto destroy_id;
+       if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+               rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
+               rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+               ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+       } else {
+               ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
+                                       &rt->addr.dev_addr);
+               if (ret)
+                       goto destroy_id;
+       }
+       rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 
        id_priv = container_of(id, struct rdma_id_private, id);
        id_priv->state = CMA_CONNECT;
@@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
        cma_save_net_info(&id->route.addr, &listen_id->route.addr,
                          ip_ver, port, src, dst);
 
-       ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-                               &id->route.addr.dev_addr);
-       if (ret)
-               goto err;
+       if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
+               ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
+                                       &id->route.addr.dev_addr);
+               if (ret)
+                       goto err;
+       }
 
        id_priv = container_of(id, struct rdma_id_private, id);
        id_priv->state = CMA_CONNECT;
@@ -1474,15 +1472,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
        mutex_unlock(&lock);
 }
 
-static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
-{
-       struct sockaddr_storage addr_in;
-
-       memset(&addr_in, 0, sizeof addr_in);
-       addr_in.ss_family = af;
-       return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
-}
-
 int rdma_listen(struct rdma_cm_id *id, int backlog)
 {
        struct rdma_id_private *id_priv;
@@ -1490,7 +1479,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 
        id_priv = container_of(id, struct rdma_id_private, id);
        if (id_priv->state == CMA_IDLE) {
-               ret = cma_bind_any(id, AF_INET);
+               ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+               ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
                if (ret)
                        return ret;
        }
@@ -1565,8 +1555,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
        struct sockaddr_in6 *sin6;
 
        memset(&path_rec, 0, sizeof path_rec);
-       ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
-       ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+       rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+       rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
        path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
        path_rec.numb_path = 1;
        path_rec.reversible = 1;
@@ -1781,7 +1771,11 @@ port_found:
        if (ret)
                goto out;
 
-       ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+       id_priv->id.route.addr.dev_addr.dev_type =
+               (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+               ARPHRD_INFINIBAND : ARPHRD_ETHER;
+
+       rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
        ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
        id_priv->id.port_num = p;
        cma_attach_to_dev(id_priv, cma_dev);
@@ -1839,7 +1833,7 @@ out:
 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 {
        struct cma_work *work;
-       struct sockaddr_in *src_in, *dst_in;
+       struct sockaddr *src, *dst;
        union ib_gid gid;
        int ret;
 
@@ -1853,14 +1847,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
                        goto err;
        }
 
-       ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
-       ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+       rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+       rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
 
-       if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
-               src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
-               dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
-               src_in->sin_family = dst_in->sin_family;
-               src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+       src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+       if (cma_zero_addr(src)) {
+               dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+               if ((src->sa_family = dst->sa_family) == AF_INET) {
+                       ((struct sockaddr_in *) src)->sin_addr.s_addr =
+                               ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+               } else {
+                       ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
+                                      &((struct sockaddr_in6 *) dst)->sin6_addr);
+               }
        }
 
        work->id = id_priv;
@@ -1878,10 +1877,14 @@ err:
 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
                         struct sockaddr *dst_addr)
 {
-       if (src_addr && src_addr->sa_family)
-               return rdma_bind_addr(id, src_addr);
-       else
-               return cma_bind_any(id, dst_addr->sa_family);
+       if (!src_addr || !src_addr->sa_family) {
+               src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+               if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+                       ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
+                               ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+               }
+       }
+       return rdma_bind_addr(id, src_addr);
 }
 
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2077,6 +2080,25 @@ static int cma_get_port(struct rdma_id_private *id_priv)
        return ret;
 }
 
+static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
+                              struct sockaddr *addr)
+{
+#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
+       struct sockaddr_in6 *sin6;
+
+       if (addr->sa_family != AF_INET6)
+               return 0;
+
+       sin6 = (struct sockaddr_in6 *) addr;
+       if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+           !sin6->sin6_scope_id)
+                       return -EINVAL;
+
+       dev_addr->bound_dev_if = sin6->sin6_scope_id;
+#endif
+       return 0;
+}
+
 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 {
        struct rdma_id_private *id_priv;
@@ -2089,7 +2111,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
        if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
                return -EINVAL;
 
-       if (!cma_any_addr(addr)) {
+       ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+       if (ret)
+               goto err1;
+
+       if (cma_loopback_addr(addr)) {
+               ret = cma_bind_loopback(id_priv);
+       } else if (!cma_zero_addr(addr)) {
                ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
                if (ret)
                        goto err1;
@@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
        return 0;
 err2:
-       if (!cma_any_addr(addr)) {
+       if (id_priv->cma_dev) {
                mutex_lock(&lock);
                cma_detach_from_dev(id_priv);
                mutex_unlock(&lock);
@@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
        if (cma_any_addr(addr)) {
                memset(mgid, 0, sizeof *mgid);
        } else if ((addr->sa_family == AF_INET6) &&
-                  ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+                  ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
                                                                 0xFF10A01B)) {
                /* IPv6 address is an SA assigned MGID. */
                memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+       } else if ((addr->sa_family == AF_INET6)) {
+               ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
+               if (id_priv->id.ps == RDMA_PS_UDP)
+                       mc_map[7] = 0x01;       /* Use RDMA CM signature */
+               *mgid = *(union ib_gid *) (mc_map + 4);
        } else {
                ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
                if (id_priv->id.ps == RDMA_PS_UDP)
@@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
        cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
        if (id_priv->id.ps == RDMA_PS_UDP)
                rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
-       ib_addr_get_sgid(dev_addr, &rec.port_gid);
+       rdma_addr_get_sgid(dev_addr, &rec.port_gid);
        rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
        rec.join_state = 1;
 
@@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
 
        dev_addr = &id_priv->id.route.addr.dev_addr;
 
-       if ((dev_addr->src_dev == ndev) &&
+       if ((dev_addr->bound_dev_if == ndev->ifindex) &&
            memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
                printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
                       ndev->name, &id_priv->id);
index 8254371..7e1ffd8 100644 (file)
@@ -604,6 +604,12 @@ retry:
        return ret ? ret : id;
 }
 
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+{
+       ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_path);
+
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
                                    int status,
                                    struct ib_sa_mad *mad)
index bb96d3c..b2e16c3 100644 (file)
@@ -43,6 +43,7 @@
 #include <rdma/rdma_user_cm.h>
 #include <rdma/ib_marshall.h>
 #include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -562,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
        switch (route->num_paths) {
        case 0:
                dev_addr = &route->addr.dev_addr;
-               ib_addr_get_dgid(dev_addr,
-                                (union ib_gid *) &resp->ib_route[0].dgid);
-               ib_addr_get_sgid(dev_addr,
-                                (union ib_gid *) &resp->ib_route[0].sgid);
+               rdma_addr_get_dgid(dev_addr,
+                                  (union ib_gid *) &resp->ib_route[0].dgid);
+               rdma_addr_get_sgid(dev_addr,
+                                  (union ib_gid *) &resp->ib_route[0].sgid);
                resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
                break;
        case 2:
@@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
        return ret;
 }
 
+static int ucma_set_ib_path(struct ucma_context *ctx,
+                           struct ib_path_rec_data *path_data, size_t optlen)
+{
+       struct ib_sa_path_rec sa_path;
+       struct rdma_cm_event event;
+       int ret;
+
+       if (optlen % sizeof(*path_data))
+               return -EINVAL;
+
+       for (; optlen; optlen -= sizeof(*path_data), path_data++) {
+               if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
+                                        IB_PATH_BIDIRECTIONAL))
+                       break;
+       }
+
+       if (!optlen)
+               return -EINVAL;
+
+       ib_sa_unpack_path(path_data->path_rec, &sa_path);
+       ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+       if (ret)
+               return ret;
+
+       memset(&event, 0, sizeof event);
+       event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+       return ucma_event_handler(ctx->cm_id, &event);
+}
+
+static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
+                             void *optval, size_t optlen)
+{
+       int ret;
+
+       switch (optname) {
+       case RDMA_OPTION_IB_PATH:
+               ret = ucma_set_ib_path(ctx, optval, optlen);
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       return ret;
+}
+
 static int ucma_set_option_level(struct ucma_context *ctx, int level,
                                 int optname, void *optval, size_t optlen)
 {
@@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
        case RDMA_OPTION_ID:
                ret = ucma_set_option_id(ctx, optname, optval, optlen);
                break;
+       case RDMA_OPTION_IB:
+               ret = ucma_set_option_ib(ctx, optname, optval, optlen);
+               break;
        default:
                ret = -ENOSYS;
        }
index 56feab6..112d397 100644 (file)
@@ -285,7 +285,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 
        ucontext = ibdev->alloc_ucontext(ibdev, &udata);
        if (IS_ERR(ucontext)) {
-               ret = PTR_ERR(file->ucontext);
+               ret = PTR_ERR(ucontext);
                goto err;
        }
 
index 1cecf98..3eb8cec 100644 (file)
@@ -365,18 +365,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        spin_lock_irqsave(&qhp->lock, flag);
        if (qhp->attr.state > IWCH_QP_STATE_RTS) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               return -EINVAL;
+               err = -EINVAL;
+               goto out;
        }
        num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
                  qhp->wq.sq_size_log2);
        if (num_wrs <= 0) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto out;
        }
        while (wr) {
                if (num_wrs == 0) {
                        err = -ENOMEM;
-                       *bad_wr = wr;
                        break;
                }
                idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -428,10 +429,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                             wr->opcode);
                        err = -EINVAL;
                }
-               if (err) {
-                       *bad_wr = wr;
+               if (err)
                        break;
-               }
                wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
                sqp->wr_id = wr->wr_id;
                sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -454,6 +453,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        }
        spin_unlock_irqrestore(&qhp->lock, flag);
        ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+       if (err)
+               *bad_wr = wr;
        return err;
 }
 
@@ -471,18 +474,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        spin_lock_irqsave(&qhp->lock, flag);
        if (qhp->attr.state > IWCH_QP_STATE_RTS) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               return -EINVAL;
+               err = -EINVAL;
+               goto out;
        }
        num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
                            qhp->wq.rq_size_log2) - 1;
        if (!wr) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               return -EINVAL;
+               err = -ENOMEM;
+               goto out;
        }
        while (wr) {
                if (wr->num_sge > T3_MAX_SGE) {
                        err = -EINVAL;
-                       *bad_wr = wr;
                        break;
                }
                idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -494,10 +498,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                                err = build_zero_stag_recv(qhp, wqe, wr);
                else
                        err = -ENOMEM;
-               if (err) {
-                       *bad_wr = wr;
+
+               if (err)
                        break;
-               }
+
                build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
                               Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
                               0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@@ -511,6 +515,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        }
        spin_unlock_irqrestore(&qhp->lock, flag);
        ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+       if (err)
+               *bad_wr = wr;
        return err;
 }
 
index c825142..0136abd 100644 (file)
@@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock;
 extern rwlock_t ehca_cq_idr_lock;
 extern struct idr ehca_qp_idr;
 extern struct idr ehca_cq_idr;
+extern spinlock_t shca_list_lock;
 
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
index 523e733..3b87589 100644 (file)
@@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
        unsigned long flags;
        u64 h_ret;
 
-       spin_lock_irqsave(&eq->spinlock, flags);
        ibmebus_free_irq(eq->ist, (void *)shca);
 
-       h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+       spin_lock_irqsave(&shca_list_lock, flags);
+       eq->is_initialized = 0;
+       spin_unlock_irqrestore(&shca_list_lock, flags);
 
-       spin_unlock_irqrestore(&eq->spinlock, flags);
+       tasklet_kill(&eq->interrupt_task);
+
+       h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
 
        if (h_ret != H_SUCCESS) {
                ehca_err(&shca->ib_device, "Can't free EQ resources.");
index fb2d83c..129a6be 100644 (file)
@@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
 DEFINE_IDR(ehca_cq_idr);
 
 static LIST_HEAD(shca_list); /* list of all registered ehcas */
-static DEFINE_SPINLOCK(shca_list_lock);
+DEFINE_SPINLOCK(shca_list_lock);
 
 static struct timer_list poll_eqs_timer;
 
index 8fd88cd..e3ec7fd 100644 (file)
@@ -400,7 +400,6 @@ static inline void map_ib_wc_status(u32 cqe_status,
 
 static inline int post_one_send(struct ehca_qp *my_qp,
                         struct ib_send_wr *cur_send_wr,
-                        struct ib_send_wr **bad_send_wr,
                         int hidden)
 {
        struct ehca_wqe *wqe_p;
@@ -412,8 +411,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
        wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
        if (unlikely(!wqe_p)) {
                /* too many posted work requests: queue overflow */
-               if (bad_send_wr)
-                       *bad_send_wr = cur_send_wr;
                ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
                         "qp_num=%x", my_qp->ib_qp.qp_num);
                return -ENOMEM;
@@ -433,8 +430,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
         */
        if (unlikely(ret)) {
                my_qp->ipz_squeue.current_q_offset = start_offset;
-               if (bad_send_wr)
-                       *bad_send_wr = cur_send_wr;
                ehca_err(my_qp->ib_qp.device, "Could not write WQE "
                         "qp_num=%x", my_qp->ib_qp.qp_num);
                return -EINVAL;
@@ -448,7 +443,6 @@ int ehca_post_send(struct ib_qp *qp,
                   struct ib_send_wr **bad_send_wr)
 {
        struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-       struct ib_send_wr *cur_send_wr;
        int wqe_cnt = 0;
        int ret = 0;
        unsigned long flags;
@@ -457,7 +451,8 @@ int ehca_post_send(struct ib_qp *qp,
        if (unlikely(my_qp->state < IB_QPS_RTS)) {
                ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
                         my_qp->state, qp->qp_num);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        /* LOCK the QUEUE */
@@ -476,24 +471,21 @@ int ehca_post_send(struct ib_qp *qp,
                struct ib_send_wr circ_wr;
                memset(&circ_wr, 0, sizeof(circ_wr));
                circ_wr.opcode = IB_WR_RDMA_READ;
-               post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
+               post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
                wqe_cnt++;
                ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
                my_qp->message_count = my_qp->packet_count = 0;
        }
 
        /* loop processes list of send reqs */
-       for (cur_send_wr = send_wr; cur_send_wr != NULL;
-            cur_send_wr = cur_send_wr->next) {
-               ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+       while (send_wr) {
+               ret = post_one_send(my_qp, send_wr, 0);
                if (unlikely(ret)) {
-                       /* if one or more WQEs were successful, don't fail */
-                       if (wqe_cnt)
-                               ret = 0;
                        goto post_send_exit0;
                }
                wqe_cnt++;
-       } /* eof for cur_send_wr */
+               send_wr = send_wr->next;
+       }
 
 post_send_exit0:
        iosync(); /* serialize GAL register access */
@@ -503,6 +495,10 @@ post_send_exit0:
                         my_qp, qp->qp_num, wqe_cnt, ret);
        my_qp->message_count += wqe_cnt;
        spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+
+out:
+       if (ret)
+               *bad_send_wr = send_wr;
        return ret;
 }
 
@@ -511,7 +507,6 @@ static int internal_post_recv(struct ehca_qp *my_qp,
                              struct ib_recv_wr *recv_wr,
                              struct ib_recv_wr **bad_recv_wr)
 {
-       struct ib_recv_wr *cur_recv_wr;
        struct ehca_wqe *wqe_p;
        int wqe_cnt = 0;
        int ret = 0;
@@ -522,27 +517,23 @@ static int internal_post_recv(struct ehca_qp *my_qp,
        if (unlikely(!HAS_RQ(my_qp))) {
                ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
                         my_qp, my_qp->real_qp_num, my_qp->ext_type);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto out;
        }
 
        /* LOCK the QUEUE */
        spin_lock_irqsave(&my_qp->spinlock_r, flags);
 
-       /* loop processes list of send reqs */
-       for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
-            cur_recv_wr = cur_recv_wr->next) {
+       /* loop processes list of recv reqs */
+       while (recv_wr) {
                u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
                /* get pointer next to free WQE */
                wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
                if (unlikely(!wqe_p)) {
                        /* too many posted work requests: queue overflow */
-                       if (bad_recv_wr)
-                               *bad_recv_wr = cur_recv_wr;
-                       if (wqe_cnt == 0) {
-                               ret = -ENOMEM;
-                               ehca_err(dev, "Too many posted WQEs "
-                                        "qp_num=%x", my_qp->real_qp_num);
-                       }
+                       ret = -ENOMEM;
+                       ehca_err(dev, "Too many posted WQEs "
+                               "qp_num=%x", my_qp->real_qp_num);
                        goto post_recv_exit0;
                }
                /*
@@ -552,7 +543,7 @@ static int internal_post_recv(struct ehca_qp *my_qp,
                rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
 
                /* write a RECV WQE into the QUEUE */
-               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
+               ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
                                rq_map_idx);
                /*
                 * if something failed,
@@ -560,22 +551,20 @@ static int internal_post_recv(struct ehca_qp *my_qp,
                 */
                if (unlikely(ret)) {
                        my_qp->ipz_rqueue.current_q_offset = start_offset;
-                       *bad_recv_wr = cur_recv_wr;
-                       if (wqe_cnt == 0) {
-                               ret = -EINVAL;
-                               ehca_err(dev, "Could not write WQE "
-                                        "qp_num=%x", my_qp->real_qp_num);
-                       }
+                       ret = -EINVAL;
+                       ehca_err(dev, "Could not write WQE "
+                               "qp_num=%x", my_qp->real_qp_num);
                        goto post_recv_exit0;
                }
 
                qmap_entry = &my_qp->rq_map.map[rq_map_idx];
-               qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
+               qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
                qmap_entry->reported = 0;
                qmap_entry->cqe_req = 1;
 
                wqe_cnt++;
-       } /* eof for cur_recv_wr */
+               recv_wr = recv_wr->next;
+       } /* eof for recv_wr */
 
 post_recv_exit0:
        iosync(); /* serialize GAL register access */
@@ -584,6 +573,11 @@ post_recv_exit0:
            ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
                     my_qp, my_qp->real_qp_num, wqe_cnt, ret);
        spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+
+out:
+       if (ret)
+               *bad_recv_wr = recv_wr;
+
        return ret;
 }
 
@@ -597,6 +591,7 @@ int ehca_post_recv(struct ib_qp *qp,
        if (unlikely(my_qp->state == IB_QPS_RESET)) {
                ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
                         my_qp->state, qp->qp_num);
+               *bad_recv_wr = recv_wr;
                return -EINVAL;
        }
 
index 013d138..d2787fe 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
+#include <linux/bitmap.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
@@ -1697,7 +1698,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
                              unsigned len, int avail)
 {
        unsigned long flags;
-       unsigned end, cnt = 0, next;
+       unsigned end, cnt = 0;
 
        /* There are two bits per send buffer (busy and generation) */
        start *= 2;
@@ -1748,12 +1749,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
 
        if (dd->ipath_pioupd_thresh) {
                end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-               next = find_first_bit(dd->ipath_pioavailkernel, end);
-               while (next < end) {
-                       cnt++;
-                       next = find_next_bit(dd->ipath_pioavailkernel, end,
-                                       next + 1);
-               }
+               cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
        }
        spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
 
index 3cb3f47..e596537 100644 (file)
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
        if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
                props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
-       if (dev->dev->caps.max_gso_sz)
+       if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
                props->device_cap_flags |= IB_DEVICE_UD_TSO;
        if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
                props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
index 219b103..847030c 100644 (file)
@@ -54,7 +54,8 @@ enum {
        /*
         * Largest possible UD header: send with GRH and immediate data.
         */
-       MLX4_IB_UD_HEADER_SIZE          = 72
+       MLX4_IB_UD_HEADER_SIZE          = 72,
+       MLX4_IB_LSO_HEADER_SPARE        = 128,
 };
 
 struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
 };
 
 enum {
-       MLX4_IB_MIN_SQ_STRIDE = 6
+       MLX4_IB_MIN_SQ_STRIDE   = 6,
+       MLX4_IB_CACHE_LINE_SIZE = 64,
 };
 
 static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
        case IB_QPT_UD:
                return sizeof (struct mlx4_wqe_ctrl_seg) +
                        sizeof (struct mlx4_wqe_datagram_seg) +
-                       ((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+                       ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
        case IB_QPT_UC:
                return sizeof (struct mlx4_wqe_ctrl_seg) +
                        sizeof (struct mlx4_wqe_raddr_seg);
@@ -897,7 +899,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 
        context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
                                     (to_mlx4_st(ibqp->qp_type) << 16));
-       context->flags     |= cpu_to_be32(1 << 8); /* DE? */
 
        if (!(attr_mask & IB_QP_PATH_MIG_STATE))
                context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1467,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 
 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
                         struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
-                        __be32 *lso_hdr_sz)
+                        __be32 *lso_hdr_sz, __be32 *blh)
 {
        unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
 
-       /*
-        * This is a temporary limitation and will be removed in
-        * a forthcoming FW release:
-        */
-       if (unlikely(halign > 64))
-               return -EINVAL;
+       if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+               *blh = cpu_to_be32(1 << 6);
 
        if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
                     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1522,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        __be32 dummy;
        __be32 *lso_wqe;
        __be32 uninitialized_var(lso_hdr_sz);
+       __be32 blh;
        int i;
 
        spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1530,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
                lso_wqe = &dummy;
+               blh = 0;
 
                if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
                        err = -ENOMEM;
@@ -1616,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
                        if (wr->opcode == IB_WR_LSO) {
-                               err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
+                               err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
                                if (unlikely(err)) {
                                        *bad_wr = wr;
                                        goto out;
@@ -1687,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                }
 
                ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
-                       (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+                       (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
 
                stamp = ind + qp->sq_spare_wqes;
                ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
index d449eb6..846dc97 100644 (file)
@@ -4,14 +4,13 @@ config INFINIBAND_NES
        select LIBCRC32C
        select INET_LRO
        ---help---
-         This is a low-level driver for NetEffect RDMA enabled
-         Network Interface Cards (RNIC).
+         This is the RDMA Network Interface Card (RNIC) driver for
+         NetEffect Ethernet Cluster Server Adapters.
 
 config INFINIBAND_NES_DEBUG
        bool "Verbose debugging output"
        depends on INFINIBAND_NES
        default n
        ---help---
-         This option causes the NetEffect RNIC driver to produce debug
-         messages.  Select this if you are developing the driver
-         or trying to diagnose a problem.
+         This option enables debug messages from the NetEffect RNIC
+         driver.  Select this if you are diagnosing a problem.
index cbde0cf..b9d09ba 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -521,7 +521,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
        spin_lock_init(&nesdev->indexed_regs_lock);
 
        /* Remap the PCI registers in adapter BAR0 to kernel VA space */
-       mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
+       mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
+                                   pci_resource_len(pcidev, BAR_0));
        if (mmio_regs == NULL) {
                printk(KERN_ERR PFX "Unable to remap BAR0\n");
                ret = -EIO;
index bcc6abc..9884056 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
index 73473db..39468c2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -52,6 +52,7 @@
 #include <linux/random.h>
 #include <linux/list.h>
 #include <linux/threads.h>
+#include <linux/highmem.h>
 #include <net/arp.h>
 #include <net/neighbour.h>
 #include <net/route.h>
@@ -251,6 +252,33 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
 
        mpa_frame = (struct ietf_mpa_frame *)buffer;
        cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+       /* make sure mpa private data len is less than 512 bytes */
+       if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
+               nes_debug(NES_DBG_CM, "The received Length of Private"
+                       " Data field exceeds 512 octets\n");
+               return -EINVAL;
+       }
+       /*
+        * make sure MPA receiver interoperate with the
+        * received MPA version and MPA key information
+        *
+        */
+       if (mpa_frame->rev != mpa_version) {
+               nes_debug(NES_DBG_CM, "The received mpa version"
+                               " can not be interoperated\n");
+               return -EINVAL;
+       }
+       if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+               if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
+                       nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+                       return -EINVAL;
+               }
+       } else {
+               if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
+                       nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+                       return -EINVAL;
+               }
+       }
 
        if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
                nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
@@ -486,6 +514,8 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node)
                send_reset(cm_node, NULL);
                break;
        default:
+               add_ref_cm_node(cm_node);
+               send_reset(cm_node, NULL);
                create_event(cm_node, NES_CM_EVENT_ABORTED);
        }
 }
@@ -949,6 +979,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
                                reset_entry);
                {
                        struct nes_cm_node *loopback = cm_node->loopbackpartner;
+                       enum nes_cm_node_state old_state;
                        if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
                                rem_ref_cm_node(cm_node->cm_core, cm_node);
                        } else {
@@ -960,11 +991,12 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
                                                         NES_CM_STATE_CLOSED;
                                                WARN_ON(1);
                                        } else {
-                                               cm_node->state =
-                                                       NES_CM_STATE_CLOSED;
-                                               rem_ref_cm_node(
-                                                       cm_node->cm_core,
-                                                       cm_node);
+                                               old_state = cm_node->state;
+                                               cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
+                                               if (old_state != NES_CM_STATE_MPAREQ_RCVD)
+                                                       rem_ref_cm_node(
+                                                               cm_node->cm_core,
+                                                               cm_node);
                                        }
                                } else {
                                        struct nes_cm_event event;
@@ -980,20 +1012,9 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
                                                         loopback->loc_port;
                                        event.cm_info.cm_id = loopback->cm_id;
                                        cm_event_connect_error(&event);
+                                       cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
                                        loopback->state = NES_CM_STATE_CLOSED;
 
-                                       event.cm_node = cm_node;
-                                       event.cm_info.rem_addr =
-                                                        cm_node->rem_addr;
-                                       event.cm_info.loc_addr =
-                                                        cm_node->loc_addr;
-                                       event.cm_info.rem_port =
-                                                        cm_node->rem_port;
-                                       event.cm_info.loc_port =
-                                                        cm_node->loc_port;
-                                       event.cm_info.cm_id = cm_node->cm_id;
-                                       cm_event_reset(&event);
-
                                        rem_ref_cm_node(cm_node->cm_core,
                                                         cm_node);
 
@@ -1077,12 +1098,13 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
 /**
  * nes_addr_resolve_neigh
  */
-static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
+static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
 {
        struct rtable *rt;
        struct flowi fl;
        struct neighbour *neigh;
-       int rc = -1;
+       int rc = arpindex;
+       struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
 
        memset(&fl, 0, sizeof fl);
        fl.nl_u.ip4_u.daddr = htonl(dst_ip);
@@ -1098,6 +1120,21 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
                        nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
                                  " is %pM, Gateway is 0x%08X \n", dst_ip,
                                  neigh->ha, ntohl(rt->rt_gateway));
+
+                       if (arpindex >= 0) {
+                               if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
+                                                       neigh->ha, ETH_ALEN)){
+                                       /* Mac address same as in nes_arp_table */
+                                       neigh_release(neigh);
+                                       ip_rt_put(rt);
+                                       return rc;
+                               }
+
+                               nes_manage_arp_cache(nesvnic->netdev,
+                                               nesadapter->arp_table[arpindex].mac_addr,
+                                               dst_ip, NES_ARP_DELETE);
+                       }
+
                        nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
                                             dst_ip, NES_ARP_ADD);
                        rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
@@ -1113,7 +1150,6 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
        return rc;
 }
 
-
 /**
  * make_cm_node - create a new instance of a cm node
  */
@@ -1123,6 +1159,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 {
        struct nes_cm_node *cm_node;
        struct timespec ts;
+       int oldarpindex = 0;
        int arpindex = 0;
        struct nes_device *nesdev;
        struct nes_adapter *nesadapter;
@@ -1176,17 +1213,18 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        nesadapter = nesdev->nesadapter;
 
        cm_node->loopbackpartner = NULL;
+
        /* get the mac addr for the remote node */
        if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
                arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
-       else
-               arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+       else {
+               oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+               arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
+
+       }
        if (arpindex < 0) {
-               arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
-               if (arpindex < 0) {
-                       kfree(cm_node);
-                       return NULL;
-               }
+               kfree(cm_node);
+               return NULL;
        }
 
        /* copy the mac addr to node context */
@@ -1333,13 +1371,20 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
        case NES_CM_STATE_SYN_RCVD:
        case NES_CM_STATE_SYN_SENT:
        case NES_CM_STATE_ESTABLISHED:
-       case NES_CM_STATE_MPAREQ_SENT:
        case NES_CM_STATE_MPAREJ_RCVD:
                cm_node->tcp_cntxt.rcv_nxt++;
                cleanup_retrans_entry(cm_node);
                cm_node->state = NES_CM_STATE_LAST_ACK;
                send_fin(cm_node, NULL);
                break;
+       case NES_CM_STATE_MPAREQ_SENT:
+               create_event(cm_node, NES_CM_EVENT_ABORTED);
+               cm_node->tcp_cntxt.rcv_nxt++;
+               cleanup_retrans_entry(cm_node);
+               cm_node->state = NES_CM_STATE_CLOSED;
+               add_ref_cm_node(cm_node);
+               send_reset(cm_node, NULL);
+               break;
        case NES_CM_STATE_FIN_WAIT1:
                cm_node->tcp_cntxt.rcv_nxt++;
                cleanup_retrans_entry(cm_node);
@@ -1590,6 +1635,7 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                break;
        case NES_CM_STATE_CLOSED:
                cleanup_retrans_entry(cm_node);
+               add_ref_cm_node(cm_node);
                send_reset(cm_node, skb);
                break;
        case NES_CM_STATE_TSA:
@@ -1641,9 +1687,15 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                passive_open_err(cm_node, skb, 1);
                break;
        case NES_CM_STATE_LISTENING:
+               cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+               cleanup_retrans_entry(cm_node);
+               cm_node->state = NES_CM_STATE_CLOSED;
+               send_reset(cm_node, skb);
+               break;
        case NES_CM_STATE_CLOSED:
                cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
                cleanup_retrans_entry(cm_node);
+               add_ref_cm_node(cm_node);
                send_reset(cm_node, skb);
                break;
        case NES_CM_STATE_ESTABLISHED:
@@ -1712,8 +1764,13 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                        dev_kfree_skb_any(skb);
                break;
        case NES_CM_STATE_LISTENING:
+               cleanup_retrans_entry(cm_node);
+               cm_node->state = NES_CM_STATE_CLOSED;
+               send_reset(cm_node, skb);
+               break;
        case NES_CM_STATE_CLOSED:
                cleanup_retrans_entry(cm_node);
+               add_ref_cm_node(cm_node);
                send_reset(cm_node, skb);
                break;
        case NES_CM_STATE_LAST_ACK:
@@ -1974,7 +2031,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
        if (!cm_node)
                return NULL;
        mpa_frame = &cm_node->mpa_frame;
-       strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ);
+       memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
        mpa_frame->flags = IETF_MPA_FLAGS_CRC;
        mpa_frame->rev =  IETF_MPA_VERSION;
        mpa_frame->priv_data_len = htons(private_data_len);
@@ -2102,30 +2159,39 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
                        cm_node->state = NES_CM_STATE_CLOSED;
                        rem_ref_cm_node(cm_core, cm_node);
                } else {
-                       ret = send_mpa_reject(cm_node);
-                       if (ret) {
-                               cm_node->state = NES_CM_STATE_CLOSED;
-                               err = send_reset(cm_node, NULL);
-                               if (err)
-                                       WARN_ON(1);
-                       } else
-                               cm_id->add_ref(cm_id);
+                       if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+                               rem_ref_cm_node(cm_core, cm_node);
+                       } else {
+                               ret = send_mpa_reject(cm_node);
+                               if (ret) {
+                                       cm_node->state = NES_CM_STATE_CLOSED;
+                                       err = send_reset(cm_node, NULL);
+                                       if (err)
+                                               WARN_ON(1);
+                               } else
+                                       cm_id->add_ref(cm_id);
+                       }
                }
        } else {
                cm_node->cm_id = NULL;
-               event.cm_node = loopback;
-               event.cm_info.rem_addr = loopback->rem_addr;
-               event.cm_info.loc_addr = loopback->loc_addr;
-               event.cm_info.rem_port = loopback->rem_port;
-               event.cm_info.loc_port = loopback->loc_port;
-               event.cm_info.cm_id = loopback->cm_id;
-               cm_event_mpa_reject(&event);
-               rem_ref_cm_node(cm_core, cm_node);
-               loopback->state = NES_CM_STATE_CLOSING;
+               if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+                       rem_ref_cm_node(cm_core, cm_node);
+                       rem_ref_cm_node(cm_core, loopback);
+               } else {
+                       event.cm_node = loopback;
+                       event.cm_info.rem_addr = loopback->rem_addr;
+                       event.cm_info.loc_addr = loopback->loc_addr;
+                       event.cm_info.rem_port = loopback->rem_port;
+                       event.cm_info.loc_port = loopback->loc_port;
+                       event.cm_info.cm_id = loopback->cm_id;
+                       cm_event_mpa_reject(&event);
+                       rem_ref_cm_node(cm_core, cm_node);
+                       loopback->state = NES_CM_STATE_CLOSING;
 
-               cm_id = loopback->cm_id;
-               rem_ref_cm_node(cm_core, loopback);
-               cm_id->rem_ref(cm_id);
+                       cm_id = loopback->cm_id;
+                       rem_ref_cm_node(cm_core, loopback);
+                       cm_id->rem_ref(cm_id);
+               }
        }
 
        return ret;
@@ -2164,11 +2230,15 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
        case NES_CM_STATE_CLOSING:
                ret = -1;
                break;
-       case NES_CM_STATE_MPAREJ_RCVD:
        case NES_CM_STATE_LISTENING:
+               cleanup_retrans_entry(cm_node);
+               send_reset(cm_node, NULL);
+               break;
+       case NES_CM_STATE_MPAREJ_RCVD:
        case NES_CM_STATE_UNKNOWN:
        case NES_CM_STATE_INITED:
        case NES_CM_STATE_CLOSED:
+       case NES_CM_STATE_LISTENER_DESTROYED:
                ret = rem_ref_cm_node(cm_core, cm_node);
                break;
        case NES_CM_STATE_TSA:
@@ -2687,8 +2757,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct nes_pd *nespd;
        u64 tagged_offset;
 
-
-
        ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
        if (!ibqp)
                return -EINVAL;
@@ -2704,6 +2772,13 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                "%s\n", cm_node, nesvnic, nesvnic->netdev,
                nesvnic->netdev->name);
 
+       if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
+               if (cm_node->loopbackpartner)
+                       rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
+               rem_ref_cm_node(cm_node->cm_core, cm_node);
+               return -EINVAL;
+       }
+
        /* associate the node with the QP */
        nesqp->cm_node = (void *)cm_node;
        cm_node->nesqp = nesqp;
@@ -2786,6 +2861,10 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                        cpu_to_le32(conn_param->private_data_len +
                        sizeof(struct ietf_mpa_frame));
                wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
+               if (nesqp->sq_kmapped) {
+                       nesqp->sq_kmapped = 0;
+                       kunmap(nesqp->page);
+               }
 
                nesqp->nesqp_context->ird_ord_sizes |=
                        cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -2929,7 +3008,7 @@ int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
        if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
                return -EINVAL;
 
-       strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP);
+       memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
        if (loopback) {
                memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
                loopback->mpa_frame.priv_data_len = pdata_len;
@@ -2974,6 +3053,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (!nesdev)
                return -EINVAL;
 
+       if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
+               return -EINVAL;
+
        nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
                "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
                ntohl(nesvnic->local_ipaddr),
@@ -3251,6 +3333,11 @@ static void cm_event_connected(struct nes_cm_event *event)
                wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
                wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
 
+               if (nesqp->sq_kmapped) {
+                       nesqp->sq_kmapped = 0;
+                       kunmap(nesqp->page);
+               }
+
                /* use the reserved spot on the WQ for the extra first WQE */
                nesqp->nesqp_context->ird_ord_sizes &=
                        cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -3346,7 +3433,7 @@ static void cm_event_connect_error(struct nes_cm_event *event)
        nesqp->cm_id = NULL;
        cm_id->provider_data = NULL;
        cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-       cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
+       cm_event.status = -ECONNRESET;
        cm_event.provider_data = cm_id->provider_data;
        cm_event.local_addr = cm_id->local_addr;
        cm_event.remote_addr = cm_id->remote_addr;
@@ -3390,6 +3477,8 @@ static void cm_event_reset(struct nes_cm_event *event)
 
        nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
        nesqp = cm_id->provider_data;
+       if (!nesqp)
+               return;
 
        nesqp->cm_id = NULL;
        /* cm_id->provider_data = NULL; */
@@ -3401,8 +3490,8 @@ static void cm_event_reset(struct nes_cm_event *event)
        cm_event.private_data = NULL;
        cm_event.private_data_len = 0;
 
-       ret = cm_id->event_handler(cm_id, &cm_event);
        cm_id->add_ref(cm_id);
+       ret = cm_id->event_handler(cm_id, &cm_event);
        atomic_inc(&cm_closes);
        cm_event.event = IW_CM_EVENT_CLOSE;
        cm_event.status = IW_CM_EVENT_STATUS_OK;
index 90e8e4d..d9825fd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
 #define IEFT_MPA_KEY_REP  "MPA ID Rep Frame"
 #define IETF_MPA_KEY_SIZE 16
 #define IETF_MPA_VERSION  1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE     20
 
 enum ietf_mpa_flags {
        IETF_MPA_FLAGS_MARKERS = 0x80,  /* receive Markers */
@@ -169,7 +171,7 @@ struct nes_timer_entry {
 
 #define NES_CM_DEF_SEQ2      0x18ed5740
 #define NES_CM_DEF_LOCAL_ID2 0xb807
-#define        MAX_CM_BUFFER   512
+#define        MAX_CM_BUFFER   (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
 
 
 typedef u32 nes_addr_t;
@@ -198,6 +200,7 @@ enum nes_cm_node_state {
        NES_CM_STATE_TIME_WAIT,
        NES_CM_STATE_LAST_ACK,
        NES_CM_STATE_CLOSING,
+       NES_CM_STATE_LISTENER_DESTROYED,
        NES_CM_STATE_CLOSED
 };
 
index 0fb8d81..b4393a1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
index 3512d6d..b1c2cbb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -424,8 +424,9 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
 
        nesadapter->base_pd = 1;
 
-       nesadapter->device_cap_flags =
-               IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+       nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+                                      IB_DEVICE_MEM_WINDOW |
+                                      IB_DEVICE_MEM_MGT_EXTENSIONS;
 
        nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
                        [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -436,11 +437,12 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
        nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
 
 
-       /* mark the usual suspect QPs and CQs as in use */
+       /* mark the usual suspect QPs, MR and CQs as in use */
        for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
                set_bit(u32temp, nesadapter->allocated_qps);
                set_bit(u32temp, nesadapter->allocated_cqs);
        }
+       set_bit(0, nesadapter->allocated_mrs);
 
        for (u32temp = 0; u32temp < 20; u32temp++)
                set_bit(u32temp, nesadapter->allocated_pds);
@@ -481,7 +483,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
        nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
 
        nesadapter->max_sge = 4;
-       nesadapter->max_cqe = 32767;
+       nesadapter->max_cqe = 32766;
 
        if (nes_read_eeprom_values(nesdev, nesadapter)) {
                printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
@@ -1355,6 +1357,8 @@ int nes_init_phy(struct nes_device *nesdev)
        }
        if ((phy_type == NES_PHY_TYPE_ARGUS) ||
            (phy_type == NES_PHY_TYPE_SFP_D)) {
+               u32 first_time = 1;
+
                /* Check firmware heartbeat */
                nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
                temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
@@ -1362,8 +1366,13 @@ int nes_init_phy(struct nes_device *nesdev)
                nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
                temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
 
-               if (temp_phy_data != temp_phy_data2)
-                       return 0;
+               if (temp_phy_data != temp_phy_data2) {
+                       nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
+                       temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+                       if ((temp_phy_data & 0xff) > 0x20)
+                               return 0;
+                       printk(PFX "Reinitializing PHY\n");
+               }
 
                /* no heartbeat, configure the PHY */
                nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
@@ -1399,7 +1408,7 @@ int nes_init_phy(struct nes_device *nesdev)
                temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
                do {
                        if (counter++ > 150) {
-                               nes_debug(NES_DBG_PHY, "No PHY heartbeat\n");
+                               printk(PFX "No PHY heartbeat\n");
                                break;
                        }
                        mdelay(1);
@@ -1413,11 +1422,20 @@ int nes_init_phy(struct nes_device *nesdev)
                        nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
                        temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
                        if (counter++ > 300) {
-                               nes_debug(NES_DBG_PHY, "PHY did not track\n");
-                               break;
+                               if (((temp_phy_data & 0xff) == 0x0) && first_time) {
+                                       first_time = 0;
+                                       counter = 0;
+                                       /* reset AMCC PHY and try again */
+                                       nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
+                                       nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
+                                       continue;
+                               } else {
+                                       printk(PFX "PHY did not track\n");
+                                       break;
+                               }
                        }
                        mdelay(10);
-               } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+               } while ((temp_phy_data & 0xff) < 0x30);
 
                /* setup signal integrity */
                nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
index f28a41b..084be0e 100644 (file)
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+* Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@@ -546,11 +546,23 @@ enum nes_iwarp_sq_fmr_wqe_word_idx {
        NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
 };
 
+enum nes_iwarp_sq_fmr_opcodes {
+       NES_IWARP_SQ_FMR_WQE_ZERO_BASED                 = (1<<6),
+       NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K               = (0<<7),
+       NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M               = (1<<7),
+       NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ   = (1<<16),
+       NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE  = (1<<17),
+       NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ  = (1<<18),
+       NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
+       NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND  = (1<<20),
+};
+
+#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK       0xFF;
+
 enum nes_iwarp_sq_locinv_wqe_word_idx {
        NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
 };
 
-
 enum nes_iwarp_rq_wqe_word_idx {
        NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
        NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
@@ -1153,6 +1165,19 @@ struct nes_pbl {
        /* TODO: need to add list for two level tables */
 };
 
+#define NES_4K_PBL_CHUNK_SIZE  4096
+
+struct nes_fast_mr_wqe_pbl {
+       u64             *kva;
+       dma_addr_t      paddr;
+};
+
+struct nes_ib_fast_reg_page_list {
+       struct ib_fast_reg_page_list    ibfrpl;
+       struct nes_fast_mr_wqe_pbl      nes_wqe_pbl;
+       u64                             pbl;
+};
+
 struct nes_listener {
        struct work_struct      work;
        struct workqueue_struct *wq;
index e593af3..5a7b554 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
index cc90c14..71e133a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
@@ -86,6 +86,7 @@ enum iwnes_memreg_type {
        IWNES_MEMREG_TYPE_CQ = 0x0002,
        IWNES_MEMREG_TYPE_MW = 0x0003,
        IWNES_MEMREG_TYPE_FMR = 0x0004,
+       IWNES_MEMREG_TYPE_FMEM = 0x0005,
 };
 
 struct nes_mem_reg_req {
index 9687c39..729d525 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
index a680c42..64d3136 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -275,342 +275,236 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
 }
 
 
-/**
- * nes_alloc_fmr
+/*
+ * nes_alloc_fast_mr
  */
-static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
-               int ibmr_access_flags,
-               struct ib_fmr_attr *ibfmr_attr)
+static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
+                            u32 stag, u32 page_count)
 {
-       unsigned long flags;
-       struct nes_pd *nespd = to_nespd(ibpd);
-       struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
-       struct nes_device *nesdev = nesvnic->nesdev;
-       struct nes_adapter *nesadapter = nesdev->nesadapter;
-       struct nes_fmr *nesfmr;
-       struct nes_cqp_request *cqp_request;
        struct nes_hw_cqp_wqe *cqp_wqe;
+       struct nes_cqp_request *cqp_request;
+       unsigned long flags;
        int ret;
-       u32 stag;
-       u32 stag_index = 0;
-       u32 next_stag_index = 0;
-       u32 driver_key = 0;
+       struct nes_adapter *nesadapter = nesdev->nesadapter;
        u32 opcode = 0;
-       u8 stag_key = 0;
-       int i=0;
-       struct nes_vpbl vpbl;
-
-       get_random_bytes(&next_stag_index, sizeof(next_stag_index));
-       stag_key = (u8)next_stag_index;
-
-       driver_key = 0;
-
-       next_stag_index >>= 8;
-       next_stag_index %= nesadapter->max_mr;
-
-       ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
-                       nesadapter->max_mr, &stag_index, &next_stag_index);
-       if (ret) {
-               goto failed_resource_alloc;
-       }
-
-       nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL);
-       if (!nesfmr) {
-               ret = -ENOMEM;
-               goto failed_fmr_alloc;
-       }
-
-       nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR;
-       if (ibfmr_attr->max_pages == 1) {
-               /* use zero length PBL */
-               nesfmr->nesmr.pbl_4k = 0;
-               nesfmr->nesmr.pbls_used = 0;
-       } else if (ibfmr_attr->max_pages <= 32) {
-               /* use PBL 256 */
-               nesfmr->nesmr.pbl_4k = 0;
-               nesfmr->nesmr.pbls_used = 1;
-       } else if (ibfmr_attr->max_pages <= 512) {
-               /* use 4K PBLs */
-               nesfmr->nesmr.pbl_4k = 1;
-               nesfmr->nesmr.pbls_used = 1;
-       } else {
-               /* use two level 4K PBLs */
-               /* add support for two level 256B PBLs */
-               nesfmr->nesmr.pbl_4k = 1;
-               nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) +
-                               ((ibfmr_attr->max_pages & 511) ? 1 : 0);
-       }
-       /* Register the region with the adapter */
-       spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
-       /* track PBL resources */
-       if (nesfmr->nesmr.pbls_used != 0) {
-               if (nesfmr->nesmr.pbl_4k) {
-                       if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) {
-                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               ret = -ENOMEM;
-                               goto failed_vpbl_avail;
-                       } else {
-                               nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used;
-                       }
-               } else {
-                       if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) {
-                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               ret = -ENOMEM;
-                               goto failed_vpbl_avail;
-                       } else {
-                               nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used;
-                       }
-               }
-       }
-
-       /* one level pbl */
-       if (nesfmr->nesmr.pbls_used == 0) {
-               nesfmr->root_vpbl.pbl_vbase = NULL;
-               nes_debug(NES_DBG_MR,  "zero level pbl \n");
-       } else if (nesfmr->nesmr.pbls_used == 1) {
-               /* can change it to kmalloc & dma_map_single */
-               nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-                               &nesfmr->root_vpbl.pbl_pbase);
-               if (!nesfmr->root_vpbl.pbl_vbase) {
-                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                       ret = -ENOMEM;
-                       goto failed_vpbl_alloc;
-               }
-               nesfmr->leaf_pbl_cnt = 0;
-               nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n",
-                               nesfmr->root_vpbl.pbl_vbase);
-       }
-       /* two level pbl */
-       else {
-               nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
-                               &nesfmr->root_vpbl.pbl_pbase);
-               if (!nesfmr->root_vpbl.pbl_vbase) {
-                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                       ret = -ENOMEM;
-                       goto failed_vpbl_alloc;
-               }
-
-               nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1;
-               nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_ATOMIC);
-               if (!nesfmr->root_vpbl.leaf_vpbl) {
-                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                       ret = -ENOMEM;
-                       goto failed_leaf_vpbl_alloc;
-               }
-
-               nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p"
-                               " leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n",
-                               nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl);
-
-               for (i=0; i<nesfmr->leaf_pbl_cnt; i++)
-                       nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL;
-
-               for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
-                       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-                                       &vpbl.pbl_pbase);
-
-                       if (!vpbl.pbl_vbase) {
-                               ret = -ENOMEM;
-                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               goto failed_leaf_vpbl_pages_alloc;
-                       }
-
-                       nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
-                       nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
-                       nesfmr->root_vpbl.leaf_vpbl[i] = vpbl;
-
-                       nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n",
-                                       nesfmr->root_vpbl.pbl_vbase[i].pa_low,
-                                       nesfmr->root_vpbl.pbl_vbase[i].pa_high,
-                                       &nesfmr->root_vpbl.leaf_vpbl[i]);
-               }
-       }
-       nesfmr->ib_qp = NULL;
-       nesfmr->access_rights =0;
+       u16 major_code;
+       u64 region_length = page_count * PAGE_SIZE;
 
-       stag = stag_index << 8;
-       stag |= driver_key;
-       stag += (u32)stag_key;
 
-       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
        cqp_request = nes_get_cqp_request(nesdev);
        if (cqp_request == NULL) {
                nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
-               ret = -ENOMEM;
-               goto failed_leaf_vpbl_pages_alloc;
+               return -ENOMEM;
        }
+       nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, "
+                             "region_length = %llu\n",
+                             page_count, region_length);
        cqp_request->waiting = 1;
        cqp_wqe = &cqp_request->cqp_wqe;
 
-       nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
-                       stag, stag_index);
-
-       opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
-
-       if (nesfmr->nesmr.pbl_4k == 1)
-               opcode |= NES_CQP_STAG_PBL_BLK_SIZE;
-
-       if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) {
-               opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE |
-                               NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN;
-               nesfmr->access_rights |=
-                               NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE |
-                               NES_CQP_STAG_REM_ACC_EN;
+       spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+       if (nesadapter->free_4kpbl > 0) {
+               nesadapter->free_4kpbl--;
+               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+       } else {
+               /* No 4kpbl's available: */
+               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+               nes_debug(NES_DBG_MR, "Out of Pbls\n");
+               nes_free_cqp_request(nesdev, cqp_request);
+               return -ENOMEM;
        }
 
-       if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) {
-               opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ |
-                               NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN;
-               nesfmr->access_rights |=
-                               NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ |
-                               NES_CQP_STAG_REM_ACC_EN;
-       }
+       opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR |
+                NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO |
+                NES_CQP_STAG_REM_ACC_EN;
+       /*
+        * The current OFED API does not support the zero based TO option.
+        * If added then need to changed the NES_CQP_STAG_VA* option.  Also,
+        * the API does not support that ability to have the MR set for local
+        * access only when created and not allow the SQ op to override. Given
+        * this the remote enable must be set here.
+        */
 
        nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
        set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
-       set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
-       set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+       set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1);
 
-       cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] =
-                       cpu_to_le32((nesfmr->nesmr.pbls_used>1) ?
-                       (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
+       cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
+                       cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
+       cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
+                       cpu_to_le32(nespd->pd_id & 0x00007fff);
+
+       set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+       set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0);
+       set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0);
+       set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0);
+       set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8));
+       cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
+       barrier();
 
        atomic_set(&cqp_request->refcount, 2);
        nes_post_cqp_request(nesdev, cqp_request);
 
        /* Wait for CQP */
-       ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
-                       NES_EVENT_TIMEOUT);
-       nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
-                       " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
-                       stag, ret, cqp_request->major_code, cqp_request->minor_code);
-
-       if ((!ret) || (cqp_request->major_code)) {
-               nes_put_cqp_request(nesdev, cqp_request);
-               ret = (!ret) ? -ETIME : -EIO;
-               goto failed_leaf_vpbl_pages_alloc;
-       }
+       ret = wait_event_timeout(cqp_request->waitq,
+                                (0 != cqp_request->request_done),
+                                NES_EVENT_TIMEOUT);
+
+       nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, "
+                 "wait_event_timeout ret = %u, CQP Major:Minor codes = "
+                 "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code,
+                 cqp_request->minor_code);
+       major_code = cqp_request->major_code;
        nes_put_cqp_request(nesdev, cqp_request);
-       nesfmr->nesmr.ibfmr.lkey = stag;
-       nesfmr->nesmr.ibfmr.rkey = stag;
-       nesfmr->attr = *ibfmr_attr;
-
-       return &nesfmr->nesmr.ibfmr;
-
-       failed_leaf_vpbl_pages_alloc:
-       /* unroll all allocated pages */
-       for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
-               if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) {
-                       pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
-                                       nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
-               }
-       }
-       if (nesfmr->root_vpbl.leaf_vpbl)
-               kfree(nesfmr->root_vpbl.leaf_vpbl);
 
-       failed_leaf_vpbl_alloc:
-       if (nesfmr->leaf_pbl_cnt == 0) {
-               if (nesfmr->root_vpbl.pbl_vbase)
-                       pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
-                                       nesfmr->root_vpbl.pbl_pbase);
-       } else
-               pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
-                               nesfmr->root_vpbl.pbl_pbase);
-
-       failed_vpbl_alloc:
-       if (nesfmr->nesmr.pbls_used != 0) {
+       if (!ret || major_code) {
                spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-               if (nesfmr->nesmr.pbl_4k)
-                       nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
-               else
-                       nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
+               nesadapter->free_4kpbl++;
                spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
        }
 
-failed_vpbl_avail:
-       kfree(nesfmr);
-
-       failed_fmr_alloc:
-       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-
-       failed_resource_alloc:
-       return ERR_PTR(ret);
+       if (!ret)
+               return -ETIME;
+       else if (major_code)
+               return -EIO;
+       return 0;
 }
 
-
-/**
- * nes_dealloc_fmr
+/*
+ * nes_alloc_fast_reg_mr
  */
-static int nes_dealloc_fmr(struct ib_fmr *ibfmr)
+struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len)
 {
-       unsigned long flags;
-       struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr);
-       struct nes_fmr *nesfmr = to_nesfmr(nesmr);
-       struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device);
+       struct nes_pd *nespd = to_nespd(ibpd);
+       struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
        struct nes_device *nesdev = nesvnic->nesdev;
        struct nes_adapter *nesadapter = nesdev->nesadapter;
-       int i = 0;
-       int rc;
 
-       /* free the resources */
-       if (nesfmr->leaf_pbl_cnt == 0) {
-               /* single PBL case */
-               if (nesfmr->root_vpbl.pbl_vbase)
-                       pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
-                                       nesfmr->root_vpbl.pbl_pbase);
-       } else {
-               for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) {
-                       pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
-                                       nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
-               }
-               kfree(nesfmr->root_vpbl.leaf_vpbl);
-               pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
-                               nesfmr->root_vpbl.pbl_pbase);
-       }
-       nesmr->ibmw.device = ibfmr->device;
-       nesmr->ibmw.pd = ibfmr->pd;
-       nesmr->ibmw.rkey = ibfmr->rkey;
-       nesmr->ibmw.uobject = NULL;
+       u32 next_stag_index;
+       u8 stag_key = 0;
+       u32 driver_key = 0;
+       int err = 0;
+       u32 stag_index = 0;
+       struct nes_mr *nesmr;
+       u32 stag;
+       int ret;
+       struct ib_mr *ibmr;
+/*
+ * Note:  Set to always use a fixed length single page entry PBL.  This is to allow
+ *      for the fast_reg_mr operation to always know the size of the PBL.
+ */
+       if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+               return ERR_PTR(-E2BIG);
 
-       rc = nes_dealloc_mw(&nesmr->ibmw);
+       get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+       stag_key = (u8)next_stag_index;
+       next_stag_index >>= 8;
+       next_stag_index %= nesadapter->max_mr;
 
-       if ((rc == 0) && (nesfmr->nesmr.pbls_used != 0)) {
-               spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-               if (nesfmr->nesmr.pbl_4k) {
-                       nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
-                       WARN_ON(nesadapter->free_4kpbl > nesadapter->max_4kpbl);
-               } else {
-                       nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
-                       WARN_ON(nesadapter->free_256pbl > nesadapter->max_256pbl);
-               }
-               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+       err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+                                nesadapter->max_mr, &stag_index,
+                                &next_stag_index);
+       if (err)
+               return ERR_PTR(err);
+
+       nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+       if (!nesmr) {
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               return ERR_PTR(-ENOMEM);
        }
 
-       return rc;
-}
+       stag = stag_index << 8;
+       stag |= driver_key;
+       stag += (u32)stag_key;
 
+       nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
+                 stag, stag_index);
 
-/**
- * nes_map_phys_fmr
+       ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len);
+
+       if (ret == 0) {
+               nesmr->ibmr.rkey = stag;
+               nesmr->ibmr.lkey = stag;
+               nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
+               ibmr = &nesmr->ibmr;
+       } else {
+               kfree(nesmr);
+               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+               ibmr = ERR_PTR(-ENOMEM);
+       }
+       return ibmr;
+}
+
+/*
+ * nes_alloc_fast_reg_page_list
  */
-static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-               int list_len, u64 iova)
+static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
+                                                       struct ib_device *ibdev,
+                                                       int page_list_len)
 {
-       return 0;
-}
+       struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+       struct nes_device *nesdev = nesvnic->nesdev;
+       struct ib_fast_reg_page_list *pifrpl;
+       struct nes_ib_fast_reg_page_list *pnesfrpl;
 
+       if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+               return ERR_PTR(-E2BIG);
+       /*
+        * Allocate the ib_fast_reg_page_list structure, the
+        * nes_fast_bpl structure, and the PLB table.
+        */
+       pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
+                          page_list_len * sizeof(u64), GFP_KERNEL);
+
+       if (!pnesfrpl)
+               return ERR_PTR(-ENOMEM);
 
-/**
- * nes_unmap_frm
+       pifrpl = &pnesfrpl->ibfrpl;
+       pifrpl->page_list = &pnesfrpl->pbl;
+       pifrpl->max_page_list_len = page_list_len;
+       /*
+        * Allocate the WQE PBL
+        */
+       pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
+                                                        page_list_len * sizeof(u64),
+                                                        &pnesfrpl->nes_wqe_pbl.paddr);
+
+       if (!pnesfrpl->nes_wqe_pbl.kva) {
+               kfree(pnesfrpl);
+               return ERR_PTR(-ENOMEM);
+       }
+       nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
+                 "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
+                 "pbl.paddr= %p\n", pnesfrpl, &pnesfrpl->ibfrpl,
+                 pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
+                 (void *)pnesfrpl->nes_wqe_pbl.paddr);
+
+       return pifrpl;
+}
+
+/*
+ * nes_free_fast_reg_page_list
  */
-static int nes_unmap_fmr(struct list_head *ibfmr_list)
+static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
 {
-       return 0;
+       struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
+       struct nes_device *nesdev = nesvnic->nesdev;
+       struct nes_ib_fast_reg_page_list *pnesfrpl;
+
+       pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
+       /*
+        * Free the WQE PBL.
+        */
+       pci_free_consistent(nesdev->pcidev,
+                           pifrpl->max_page_list_len * sizeof(u64),
+                           pnesfrpl->nes_wqe_pbl.kva,
+                           pnesfrpl->nes_wqe_pbl.paddr);
+       /*
+        * Free the PBL structure
+        */
+       kfree(pnesfrpl);
 }
 
-
-
 /**
  * nes_query_device
  */
@@ -633,23 +527,23 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
        props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
        props->max_sge = nesdev->nesadapter->max_sge;
        props->max_cq = nesibdev->max_cq;
-       props->max_cqe = nesdev->nesadapter->max_cqe - 1;
+       props->max_cqe = nesdev->nesadapter->max_cqe;
        props->max_mr = nesibdev->max_mr;
        props->max_mw = nesibdev->max_mr;
        props->max_pd = nesibdev->max_pd;
        props->max_sge_rd = 1;
        switch (nesdev->nesadapter->max_irrq_wr) {
                case 0:
-                       props->max_qp_rd_atom = 1;
+                       props->max_qp_rd_atom = 2;
                        break;
                case 1:
-                       props->max_qp_rd_atom = 4;
+                       props->max_qp_rd_atom = 8;
                        break;
                case 2:
-                       props->max_qp_rd_atom = 16;
+                       props->max_qp_rd_atom = 32;
                        break;
                case 3:
-                       props->max_qp_rd_atom = 32;
+                       props->max_qp_rd_atom = 64;
                        break;
                default:
                        props->max_qp_rd_atom = 0;
@@ -1121,6 +1015,7 @@ static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl,
                kunmap(nesqp->page);
                return -ENOMEM;
        }
+       nesqp->sq_kmapped = 1;
        nesqp->hwqp.q2_vbase = mem;
        mem += 256;
        memset(nesqp->hwqp.q2_vbase, 0, 256);
@@ -1198,7 +1093,10 @@ static inline void nes_free_qp_mem(struct nes_device *nesdev,
                pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
                pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
                nesqp->pbl_vbase = NULL;
-               kunmap(nesqp->page);
+               if (nesqp->sq_kmapped) {
+                       nesqp->sq_kmapped = 0;
+                       kunmap(nesqp->page);
+               }
        }
 }
 
@@ -1504,8 +1402,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
                        nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
                                        nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
                        spin_lock_init(&nesqp->lock);
-                       init_waitqueue_head(&nesqp->state_waitq);
-                       init_waitqueue_head(&nesqp->kick_waitq);
                        nes_add_ref(&nesqp->ibqp);
                        break;
                default:
@@ -1513,6 +1409,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
                        return ERR_PTR(-EINVAL);
        }
 
+       nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
+
        /* update the QP table */
        nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
        nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
@@ -1607,8 +1505,10 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
                                nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
                        }
                }
-               if (nesqp->pbl_pbase)
+               if (nesqp->pbl_pbase && nesqp->sq_kmapped) {
+                       nesqp->sq_kmapped = 0;
                        kunmap(nesqp->page);
+               }
        } else {
                /* Clean any pending completions from the cq(s) */
                if (nesqp->nesscq)
@@ -1649,6 +1549,9 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
        unsigned long flags;
        int ret;
 
+       if (entries > nesadapter->max_cqe)
+               return ERR_PTR(-EINVAL);
+
        err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
                        nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
        if (err) {
@@ -2606,9 +2509,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                        stag = stag_index << 8;
                        stag |= driver_key;
                        stag += (u32)stag_key;
-                       if (stag == 0) {
-                               stag = 1;
-                       }
 
                        iova_start = virt;
                        /* Make the leaf PBL the root if only one PBL */
@@ -3109,7 +3009,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                                                " already done based on hw state.\n",
                                                                nesqp->hwqp.qp_id);
                                                issue_modify_qp = 0;
-                                               nesqp->in_disconnect = 0;
                                        }
                                        switch (nesqp->hw_iwarp_state) {
                                                case NES_AEQE_IWARP_STATE_CLOSING:
@@ -3122,7 +3021,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                                        break;
                                                default:
                                                        next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
-                                                       nesqp->in_disconnect = 1;
                                                        nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
                                                        break;
                                        }
@@ -3139,7 +3037,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
                                nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
                                issue_modify_qp = 1;
-                               nesqp->in_disconnect = 1;
                                break;
                        case IB_QPS_ERR:
                        case IB_QPS_RESET:
@@ -3162,7 +3059,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
                                                (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
                                        next_iwarp_state |= NES_CQP_QP_RESET;
-                                       nesqp->in_disconnect = 1;
                                } else {
                                        nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
                                                        nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
@@ -3373,21 +3269,17 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
        struct nes_device *nesdev = nesvnic->nesdev;
        struct nes_qp *nesqp = to_nesqp(ibqp);
        struct nes_hw_qp_wqe *wqe;
-       int err;
+       int err = 0;
        u32 qsize = nesqp->hwqp.sq_size;
        u32 head;
-       u32 wqe_misc;
-       u32 wqe_count;
+       u32 wqe_misc = 0;
+       u32 wqe_count = 0;
        u32 counter;
-       u32 total_payload_length;
-
-       err = 0;
-       wqe_misc = 0;
-       wqe_count = 0;
-       total_payload_length = 0;
 
-       if (nesqp->ibqp_state > IB_QPS_RTS)
-               return -EINVAL;
+       if (nesqp->ibqp_state > IB_QPS_RTS) {
+               err = -EINVAL;
+               goto out;
+       }
 
        spin_lock_irqsave(&nesqp->lock, flags);
 
@@ -3413,94 +3305,208 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                u64temp = (u64)(ib_wr->wr_id);
                set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
                                        u64temp);
-                       switch (ib_wr->opcode) {
-                               case IB_WR_SEND:
-                                       if (ib_wr->send_flags & IB_SEND_SOLICITED) {
-                                               wqe_misc = NES_IWARP_SQ_OP_SENDSE;
-                                       } else {
-                                               wqe_misc = NES_IWARP_SQ_OP_SEND;
-                                       }
-                                       if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
-                                               err = -EINVAL;
-                                               break;
-                                       }
-                                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                                               wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-                                       }
-                                       if ((ib_wr->send_flags & IB_SEND_INLINE) &&
-                                                       ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
-                                                       (ib_wr->sg_list[0].length <= 64)) {
-                                               memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
-                                                              (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
-                                               set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
-                                                               ib_wr->sg_list[0].length);
-                                               wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
-                                       } else {
-                                               fill_wqe_sg_send(wqe, ib_wr, 1);
-                                       }
+               switch (ib_wr->opcode) {
+               case IB_WR_SEND:
+               case IB_WR_SEND_WITH_INV:
+                       if (IB_WR_SEND == ib_wr->opcode) {
+                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
+                                       wqe_misc = NES_IWARP_SQ_OP_SENDSE;
+                               else
+                                       wqe_misc = NES_IWARP_SQ_OP_SEND;
+                       } else {
+                               if (ib_wr->send_flags & IB_SEND_SOLICITED)
+                                       wqe_misc = NES_IWARP_SQ_OP_SENDSEINV;
+                               else
+                                       wqe_misc = NES_IWARP_SQ_OP_SENDINV;
 
-                                       break;
-                               case IB_WR_RDMA_WRITE:
-                                       wqe_misc = NES_IWARP_SQ_OP_RDMAW;
-                                       if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
-                                               nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
-                                                               ib_wr->num_sge,
-                                                               nesdev->nesadapter->max_sge);
-                                               err = -EINVAL;
-                                               break;
-                                       }
-                                       if (ib_wr->send_flags & IB_SEND_FENCE) {
-                                               wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-                                       }
+                               set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+                                                   ib_wr->ex.invalidate_rkey);
+                       }
 
-                                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
-                                                       ib_wr->wr.rdma.rkey);
-                                       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
-                                                       ib_wr->wr.rdma.remote_addr);
-
-                                       if ((ib_wr->send_flags & IB_SEND_INLINE) &&
-                                                       ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
-                                                       (ib_wr->sg_list[0].length <= 64)) {
-                                               memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
-                                                              (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
-                                               set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
-                                                               ib_wr->sg_list[0].length);
-                                               wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
-                                       } else {
-                                               fill_wqe_sg_send(wqe, ib_wr, 1);
-                                       }
-                                       wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
-                                                       wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
-                                       break;
-                               case IB_WR_RDMA_READ:
-                                       /* iWARP only supports 1 sge for RDMA reads */
-                                       if (ib_wr->num_sge > 1) {
-                                               nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
-                                                               ib_wr->num_sge);
-                                               err = -EINVAL;
-                                               break;
-                                       }
-                                       wqe_misc = NES_IWARP_SQ_OP_RDMAR;
-                                       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
-                                                       ib_wr->wr.rdma.remote_addr);
-                                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
-                                                       ib_wr->wr.rdma.rkey);
-                                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
-                                                       ib_wr->sg_list->length);
-                                       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
-                                                       ib_wr->sg_list->addr);
-                                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
-                                                       ib_wr->sg_list->lkey);
-                                       break;
-                               default:
-                                       /* error */
-                                       err = -EINVAL;
-                                       break;
+                       if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+                               err = -EINVAL;
+                               break;
                        }
 
-               if (ib_wr->send_flags & IB_SEND_SIGNALED) {
-                       wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+                       if (ib_wr->send_flags & IB_SEND_FENCE)
+                               wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+                       if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+                           ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+                            (ib_wr->sg_list[0].length <= 64)) {
+                               memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+                                      (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+                               set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+                                                   ib_wr->sg_list[0].length);
+                               wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+                       } else {
+                               fill_wqe_sg_send(wqe, ib_wr, 1);
+                       }
+
+                       break;
+               case IB_WR_RDMA_WRITE:
+                       wqe_misc = NES_IWARP_SQ_OP_RDMAW;
+                       if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+                               nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
+                                         ib_wr->num_sge, nesdev->nesadapter->max_sge);
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       if (ib_wr->send_flags & IB_SEND_FENCE)
+                               wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+                                           ib_wr->wr.rdma.rkey);
+                       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+                                           ib_wr->wr.rdma.remote_addr);
+
+                       if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+                           ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+                            (ib_wr->sg_list[0].length <= 64)) {
+                               memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+                                      (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+                               set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+                                                   ib_wr->sg_list[0].length);
+                               wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+                       } else {
+                               fill_wqe_sg_send(wqe, ib_wr, 1);
+                       }
+
+                       wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
+                               wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
+                       break;
+               case IB_WR_RDMA_READ:
+               case IB_WR_RDMA_READ_WITH_INV:
+                       /* iWARP only supports 1 sge for RDMA reads */
+                       if (ib_wr->num_sge > 1) {
+                               nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
+                                         ib_wr->num_sge);
+                               err = -EINVAL;
+                               break;
+                       }
+                       if (ib_wr->opcode == IB_WR_RDMA_READ) {
+                               wqe_misc = NES_IWARP_SQ_OP_RDMAR;
+                       } else {
+                               wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV;
+                               set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+                                                   ib_wr->ex.invalidate_rkey);
+                       }
+
+                       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+                                           ib_wr->wr.rdma.remote_addr);
+                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+                                           ib_wr->wr.rdma.rkey);
+                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
+                                           ib_wr->sg_list->length);
+                       set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+                                           ib_wr->sg_list->addr);
+                       set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
+                                           ib_wr->sg_list->lkey);
+                       break;
+               case IB_WR_LOCAL_INV:
+                       wqe_misc = NES_IWARP_SQ_OP_LOCINV;
+                       set_wqe_32bit_value(wqe->wqe_words,
+                                           NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
+                                           ib_wr->ex.invalidate_rkey);
+                       break;
+               case IB_WR_FAST_REG_MR:
+               {
+                       int i;
+                       int flags = ib_wr->wr.fast_reg.access_flags;
+                       struct nes_ib_fast_reg_page_list *pnesfrpl =
+                               container_of(ib_wr->wr.fast_reg.page_list,
+                                            struct nes_ib_fast_reg_page_list,
+                                            ibfrpl);
+                       u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
+                       u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
+
+                       if (ib_wr->wr.fast_reg.page_list_len >
+                           (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
+                               nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
+                               err = -EINVAL;
+                               break;
+                       }
+                       wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
+                       set_wqe_64bit_value(wqe->wqe_words,
+                                           NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
+                                           ib_wr->wr.fast_reg.iova_start);
+                       set_wqe_32bit_value(wqe->wqe_words,
+                                           NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
+                                           ib_wr->wr.fast_reg.length);
+                       set_wqe_32bit_value(wqe->wqe_words,
+                                           NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
+                                           ib_wr->wr.fast_reg.rkey);
+                       /* Set page size: */
+                       if (ib_wr->wr.fast_reg.page_shift == 12) {
+                               wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
+                       } else if (ib_wr->wr.fast_reg.page_shift == 21) {
+                               wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
+                       } else {
+                               nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
+                                         " ib_wr=%u, max=1\n", ib_wr->num_sge);
+                               err = -EINVAL;
+                               break;
+                       }
+                       /* Set access_flags */
+                       wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
+                       if (flags & IB_ACCESS_LOCAL_WRITE)
+                               wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
+
+                       if (flags & IB_ACCESS_REMOTE_WRITE)
+                               wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
+
+                       if (flags & IB_ACCESS_REMOTE_READ)
+                               wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
+
+                       if (flags & IB_ACCESS_MW_BIND)
+                               wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
+
+                       /* Fill in PBL info: */
+                       if (ib_wr->wr.fast_reg.page_list_len >
+                           pnesfrpl->ibfrpl.max_page_list_len) {
+                               nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
+                                         " ib_wr=%p, value=%u, max=%u\n",
+                                         ib_wr, ib_wr->wr.fast_reg.page_list_len,
+                                         pnesfrpl->ibfrpl.max_page_list_len);
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       set_wqe_64bit_value(wqe->wqe_words,
+                                           NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
+                                           pnesfrpl->nes_wqe_pbl.paddr);
+
+                       set_wqe_32bit_value(wqe->wqe_words,
+                                           NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
+                                           ib_wr->wr.fast_reg.page_list_len * 8);
+
+                       for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
+                               dst_page_list[i] = cpu_to_le64(src_page_list[i]);
+
+                       nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %p, "
+                                 "length: %d, rkey: %0x, pgl_paddr: %p, "
+                                 "page_list_len: %u, wqe_misc: %x\n",
+                                 (void *)ib_wr->wr.fast_reg.iova_start,
+                                 ib_wr->wr.fast_reg.length,
+                                 ib_wr->wr.fast_reg.rkey,
+                                 (void *)pnesfrpl->nes_wqe_pbl.paddr,
+                                 ib_wr->wr.fast_reg.page_list_len,
+                                 wqe_misc);
+                       break;
+               }
+               default:
+                       /* error */
+                       err = -EINVAL;
+                       break;
                }
+
+               if (err)
+                       break;
+
+               if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all)
+                       wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+
                wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
 
                ib_wr = ib_wr->next;
@@ -3522,6 +3528,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
 
        spin_unlock_irqrestore(&nesqp->lock, flags);
 
+out:
        if (err)
                *bad_wr = ib_wr;
        return err;
@@ -3548,8 +3555,10 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
        u32 counter;
        u32 total_payload_length;
 
-       if (nesqp->ibqp_state > IB_QPS_RTS)
-               return -EINVAL;
+       if (nesqp->ibqp_state > IB_QPS_RTS) {
+               err = -EINVAL;
+               goto out;
+       }
 
        spin_lock_irqsave(&nesqp->lock, flags);
 
@@ -3612,6 +3621,7 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
 
        spin_unlock_irqrestore(&nesqp->lock, flags);
 
+out:
        if (err)
                *bad_wr = ib_wr;
        return err;
@@ -3720,6 +3730,12 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
                                                nes_debug(NES_DBG_CQ, "Operation = Send.\n");
                                                entry->opcode = IB_WC_SEND;
                                                break;
+                                       case NES_IWARP_SQ_OP_LOCINV:
+                                               entry->opcode = IB_WR_LOCAL_INV;
+                                               break;
+                                       case NES_IWARP_SQ_OP_FAST_REG:
+                                               entry->opcode = IB_WC_FAST_REG_MR;
+                                               break;
                                }
 
                                nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
@@ -3890,10 +3906,9 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
        nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
        nesibdev->ibdev.bind_mw = nes_bind_mw;
 
-       nesibdev->ibdev.alloc_fmr = nes_alloc_fmr;
-       nesibdev->ibdev.unmap_fmr = nes_unmap_fmr;
-       nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr;
-       nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr;
+       nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr;
+       nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
+       nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
 
        nesibdev->ibdev.attach_mcast = nes_multicast_attach;
        nesibdev->ibdev.detach_mcast = nes_multicast_detach;
index 89822d7..2df9993 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -135,19 +135,15 @@ struct nes_qp {
        struct ib_qp          ibqp;
        void                  *allocated_buffer;
        struct iw_cm_id       *cm_id;
-       struct workqueue_struct *wq;
        struct nes_cq         *nesscq;
        struct nes_cq         *nesrcq;
        struct nes_pd         *nespd;
        void *cm_node; /* handle of the node this QP is associated with */
        struct ietf_mpa_frame *ietf_frame;
        dma_addr_t            ietf_frame_pbase;
-       wait_queue_head_t     state_waitq;
        struct ib_mr          *lsmm_mr;
-       unsigned long         socket;
        struct nes_hw_qp      hwqp;
        struct work_struct    work;
-       struct work_struct    ae_work;
        enum ib_qp_state      ibqp_state;
        u32                   iwarp_state;
        u32                   hte_index;
@@ -165,19 +161,20 @@ struct nes_qp {
        struct page           *page;
        struct timer_list     terminate_timer;
        enum ib_event_type    terminate_eventtype;
-       wait_queue_head_t     kick_waitq;
-       u16                   in_disconnect;
+       u16                   active_conn:1;
+       u16                   skip_lsmm:1;
+       u16                   user_mode:1;
+       u16                   hte_added:1;
+       u16                   flush_issued:1;
+       u16                   destroyed:1;
+       u16                   sig_all:1;
+       u16                   rsvd:9;
        u16                   private_data_len;
        u16                   term_sq_flush_code;
        u16                   term_rq_flush_code;
-       u8                    active_conn;
-       u8                    skip_lsmm;
-       u8                    user_mode;
-       u8                    hte_added;
        u8                    hw_iwarp_state;
-       u8                    flush_issued;
        u8                    hw_tcp_state;
        u8                    term_flags;
-       u8                    destroyed;
+       u8                    sq_kmapped;
 };
 #endif                 /* NES_VERBS_H */
index 2bf5116..df3eb8c 100644 (file)
@@ -884,6 +884,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
 
        neigh->neighbour = neighbour;
        neigh->dev = dev;
+       memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
        *to_ipoib_neigh(neighbour) = neigh;
        skb_queue_head_init(&neigh->queue);
        ipoib_cm_set(neigh, NULL);
index b9453d0..274c883 100644 (file)
@@ -209,6 +209,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
        mem_copy->copy_buf = NULL;
 }
 
+#define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
+
 /**
  * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
  * and returns the length of resulting physical address array (may be less than
@@ -221,62 +223,52 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
  * where --few fragments of the same page-- are present in the SG as
  * consecutive elements. Also, it handles one entry SG.
  */
+
 static int iser_sg_to_page_vec(struct iser_data_buf *data,
                               struct iser_page_vec *page_vec,
                               struct ib_device *ibdev)
 {
-       struct scatterlist *sgl = (struct scatterlist *)data->buf;
-       struct scatterlist *sg;
-       u64 first_addr, last_addr, page;
-       int end_aligned;
-       unsigned int cur_page = 0;
+       struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
+       u64 start_addr, end_addr, page, chunk_start = 0;
        unsigned long total_sz = 0;
-       int i;
+       unsigned int dma_len;
+       int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
 
        /* compute the offset of first element */
        page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
 
+       new_chunk = 1;
+       cur_page  = 0;
        for_each_sg(sgl, sg, data->dma_nents, i) {
-               unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
-
+               start_addr = ib_sg_dma_address(ibdev, sg);
+               if (new_chunk)
+                       chunk_start = start_addr;
+               dma_len = ib_sg_dma_len(ibdev, sg);
+               end_addr = start_addr + dma_len;
                total_sz += dma_len;
 
-               first_addr = ib_sg_dma_address(ibdev, sg);
-               last_addr  = first_addr + dma_len;
-
-               end_aligned   = !(last_addr  & ~MASK_4K);
-
-               /* continue to collect page fragments till aligned or SG ends */
-               while (!end_aligned && (i + 1 < data->dma_nents)) {
-                       sg = sg_next(sg);
-                       i++;
-                       dma_len = ib_sg_dma_len(ibdev, sg);
-                       total_sz += dma_len;
-                       last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
-                       end_aligned = !(last_addr  & ~MASK_4K);
+               /* collect page fragments until aligned or end of SG list */
+               if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
+                       new_chunk = 0;
+                       continue;
                }
-
-               /* handle the 1st page in the 1st DMA element */
-               if (cur_page == 0) {
-                       page = first_addr & MASK_4K;
-                       page_vec->pages[cur_page] = page;
-                       cur_page++;
+               new_chunk = 1;
+
+               /* address of the first page in the contiguous chunk;
+                  masking relevant for the very first SG entry,
+                  which might be unaligned */
+               page = chunk_start & MASK_4K;
+               do {
+                       page_vec->pages[cur_page++] = page;
                        page += SIZE_4K;
-               } else
-                       page = first_addr;
-
-               for (; page < last_addr; page += SIZE_4K) {
-                       page_vec->pages[cur_page] = page;
-                       cur_page++;
-               }
-
+               } while (page < end_addr);
        }
+
        page_vec->data_size = total_sz;
        iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
        return cur_page;
 }
 
-#define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
 
 /**
  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -284,42 +276,40 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
  * the number of entries which are aligned correctly. Supports the case where
  * consecutive SG elements are actually fragments of the same physcial page.
  */
-static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
-                                             struct ib_device *ibdev)
+static int iser_data_buf_aligned_len(struct iser_data_buf *data,
+                                     struct ib_device *ibdev)
 {
-       struct scatterlist *sgl, *sg;
-       u64 end_addr, next_addr;
-       int i, cnt;
-       unsigned int ret_len = 0;
+       struct scatterlist *sgl, *sg, *next_sg = NULL;
+       u64 start_addr, end_addr;
+       int i, ret_len, start_check = 0;
+
+       if (data->dma_nents == 1)
+               return 1;
 
        sgl = (struct scatterlist *)data->buf;
+       start_addr  = ib_sg_dma_address(ibdev, sgl);
 
-       cnt = 0;
        for_each_sg(sgl, sg, data->dma_nents, i) {
-               /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
-                  "offset: %ld sz: %ld\n", i,
-                  (unsigned long)sg_phys(sg),
-                  (unsigned long)sg->offset,
-                  (unsigned long)sg->length); */
-               end_addr = ib_sg_dma_address(ibdev, sg) +
-                          ib_sg_dma_len(ibdev, sg);
-               /* iser_dbg("Checking sg iobuf end address "
-                      "0x%08lX\n", end_addr); */
-               if (i + 1 < data->dma_nents) {
-                       next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
-                       /* are i, i+1 fragments of the same page? */
-                       if (end_addr == next_addr) {
-                               cnt++;
-                               continue;
-                       } else if (!IS_4K_ALIGNED(end_addr)) {
-                               ret_len = cnt + 1;
-                               break;
-                       }
-               }
-               cnt++;
+               if (start_check && !IS_4K_ALIGNED(start_addr))
+                       break;
+
+               next_sg = sg_next(sg);
+               if (!next_sg)
+                       break;
+
+               end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
+               start_addr  = ib_sg_dma_address(ibdev, next_sg);
+
+               if (end_addr == start_addr) {
+                       start_check = 0;
+                       continue;
+               } else
+                       start_check = 1;
+
+               if (!IS_4K_ALIGNED(end_addr))
+                       break;
        }
-       if (i == data->dma_nents)
-               ret_len = cnt;  /* loop ended */
+       ret_len = (next_sg) ? i : i+1;
        iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
                 ret_len, data->dma_nents, data);
        return ret_len;
index 3c16602..04f42ae 100644 (file)
@@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
                [ 9] = "Q_Key violation counter",
                [10] = "VMM",
                [12] = "DPDP",
+               [15] = "Big LSO headers",
                [16] = "MW support",
                [17] = "APM support",
                [18] = "Atomic ops support",
@@ -235,7 +236,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
        dev_cap->max_mpts = 1 << (field & 0x3f);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
-       dev_cap->reserved_eqs = 1 << (field & 0xf);
+       dev_cap->reserved_eqs = field & 0xf;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
        dev_cap->max_eqs = 1 << (field & 0xf);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
index ce7cc6c..e92d1bf 100644 (file)
@@ -61,6 +61,7 @@ enum {
        MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 <<  8,
        MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 <<  9,
        MLX4_DEV_CAP_FLAG_DPDP          = 1 << 12,
+       MLX4_DEV_CAP_FLAG_BLH           = 1 << 15,
        MLX4_DEV_CAP_FLAG_MEM_WINDOW    = 1 << 16,
        MLX4_DEV_CAP_FLAG_APM           = 1 << 17,
        MLX4_DEV_CAP_FLAG_ATOMIC        = 1 << 18,
index 483057b..fa0d52b 100644 (file)
@@ -36,6 +36,7 @@
 
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/if_arp.h>
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <rdma/ib_verbs.h>
@@ -60,8 +61,8 @@ struct rdma_dev_addr {
        unsigned char src_dev_addr[MAX_ADDR_LEN];
        unsigned char dst_dev_addr[MAX_ADDR_LEN];
        unsigned char broadcast[MAX_ADDR_LEN];
-       enum rdma_node_type dev_type;
-       struct net_device *src_dev;
+       unsigned short dev_type;
+       int bound_dev_if;
 };
 
 /**
@@ -121,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
        memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
 }
 
-static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
-                                   union ib_gid *gid)
+static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
 {
-       memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid);
+       return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
 }
 
-static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
-                                   union ib_gid *gid)
+static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-       memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
+       memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
 }
 
-static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr,
-                                   union ib_gid *gid)
+static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-       memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid);
+       memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
 }
 
-static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
-                                   union ib_gid *gid)
+static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-       memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
+       memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
 }
 
-static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
-                                   union ib_gid *gid)
-{
-       memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
-}
-
-static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
-                                   union ib_gid *gid)
+static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-       memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+       memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
 }
 
 #endif /* IB_ADDR_H */
index 3841c1a..1082afa 100644 (file)
@@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
                         struct ib_sa_path_rec *rec,
                         struct ib_ah_attr *ah_attr);
 
+/**
+ * ib_sa_unpack_path - Convert a path record from MAD format to struct
+ * ib_sa_path_rec.
+ */
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
+
 #endif /* IB_SA_H */
index 6591201..cfc7c9b 100644 (file)
 
 #include <linux/types.h>
 
+enum {
+       IB_PATH_GMP             = 1,
+       IB_PATH_PRIMARY         = (1<<1),
+       IB_PATH_ALTERNATE       = (1<<2),
+       IB_PATH_OUTBOUND        = (1<<3),
+       IB_PATH_INBOUND         = (1<<4),
+       IB_PATH_INBOUND_REVERSE = (1<<5),
+       IB_PATH_BIDIRECTIONAL   = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE
+};
+
+struct ib_path_rec_data {
+       __u32   flags;
+       __u32   reserved;
+       __u32   path_rec[16];
+};
+
 struct ib_user_path_rec {
        __u8    dgid[16];
        __u8    sgid[16];
index c179318..09509ed 100644 (file)
@@ -1425,6 +1425,11 @@ int ib_destroy_qp(struct ib_qp *qp);
  * @send_wr: A list of work requests to post on the send queue.
  * @bad_send_wr: On an immediate failure, this parameter will reference
  *   the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
  */
 static inline int ib_post_send(struct ib_qp *qp,
                               struct ib_send_wr *send_wr,
index c557054..1d16502 100644 (file)
@@ -215,12 +215,14 @@ struct rdma_ucm_event_resp {
 
 /* Option levels */
 enum {
-       RDMA_OPTION_ID          = 0
+       RDMA_OPTION_ID          = 0,
+       RDMA_OPTION_IB          = 1
 };
 
 /* Option details */
 enum {
-       RDMA_OPTION_ID_TOS      = 0
+       RDMA_OPTION_ID_TOS      = 0,
+       RDMA_OPTION_IB_PATH     = 1
 };
 
 struct rdma_ucm_set_option {
index 536ebe5..3b89923 100644 (file)
@@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
                ic = conn->c_transport_data;
                dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-               ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-               ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+               rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+               rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
                rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
                iinfo->max_send_wr = ic->i_send_ring.w_nr;
index db224f7..b28fa85 100644 (file)
@@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn,
                ic = conn->c_transport_data;
                dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-               ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-               ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+               rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+               rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
                rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
                iinfo->max_send_wr = ic->i_send_ring.w_nr;