IB/mlx4: Add support for masked atomic operations

author Vladimir Sokolovsky <vlad@mellanox.co.il>

Wed, 14 Apr 2010 14:23:39 +0000 (17:23 +0300)

committer Roland Dreier <rolandd@cisco.com>

Wed, 21 Apr 2010 23:37:49 +0000 (16:37 -0700)
author Vladimir Sokolovsky <vlad@mellanox.co.il>
Wed, 14 Apr 2010 14:23:39 +0000 (17:23 +0300)
committer Roland Dreier <rolandd@cisco.com>
Wed, 21 Apr 2010 23:37:49 +0000 (16:37 -0700)
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c

index cc2ddd2..5a219a2 100644 (file)
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -661,6 +661,14 @@ repoll:
                         wc->opcode    = IB_WC_FETCH_ADD;
                         wc->byte_len  = 8;
                         break;
+               case MLX4_OPCODE_MASKED_ATOMIC_CS:
+                       wc->opcode    = IB_WC_MASKED_COMP_SWAP;
+                       wc->byte_len  = 8;
+                       break;
+               case MLX4_OPCODE_MASKED_ATOMIC_FA:
+                       wc->opcode    = IB_WC_MASKED_FETCH_ADD;
+                       wc->byte_len  = 8;
+                       break;
                 case MLX4_OPCODE_BIND_MW:
                         wc->opcode    = IB_WC_BIND_MW;
                         break;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c

index 01f2a3f..3905141 100644 (file)
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
         props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
         props->atomic_cap          = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
                 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+       props->masked_atomic_cap   = IB_ATOMIC_HCA;
         props->max_pkeys           = dev->dev->caps.pkey_table_len[1];
         props->max_mcast_grp       = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
         props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c

index 5643f4a..6a60827 100644 (file)
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -74,17 +74,19 @@ enum {
  };
  
  static const __be32 mlx4_ib_opcode[] = {
-       [IB_WR_SEND]                    = cpu_to_be32(MLX4_OPCODE_SEND),
-       [IB_WR_LSO]                     = cpu_to_be32(MLX4_OPCODE_LSO),
-       [IB_WR_SEND_WITH_IMM]           = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
-       [IB_WR_RDMA_WRITE]              = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
-       [IB_WR_RDMA_WRITE_WITH_IMM]     = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
-       [IB_WR_RDMA_READ]               = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
-       [IB_WR_ATOMIC_CMP_AND_SWP]      = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
-       [IB_WR_ATOMIC_FETCH_AND_ADD]    = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
-       [IB_WR_SEND_WITH_INV]           = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
-       [IB_WR_LOCAL_INV]               = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
-       [IB_WR_FAST_REG_MR]             = cpu_to_be32(MLX4_OPCODE_FMR),
+       [IB_WR_SEND]                            = cpu_to_be32(MLX4_OPCODE_SEND),
+       [IB_WR_LSO]                             = cpu_to_be32(MLX4_OPCODE_LSO),
+       [IB_WR_SEND_WITH_IMM]                   = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
+       [IB_WR_RDMA_WRITE]                      = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
+       [IB_WR_RDMA_WRITE_WITH_IMM]             = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
+       [IB_WR_RDMA_READ]                       = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
+       [IB_WR_ATOMIC_CMP_AND_SWP]              = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
+       [IB_WR_ATOMIC_FETCH_AND_ADD]            = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+       [IB_WR_SEND_WITH_INV]                   = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+       [IB_WR_LOCAL_INV]                       = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+       [IB_WR_FAST_REG_MR]                     = cpu_to_be32(MLX4_OPCODE_FMR),
+       [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
+       [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
  };
  
  static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
         if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
                 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
                 aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
+       } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
         } else {
                 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
                 aseg->compare  = 0;
@@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
  
  }
  
+static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
+                                 struct ib_send_wr *wr)
+{
+       aseg->swap_add          = cpu_to_be64(wr->wr.atomic.swap);
+       aseg->swap_add_mask     = cpu_to_be64(wr->wr.atomic.swap_mask);
+       aseg->compare           = cpu_to_be64(wr->wr.atomic.compare_add);
+       aseg->compare_mask      = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+}
+
  static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
                              struct ib_send_wr *wr)
  {
@@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         switch (wr->opcode) {
                         case IB_WR_ATOMIC_CMP_AND_SWP:
                         case IB_WR_ATOMIC_FETCH_AND_ADD:
+                       case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
                                 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
                                               wr->wr.atomic.rkey);
                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);
@@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  
                                 break;
  
+                       case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+                                             wr->wr.atomic.rkey);
+                               wqe  += sizeof (struct mlx4_wqe_raddr_seg);
+
+                               set_masked_atomic_seg(wqe, wr);
+                               wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);
+
+                               size += (sizeof (struct mlx4_wqe_raddr_seg) +
+                                        sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
+
+                               break;
+
                         case IB_WR_RDMA_READ:
                         case IB_WR_RDMA_WRITE:
                         case IB_WR_RDMA_WRITE_WITH_IMM:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h

index e92d1bf..7a7f9c1 100644 (file)
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -123,8 +123,8 @@ enum {
         MLX4_OPCODE_RDMA_READ           = 0x10,
         MLX4_OPCODE_ATOMIC_CS           = 0x11,
         MLX4_OPCODE_ATOMIC_FA           = 0x12,
-       MLX4_OPCODE_ATOMIC_MASK_CS      = 0x14,
-       MLX4_OPCODE_ATOMIC_MASK_FA      = 0x15,
+       MLX4_OPCODE_MASKED_ATOMIC_CS    = 0x14,
+       MLX4_OPCODE_MASKED_ATOMIC_FA    = 0x15,
         MLX4_OPCODE_BIND_MW             = 0x18,
         MLX4_OPCODE_FMR                 = 0x19,
         MLX4_OPCODE_LOCAL_INVAL         = 0x1b,
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h

index 9f29d86..7abe643 100644 (file)
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg {
         __be64                  compare;
  };
  
+struct mlx4_wqe_masked_atomic_seg {
+       __be64                  swap_add;
+       __be64                  compare;
+       __be64                  swap_add_mask;
+       __be64                  compare_mask;
+};
+
  struct mlx4_wqe_data_seg {
         __be32                  byte_count;
         __be32                  lkey;
author	Vladimir Sokolovsky <vlad@mellanox.co.il>
	Wed, 14 Apr 2010 14:23:39 +0000 (17:23 +0300)
committer	Roland Dreier <rolandd@cisco.com>
	Wed, 21 Apr 2010 23:37:49 +0000 (16:37 -0700)
drivers/infiniband/hw/mlx4/cq.c		patch \| blob \| history
drivers/infiniband/hw/mlx4/main.c		patch \| blob \| history
drivers/infiniband/hw/mlx4/qp.c		patch \| blob \| history
include/linux/mlx4/device.h		patch \| blob \| history
include/linux/mlx4/qp.h		patch \| blob \| history