IB/mlx4: Add IPoIB checksum offload support
authorEli Cohen <eli@dev.mellanox.co.il>
Thu, 17 Apr 2008 04:01:10 +0000 (21:01 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 17 Apr 2008 04:01:10 +0000 (21:01 -0700)
ConnectX devices support checksum generation and verification of TCP
and UDP packets for UD IPoIB messages.  This patch checks if the HCA
supports this and sets the IB_DEVICE_UD_IP_CSUM capability flag if it
does.  It implements support for handling the IB_SEND_IP_CSUM send
flag and setting the csum_ok field in receive work completions.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Ali Ayub <ali@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/qp.c
drivers/net/mlx4/fw.c
include/linux/mlx4/cq.h
include/linux/mlx4/qp.h

index 7360bba..d2e32b0 100644 (file)
@@ -297,6 +297,20 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
        wc->vendor_err = cqe->vendor_err_syndrome;
 }
 
+static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum)
+{
+       return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4        |
+                                     MLX4_CQE_IPOIB_STATUS_IPV4F       |
+                                     MLX4_CQE_IPOIB_STATUS_IPV4OPT     |
+                                     MLX4_CQE_IPOIB_STATUS_IPV6        |
+                                     MLX4_CQE_IPOIB_STATUS_IPOK)) ==
+               cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4  |
+                           MLX4_CQE_IPOIB_STATUS_IPOK))                &&
+               (status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP |
+                                     MLX4_CQE_IPOIB_STATUS_TCP))       &&
+               checksum == cpu_to_be16(0xffff);
+}
+
 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
                            struct mlx4_ib_qp **cur_qp,
                            struct ib_wc *wc)
@@ -434,6 +448,8 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
                wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
                wc->wc_flags      |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
                wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
+               wc->csum_ok        = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,
+                                                          cqe->checksum);
        }
 
        return 0;
index d551201..6ea4746 100644 (file)
@@ -99,6 +99,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
        if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
                props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+       if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+               props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
                0xffffff;
index ac965ab..31b2b5b 100644 (file)
@@ -1436,6 +1436,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
                        (wr->send_flags & IB_SEND_SOLICITED ?
                         cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
+                       ((wr->send_flags & IB_SEND_IP_CSUM) ?
+                        cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+                                    MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
                        qp->sq_signal_bits;
 
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
index 61dc495..f494c3e 100644 (file)
@@ -696,6 +696,10 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
        /* Check port for UD address vector: */
        *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
 
+       /* Enable IPoIB checksumming if we can: */
+       if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+               *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
+
        /* QPC/EEC/CQC/EQC/RDMARC attributes */
 
        MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
index 0181e0a..1243eba 100644 (file)
@@ -45,11 +45,11 @@ struct mlx4_cqe {
        u8                      sl;
        u8                      reserved1;
        __be16                  rlid;
-       u32                     reserved2;
+       __be32                  ipoib_status;
        __be32                  byte_cnt;
        __be16                  wqe_index;
        __be16                  checksum;
-       u8                      reserved3[3];
+       u8                      reserved2[3];
        u8                      owner_sr_opcode;
 };
 
@@ -85,6 +85,16 @@ enum {
        MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR            = 0x22,
 };
 
+enum {
+       MLX4_CQE_IPOIB_STATUS_IPV4                      = 1 << 22,
+       MLX4_CQE_IPOIB_STATUS_IPV4F                     = 1 << 23,
+       MLX4_CQE_IPOIB_STATUS_IPV6                      = 1 << 24,
+       MLX4_CQE_IPOIB_STATUS_IPV4OPT                   = 1 << 25,
+       MLX4_CQE_IPOIB_STATUS_TCP                       = 1 << 26,
+       MLX4_CQE_IPOIB_STATUS_UDP                       = 1 << 27,
+       MLX4_CQE_IPOIB_STATUS_IPOK                      = 1 << 28,
+};
+
 static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd,
                               void __iomem *uar_page,
                               spinlock_t *doorbell_lock)
index 09a2230..31f9eb3 100644 (file)
@@ -158,10 +158,12 @@ struct mlx4_qp_context {
 #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
 
 enum {
-       MLX4_WQE_CTRL_NEC       = 1 << 29,
-       MLX4_WQE_CTRL_FENCE     = 1 << 6,
-       MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2,
-       MLX4_WQE_CTRL_SOLICITED = 1 << 1,
+       MLX4_WQE_CTRL_NEC               = 1 << 29,
+       MLX4_WQE_CTRL_FENCE             = 1 << 6,
+       MLX4_WQE_CTRL_CQ_UPDATE         = 3 << 2,
+       MLX4_WQE_CTRL_SOLICITED         = 1 << 1,
+       MLX4_WQE_CTRL_IP_CSUM           = 1 << 4,
+       MLX4_WQE_CTRL_TCP_UDP_CSUM      = 1 << 5,
 };
 
 struct mlx4_wqe_ctrl_seg {