tree-wide: fix assorted typos all over the place

[safe/jmp/linux-2.6] / drivers / infiniband / hw / mlx4 / qp.c
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c

index 39167a7..256a00c 100644 (file)
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -71,17 +71,17 @@ enum {
  };
  
  static const __be32 mlx4_ib_opcode[] = {
-       [IB_WR_SEND]                    = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
-       [IB_WR_LSO]                     = __constant_cpu_to_be32(MLX4_OPCODE_LSO),
-       [IB_WR_SEND_WITH_IMM]           = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
-       [IB_WR_RDMA_WRITE]              = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
-       [IB_WR_RDMA_WRITE_WITH_IMM]     = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
-       [IB_WR_RDMA_READ]               = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
-       [IB_WR_ATOMIC_CMP_AND_SWP]      = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
-       [IB_WR_ATOMIC_FETCH_AND_ADD]    = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
-       [IB_WR_SEND_WITH_INV]           = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
-       [IB_WR_LOCAL_INV]               = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
-       [IB_WR_FAST_REG_MR]             = __constant_cpu_to_be32(MLX4_OPCODE_FMR),
+       [IB_WR_SEND]                    = cpu_to_be32(MLX4_OPCODE_SEND),
+       [IB_WR_LSO]                     = cpu_to_be32(MLX4_OPCODE_LSO),
+       [IB_WR_SEND_WITH_IMM]           = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
+       [IB_WR_RDMA_WRITE]              = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
+       [IB_WR_RDMA_WRITE_WITH_IMM]     = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
+       [IB_WR_RDMA_READ]               = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
+       [IB_WR_ATOMIC_CMP_AND_SWP]      = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
+       [IB_WR_ATOMIC_FETCH_AND_ADD]    = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+       [IB_WR_SEND_WITH_INV]           = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+       [IB_WR_LOCAL_INV]               = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+       [IB_WR_FAST_REG_MR]             = cpu_to_be32(MLX4_OPCODE_FMR),
  };
  
  static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -352,7 +352,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
          * anymore, so we do this only if selective signaling is off.
          *
          * Further, on 32-bit platforms, we can't use vmap() to make
-        * the QP buffer virtually contigious.  Thus we have to use
+        * the QP buffer virtually contiguous.  Thus we have to use
          * constant-sized WRs to make sure a WR is always fully within
          * a single page-sized chunk.
          *
@@ -615,10 +615,12 @@ static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
  }
  
  static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+       __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
  {
-       if (send_cq == recv_cq)
+       if (send_cq == recv_cq) {
                 spin_lock_irq(&send_cq->lock);
-       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+               __acquire(&recv_cq->lock);
+       } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
                 spin_lock_irq(&send_cq->lock);
                 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
         } else {
@@ -628,10 +630,12 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
  }
  
  static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+       __releases(&send_cq->lock) __releases(&recv_cq->lock)
  {
-       if (send_cq == recv_cq)
+       if (send_cq == recv_cq) {
+               __release(&recv_cq->lock);
                 spin_unlock_irq(&send_cq->lock);
-       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+       } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
                 spin_unlock(&recv_cq->lock);
                 spin_unlock_irq(&send_cq->lock);
         } else {
@@ -1365,7 +1369,7 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
         int i;
  
         for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
-               wr->wr.fast_reg.page_list->page_list[i] =
+               mfrpl->mapped_page_list[i] =
                         cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
                                     MLX4_MTT_FLAG_PRESENT);
  
@@ -1462,7 +1466,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
  }
  
  static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
-                        struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
+                        struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
+                        __be32 *lso_hdr_sz)
  {
         unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
  
@@ -1479,12 +1484,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
  
         memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
  
-       /* make sure LSO header is written before overwriting stamping */
-       wmb();
-
-       wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
-                                       wr->wr.ud.hlen);
-
+       *lso_hdr_sz  = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
+                                  wr->wr.ud.hlen);
         *lso_seg_len = halign;
         return 0;
  }
@@ -1518,6 +1519,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
         int uninitialized_var(stamp);
         int uninitialized_var(size);
         unsigned uninitialized_var(seglen);
+       __be32 dummy;
+       __be32 *lso_wqe;
+       __be32 uninitialized_var(lso_hdr_sz);
         int i;
  
         spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1525,6 +1529,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
         ind = qp->sq_next_wqe;
  
         for (nreq = 0; wr; ++nreq, wr = wr->next) {
+               lso_wqe = &dummy;
+
                 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
                         err = -ENOMEM;
                         *bad_wr = wr;
@@ -1583,12 +1589,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                 break;
  
                         case IB_WR_LOCAL_INV:
+                               ctrl->srcrb_flags |=
+                                       cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
                                 set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
                                 wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
                                 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
                                 break;
  
                         case IB_WR_FAST_REG_MR:
+                               ctrl->srcrb_flags |=
+                                       cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
                                 set_fmr_seg(wqe, wr);
                                 wqe  += sizeof (struct mlx4_wqe_fmr_seg);
                                 size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
@@ -1606,11 +1616,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
  
                         if (wr->opcode == IB_WR_LSO) {
-                               err = build_lso_seg(wqe, wr, qp, &seglen);
+                               err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
                                 if (unlikely(err)) {
                                         *bad_wr = wr;
                                         goto out;
                                 }
+                               lso_wqe = (__be32 *) wqe;
                                 wqe  += seglen;
                                 size += seglen / 16;
                         }
@@ -1652,6 +1663,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                 for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
                         set_data_seg(dseg, wr->sg_list + i);
  
+               /*
+                * Possibly overwrite stamping in cacheline with LSO
+                * segment only after making sure all data segments
+                * are written.
+                */
+               wmb();
+               *lso_wqe = lso_hdr_sz;
+
                 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
                                     MLX4_WQE_CTRL_FENCE : 0) | size;
  
@@ -1686,7 +1705,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         stamp_send_wqe(qp, stamp, size * 16);
                         ind = pad_wraparound(qp, ind);
                 }
-
         }
  
  out: