* anymore, so we do this only if selective signaling is off.
*
* Further, on 32-bit platforms, we can't use vmap() to make
- * the QP buffer virtually contigious. Thus we have to use
+ * the QP buffer virtually contiguous. Thus we have to use
* constant-sized WRs to make sure a WR is always fully within
* a single page-sized chunk.
*
}
static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
}
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
break;
case IB_WR_LOCAL_INV:
+ ctrl->srcrb_flags |=
+ cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
wqe += sizeof (struct mlx4_wqe_local_inval_seg);
size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
break;
case IB_WR_FAST_REG_MR:
+ ctrl->srcrb_flags |=
+ cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
set_fmr_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_fmr_seg);
size += sizeof (struct mlx4_wqe_fmr_seg) / 16;