X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=drivers%2Finfiniband%2Fhw%2Fipath%2Fipath_qp.c;h=cb2d3ef2ae12426a2ee3da2937118de3145c0c61;hp=0924a21fec13ad447f6ec42c833f5b6cae46b423;hb=d43c36dc6b357fa1806800f18aa30123c747a6d1;hpb=94b8d9f98d7f535037eb9845b81396f667b4f727 diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 0924a21..cb2d3ef 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -31,10 +32,11 @@ */ #include +#include #include #include "ipath_verbs.h" -#include "ips_common.h" +#include "ipath_kernel.h" #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) @@ -43,19 +45,6 @@ #define find_next_offset(map, off) find_next_zero_bit((map)->page, \ BITS_PER_PAGE, off) -#define TRANS_INVALID 0 -#define TRANS_ANY2RST 1 -#define TRANS_RST2INIT 2 -#define TRANS_INIT2INIT 3 -#define TRANS_INIT2RTR 4 -#define TRANS_RTR2RTS 5 -#define TRANS_RTS2RTS 6 -#define TRANS_SQERR2RTS 7 -#define TRANS_ANY2ERR 8 -#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */ -#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */ -#define TRANS_SQD2RTS 11 /* error if not drained */ - /* * Convert the AETH credit code into the number of credits. */ @@ -93,11 +82,51 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static u32 alloc_qpn(struct ipath_qp_table *qpt) + +static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long flags; + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock_irqsave(&qpt->lock, flags); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock_irqrestore(&qpt->lock, flags); +} + + +static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type) { u32 i, offset, max_scan, qpn; struct qpn_map *map; - u32 ret; + u32 ret = -1; + + if (type == IB_QPT_SMI) + ret = 0; + else if (type == IB_QPT_GSI) + ret = 1; + + if (ret != -1) { + map = &qpt->map[0]; + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) { + ret = -ENOMEM; + goto bail; + } + } + if (!test_and_set_bit(ret, map->page)) + atomic_dec(&map->n_free); + else + ret = -EBUSY; + goto bail; + } qpn = qpt->last + 1; if (qpn >= QPN_MAX) @@ -107,19 +136,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt) max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - unsigned long page = get_zeroed_page(GFP_KERNEL); - unsigned long flags; - - /* - * Free the page if someone raced with us - * installing it: - */ - spin_lock_irqsave(&qpt->lock, flags); - if (map->page) - free_page(page); - else - map->page = (void *)page; - spin_unlock_irqrestore(&qpt->lock, flags); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -163,7 +180,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt) qpn = mk_qpn(qpt, map, offset); } - ret = 0; + ret = -ENOMEM; bail: return ret; @@ -192,29 +209,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, enum ib_qp_type type) { unsigned long flags; - u32 qpn; int ret; - if (type == IB_QPT_SMI) - qpn = 0; - else if (type == IB_QPT_GSI) - qpn = 1; - else { - /* Allocate the next available QPN */ - qpn = alloc_qpn(qpt); - if (qpn == 0) { - ret = -ENOMEM; - goto bail; - } - } - qp->ibqp.qp_num = qpn; + ret = alloc_qpn(qpt, type); + if (ret < 0) + goto bail; + qp->ibqp.qp_num = ret; /* Add the QP to the hash table. */ spin_lock_irqsave(&qpt->lock, flags); - qpn %= qpt->max; - qp->next = qpt->table[qpn]; - qpt->table[qpn] = qp; + ret %= qpt->max; + qp->next = qpt->table[ret]; + qpt->table[ret] = qp; atomic_inc(&qp->refcount); spin_unlock_irqrestore(&qpt->lock, flags); @@ -236,7 +243,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) { struct ipath_qp *q, **qpp; unsigned long flags; - int fnd = 0; spin_lock_irqsave(&qpt->lock, flags); @@ -247,54 +253,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) *qpp = qp->next; qp->next = NULL; atomic_dec(&qp->refcount); - fnd = 1; break; } } spin_unlock_irqrestore(&qpt->lock, flags); - - if (!fnd) - return; - - /* If QPN is not reserved, mark QPN free in the bitmap. */ - if (qp->ibqp.qp_num > 1) - free_qpn(qpt, qp->ibqp.qp_num); - - wait_event(qp->wait, !atomic_read(&qp->refcount)); } /** - * ipath_free_all_qps - remove all QPs from the table + * ipath_free_all_qps - check for QPs still in use * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. */ -void ipath_free_all_qps(struct ipath_qp_table *qpt) +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) { unsigned long flags; - struct ipath_qp *qp, *nqp; - u32 n; + struct ipath_qp *qp; + u32 n, qp_inuse = 0; + spin_lock_irqsave(&qpt->lock, flags); for (n = 0; n < qpt->max; n++) { - spin_lock_irqsave(&qpt->lock, flags); qp = qpt->table[n]; qpt->table[n] = NULL; - spin_unlock_irqrestore(&qpt->lock, flags); - - while (qp) { - nqp = qp->next; - if (qp->ibqp.qp_num > 1) - free_qpn(qpt, qp->ibqp.qp_num); - if (!atomic_dec_and_test(&qp->refcount) || - !ipath_destroy_qp(&qp->ibqp)) - _VERBS_INFO("QP memory leak!\n"); - qp = nqp; - } + + for (; qp; qp = qp->next) + qp_inuse++; } + spin_unlock_irqrestore(&qpt->lock, flags); - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { + for (n = 0; n < ARRAY_SIZE(qpt->map); n++) if (qpt->map[n].page) - free_page((unsigned long)qpt->map[n].page); - } + free_page((unsigned long) qpt->map[n].page); + return qp_inuse; } /** @@ -326,17 +318,23 @@ struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn) /** * ipath_reset_qp - initialize the QP state to the reset state * @qp: the QP to reset + * @type: the QP type */ -static void ipath_reset_qp(struct ipath_qp *qp) +static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) { qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; + atomic_set(&qp->s_dma_busy, 0); + qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_pkt_delay = 0; + qp->s_draining = 0; qp->s_psn = 0; qp->r_psn = 0; - atomic_set(&qp->msn, 0); - if (qp->ibqp.qp_type == IB_QPT_RC) { + qp->r_msn = 0; + if (type == IB_QPT_RC) { qp->s_state = IB_OPCODE_RC_SEND_LAST; qp->r_state = IB_OPCODE_RC_SEND_LAST; } else { @@ -344,7 +342,9 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->r_state = IB_OPCODE_UC_SEND_LAST; } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - qp->s_nak_state = 0; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -352,68 +352,89 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->r_rq.head = 0; - qp->r_rq.tail = 0; - qp->r_reuse_sge = 0; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } } /** - * ipath_error_qp - put a QP into an error state - * @qp: the QP to put into an error state + * ipath_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. - * QP r_rq.lock and s_lock should be held. + * Returns true if last WQE event should be generated. + * The QP s_lock should be held and interrupts disabled. + * If we are already in error state, just return. */ -static void ipath_error_qp(struct ipath_qp *qp) +int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; + int ret = 0; + + if (qp->state == IB_QPS_ERR) + goto bail; - _VERBS_INFO("QP%d/%d in error state\n", - qp->ibqp.qp_num, qp->remote_qpn); + qp->state = IB_QPS_ERR; spin_lock(&dev->pending_lock); - /* XXX What if its already removed by the timeout code? */ if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); if (!list_empty(&qp->piowait)) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.status = IB_WC_WR_FLUSH_ERR; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; - wc.qp_num = qp->ibqp.qp_num; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - - while (qp->s_last != qp->s_head) { - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->s_hdrwords = 0; - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + ipath_schedule_send(qp); + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; - while (qp->r_rq.tail != qp->r_rq.head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id; - if (++qp->r_rq.tail >= qp->r_rq.size) - qp->r_rq.tail = 0; + + if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct ipath_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) + ret = 1; + +bail: + return ret; } /** @@ -421,20 +442,20 @@ static void ipath_error_qp(struct ipath_qp *qp) * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify + * @udata: user data for ipathverbs.so * * Returns 0 on success, otherwise returns an errno. */ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask) + int attr_mask, struct ib_udata *udata) { struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); enum ib_qp_state cur_state, new_state; - unsigned long flags; + int lastwqe = 0; int ret; - spin_lock_irqsave(&qp->r_rq.lock, flags); - spin_lock(&qp->s_lock); + spin_lock_irq(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -444,31 +465,85 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr_mask)) goto inval; - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid == 0 || - attr->ah_attr.dlid >= IPS_MULTICAST_LID_BASE) + attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) goto inval; + if ((attr->ah_attr.ah_flags & IB_AH_GRH) && + (attr->ah_attr.grh.sgid_index > 1)) + goto inval; + } + if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd)) + if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) goto inval; if (attr_mask & IB_QP_MIN_RNR_TIMER) if (attr->min_rnr_timer > 31) goto inval; + if (attr_mask & IB_QP_PORT) + if (attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + /* + * don't allow invalid Path MTU values or greater than 2048 + * unless we are configured for a 4KB MTU + */ + if ((attr_mask & IB_QP_PATH_MTU) && + (ib_mtu_enum_to_int(attr->path_mtu) == -1 || + (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096))) + goto inval; + + if (attr_mask & IB_QP_PATH_MIG_STATE) + if (attr->path_mig_state != IB_MIG_MIGRATED && + attr->path_mig_state != IB_MIG_REARM) + goto inval; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) + goto inval; + switch (new_state) { case IB_QPS_RESET: - ipath_reset_qp(qp); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + spin_lock_irq(&qp->s_lock); + } + ipath_reset_qp(qp, ibqp->qp_type); + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; break; case IB_QPS_ERR: - ipath_error_qp(qp); + lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: + qp->state = new_state; break; - } if (attr_mask & IB_QP_PKEY_INDEX) @@ -478,7 +553,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn; + qp->s_psn = qp->s_next_psn = attr->sq_psn; qp->s_last_psn = qp->s_next_psn - 1; } @@ -488,8 +563,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ACCESS_FLAGS) qp->qp_access_flags = attr->qp_access_flags; - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { qp->remote_ah_attr = attr->ah_attr; + qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate); + } if (attr_mask & IB_QP_PATH_MTU) qp->path_mtu = attr->path_mtu; @@ -505,34 +582,35 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (attr_mask & IB_QP_MIN_RNR_TIMER) - qp->s_min_rnr_timer = attr->min_rnr_timer; + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) + qp->timeout = attr->timeout; if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; - if (attr_mask & IB_QP_PKEY_INDEX) - qp->s_pkey_index = attr->pkey_index; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; - qp->state = new_state; - spin_unlock(&qp->s_lock); - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; - /* - * If QP1 changed to the RTS state, try to move to the link to INIT - * even if it was ACTIVE so the SM will reinitialize the SMA's - * state. - */ - if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) { - struct ipath_ibdev *dev = to_idev(ibqp->device); + spin_unlock_irq(&qp->s_lock); + + if (lastwqe) { + struct ib_event ev; - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } ret = 0; goto bail; inval: - spin_unlock(&qp->s_lock); - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + spin_unlock_irq(&qp->s_lock); ret = -EINVAL; bail: @@ -554,7 +632,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->dest_qp_num = qp->remote_qpn; attr->qp_access_flags = qp->qp_access_flags; attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->r_rq.size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; attr->cap.max_send_sge = qp->s_max_sge; attr->cap.max_recv_sge = qp->r_rq.max_sge; attr->cap.max_inline_data = 0; @@ -563,14 +641,14 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->pkey_index = qp->s_pkey_index; attr->alt_pkey_index = 0; attr->en_sqd_async_notify = 0; - attr->sq_draining = 0; - attr->max_rd_atomic = 1; - attr->max_dest_rd_atomic = 1; - attr->min_rnr_timer = qp->s_min_rnr_timer; + attr->sq_draining = qp->s_draining; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; + attr->min_rnr_timer = qp->r_min_rnr_timer; attr->port_num = 1; - attr->timeout = 0; + attr->timeout = qp->timeout; attr->retry_cnt = qp->s_retry_cnt; - attr->rnr_retry = qp->s_rnr_retry; + attr->rnr_retry = qp->s_rnr_retry_cnt; attr->alt_port_num = 0; attr->alt_timeout = 0; @@ -580,9 +658,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - init_attr->sq_sig_type = - (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) - ? IB_SIGNAL_REQ_WR : 0; + if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->qp_type = qp->ibqp.qp_type; init_attr->port_num = 1; return 0; @@ -593,31 +672,37 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * @qp: the queue pair to compute the AETH for * * Returns the AETH. - * - * The QP s_lock should be held. */ __be32 ipath_compute_aeth(struct ipath_qp *qp) { - u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK; + u32 aeth = qp->r_msn & IPATH_MSN_MASK; - if (qp->s_nak_state) { - aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT; - } else if (qp->ibqp.srq) { + if (qp->ibqp.srq) { /* * Shared receive queues don't generate credits. * Set the credit field to the invalid value. */ - aeth |= IPS_AETH_CREDIT_INVAL << IPS_AETH_CREDIT_SHIFT; + aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT; } else { u32 min, max, x; u32 credits; - + struct ipath_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; /* * Compute the number of credits available (RWQEs). * XXX Not holding the r_rq.lock here so there is a small * chance that the pair of reads are not atomic. */ - credits = qp->r_rq.head - qp->r_rq.tail; + credits = head - tail; if ((int)credits < 0) credits += qp->r_rq.size; /* @@ -637,7 +722,7 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp) else min = x; } - aeth |= x << IPS_AETH_CREDIT_SHIFT; + aeth |= x << IPATH_AETH_CREDIT_SHIFT; } return cpu_to_be32(aeth); } @@ -661,17 +746,42 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, struct ipath_swqe *swq = NULL; struct ipath_ibdev *dev; size_t sz; + size_t sg_list_sz; struct ib_qp *ret; - if (init_attr->cap.max_send_sge > 255 || - init_attr->cap.max_recv_sge > 255) { - ret = ERR_PTR(-ENOMEM); + if (init_attr->create_flags) { + ret = ERR_PTR(-EINVAL); goto bail; } + if (init_attr->cap.max_send_sge > ib_ipath_max_sges || + init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > ib_ipath_max_sges || + init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + } + switch (init_attr->qp_type) { case IB_QPT_UC: case IB_QPT_RC: + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: sz = sizeof(struct ipath_sge) * init_attr->cap.max_send_sge + sizeof(struct ipath_swqe); @@ -680,24 +790,50 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, ret = ERR_PTR(-ENOMEM); goto bail; } - /* FALLTHROUGH */ - case IB_QPT_UD: - case IB_QPT_SMI: - case IB_QPT_GSI: - qp = kmalloc(sizeof(*qp), GFP_KERNEL); + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct ipath_srq *srq = to_isrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kmalloc(sz + sg_list_sz, GFP_KERNEL); if (!qp) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_swq; } - qp->r_rq.size = init_attr->cap.max_recv_wr + 1; - sz = sizeof(struct ipath_sge) * - init_attr->cap.max_recv_sge + - sizeof(struct ipath_rwqe); - qp->r_rq.wq = vmalloc(qp->r_rq.size * sz); - if (!qp->r_rq.wq) { - kfree(qp); - ret = ERR_PTR(-ENOMEM); - goto bail; + if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || + init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI)) { + qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); + if (!qp->r_ud_sg_list) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } else + qp->r_ud_sg_list = NULL; + if (init_attr->srq) { + sz = 0; + qp->r_rq.size = 0; + qp->r_rq.max_sge = 0; + qp->r_rq.wq = NULL; + init_attr->cap.max_recv_wr = 0; + init_attr->cap.max_recv_sge = 0; + } else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct ipath_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_sg_list; + } } /* @@ -708,35 +844,29 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); - tasklet_init(&qp->s_task, - init_attr->qp_type == IB_QPT_RC ? - ipath_do_rc_send : ipath_do_uc_send, - (unsigned long)qp); + init_waitqueue_head(&qp->wait_dma); + tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->timerwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; - qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ? - 1 << IPATH_S_SIGNAL_REQ_WR : 0; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = IPATH_S_SIGNAL_REQ_WR; + else + qp->s_flags = 0; dev = to_idev(ibpd->device); err = ipath_alloc_qpn(&dev->qp_table, qp, init_attr->qp_type); if (err) { - vfree(swq); - vfree(qp->r_rq.wq); - kfree(qp); ret = ERR_PTR(err); - goto bail; + vfree(qp->r_rq.wq); + goto bail_sg_list; } - ipath_reset_qp(qp); - - /* Tell the core driver that the kernel SMA is present. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, - IPATH_VERBS_KERNEL_SMA); + qp->ip = NULL; + qp->s_tx = NULL; + ipath_reset_qp(qp, init_attr->qp_type); break; default: @@ -747,8 +877,74 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->cap.max_inline_data = 0; + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + u32 s = sizeof(struct ipath_rwq) + + qp->r_rq.size * sz; + + qp->ip = + ipath_create_mmap_info(dev, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + err = ib_copy_to_udata(udata, &(qp->ip->offset), + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_ipath_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + ret = &qp->ibqp; + goto bail; +bail_ip: + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + ipath_free_qp(&dev->qp_table, qp); + free_qpn(&dev->qp_table, qp->ibqp.qp_num); +bail_sg_list: + kfree(qp->r_ud_sg_list); +bail_qp: + kfree(qp); +bail_swq: + vfree(swq); bail: return ret; } @@ -766,39 +962,51 @@ int ipath_destroy_qp(struct ib_qp *ibqp) { struct ipath_qp *qp = to_iqp(ibqp); struct ipath_ibdev *dev = to_idev(ibqp->device); - unsigned long flags; - - /* Tell the core driver that the kernel SMA is gone. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, 0); - - spin_lock_irqsave(&qp->r_rq.lock, flags); - spin_lock(&qp->s_lock); - qp->state = IB_QPS_ERR; - spin_unlock(&qp->s_lock); - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - - /* Stop the sending tasklet. */ - tasklet_kill(&qp->s_task); - /* Make sure the QP isn't on the timeout list. */ - spin_lock_irqsave(&dev->pending_lock, flags); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock_irqrestore(&dev->pending_lock, flags); + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + } else + spin_unlock_irq(&qp->s_lock); + + ipath_free_qp(&dev->qp_table, qp); + + if (qp->s_tx) { + atomic_dec(&qp->refcount); + if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) + kfree(qp->s_tx->txreq.map_addr); + spin_lock_irq(&dev->pending_lock); + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); + spin_unlock_irq(&dev->pending_lock); + qp->s_tx = NULL; + } - /* - * Make sure that the QP is not in the QPN table so receive - * interrupts will discard packets for this QP. XXX Also remove QP - * from multicast table. - */ - if (atomic_read(&qp->refcount) != 0) - ipath_free_qp(&dev->qp_table, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qp_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + kfree(qp->r_ud_sg_list); vfree(qp->s_wq); - vfree(qp->r_rq.wq); kfree(qp); return 0; } @@ -837,49 +1045,6 @@ bail: } /** - * ipath_sqerror_qp - put a QP's send queue into an error state - * @qp: QP who's send queue will be put into an error state - * @wc: the WC responsible for putting the QP in this state - * - * Flushes the send work queue. - * The QP s_lock should be held. - */ - -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n", - qp->ibqp.qp_num, qp->remote_qpn, wc->status); - - spin_lock(&dev->pending_lock); - /* XXX What if its already removed by the timeout code? */ - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - - wc->status = IB_WC_WR_FLUSH_ERR; - - while (qp->s_last != qp->s_head) { - wc->wr_id = wqe->wr.wr_id; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->state = IB_QPS_SQE; -} - -/** * ipath_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header @@ -888,26 +1053,27 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) */ void ipath_get_credit(struct ipath_qp *qp, u32 aeth) { - u32 credit = (aeth >> IPS_AETH_CREDIT_SHIFT) & IPS_AETH_CREDIT_MASK; + u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK; /* * If the credit is invalid, we can send * as many packets as we like. Otherwise, we have to * honor the credit field. */ - if (credit == IPS_AETH_CREDIT_INVAL) { + if (credit == IPATH_AETH_CREDIT_INVAL) qp->s_lsn = (u32) -1; - } else if (qp->s_lsn != (u32) -1) { + else if (qp->s_lsn != (u32) -1) { /* Compute new LSN (i.e., MSN + credit) */ - credit = (aeth + credit_table[credit]) & IPS_MSN_MASK; + credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK; if (ipath_cmp24(credit, qp->s_lsn) > 0) qp->s_lsn = credit; } /* Restart sending if it was blocked due to lack of credits. */ - if (qp->s_cur != qp->s_head && + if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && + qp->s_cur != qp->s_head && (qp->s_lsn == (u32) -1 || ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, qp->s_lsn + 1) <= 0)) - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); }