X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Finfiniband%2Fcore%2Fcm.c;h=e5dc4530808aac8c8e8d410ca32f47ae60cf5cfe;hb=e1444b5a163e81138754cab27c4fa1637b5a2239;hp=3a611fe5497e06c0b9e2de2cc9eaf75d2abf0caa;hpb=de1bb1a64c29bae4f5330c70bd1dc6a62954c9f4;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 3a611fe..e5dc453 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -32,13 +32,16 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $ + * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ + +#include #include #include #include #include #include +#include #include #include #include @@ -71,6 +74,8 @@ static struct ib_cm { struct rb_root remote_id_table; struct rb_root remote_sidr_table; struct idr local_id_table; + __be32 random_id_operand; + struct list_head timewait_list; struct workqueue_struct *wq; } cm; @@ -108,6 +113,7 @@ struct cm_work { struct cm_timewait_info { struct cm_work work; /* Must be first. */ + struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; __be64 remote_ca_guid; @@ -121,8 +127,8 @@ struct cm_id_private { struct rb_node service_node; struct rb_node sidr_id_node; - spinlock_t lock; - wait_queue_head_t wait; + spinlock_t lock; /* Do not acquire inside cm.lock */ + struct completion comp; atomic_t refcount; struct ib_mad_send_buf *msg; @@ -130,6 +136,7 @@ struct cm_id_private { /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; + struct ib_cm_compare_data *compare_data; void *private_data; __be64 tid; @@ -140,12 +147,12 @@ struct cm_id_private { __be32 rq_psn; int timeout_ms; enum ib_mtu path_mtu; + __be16 pkey; u8 private_data_len; u8 max_cm_retries; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; - u8 local_ack_timeout; u8 retry_count; u8 rnr_retry_count; u8 service_timeout; @@ -159,7 +166,7 @@ static void cm_work_handler(void *data); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { if (atomic_dec_and_test(&cm_id_priv->refcount)) - wake_up(&cm_id_priv->wait); + complete(&cm_id_priv->comp); } static int cm_alloc_msg(struct cm_id_private *cm_id_priv, @@ -174,7 +181,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, if (IS_ERR(ah)) return PTR_ERR(ah); - m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, + m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); @@ -233,11 +240,10 @@ static void * cm_copy_private_data(const void *private_data, if (!private_data || !private_data_len) return NULL; - data = kmalloc(private_data_len, GFP_KERNEL); + data = kmemdup(private_data, private_data_len, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - memcpy(data, private_data, private_data_len); return data; } @@ -251,23 +257,13 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, cm_id_priv->private_data_len = private_data_len; } -static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num, - u16 dlid, u8 sl, u16 src_path_bits) -{ - memset(ah_attr, 0, sizeof ah_attr); - ah_attr->dlid = dlid; - ah_attr->sl = sl; - ah_attr->src_path_bits = src_path_bits; - ah_attr->port_num = port_num; -} - -static void cm_init_av_for_response(struct cm_port *port, - struct ib_wc *wc, struct cm_av *av) +static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, + struct ib_grh *grh, struct cm_av *av) { av->port = port; av->pkey_index = wc->pkey_index; - cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid, - wc->sl, wc->dlid_path_bits); + ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc, + grh, &av->ah_attr); } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) @@ -297,9 +293,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) return ret; av->port = port; - cm_set_ah_attr(&av->ah_attr, av->port->port_num, - be16_to_cpu(path->dlid), path->sl, - be16_to_cpu(path->slid) & 0x7F); + ib_init_ah_from_path(cm_dev->device, port->port_num, path, + &av->ah_attr); av->packet_life_time = path->packet_life_time; return 0; } @@ -307,15 +302,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; - int ret; + int ret, id; static int next_id; do { spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++, - (__force int *) &cm_id_priv->id.local_id); + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, + next_id++, &id); spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + + cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand); return ret; } @@ -324,7 +321,8 @@ static void cm_free_id(__be32 local_id) unsigned long flags; spin_lock_irqsave(&cm.lock, flags); - idr_remove(&cm.local_id_table, (__force int) local_id); + idr_remove(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); spin_unlock_irqrestore(&cm.lock, flags); } @@ -332,7 +330,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id); + cm_id_priv = idr_find(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -355,6 +354,41 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) return cm_id_priv; } +static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask) +{ + int i; + + for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++) + ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] & + ((unsigned long *) mask)[i]; +} + +static int cm_compare_data(struct ib_cm_compare_data *src_data, + struct ib_cm_compare_data *dst_data) +{ + u8 src[IB_CM_COMPARE_SIZE]; + u8 dst[IB_CM_COMPARE_SIZE]; + + if (!src_data || !dst_data) + return 0; + + cm_mask_copy(src, src_data->data, dst_data->mask); + cm_mask_copy(dst, dst_data->data, src_data->mask); + return memcmp(src, dst, IB_CM_COMPARE_SIZE); +} + +static int cm_compare_private_data(u8 *private_data, + struct ib_cm_compare_data *dst_data) +{ + u8 src[IB_CM_COMPARE_SIZE]; + + if (!dst_data) + return 0; + + cm_mask_copy(src, private_data, dst_data->mask); + return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); +} + static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.listen_service_table.rb_node; @@ -362,14 +396,18 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; + int data_cmp; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); + data_cmp = cm_compare_data(cm_id_priv->compare_data, + cur_cm_id_priv->compare_data); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + (cm_id_priv->id.device == cur_cm_id_priv->id.device) && + !data_cmp) return cur_cm_id_priv; if (cm_id_priv->id.device < cur_cm_id_priv->id.device) @@ -378,6 +416,10 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) link = &(*link)->rb_right; else if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; + else if (service_id > cur_cm_id_priv->id.service_id) + link = &(*link)->rb_right; + else if (data_cmp < 0) + link = &(*link)->rb_left; else link = &(*link)->rb_right; } @@ -387,16 +429,20 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) } static struct cm_id_private * cm_find_listen(struct ib_device *device, - __be64 service_id) + __be64 service_id, + u8 *private_data) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; + int data_cmp; while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); + data_cmp = cm_compare_private_data(private_data, + cm_id_priv->compare_data); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) + (cm_id_priv->id.device == device) && !data_cmp) return cm_id_priv; if (device < cm_id_priv->id.device) @@ -405,6 +451,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, node = node->rb_right; else if (service_id < cm_id_priv->id.service_id) node = node->rb_left; + else if (service_id > cm_id_priv->id.service_id) + node = node->rb_right; + else if (data_cmp < 0) + node = node->rb_left; else node = node->rb_right; } @@ -559,7 +609,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, goto error; spin_lock_init(&cm_id_priv->lock); - init_waitqueue_head(&cm_id_priv->wait); + init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); @@ -598,13 +648,6 @@ static inline int cm_convert_to_ms(int iba_time) static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { - unsigned long flags; - - if (!timewait_info->inserted_remote_id && - !timewait_info->inserted_remote_qp) - return; - - spin_lock_irqsave(&cm.lock, flags); if (timewait_info->inserted_remote_id) { rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); timewait_info->inserted_remote_id = 0; @@ -614,7 +657,6 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); timewait_info->inserted_remote_qp = 0; } - spin_unlock_irqrestore(&cm.lock, flags); } static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) @@ -635,6 +677,12 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; + unsigned long flags; + + spin_lock_irqsave(&cm.lock, flags); + cm_cleanup_timewait(cm_id_priv->timewait_info); + list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); + spin_unlock_irqrestore(&cm.lock, flags); /* * The cm_id could be destroyed by the user before we exit timewait. @@ -642,7 +690,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) * timewait before notifying the user that we've exited timewait. */ cm_id_priv->id.state = IB_CM_TIMEWAIT; - wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout); + wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1); queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); cm_id_priv->timewait_info = NULL; @@ -650,15 +698,19 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) { + unsigned long flags; + cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { + spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); + spin_unlock_irqrestore(&cm.lock, flags); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } } -void ib_destroy_cm_id(struct ib_cm_id *cm_id) +static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; @@ -692,12 +744,22 @@ retest: sizeof cm_id_priv->av.port->cm_dev->ca_guid, NULL, 0); break; + case IB_CM_REQ_RCVD: + if (err == -ENOMEM) { + /* Do not reject to allow future retries. */ + cm_reset_to_idle(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + } else { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + } + break; case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ - case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -724,19 +786,23 @@ retest: } cm_free_id(cm_id->local_id); - atomic_dec(&cm_id_priv->refcount); - wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount)); + cm_deref_id(cm_id_priv); + wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); - if (cm_id_priv->private_data && cm_id_priv->private_data_len) - kfree(cm_id_priv->private_data); + kfree(cm_id_priv->compare_data); + kfree(cm_id_priv->private_data); kfree(cm_id_priv); } + +void ib_destroy_cm_id(struct ib_cm_id *cm_id) +{ + cm_destroy_id(cm_id, 0); +} EXPORT_SYMBOL(ib_destroy_cm_id); -int ib_cm_listen(struct ib_cm_id *cm_id, - __be64 service_id, - __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, + struct ib_cm_compare_data *compare_data) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; unsigned long flags; @@ -750,7 +816,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - BUG_ON(cm_id->state != IB_CM_IDLE); + if (cm_id->state != IB_CM_IDLE) + return -EINVAL; + + if (compare_data) { + cm_id_priv->compare_data = kzalloc(sizeof *compare_data, + GFP_KERNEL); + if (!cm_id_priv->compare_data) + return -ENOMEM; + cm_mask_copy(cm_id_priv->compare_data->data, + compare_data->data, compare_data->mask); + memcpy(cm_id_priv->compare_data->mask, compare_data->mask, + IB_CM_COMPARE_SIZE); + } cm_id->state = IB_CM_LISTEN; @@ -767,6 +845,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, if (cur_cm_id_priv) { cm_id->state = IB_CM_IDLE; + kfree(cm_id_priv->compare_data); + cm_id_priv->compare_data = NULL; ret = -EBUSY; } return ret; @@ -856,7 +936,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, param->private_data_len); } -static inline int cm_validate_req_param(struct ib_cm_req_param *param) +static int cm_validate_req_param(struct ib_cm_req_param *param) { /* peer-to-peer not supported */ if (param->peer_to_peer) @@ -904,8 +984,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); - if (IS_ERR(cm_id_priv->timewait_info)) + if (IS_ERR(cm_id_priv->timewait_info)) { + ret = PTR_ERR(cm_id_priv->timewait_info); goto out; + } ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); if (ret) @@ -927,6 +1009,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; + cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); @@ -941,8 +1024,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); - cm_id_priv->local_ack_timeout = - cm_req_get_primary_local_ack_timeout(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); @@ -1005,7 +1086,7 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); } -static inline void cm_format_paths_from_req(struct cm_req_msg *req_msg, +static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct ib_sa_path_rec *primary_path, struct ib_sa_path_rec *alt_path) { @@ -1107,7 +1188,7 @@ static void cm_process_work(struct cm_id_private *cm_id_priv, } cm_deref_id(cm_id_priv); if (ret) - ib_destroy_cm_id(&cm_id_priv->id); + cm_destroy_id(&cm_id_priv->id, ret); } static void cm_format_mra(struct cm_mra_msg *mra_msg, @@ -1226,6 +1307,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); + cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); @@ -1234,28 +1316,29 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); - goto error; + listen_cm_id_priv = NULL; + goto out; } /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id); + req_msg->service_id, + req_msg->private_data); if (!listen_cm_id_priv) { + cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); - goto error; + goto out; } atomic_inc(&listen_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irqrestore(&cm.lock, flags); +out: return listen_cm_id_priv; - -error: cm_cleanup_timewait(cm_id_priv->timewait_info); - return NULL; } static int cm_req_handler(struct cm_work *work) @@ -1274,12 +1357,13 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv->id.remote_id = req_msg->local_comm_id; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); - goto error1; + goto destroy; } cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; @@ -1288,7 +1372,8 @@ static int cm_req_handler(struct cm_work *work) listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { ret = -EINVAL; - goto error2; + kfree(cm_id_priv->timewait_info); + goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; @@ -1298,12 +1383,22 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); - if (ret) - goto error3; + if (ret) { + ib_get_cached_gid(work->port->cm_dev->device, + work->port->port_num, 0, &work->path[0].sgid); + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, + &work->path[0].sgid, sizeof work->path[0].sgid, + NULL, 0); + goto rejected; + } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); - if (ret) - goto error3; + if (ret) { + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, + &work->path[0].sgid, + sizeof work->path[0].sgid, NULL, 0); + goto rejected; + } } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( @@ -1313,9 +1408,8 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); + cm_id_priv->pkey = req_msg->pkey; cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); - cm_id_priv->local_ack_timeout = - cm_req_get_primary_local_ack_timeout(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); @@ -1325,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work) cm_deref_id(listen_cm_id_priv); return 0; -error3: atomic_dec(&cm_id_priv->refcount); +rejected: + atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); - cm_cleanup_timewait(cm_id_priv->timewait_info); -error2: kfree(cm_id_priv->timewait_info); - cm_id_priv->timewait_info = NULL; -error1: ib_destroy_cm_id(&cm_id_priv->id); +destroy: + ib_destroy_cm_id(cm_id); return ret; } @@ -1547,40 +1640,46 @@ static int cm_rep_handler(struct cm_work *work) return -EINVAL; } + cm_format_rep_event(work); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto error; + } + cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); - spin_lock_irqsave(&cm.lock, flags); + spin_lock(&cm.lock); /* Check for duplicate REP. */ if (cm_insert_remote_id(cm_id_priv->timewait_info)) { - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock(&cm.lock); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = -EINVAL; goto error; } /* Check for a stale connection. */ if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { - spin_unlock_irqrestore(&cm.lock, flags); + rb_erase(&cm_id_priv->timewait_info->remote_id_node, + &cm.remote_id_table); + cm_id_priv->timewait_info->inserted_remote_id = 0; + spin_unlock(&cm.lock); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; goto error; } - spin_unlock_irqrestore(&cm.lock, flags); - - cm_format_rep_event(work); + spin_unlock(&cm.lock); - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->id.state) { - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - break; - default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = -EINVAL; - goto error; - } cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); @@ -1603,7 +1702,7 @@ static int cm_rep_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; -error: cm_cleanup_timewait(cm_id_priv->timewait_info); +error: cm_deref_id(cm_id_priv); return ret; } @@ -1614,7 +1713,7 @@ static int cm_establish_handler(struct cm_work *work) unsigned long flags; int ret; - /* See comment in ib_cm_establish about lookup. */ + /* See comment in cm_establish about lookup. */ cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); if (!cm_id_priv) return -EINVAL; @@ -1800,6 +1899,32 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_drep); +static int cm_issue_drep(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_buf *msg = NULL; + struct cm_dreq_msg *dreq_msg; + struct cm_drep_msg *drep_msg; + int ret; + + ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + if (ret) + return ret; + + dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; + drep_msg = (struct cm_drep_msg *) msg->mad; + + cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); + drep_msg->remote_comm_id = dreq_msg->local_comm_id; + drep_msg->local_comm_id = dreq_msg->remote_comm_id; + + ret = ib_post_send_mad(msg, NULL); + if (ret) + cm_free_msg(msg); + + return ret; +} + static int cm_dreq_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; @@ -1811,8 +1936,10 @@ static int cm_dreq_handler(struct cm_work *work) dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); - if (!cm_id_priv) + if (!cm_id_priv) { + cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; + } work->cm_event.private_data = &dreq_msg->private_data; @@ -1991,8 +2118,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) spin_unlock_irqrestore(&cm.lock, flags); return NULL; } - cm_id_priv = idr_find(&cm.local_id_table, - (__force int) timewait_info->work.local_id); + cm_id_priv = idr_find(&cm.local_id_table, (__force int) + (timewait_info->work.local_id ^ + cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -2271,11 +2399,16 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || - cm_id->lap_state != IB_CM_LAP_IDLE) { + (cm_id->lap_state != IB_CM_LAP_UNINIT && + cm_id->lap_state != IB_CM_LAP_IDLE)) { ret = -EINVAL; goto out; } + ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); + if (ret) + goto out; + ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; @@ -2300,7 +2433,8 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_lap); -static void cm_format_path_from_lap(struct ib_sa_path_rec *path, +static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, + struct ib_sa_path_rec *path, struct cm_lap_msg *lap_msg) { memset(path, 0, sizeof *path); @@ -2312,10 +2446,10 @@ static void cm_format_path_from_lap(struct ib_sa_path_rec *path, path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); path->reversible = 1; - /* pkey is same as in REQ */ + path->pkey = cm_id_priv->pkey; path->sl = cm_lap_get_sl(lap_msg); path->mtu_selector = IB_SA_EQ; - /* mtu is same as in REQ */ + path->mtu = cm_id_priv->path_mtu; path->rate_selector = IB_SA_EQ; path->rate = cm_lap_get_packet_rate(lap_msg); path->packet_life_time_selector = IB_SA_EQ; @@ -2341,7 +2475,7 @@ static int cm_lap_handler(struct cm_work *work) param = &work->cm_event.param.lap_rcvd; param->alternate_path = &work->path[0]; - cm_format_path_from_lap(param->alternate_path, lap_msg); + cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -2349,6 +2483,7 @@ static int cm_lap_handler(struct cm_work *work) goto unlock; switch (cm_id_priv->id.lap_state) { + case IB_CM_LAP_UNINIT: case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: @@ -2371,6 +2506,10 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &cm_id_priv->av); + cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -2501,28 +2640,29 @@ static int cm_timewait_handler(struct cm_work *work) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; - unsigned long flags; int ret; timewait_info = (struct cm_timewait_info *)work; - cm_cleanup_timewait(timewait_info); + spin_lock_irq(&cm.lock); + list_del(&timewait_info->list); + spin_unlock_irq(&cm.lock); cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); if (!cm_id_priv) return -EINVAL; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_TIMEWAIT || cm_id_priv->remote_qpn != timewait_info->remote_qpn) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2541,7 +2681,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = cpu_to_be16(param->pkey); + sidr_req_msg->pkey = cpu_to_be16(param->path->pkey); sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) @@ -2633,6 +2773,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); cm_id_priv->av.dgid.global.interface_id = 0; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->id.remote_id = sidr_req_msg->request_id; cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; @@ -2646,7 +2787,8 @@ static int cm_sidr_req_handler(struct cm_work *work) goto out; /* Duplicate message. */ } cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id); + sidr_req_msg->service_id, + sidr_req_msg->private_data); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); @@ -2906,7 +3048,7 @@ static void cm_work_handler(void *data) cm_free_work(work); } -int ib_cm_establish(struct ib_cm_id *cm_id) +static int cm_establish(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; struct cm_work *work; @@ -2954,7 +3096,44 @@ int ib_cm_establish(struct ib_cm_id *cm_id) out: return ret; } -EXPORT_SYMBOL(ib_cm_establish); + +static int cm_migrate(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state == IB_CM_ESTABLISHED && + (cm_id->lap_state == IB_CM_LAP_UNINIT || + cm_id->lap_state == IB_CM_LAP_IDLE)) { + cm_id->lap_state = IB_CM_LAP_IDLE; + cm_id_priv->av = cm_id_priv->alt_av; + } else + ret = -EINVAL; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} + +int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event) +{ + int ret; + + switch (event) { + case IB_EVENT_COMM_EST: + ret = cm_establish(cm_id); + break; + case IB_EVENT_PATH_MIG: + ret = cm_migrate(cm_id); + break; + default: + ret = -EINVAL; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) @@ -3039,10 +3218,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE; + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; @@ -3086,6 +3265,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; + qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; + qp_attr->alt_timeout = + cm_id_priv->alt_av.packet_life_time + 1; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; } ret = 0; @@ -3112,19 +3295,31 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; - qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | - IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->local_ack_timeout; - qp_attr->retry_cnt = cm_id_priv->retry_count; - qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; - } - if (cm_id_priv->alt_av.ah_attr.dlid) { - *qp_attr_mask |= IB_QP_PATH_MIG_STATE; + if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { + *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; + qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = + cm_id_priv->av.packet_life_time + 1; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->max_rd_atomic = + cm_id_priv->initiator_depth; + } + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_PATH_MIG_STATE; + qp_attr->path_mig_state = IB_MIG_REARM; + } + } else { + *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; + qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; + qp_attr->alt_timeout = + cm_id_priv->alt_av.packet_life_time + 1; + qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->path_mig_state = IB_MIG_REARM; } ret = 0; @@ -3163,22 +3358,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); -static __be64 cm_get_ca_guid(struct ib_device *device) -{ - struct ib_device_attr *device_attr; - __be64 guid; - int ret; - - device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); - if (!device_attr) - return 0; - - ret = ib_query_device(device, device_attr); - guid = ret ? 0 : device_attr->node_guid; - kfree(device_attr); - return guid; -} - static void cm_add_one(struct ib_device *device) { struct cm_device *cm_dev; @@ -3194,15 +3373,16 @@ static void cm_add_one(struct ib_device *device) int ret; u8 i; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; cm_dev->device = device; - cm_dev->ca_guid = cm_get_ca_guid(device); - if (!cm_dev->ca_guid) - goto error1; + cm_dev->ca_guid = device->node_guid; set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= device->phys_port_cnt; i++) { @@ -3217,11 +3397,11 @@ static void cm_add_one(struct ib_device *device) cm_recv_handler, port); if (IS_ERR(port->mad_agent)) - goto error2; + goto error1; ret = ib_modify_port(device, i, 0, &port_modify); if (ret) - goto error3; + goto error2; } ib_set_client_data(device, &cm_client, cm_dev); @@ -3230,9 +3410,9 @@ static void cm_add_one(struct ib_device *device) write_unlock_irqrestore(&cm.device_lock, flags); return; -error3: - ib_unregister_mad_agent(port->mad_agent); error2: + ib_unregister_mad_agent(port->mad_agent); +error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { @@ -3240,7 +3420,6 @@ error2: ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); } -error1: kfree(cm_dev); } @@ -3284,7 +3463,9 @@ static int __init ib_cm_init(void) cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); + get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); idr_pre_get(&cm.local_id_table, GFP_KERNEL); + INIT_LIST_HEAD(&cm.timewait_list); cm.wq = create_workqueue("ib_cm"); if (!cm.wq) @@ -3302,8 +3483,20 @@ error: static void __exit ib_cm_cleanup(void) { - flush_workqueue(cm.wq); + struct cm_timewait_info *timewait_info, *tmp; + + spin_lock_irq(&cm.lock); + list_for_each_entry(timewait_info, &cm.timewait_list, list) + cancel_delayed_work(&timewait_info->work.work); + spin_unlock_irq(&cm.lock); + destroy_workqueue(cm.wq); + + list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { + list_del(&timewait_info->list); + kfree(timewait_info); + } + ib_unregister_client(&cm_client); idr_destroy(&cm.local_id_table); }