[PATCH] IB: move include files to include/rdma
[safe/jmp/linux-2.6] / drivers / infiniband / hw / mthca / mthca_provider.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005 Cisco Systems. All rights reserved.
5  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $
37  */
38
39 #include <rdma/ib_smi.h>
40 #include <linux/mm.h>
41
42 #include "mthca_dev.h"
43 #include "mthca_cmd.h"
44 #include "mthca_user.h"
45 #include "mthca_memfree.h"
46
47 static int mthca_query_device(struct ib_device *ibdev,
48                               struct ib_device_attr *props)
49 {
50         struct ib_smp *in_mad  = NULL;
51         struct ib_smp *out_mad = NULL;
52         int err = -ENOMEM;
53         struct mthca_dev* mdev = to_mdev(ibdev);
54
55         u8 status;
56
57         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
58         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
59         if (!in_mad || !out_mad)
60                 goto out;
61
62         memset(props, 0, sizeof *props);
63
64         props->fw_ver              = mdev->fw_ver;
65
66         memset(in_mad, 0, sizeof *in_mad);
67         in_mad->base_version       = 1;
68         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
69         in_mad->class_version      = 1;
70         in_mad->method             = IB_MGMT_METHOD_GET;
71         in_mad->attr_id            = IB_SMP_ATTR_NODE_INFO;
72
73         err = mthca_MAD_IFC(mdev, 1, 1,
74                             1, NULL, NULL, in_mad, out_mad,
75                             &status);
76         if (err)
77                 goto out;
78         if (status) {
79                 err = -EINVAL;
80                 goto out;
81         }
82
83         props->device_cap_flags    = mdev->device_cap_flags;
84         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
85                 0xffffff;
86         props->vendor_part_id      = be16_to_cpup((__be16 *) (out_mad->data + 30));
87         props->hw_ver              = be16_to_cpup((__be16 *) (out_mad->data + 32));
88         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
89         memcpy(&props->node_guid,      out_mad->data + 12, 8);
90
91         props->max_mr_size         = ~0ull;
92         props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
93         props->max_qp_wr           = 0xffff;
94         props->max_sge             = mdev->limits.max_sg;
95         props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
96         props->max_cqe             = 0xffff;
97         props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
98         props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
99         props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
100         props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
101         props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
102
103         err = 0;
104  out:
105         kfree(in_mad);
106         kfree(out_mad);
107         return err;
108 }
109
110 static int mthca_query_port(struct ib_device *ibdev,
111                             u8 port, struct ib_port_attr *props)
112 {
113         struct ib_smp *in_mad  = NULL;
114         struct ib_smp *out_mad = NULL;
115         int err = -ENOMEM;
116         u8 status;
117
118         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
119         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
120         if (!in_mad || !out_mad)
121                 goto out;
122
123         memset(props, 0, sizeof *props);
124
125         memset(in_mad, 0, sizeof *in_mad);
126         in_mad->base_version       = 1;
127         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
128         in_mad->class_version      = 1;
129         in_mad->method             = IB_MGMT_METHOD_GET;
130         in_mad->attr_id            = IB_SMP_ATTR_PORT_INFO;
131         in_mad->attr_mod           = cpu_to_be32(port);
132
133         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
134                             port, NULL, NULL, in_mad, out_mad,
135                             &status);
136         if (err)
137                 goto out;
138         if (status) {
139                 err = -EINVAL;
140                 goto out;
141         }
142
143         props->lid               = be16_to_cpup((__be16 *) (out_mad->data + 16));
144         props->lmc               = out_mad->data[34] & 0x7;
145         props->sm_lid            = be16_to_cpup((__be16 *) (out_mad->data + 18));
146         props->sm_sl             = out_mad->data[36] & 0xf;
147         props->state             = out_mad->data[32] & 0xf;
148         props->phys_state        = out_mad->data[33] >> 4;
149         props->port_cap_flags    = be32_to_cpup((__be32 *) (out_mad->data + 20));
150         props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
151         props->max_msg_sz        = 0x80000000;
152         props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
153         props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
154         props->active_width      = out_mad->data[31] & 0xf;
155         props->active_speed      = out_mad->data[35] >> 4;
156
157  out:
158         kfree(in_mad);
159         kfree(out_mad);
160         return err;
161 }
162
163 static int mthca_modify_port(struct ib_device *ibdev,
164                              u8 port, int port_modify_mask,
165                              struct ib_port_modify *props)
166 {
167         struct mthca_set_ib_param set_ib;
168         struct ib_port_attr attr;
169         int err;
170         u8 status;
171
172         if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
173                 return -ERESTARTSYS;
174
175         err = mthca_query_port(ibdev, port, &attr);
176         if (err)
177                 goto out;
178
179         set_ib.set_si_guid     = 0;
180         set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
181
182         set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
183                 ~props->clr_port_cap_mask;
184
185         err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
186         if (err)
187                 goto out;
188         if (status) {
189                 err = -EINVAL;
190                 goto out;
191         }
192
193 out:
194         up(&to_mdev(ibdev)->cap_mask_mutex);
195         return err;
196 }
197
198 static int mthca_query_pkey(struct ib_device *ibdev,
199                             u8 port, u16 index, u16 *pkey)
200 {
201         struct ib_smp *in_mad  = NULL;
202         struct ib_smp *out_mad = NULL;
203         int err = -ENOMEM;
204         u8 status;
205
206         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
207         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
208         if (!in_mad || !out_mad)
209                 goto out;
210
211         memset(in_mad, 0, sizeof *in_mad);
212         in_mad->base_version       = 1;
213         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
214         in_mad->class_version      = 1;
215         in_mad->method             = IB_MGMT_METHOD_GET;
216         in_mad->attr_id            = IB_SMP_ATTR_PKEY_TABLE;
217         in_mad->attr_mod           = cpu_to_be32(index / 32);
218
219         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
220                             port, NULL, NULL, in_mad, out_mad,
221                             &status);
222         if (err)
223                 goto out;
224         if (status) {
225                 err = -EINVAL;
226                 goto out;
227         }
228
229         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
230
231  out:
232         kfree(in_mad);
233         kfree(out_mad);
234         return err;
235 }
236
237 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
238                            int index, union ib_gid *gid)
239 {
240         struct ib_smp *in_mad  = NULL;
241         struct ib_smp *out_mad = NULL;
242         int err = -ENOMEM;
243         u8 status;
244
245         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
246         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
247         if (!in_mad || !out_mad)
248                 goto out;
249
250         memset(in_mad, 0, sizeof *in_mad);
251         in_mad->base_version       = 1;
252         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
253         in_mad->class_version      = 1;
254         in_mad->method             = IB_MGMT_METHOD_GET;
255         in_mad->attr_id            = IB_SMP_ATTR_PORT_INFO;
256         in_mad->attr_mod           = cpu_to_be32(port);
257
258         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
259                             port, NULL, NULL, in_mad, out_mad,
260                             &status);
261         if (err)
262                 goto out;
263         if (status) {
264                 err = -EINVAL;
265                 goto out;
266         }
267
268         memcpy(gid->raw, out_mad->data + 8, 8);
269
270         memset(in_mad, 0, sizeof *in_mad);
271         in_mad->base_version       = 1;
272         in_mad->mgmt_class         = IB_MGMT_CLASS_SUBN_LID_ROUTED;
273         in_mad->class_version      = 1;
274         in_mad->method             = IB_MGMT_METHOD_GET;
275         in_mad->attr_id            = IB_SMP_ATTR_GUID_INFO;
276         in_mad->attr_mod           = cpu_to_be32(index / 8);
277
278         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
279                             port, NULL, NULL, in_mad, out_mad,
280                             &status);
281         if (err)
282                 goto out;
283         if (status) {
284                 err = -EINVAL;
285                 goto out;
286         }
287
288         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
289
290  out:
291         kfree(in_mad);
292         kfree(out_mad);
293         return err;
294 }
295
296 static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
297                                                 struct ib_udata *udata)
298 {
299         struct mthca_alloc_ucontext_resp uresp;
300         struct mthca_ucontext           *context;
301         int                              err;
302
303         memset(&uresp, 0, sizeof uresp);
304
305         uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
306         if (mthca_is_memfree(to_mdev(ibdev)))
307                 uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
308         else
309                 uresp.uarc_size = 0;
310
311         context = kmalloc(sizeof *context, GFP_KERNEL);
312         if (!context)
313                 return ERR_PTR(-ENOMEM);
314
315         err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
316         if (err) {
317                 kfree(context);
318                 return ERR_PTR(err);
319         }
320
321         context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
322         if (IS_ERR(context->db_tab)) {
323                 err = PTR_ERR(context->db_tab);
324                 mthca_uar_free(to_mdev(ibdev), &context->uar);
325                 kfree(context);
326                 return ERR_PTR(err);
327         }
328
329         if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
330                 mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
331                 mthca_uar_free(to_mdev(ibdev), &context->uar);
332                 kfree(context);
333                 return ERR_PTR(-EFAULT);
334         }
335
336         return &context->ibucontext;
337 }
338
339 static int mthca_dealloc_ucontext(struct ib_ucontext *context)
340 {
341         mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
342                                   to_mucontext(context)->db_tab);
343         mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
344         kfree(to_mucontext(context));
345
346         return 0;
347 }
348
349 static int mthca_mmap_uar(struct ib_ucontext *context,
350                           struct vm_area_struct *vma)
351 {
352         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
353                 return -EINVAL;
354
355         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
356
357         if (io_remap_pfn_range(vma, vma->vm_start,
358                                to_mucontext(context)->uar.pfn,
359                                PAGE_SIZE, vma->vm_page_prot))
360                 return -EAGAIN;
361
362         return 0;
363 }
364
365 static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
366                                     struct ib_ucontext *context,
367                                     struct ib_udata *udata)
368 {
369         struct mthca_pd *pd;
370         int err;
371
372         pd = kmalloc(sizeof *pd, GFP_KERNEL);
373         if (!pd)
374                 return ERR_PTR(-ENOMEM);
375
376         err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
377         if (err) {
378                 kfree(pd);
379                 return ERR_PTR(err);
380         }
381
382         if (context) {
383                 if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
384                         mthca_pd_free(to_mdev(ibdev), pd);
385                         kfree(pd);
386                         return ERR_PTR(-EFAULT);
387                 }
388         }
389
390         return &pd->ibpd;
391 }
392
393 static int mthca_dealloc_pd(struct ib_pd *pd)
394 {
395         mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
396         kfree(pd);
397
398         return 0;
399 }
400
401 static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
402                                      struct ib_ah_attr *ah_attr)
403 {
404         int err;
405         struct mthca_ah *ah;
406
407         ah = kmalloc(sizeof *ah, GFP_ATOMIC);
408         if (!ah)
409                 return ERR_PTR(-ENOMEM);
410
411         err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
412         if (err) {
413                 kfree(ah);
414                 return ERR_PTR(err);
415         }
416
417         return &ah->ibah;
418 }
419
420 static int mthca_ah_destroy(struct ib_ah *ah)
421 {
422         mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
423         kfree(ah);
424
425         return 0;
426 }
427
428 static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
429                                        struct ib_srq_init_attr *init_attr,
430                                        struct ib_udata *udata)
431 {
432         struct mthca_create_srq ucmd;
433         struct mthca_ucontext *context = NULL;
434         struct mthca_srq *srq;
435         int err;
436
437         srq = kmalloc(sizeof *srq, GFP_KERNEL);
438         if (!srq)
439                 return ERR_PTR(-ENOMEM);
440
441         if (pd->uobject) {
442                 context = to_mucontext(pd->uobject->context);
443
444                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
445                         return ERR_PTR(-EFAULT);
446
447                 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
448                                         context->db_tab, ucmd.db_index,
449                                         ucmd.db_page);
450
451                 if (err)
452                         goto err_free;
453
454                 srq->mr.ibmr.lkey = ucmd.lkey;
455                 srq->db_index     = ucmd.db_index;
456         }
457
458         err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
459                               &init_attr->attr, srq);
460
461         if (err && pd->uobject)
462                 mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
463                                     context->db_tab, ucmd.db_index);
464
465         if (err)
466                 goto err_free;
467
468         if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
469                 mthca_free_srq(to_mdev(pd->device), srq);
470                 err = -EFAULT;
471                 goto err_free;
472         }
473
474         return &srq->ibsrq;
475
476 err_free:
477         kfree(srq);
478
479         return ERR_PTR(err);
480 }
481
482 static int mthca_destroy_srq(struct ib_srq *srq)
483 {
484         struct mthca_ucontext *context;
485
486         if (srq->uobject) {
487                 context = to_mucontext(srq->uobject->context);
488
489                 mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
490                                     context->db_tab, to_msrq(srq)->db_index);
491         }
492
493         mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
494         kfree(srq);
495
496         return 0;
497 }
498
499 static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
500                                      struct ib_qp_init_attr *init_attr,
501                                      struct ib_udata *udata)
502 {
503         struct mthca_create_qp ucmd;
504         struct mthca_qp *qp;
505         int err;
506
507         switch (init_attr->qp_type) {
508         case IB_QPT_RC:
509         case IB_QPT_UC:
510         case IB_QPT_UD:
511         {
512                 struct mthca_ucontext *context;
513
514                 qp = kmalloc(sizeof *qp, GFP_KERNEL);
515                 if (!qp)
516                         return ERR_PTR(-ENOMEM);
517
518                 if (pd->uobject) {
519                         context = to_mucontext(pd->uobject->context);
520
521                         if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
522                                 return ERR_PTR(-EFAULT);
523
524                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
525                                                 context->db_tab,
526                                                 ucmd.sq_db_index, ucmd.sq_db_page);
527                         if (err) {
528                                 kfree(qp);
529                                 return ERR_PTR(err);
530                         }
531
532                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
533                                                 context->db_tab,
534                                                 ucmd.rq_db_index, ucmd.rq_db_page);
535                         if (err) {
536                                 mthca_unmap_user_db(to_mdev(pd->device),
537                                                     &context->uar,
538                                                     context->db_tab,
539                                                     ucmd.sq_db_index);
540                                 kfree(qp);
541                                 return ERR_PTR(err);
542                         }
543
544                         qp->mr.ibmr.lkey = ucmd.lkey;
545                         qp->sq.db_index  = ucmd.sq_db_index;
546                         qp->rq.db_index  = ucmd.rq_db_index;
547                 }
548
549                 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
550                                      to_mcq(init_attr->send_cq),
551                                      to_mcq(init_attr->recv_cq),
552                                      init_attr->qp_type, init_attr->sq_sig_type,
553                                      &init_attr->cap, qp);
554
555                 if (err && pd->uobject) {
556                         context = to_mucontext(pd->uobject->context);
557
558                         mthca_unmap_user_db(to_mdev(pd->device),
559                                             &context->uar,
560                                             context->db_tab,
561                                             ucmd.sq_db_index);
562                         mthca_unmap_user_db(to_mdev(pd->device),
563                                             &context->uar,
564                                             context->db_tab,
565                                             ucmd.rq_db_index);
566                 }
567
568                 qp->ibqp.qp_num = qp->qpn;
569                 break;
570         }
571         case IB_QPT_SMI:
572         case IB_QPT_GSI:
573         {
574                 /* Don't allow userspace to create special QPs */
575                 if (pd->uobject)
576                         return ERR_PTR(-EINVAL);
577
578                 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
579                 if (!qp)
580                         return ERR_PTR(-ENOMEM);
581
582                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
583
584                 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
585                                       to_mcq(init_attr->send_cq),
586                                       to_mcq(init_attr->recv_cq),
587                                       init_attr->sq_sig_type, &init_attr->cap,
588                                       qp->ibqp.qp_num, init_attr->port_num,
589                                       to_msqp(qp));
590                 break;
591         }
592         default:
593                 /* Don't support raw QPs */
594                 return ERR_PTR(-ENOSYS);
595         }
596
597         if (err) {
598                 kfree(qp);
599                 return ERR_PTR(err);
600         }
601
602         init_attr->cap.max_inline_data = 0;
603         init_attr->cap.max_send_wr     = qp->sq.max;
604         init_attr->cap.max_recv_wr     = qp->rq.max;
605         init_attr->cap.max_send_sge    = qp->sq.max_gs;
606         init_attr->cap.max_recv_sge    = qp->rq.max_gs;
607
608         return &qp->ibqp;
609 }
610
611 static int mthca_destroy_qp(struct ib_qp *qp)
612 {
613         if (qp->uobject) {
614                 mthca_unmap_user_db(to_mdev(qp->device),
615                                     &to_mucontext(qp->uobject->context)->uar,
616                                     to_mucontext(qp->uobject->context)->db_tab,
617                                     to_mqp(qp)->sq.db_index);
618                 mthca_unmap_user_db(to_mdev(qp->device),
619                                     &to_mucontext(qp->uobject->context)->uar,
620                                     to_mucontext(qp->uobject->context)->db_tab,
621                                     to_mqp(qp)->rq.db_index);
622         }
623         mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
624         kfree(qp);
625         return 0;
626 }
627
628 static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
629                                      struct ib_ucontext *context,
630                                      struct ib_udata *udata)
631 {
632         struct mthca_create_cq ucmd;
633         struct mthca_cq *cq;
634         int nent;
635         int err;
636
637         if (context) {
638                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
639                         return ERR_PTR(-EFAULT);
640
641                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
642                                         to_mucontext(context)->db_tab,
643                                         ucmd.set_db_index, ucmd.set_db_page);
644                 if (err)
645                         return ERR_PTR(err);
646
647                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
648                                         to_mucontext(context)->db_tab,
649                                         ucmd.arm_db_index, ucmd.arm_db_page);
650                 if (err)
651                         goto err_unmap_set;
652         }
653
654         cq = kmalloc(sizeof *cq, GFP_KERNEL);
655         if (!cq) {
656                 err = -ENOMEM;
657                 goto err_unmap_arm;
658         }
659
660         if (context) {
661                 cq->mr.ibmr.lkey    = ucmd.lkey;
662                 cq->set_ci_db_index = ucmd.set_db_index;
663                 cq->arm_db_index    = ucmd.arm_db_index;
664         }
665
666         for (nent = 1; nent <= entries; nent <<= 1)
667                 ; /* nothing */
668
669         err = mthca_init_cq(to_mdev(ibdev), nent,
670                             context ? to_mucontext(context) : NULL,
671                             context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
672                             cq);
673         if (err)
674                 goto err_free;
675
676         if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
677                 mthca_free_cq(to_mdev(ibdev), cq);
678                 goto err_free;
679         }
680
681         return &cq->ibcq;
682
683 err_free:
684         kfree(cq);
685
686 err_unmap_arm:
687         if (context)
688                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
689                                     to_mucontext(context)->db_tab, ucmd.arm_db_index);
690
691 err_unmap_set:
692         if (context)
693                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
694                                     to_mucontext(context)->db_tab, ucmd.set_db_index);
695
696         return ERR_PTR(err);
697 }
698
699 static int mthca_destroy_cq(struct ib_cq *cq)
700 {
701         if (cq->uobject) {
702                 mthca_unmap_user_db(to_mdev(cq->device),
703                                     &to_mucontext(cq->uobject->context)->uar,
704                                     to_mucontext(cq->uobject->context)->db_tab,
705                                     to_mcq(cq)->arm_db_index);
706                 mthca_unmap_user_db(to_mdev(cq->device),
707                                     &to_mucontext(cq->uobject->context)->uar,
708                                     to_mucontext(cq->uobject->context)->db_tab,
709                                     to_mcq(cq)->set_ci_db_index);
710         }
711         mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
712         kfree(cq);
713
714         return 0;
715 }
716
717 static inline u32 convert_access(int acc)
718 {
719         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC       : 0) |
720                (acc & IB_ACCESS_REMOTE_WRITE  ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
721                (acc & IB_ACCESS_REMOTE_READ   ? MTHCA_MPT_FLAG_REMOTE_READ  : 0) |
722                (acc & IB_ACCESS_LOCAL_WRITE   ? MTHCA_MPT_FLAG_LOCAL_WRITE  : 0) |
723                MTHCA_MPT_FLAG_LOCAL_READ;
724 }
725
726 static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
727 {
728         struct mthca_mr *mr;
729         int err;
730
731         mr = kmalloc(sizeof *mr, GFP_KERNEL);
732         if (!mr)
733                 return ERR_PTR(-ENOMEM);
734
735         err = mthca_mr_alloc_notrans(to_mdev(pd->device),
736                                      to_mpd(pd)->pd_num,
737                                      convert_access(acc), mr);
738
739         if (err) {
740                 kfree(mr);
741                 return ERR_PTR(err);
742         }
743
744         return &mr->ibmr;
745 }
746
747 static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
748                                        struct ib_phys_buf *buffer_list,
749                                        int                 num_phys_buf,
750                                        int                 acc,
751                                        u64                *iova_start)
752 {
753         struct mthca_mr *mr;
754         u64 *page_list;
755         u64 total_size;
756         u64 mask;
757         int shift;
758         int npages;
759         int err;
760         int i, j, n;
761
762         /* First check that we have enough alignment */
763         if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
764                 return ERR_PTR(-EINVAL);
765
766         if (num_phys_buf > 1 &&
767             ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK))
768                 return ERR_PTR(-EINVAL);
769
770         mask = 0;
771         total_size = 0;
772         for (i = 0; i < num_phys_buf; ++i) {
773                 if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
774                         return ERR_PTR(-EINVAL);
775                 if (i != 0 && i != num_phys_buf - 1 &&
776                     (buffer_list[i].size & ~PAGE_MASK))
777                         return ERR_PTR(-EINVAL);
778
779                 total_size += buffer_list[i].size;
780                 if (i > 0)
781                         mask |= buffer_list[i].addr;
782         }
783
784         /* Find largest page shift we can use to cover buffers */
785         for (shift = PAGE_SHIFT; shift < 31; ++shift)
786                 if (num_phys_buf > 1) {
787                         if ((1ULL << shift) & mask)
788                                 break;
789                 } else {
790                         if (1ULL << shift >=
791                             buffer_list[0].size +
792                             (buffer_list[0].addr & ((1ULL << shift) - 1)))
793                                 break;
794                 }
795
796         buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
797         buffer_list[0].addr &= ~0ull << shift;
798
799         mr = kmalloc(sizeof *mr, GFP_KERNEL);
800         if (!mr)
801                 return ERR_PTR(-ENOMEM);
802
803         npages = 0;
804         for (i = 0; i < num_phys_buf; ++i)
805                 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
806
807         if (!npages)
808                 return &mr->ibmr;
809
810         page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
811         if (!page_list) {
812                 kfree(mr);
813                 return ERR_PTR(-ENOMEM);
814         }
815
816         n = 0;
817         for (i = 0; i < num_phys_buf; ++i)
818                 for (j = 0;
819                      j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
820                      ++j)
821                         page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
822
823         mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
824                   "in PD %x; shift %d, npages %d.\n",
825                   (unsigned long long) buffer_list[0].addr,
826                   (unsigned long long) *iova_start,
827                   to_mpd(pd)->pd_num,
828                   shift, npages);
829
830         err = mthca_mr_alloc_phys(to_mdev(pd->device),
831                                   to_mpd(pd)->pd_num,
832                                   page_list, shift, npages,
833                                   *iova_start, total_size,
834                                   convert_access(acc), mr);
835
836         if (err) {
837                 kfree(page_list);
838                 kfree(mr);
839                 return ERR_PTR(err);
840         }
841
842         kfree(page_list);
843         return &mr->ibmr;
844 }
845
846 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
847                                        int acc, struct ib_udata *udata)
848 {
849         struct mthca_dev *dev = to_mdev(pd->device);
850         struct ib_umem_chunk *chunk;
851         struct mthca_mr *mr;
852         u64 *pages;
853         int shift, n, len;
854         int i, j, k;
855         int err = 0;
856
857         shift = ffs(region->page_size) - 1;
858
859         mr = kmalloc(sizeof *mr, GFP_KERNEL);
860         if (!mr)
861                 return ERR_PTR(-ENOMEM);
862
863         n = 0;
864         list_for_each_entry(chunk, &region->chunk_list, list)
865                 n += chunk->nents;
866
867         mr->mtt = mthca_alloc_mtt(dev, n);
868         if (IS_ERR(mr->mtt)) {
869                 err = PTR_ERR(mr->mtt);
870                 goto err;
871         }
872
873         pages = (u64 *) __get_free_page(GFP_KERNEL);
874         if (!pages) {
875                 err = -ENOMEM;
876                 goto err_mtt;
877         }
878
879         i = n = 0;
880
881         list_for_each_entry(chunk, &region->chunk_list, list)
882                 for (j = 0; j < chunk->nmap; ++j) {
883                         len = sg_dma_len(&chunk->page_list[j]) >> shift;
884                         for (k = 0; k < len; ++k) {
885                                 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
886                                         region->page_size * k;
887                                 /*
888                                  * Be friendly to WRITE_MTT command
889                                  * and leave two empty slots for the
890                                  * index and reserved fields of the
891                                  * mailbox.
892                                  */
893                                 if (i == PAGE_SIZE / sizeof (u64) - 2) {
894                                         err = mthca_write_mtt(dev, mr->mtt,
895                                                               n, pages, i);
896                                         if (err)
897                                                 goto mtt_done;
898                                         n += i;
899                                         i = 0;
900                                 }
901                         }
902                 }
903
904         if (i)
905                 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
906 mtt_done:
907         free_page((unsigned long) pages);
908         if (err)
909                 goto err_mtt;
910
911         err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
912                              region->length, convert_access(acc), mr);
913
914         if (err)
915                 goto err_mtt;
916
917         return &mr->ibmr;
918
919 err_mtt:
920         mthca_free_mtt(dev, mr->mtt);
921
922 err:
923         kfree(mr);
924         return ERR_PTR(err);
925 }
926
927 static int mthca_dereg_mr(struct ib_mr *mr)
928 {
929         struct mthca_mr *mmr = to_mmr(mr);
930         mthca_free_mr(to_mdev(mr->device), mmr);
931         kfree(mmr);
932         return 0;
933 }
934
935 static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
936                                       struct ib_fmr_attr *fmr_attr)
937 {
938         struct mthca_fmr *fmr;
939         int err;
940
941         fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
942         if (!fmr)
943                 return ERR_PTR(-ENOMEM);
944
945         memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
946         err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
947                              convert_access(mr_access_flags), fmr);
948
949         if (err) {
950                 kfree(fmr);
951                 return ERR_PTR(err);
952         }
953
954         return &fmr->ibmr;
955 }
956
957 static int mthca_dealloc_fmr(struct ib_fmr *fmr)
958 {
959         struct mthca_fmr *mfmr = to_mfmr(fmr);
960         int err;
961
962         err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
963         if (err)
964                 return err;
965
966         kfree(mfmr);
967         return 0;
968 }
969
970 static int mthca_unmap_fmr(struct list_head *fmr_list)
971 {
972         struct ib_fmr *fmr;
973         int err;
974         u8 status;
975         struct mthca_dev *mdev = NULL;
976
977         list_for_each_entry(fmr, fmr_list, list) {
978                 if (mdev && to_mdev(fmr->device) != mdev)
979                         return -EINVAL;
980                 mdev = to_mdev(fmr->device);
981         }
982
983         if (!mdev)
984                 return 0;
985
986         if (mthca_is_memfree(mdev)) {
987                 list_for_each_entry(fmr, fmr_list, list)
988                         mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
989
990                 wmb();
991         } else
992                 list_for_each_entry(fmr, fmr_list, list)
993                         mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
994
995         err = mthca_SYNC_TPT(mdev, &status);
996         if (err)
997                 return err;
998         if (status)
999                 return -EINVAL;
1000         return 0;
1001 }
1002
1003 static ssize_t show_rev(struct class_device *cdev, char *buf)
1004 {
1005         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1006         return sprintf(buf, "%x\n", dev->rev_id);
1007 }
1008
1009 static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
1010 {
1011         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1012         return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32),
1013                        (int) (dev->fw_ver >> 16) & 0xffff,
1014                        (int) dev->fw_ver & 0xffff);
1015 }
1016
1017 static ssize_t show_hca(struct class_device *cdev, char *buf)
1018 {
1019         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1020         switch (dev->pdev->device) {
1021         case PCI_DEVICE_ID_MELLANOX_TAVOR:
1022                 return sprintf(buf, "MT23108\n");
1023         case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
1024                 return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
1025         case PCI_DEVICE_ID_MELLANOX_ARBEL:
1026                 return sprintf(buf, "MT25208\n");
1027         case PCI_DEVICE_ID_MELLANOX_SINAI:
1028         case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
1029                 return sprintf(buf, "MT25204\n");
1030         default:
1031                 return sprintf(buf, "unknown\n");
1032         }
1033 }
1034
1035 static ssize_t show_board(struct class_device *cdev, char *buf)
1036 {
1037         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
1038         return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
1039 }
1040
1041 static CLASS_DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1042 static CLASS_DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1043 static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1044 static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1045
1046 static struct class_device_attribute *mthca_class_attributes[] = {
1047         &class_device_attr_hw_rev,
1048         &class_device_attr_fw_ver,
1049         &class_device_attr_hca_type,
1050         &class_device_attr_board_id
1051 };
1052
1053 int mthca_register_device(struct mthca_dev *dev)
1054 {
1055         int ret;
1056         int i;
1057
1058         strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
1059         dev->ib_dev.owner                = THIS_MODULE;
1060
1061         dev->ib_dev.node_type            = IB_NODE_CA;
1062         dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
1063         dev->ib_dev.dma_device           = &dev->pdev->dev;
1064         dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
1065         dev->ib_dev.query_device         = mthca_query_device;
1066         dev->ib_dev.query_port           = mthca_query_port;
1067         dev->ib_dev.modify_port          = mthca_modify_port;
1068         dev->ib_dev.query_pkey           = mthca_query_pkey;
1069         dev->ib_dev.query_gid            = mthca_query_gid;
1070         dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
1071         dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
1072         dev->ib_dev.mmap                 = mthca_mmap_uar;
1073         dev->ib_dev.alloc_pd             = mthca_alloc_pd;
1074         dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
1075         dev->ib_dev.create_ah            = mthca_ah_create;
1076         dev->ib_dev.destroy_ah           = mthca_ah_destroy;
1077
1078         if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
1079                 dev->ib_dev.create_srq           = mthca_create_srq;
1080                 dev->ib_dev.destroy_srq          = mthca_destroy_srq;
1081
1082                 if (mthca_is_memfree(dev))
1083                         dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
1084                 else
1085                         dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
1086         }
1087
1088         dev->ib_dev.create_qp            = mthca_create_qp;
1089         dev->ib_dev.modify_qp            = mthca_modify_qp;
1090         dev->ib_dev.destroy_qp           = mthca_destroy_qp;
1091         dev->ib_dev.create_cq            = mthca_create_cq;
1092         dev->ib_dev.destroy_cq           = mthca_destroy_cq;
1093         dev->ib_dev.poll_cq              = mthca_poll_cq;
1094         dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
1095         dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
1096         dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
1097         dev->ib_dev.dereg_mr             = mthca_dereg_mr;
1098
1099         if (dev->mthca_flags & MTHCA_FLAG_FMR) {
1100                 dev->ib_dev.alloc_fmr            = mthca_alloc_fmr;
1101                 dev->ib_dev.unmap_fmr            = mthca_unmap_fmr;
1102                 dev->ib_dev.dealloc_fmr          = mthca_dealloc_fmr;
1103                 if (mthca_is_memfree(dev))
1104                         dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
1105                 else
1106                         dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
1107         }
1108
1109         dev->ib_dev.attach_mcast         = mthca_multicast_attach;
1110         dev->ib_dev.detach_mcast         = mthca_multicast_detach;
1111         dev->ib_dev.process_mad          = mthca_process_mad;
1112
1113         if (mthca_is_memfree(dev)) {
1114                 dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
1115                 dev->ib_dev.post_send     = mthca_arbel_post_send;
1116                 dev->ib_dev.post_recv     = mthca_arbel_post_receive;
1117         } else {
1118                 dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
1119                 dev->ib_dev.post_send     = mthca_tavor_post_send;
1120                 dev->ib_dev.post_recv     = mthca_tavor_post_receive;
1121         }
1122
1123         init_MUTEX(&dev->cap_mask_mutex);
1124
1125         ret = ib_register_device(&dev->ib_dev);
1126         if (ret)
1127                 return ret;
1128
1129         for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) {
1130                 ret = class_device_create_file(&dev->ib_dev.class_dev,
1131                                                mthca_class_attributes[i]);
1132                 if (ret) {
1133                         ib_unregister_device(&dev->ib_dev);
1134                         return ret;
1135                 }
1136         }
1137
1138         return 0;
1139 }
1140
1141 void mthca_unregister_device(struct mthca_dev *dev)
1142 {
1143         ib_unregister_device(&dev->ib_dev);
1144 }