IB/ipath: Enable 4KB MTU
authorDave Olson <dave.olson@qlogic.com>
Thu, 17 Apr 2008 04:01:12 +0000 (21:01 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 17 Apr 2008 04:01:12 +0000 (21:01 -0700)
Enable use of 4KB MTU.  Since the driver uses more pinned memory for
receive buffers when the 4KB MTU is enabled, whether or not the fabric
supports that MTU, add a "mtu4096" module parameter that can be used to
limit the MTU to 2KB when it is known that 4KB MTUs can't be used
anyway.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/ipath/ipath_iba6120.c
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_qp.c
drivers/infiniband/hw/ipath/ipath_verbs.c

index 367f2a3..7121fe8 100644 (file)
@@ -73,6 +73,10 @@ module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(debug, "mask for debug prints");
 EXPORT_SYMBOL_GPL(ipath_debug);
 
+unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
+module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
+MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("QLogic <support@pathscale.com>");
 MODULE_DESCRIPTION("QLogic InfiniPath driver");
@@ -1800,7 +1804,7 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
         * piosize).  We check that it's one of the valid IB sizes.
         */
        if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-           arg != 4096) {
+           (arg != 4096 || !ipath_mtu4096)) {
                ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
                ret = -EINVAL;
                goto bail;
@@ -1816,6 +1820,8 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
        if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
                /* Only if it's not the initial value (or reset to it) */
                if (piosize != dd->ipath_init_ibmaxlen) {
+                       if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
+                               piosize = dd->ipath_init_ibmaxlen;
                        dd->ipath_ibmaxlen = piosize;
                        changed = 1;
                }
@@ -1829,24 +1835,17 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
        }
 
        if (changed) {
+               u64 ibc = dd->ipath_ibcctrl, ibdw;
                /*
-                * set the IBC maxpktlength to the size of our pio
-                * buffers in words
+                * update our housekeeping variables, and set IBC max
+                * size, same as init code; max IBC is max we allow in
+                * buffer, less the qword pbc, plus 1 for ICRC, in dwords
                 */
-               u64 ibc = dd->ipath_ibcctrl;
+               dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
+               ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
                ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-                        INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
-               piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
-               dd->ipath_ibmaxlen = piosize;
-               piosize /= sizeof(u32); /* in words */
-               /*
-                * for ICRC, which we only send in diag test pkt mode, and
-                * we don't need to worry about that for mtu
-                */
-               piosize += 1;
-
-               ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+                        dd->ibcc_mpl_shift);
+               ibc |= ibdw << dd->ibcc_mpl_shift;
                dd->ipath_ibcctrl = ibc;
                ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
                                 dd->ipath_ibcctrl);
index 219b62d..cddf29b 100644 (file)
@@ -219,7 +219,12 @@ static int ipath_get_base_info(struct file *fp,
        kinfo->spi_pioalign = dd->ipath_palign;
 
        kinfo->spi_qpair = IPATH_KD_QP;
-       kinfo->spi_piosize = dd->ipath_ibmaxlen;
+       /*
+        * user mode PIO buffers are always 2KB, even when 4KB can
+        * be received, and sent via the kernel; this is ibmaxlen
+        * for 2K MTU.
+        */
+       kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
        kinfo->spi_mtu = dd->ipath_ibmaxlen;    /* maxlen, not ibmtu */
        kinfo->spi_port = pd->port_port;
        kinfo->spi_subport = subport_fp(fp);
index 828066e..a9fc804 100644 (file)
@@ -1441,17 +1441,13 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
        dd->ipath_egrtidbase = (u64 __iomem *)
                ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase);
 
-       /*
-        * To truly support a 4KB MTU (for usermode), we need to
-        * bump this to a larger value.  For now, we use them for
-        * the kernel only.
-        */
-       dd->ipath_rcvegrbufsize = 2048;
+       dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048;
        /*
         * the min() check here is currently a nop, but it may not always
         * be, depending on just how we do ipath_rcvegrbufsize
         */
-       dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
+       dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k :
+                                dd->ipath_piosize2k,
                                 dd->ipath_rcvegrbufsize +
                                 (dd->ipath_rcvhdrentsize << 2));
        dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
index 5428aff..f0d7848 100644 (file)
@@ -155,24 +155,13 @@ static int bringup_link(struct ipath_devdata *dd)
                         dd->ipath_control);
 
        /*
-        * Note that prior to try 14 or 15 of IB, the credit scaling
-        * wasn't working, because it was swapped for writes with the
-        * 1 bit default linkstate field
+        * set initial max size pkt IBC will send, including ICRC; it's the
+        * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
         */
+       val = (dd->ipath_ibmaxlen >> 2) + 1;
+       ibc = val << dd->ibcc_mpl_shift;
 
-       /* ignore pbc and align word */
-       val = dd->ipath_piosize2k - 2 * sizeof(u32);
-       /*
-        * for ICRC, which we only send in diag test pkt mode, and we
-        * don't need to worry about that for mtu
-        */
-       val += 1;
-       /*
-        * Set the IBC maxpktlength to the size of our pio buffers the
-        * maxpktlength is in words.  This is *not* the IB data MTU.
-        */
-       ibc = (val / sizeof(u32)) << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-       /* in KB */
+       /* flowcontrolwatermark is in units of KBytes */
        ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
        /*
         * How often flowctrl sent.  More or less in usecs; balance against
@@ -295,12 +284,9 @@ static int init_chip_first(struct ipath_devdata *dd,
        val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
        dd->ipath_piosize2k = val & ~0U;
        dd->ipath_piosize4k = val >> 32;
-       /*
-        * Note: the chips support a maximum MTU of 4096, but the driver
-        * hasn't implemented this feature yet, so set the initial value
-        * to 2048.
-        */
-       dd->ipath_ibmtu = 2048;
+       if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
+               ipath_mtu4096 = 0; /* 4KB not supported by this chip */
+       dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
        val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
        dd->ipath_piobcnt2k = val & ~0U;
        dd->ipath_piobcnt4k = val >> 32;
index 59dc895..70c0a0d 100644 (file)
@@ -1066,6 +1066,7 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
 #endif
 
 extern unsigned ipath_debug; /* debugging bit mask */
+extern unsigned ipath_mtu4096;
 
 #define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
 
index b34b91d..aca876b 100644 (file)
@@ -292,13 +292,9 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
        /* pip->vl_arb_high_cap; // only one VL */
        /* pip->vl_arb_low_cap; // only one VL */
        /* InitTypeReply = 0 */
-       /*
-        * Note: the chips support a maximum MTU of 4096, but the driver
-        * hasn't implemented this feature yet, so set the maximum value
-        * to 2048.
-        */
-       pip->inittypereply_mtucap = IB_MTU_2048;
-       // HCAs ignore VLStallCount and HOQLife
+       /* our mtu cap depends on whether 4K MTU enabled or not */
+       pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
+       /* HCAs ignore VLStallCount and HOQLife */
        /* pip->vlstallcnt_hoqlife; */
        pip->operationalvl_pei_peo_fpi_fpo = 0x10;      /* OVLs = 1 */
        pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
@@ -491,6 +487,8 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                mtu = 2048;
                break;
        case IB_MTU_4096:
+               if (!ipath_mtu4096)
+                       goto err;
                mtu = 4096;
                break;
        default:
index 087ed31..5c8094a 100644 (file)
@@ -516,13 +516,13 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                        goto inval;
 
        /*
-        * Note: the chips support a maximum MTU of 4096, but the driver
-        * hasn't implemented this feature yet, so don't allow Path MTU
-        * values greater than 2048.
+        * don't allow invalid Path MTU values or greater than 2048
+        * unless we are configured for a 4KB MTU
         */
-       if (attr_mask & IB_QP_PATH_MTU)
-               if (attr->path_mtu > IB_MTU_2048)
-                       goto inval;
+       if ((attr_mask & IB_QP_PATH_MTU) &&
+               (ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
+               (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
+               goto inval;
 
        if (attr_mask & IB_QP_PATH_MIG_STATE)
                if (attr->path_mig_state != IB_MIG_MIGRATED &&
index 32d8f88..012ccb4 100644 (file)
@@ -1201,12 +1201,7 @@ static int ipath_query_port(struct ib_device *ibdev,
        props->max_vl_num = 1;          /* VLCap = VL0 */
        props->init_type_reply = 0;
 
-       /*
-        * Note: the chip supports a maximum MTU of 4096, but the driver
-        * hasn't implemented this feature yet, so set the maximum value
-        * to 2048.
-        */
-       props->max_mtu = IB_MTU_2048;
+       props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
        switch (dd->ipath_ibmtu) {
        case 4096:
                mtu = IB_MTU_4096;