[PATCH] IB/ipath: fixes to performance get counters for IB compliance
authorBryan O'Sullivan <bos@pathscale.com>
Sat, 1 Jul 2006 11:36:09 +0000 (04:36 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Sat, 1 Jul 2006 16:56:01 +0000 (09:56 -0700)
This patch fixes some problems uncovered during IB compliance testing to
return the right values for error counters returned by the Performance Get
Counters packet.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Cc: "Michael S. Tsirkin" <mst@mellanox.co.il>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_layer.c
drivers/infiniband/hw/ipath/ipath_layer.h
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/ipath/ipath_verbs.h

index 979ae29..4109913 100644 (file)
@@ -460,6 +460,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
         * by ipath_setup_htconfig.
         */
        dd->ipath_flags = 0;
+       dd->ipath_lli_counter = 0;
+       dd->ipath_lli_errors = 0;
 
        if (dd->ipath_f_bus(dd, pdev))
                ipath_dev_err(dd, "Failed to setup config space; "
@@ -942,6 +944,18 @@ reloop:
                                   "tlen=%x opcode=%x egridx=%x: %s\n",
                                   eflags, l, etype, tlen, bthbytes[0],
                                   ips_get_index((__le32 *) rc), emsg);
+                       /* Count local link integrity errors. */
+                       if (eflags & (INFINIPATH_RHF_H_ICRCERR |
+                                     INFINIPATH_RHF_H_VCRCERR)) {
+                               u8 n = (dd->ipath_ibcctrl >>
+                                       INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+                                       INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+
+                               if (++dd->ipath_lli_counter > n) {
+                                       dd->ipath_lli_counter = 0;
+                                       dd->ipath_lli_errors++;
+                               }
+                       }
                } else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
                                int ret = __ipath_verbs_rcv(dd, rc + 1,
                                                            ebuf, tlen);
@@ -949,6 +963,9 @@ reloop:
                                        ipath_cdbg(VERBOSE,
                                                   "received IB packet, "
                                                   "not SMA (QP=%x)\n", qp);
+                               if (dd->ipath_lli_counter)
+                                       dd->ipath_lli_counter--;
+
                } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
                        if (qp == IPATH_KD_QP &&
                            bthbytes[0] == ipath_layer_rcv_opcode &&
index f6ca59b..30160bc 100644 (file)
@@ -262,6 +262,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                                     | IPATH_LINKACTIVE |
                                     IPATH_LINKARMED);
                *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+               dd->ipath_lli_counter = 0;
                if (!noprint) {
                        if (((dd->ipath_lastibcstat >>
                              INFINIPATH_IBCS_LINKSTATE_SHIFT) &
index 493100d..fe3c862 100644 (file)
@@ -507,6 +507,11 @@ struct ipath_devdata {
        u8 ipath_pci_cacheline;
        /* LID mask control */
        u8 ipath_lmc;
+
+       /* local link integrity counter */
+       u32 ipath_lli_counter;
+       /* local link integrity errors */
+       u32 ipath_lli_errors;
 };
 
 extern struct list_head ipath_dev_list;
index 0f8b529..6e3d9bf 100644 (file)
@@ -1032,19 +1032,22 @@ int ipath_layer_get_counters(struct ipath_devdata *dd,
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
        cntrs->link_error_recovery_counter =
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+       /*
+        * The link downed counter counts when the other side downs the
+        * connection.  We add in the number of times we downed the link
+        * due to local link integrity errors to compensate.
+        */
        cntrs->link_downed_counter =
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
        cntrs->port_rcv_errors =
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errrcvflowctrlcnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlinkcnt) +
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
        cntrs->port_rcv_remphys_errors =
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
@@ -1058,6 +1061,8 @@ int ipath_layer_get_counters(struct ipath_devdata *dd,
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
        cntrs->port_rcv_packets =
                ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+       cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
+       cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
 
        ret = 0;
 
index 5dcffc7..ee617e8 100644 (file)
@@ -55,6 +55,8 @@ struct ipath_layer_counters {
        u64 port_rcv_data;
        u64 port_xmit_packets;
        u64 port_rcv_packets;
+       u32 local_link_integrity_errors;
+       u32 excessive_buffer_overrun_errors;
 };
 
 /*
index 8f76534..9575692 100644 (file)
@@ -613,6 +613,9 @@ struct ib_pma_portcounters {
 #define IB_PMA_SEL_PORT_RCV_ERRORS             __constant_htons(0x0008)
 #define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS     __constant_htons(0x0010)
 #define IB_PMA_SEL_PORT_XMIT_DISCARDS          __constant_htons(0x0040)
+#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200)
+#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS   __constant_htons(0x0400)
+#define IB_PMA_SEL_PORT_VL15_DROPPED           __constant_htons(0x0800)
 #define IB_PMA_SEL_PORT_XMIT_DATA              __constant_htons(0x1000)
 #define IB_PMA_SEL_PORT_RCV_DATA               __constant_htons(0x2000)
 #define IB_PMA_SEL_PORT_XMIT_PACKETS           __constant_htons(0x4000)
@@ -859,6 +862,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
        cntrs.port_rcv_data -= dev->z_port_rcv_data;
        cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
        cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
+       cntrs.local_link_integrity_errors -=
+               dev->z_local_link_integrity_errors;
+       cntrs.excessive_buffer_overrun_errors -=
+               dev->z_excessive_buffer_overrun_errors;
 
        memset(pmp->data, 0, sizeof(pmp->data));
 
@@ -896,6 +903,16 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
        else
                p->port_xmit_discards =
                        cpu_to_be16((u16)cntrs.port_xmit_discards);
+       if (cntrs.local_link_integrity_errors > 0xFUL)
+               cntrs.local_link_integrity_errors = 0xFUL;
+       if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
+               cntrs.excessive_buffer_overrun_errors = 0xFUL;
+       p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
+               cntrs.excessive_buffer_overrun_errors;
+       if (dev->n_vl15_dropped > 0xFFFFUL)
+               p->vl15_dropped = __constant_cpu_to_be16(0xFFFF);
+       else
+               p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped);
        if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
                p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
        else
@@ -990,6 +1007,17 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
        if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
                dev->z_port_xmit_discards = cntrs.port_xmit_discards;
 
+       if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
+               dev->z_local_link_integrity_errors =
+                       cntrs.local_link_integrity_errors;
+
+       if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
+               dev->z_excessive_buffer_overrun_errors =
+                       cntrs.excessive_buffer_overrun_errors;
+
+       if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED)
+               dev->n_vl15_dropped = 0;
+
        if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
                dev->z_port_xmit_data = cntrs.port_xmit_data;
 
@@ -1275,32 +1303,8 @@ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
                      struct ib_wc *in_wc, struct ib_grh *in_grh,
                      struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
-       struct ipath_ibdev *dev = to_idev(ibdev);
        int ret;
 
-       /*
-        * Snapshot current HW counters to "clear" them.
-        * This should be done when the driver is loaded except that for
-        * some reason we get a zillion errors when brining up the link.
-        */
-       if (dev->rcv_errors == 0) {
-               struct ipath_layer_counters cntrs;
-
-               ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs);
-               dev->rcv_errors++;
-               dev->z_symbol_error_counter = cntrs.symbol_error_counter;
-               dev->z_link_error_recovery_counter =
-                       cntrs.link_error_recovery_counter;
-               dev->z_link_downed_counter = cntrs.link_downed_counter;
-               dev->z_port_rcv_errors = cntrs.port_rcv_errors + 1;
-               dev->z_port_rcv_remphys_errors =
-                       cntrs.port_rcv_remphys_errors;
-               dev->z_port_xmit_discards = cntrs.port_xmit_discards;
-               dev->z_port_xmit_data = cntrs.port_xmit_data;
-               dev->z_port_rcv_data = cntrs.port_rcv_data;
-               dev->z_port_xmit_packets = cntrs.port_xmit_packets;
-               dev->z_port_rcv_packets = cntrs.port_rcv_packets;
-       }
        switch (in_mad->mad_hdr.mgmt_class) {
        case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
        case IB_MGMT_CLASS_SUBN_LID_ROUTED:
index 3b6d00b..4b05029 100644 (file)
@@ -560,7 +560,16 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        spin_lock_irqsave(&rq->lock, flags);
        if (rq->tail == rq->head) {
                spin_unlock_irqrestore(&rq->lock, flags);
-               dev->n_pkt_drops++;
+               /*
+                * Count VL15 packets dropped due to no receive buffer.
+                * Otherwise, count them as buffer overruns since usually,
+                * the HW will be able to receive packets even if there are
+                * no QPs with posted receive buffers.
+                */
+               if (qp->ibqp.qp_num == 0)
+                       dev->n_vl15_dropped++;
+               else
+                       dev->rcv_errors++;
                goto bail;
        }
        /* Silently drop packets which are too big. */
index 059701d..ab4d533 100644 (file)
@@ -981,6 +981,7 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev);
  */
 static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 {
+       struct ipath_layer_counters cntrs;
        struct ipath_ibdev *idev;
        struct ib_device *dev;
        int ret;
@@ -1031,6 +1032,25 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
        idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
        idev->link_width_enabled = 3;   /* 1x or 4x */
 
+       /* Snapshot current HW counters to "clear" them. */
+       ipath_layer_get_counters(dd, &cntrs);
+       idev->z_symbol_error_counter = cntrs.symbol_error_counter;
+       idev->z_link_error_recovery_counter =
+               cntrs.link_error_recovery_counter;
+       idev->z_link_downed_counter = cntrs.link_downed_counter;
+       idev->z_port_rcv_errors = cntrs.port_rcv_errors;
+       idev->z_port_rcv_remphys_errors =
+               cntrs.port_rcv_remphys_errors;
+       idev->z_port_xmit_discards = cntrs.port_xmit_discards;
+       idev->z_port_xmit_data = cntrs.port_xmit_data;
+       idev->z_port_rcv_data = cntrs.port_rcv_data;
+       idev->z_port_xmit_packets = cntrs.port_xmit_packets;
+       idev->z_port_rcv_packets = cntrs.port_rcv_packets;
+       idev->z_local_link_integrity_errors =
+               cntrs.local_link_integrity_errors;
+       idev->z_excessive_buffer_overrun_errors =
+               cntrs.excessive_buffer_overrun_errors;
+
        /*
         * The system image GUID is supposed to be the same for all
         * IB HCAs in a single system but since there can be other
index c57058f..1cb7970 100644 (file)
@@ -460,6 +460,8 @@ struct ipath_ibdev {
        u64 z_port_xmit_packets;                /* starting count for PMA */
        u64 z_port_rcv_packets;                 /* starting count for PMA */
        u32 z_pkey_violations;                  /* starting count for PMA */
+       u32 z_local_link_integrity_errors;      /* starting count for PMA */
+       u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
        u32 n_rc_resends;
        u32 n_rc_acks;
        u32 n_rc_qacks;
@@ -469,6 +471,7 @@ struct ipath_ibdev {
        u32 n_other_naks;
        u32 n_timeouts;
        u32 n_pkt_drops;
+       u32 n_vl15_dropped;
        u32 n_wqe_errs;
        u32 n_rdma_dup_busy;
        u32 n_piowait;