omap: i2c: add a timeout to the busy waiting
[safe/jmp/linux-2.6] / drivers / edac / i5000_edac.c
index 5a85201..adc10a2 100644 (file)
 #define                        FERR_NF_UNCORRECTABLE   (FERR_NF_M12ERR | \
                                                        FERR_NF_M11ERR | \
                                                        FERR_NF_M10ERR | \
+                                                       FERR_NF_M9ERR | \
                                                        FERR_NF_M8ERR | \
                                                        FERR_NF_M7ERR | \
                                                        FERR_NF_M6ERR | \
@@ -301,6 +302,9 @@ static char *numcol_toString[] = {
 };
 #endif
 
+/* enables the report of miscellaneous messages as CE errors - default off */
+static int misc_messages;
+
 /* Enumeration of supported devices */
 enum i5000_chips {
        I5000P = 0,
@@ -466,7 +470,8 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
                                        struct i5000_error_info *info,
                                        int handle_errors)
 {
-       char msg[EDAC_MC_LABEL_LEN + 1 + 90];
+       char msg[EDAC_MC_LABEL_LEN + 1 + 160];
+       char *specific = NULL;
        u32 allErrors;
        int branch;
        int channel;
@@ -480,11 +485,6 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
        if (!allErrors)
                return;         /* if no error, return now */
 
-       /* ONLY ONE of the possible error bits will be set, as per the docs */
-       i5000_mc_printk(mci, KERN_ERR,
-                       "FATAL ERRORS Found!!! 1st FATAL Err Reg= 0x%x\n",
-                       allErrors);
-
        branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
        channel = branch;
 
@@ -501,28 +501,42 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
                rdwr ? "Write" : "Read", ras, cas);
 
        /* Only 1 bit will be on */
-       if (allErrors & FERR_FAT_M1ERR) {
-               i5000_mc_printk(mci, KERN_ERR,
-                               "Alert on non-redundant retry or fast "
-                               "reset timeout\n");
-
-       } else if (allErrors & FERR_FAT_M2ERR) {
-               i5000_mc_printk(mci, KERN_ERR,
-                               "Northbound CRC error on non-redundant "
-                               "retry\n");
-
-       } else if (allErrors & FERR_FAT_M3ERR) {
-               i5000_mc_printk(mci, KERN_ERR,
-                               ">Tmid Thermal event with intelligent "
-                               "throttling disabled\n");
+       switch (allErrors) {
+       case FERR_FAT_M1ERR:
+               specific = "Alert on non-redundant retry or fast "
+                               "reset timeout";
+               break;
+       case FERR_FAT_M2ERR:
+               specific = "Northbound CRC error on non-redundant "
+                               "retry";
+               break;
+       case FERR_FAT_M3ERR:
+               {
+               static int done;
+
+               /*
+                * This error is generated to inform that the intelligent
+                * throttling is disabled and the temperature passed the
+                * specified middle point. Since this is something the BIOS
+                * should take care of, we'll warn only once to avoid
+                * worthlessly flooding the log.
+                */
+               if (done)
+                       return;
+               done++;
+
+               specific = ">Tmid Thermal event with intelligent "
+                          "throttling disabled";
+               }
+               break;
        }
 
        /* Form out message */
        snprintf(msg, sizeof(msg),
                 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d "
-                "FATAL Err=0x%x)",
+                "FATAL Err=0x%x (%s))",
                 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
-                allErrors);
+                allErrors, specific);
 
        /* Call the helper to output message */
        edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
@@ -539,7 +553,8 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
                                        struct i5000_error_info *info,
                                        int handle_errors)
 {
-       char msg[EDAC_MC_LABEL_LEN + 1 + 90];
+       char msg[EDAC_MC_LABEL_LEN + 1 + 170];
+       char *specific = NULL;
        u32 allErrors;
        u32 ue_errors;
        u32 ce_errors;
@@ -557,16 +572,18 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
                return;         /* if no error, return now */
 
        /* ONLY ONE of the possible error bits will be set, as per the docs */
-       i5000_mc_printk(mci, KERN_WARNING,
-                       "NON-FATAL ERRORS Found!!! 1st NON-FATAL Err "
-                       "Reg= 0x%x\n", allErrors);
-
        ue_errors = allErrors & FERR_NF_UNCORRECTABLE;
        if (ue_errors) {
                debugf0("\tUncorrected bits= 0x%x\n", ue_errors);
 
                branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
-               channel = branch;
+
+               /*
+                * According with i5000 datasheet, bit 28 has no significance
+                * for errors M4Err-M12Err and M17Err-M21Err, on FERR_NF_FBD
+                */
+               channel = branch & 2;
+
                bank = NREC_BANK(info->nrecmema);
                rank = NREC_RANK(info->nrecmema);
                rdwr = NREC_RDWR(info->nrecmema);
@@ -579,12 +596,47 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
                        rank, channel, channel + 1, branch >> 1, bank,
                        rdwr ? "Write" : "Read", ras, cas);
 
+               switch (ue_errors) {
+               case FERR_NF_M12ERR:
+                       specific = "Non-Aliased Uncorrectable Patrol Data ECC";
+                       break;
+               case FERR_NF_M11ERR:
+                       specific = "Non-Aliased Uncorrectable Spare-Copy "
+                                       "Data ECC";
+                       break;
+               case FERR_NF_M10ERR:
+                       specific = "Non-Aliased Uncorrectable Mirrored Demand "
+                                       "Data ECC";
+                       break;
+               case FERR_NF_M9ERR:
+                       specific = "Non-Aliased Uncorrectable Non-Mirrored "
+                                       "Demand Data ECC";
+                       break;
+               case FERR_NF_M8ERR:
+                       specific = "Aliased Uncorrectable Patrol Data ECC";
+                       break;
+               case FERR_NF_M7ERR:
+                       specific = "Aliased Uncorrectable Spare-Copy Data ECC";
+                       break;
+               case FERR_NF_M6ERR:
+                       specific = "Aliased Uncorrectable Mirrored Demand "
+                                       "Data ECC";
+                       break;
+               case FERR_NF_M5ERR:
+                       specific = "Aliased Uncorrectable Non-Mirrored Demand "
+                                       "Data ECC";
+                       break;
+               case FERR_NF_M4ERR:
+                       specific = "Uncorrectable Data ECC on Replay";
+                       break;
+               }
+
                /* Form out message */
                snprintf(msg, sizeof(msg),
                         "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
-                        "CAS=%d, UE Err=0x%x)",
+                        "CAS=%d, UE Err=0x%x (%s))",
                         branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
-                        ue_errors);
+                        ue_errors, specific);
 
                /* Call the helper to output message */
                edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
@@ -616,51 +668,74 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
                        rank, channel, branch >> 1, bank,
                        rdwr ? "Write" : "Read", ras, cas);
 
+               switch (ce_errors) {
+               case FERR_NF_M17ERR:
+                       specific = "Correctable Non-Mirrored Demand Data ECC";
+                       break;
+               case FERR_NF_M18ERR:
+                       specific = "Correctable Mirrored Demand Data ECC";
+                       break;
+               case FERR_NF_M19ERR:
+                       specific = "Correctable Spare-Copy Data ECC";
+                       break;
+               case FERR_NF_M20ERR:
+                       specific = "Correctable Patrol Data ECC";
+                       break;
+               }
+
                /* Form out message */
                snprintf(msg, sizeof(msg),
                         "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
-                        "CAS=%d, CE Err=0x%x)", branch >> 1, bank,
-                        rdwr ? "Write" : "Read", ras, cas, ce_errors);
+                        "CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank,
+                        rdwr ? "Write" : "Read", ras, cas, ce_errors,
+                        specific);
 
                /* Call the helper to output message */
                edac_mc_handle_fbd_ce(mci, rank, channel, msg);
        }
 
-       /* See if any of the thermal errors have fired */
-       misc_errors = allErrors & FERR_NF_THERMAL;
-       if (misc_errors) {
-               i5000_printk(KERN_WARNING, "\tTHERMAL Error, bits= 0x%x\n",
-                       misc_errors);
-       }
-
-       /* See if any of the thermal errors have fired */
-       misc_errors = allErrors & FERR_NF_NON_RETRY;
-       if (misc_errors) {
-               i5000_printk(KERN_WARNING, "\tNON-Retry  Errors, bits= 0x%x\n",
-                       misc_errors);
-       }
+       if (!misc_messages)
+               return;
 
-       /* See if any of the thermal errors have fired */
-       misc_errors = allErrors & FERR_NF_NORTH_CRC;
+       misc_errors = allErrors & (FERR_NF_NON_RETRY | FERR_NF_NORTH_CRC |
+                                  FERR_NF_SPD_PROTOCOL | FERR_NF_DIMM_SPARE);
        if (misc_errors) {
-               i5000_printk(KERN_WARNING,
-                       "\tNORTHBOUND CRC  Error, bits= 0x%x\n",
-                       misc_errors);
-       }
+               switch (misc_errors) {
+               case FERR_NF_M13ERR:
+                       specific = "Non-Retry or Redundant Retry FBD Memory "
+                                       "Alert or Redundant Fast Reset Timeout";
+                       break;
+               case FERR_NF_M14ERR:
+                       specific = "Non-Retry or Redundant Retry FBD "
+                                       "Configuration Alert";
+                       break;
+               case FERR_NF_M15ERR:
+                       specific = "Non-Retry or Redundant Retry FBD "
+                                       "Northbound CRC error on read data";
+                       break;
+               case FERR_NF_M21ERR:
+                       specific = "FBD Northbound CRC error on "
+                                       "FBD Sync Status";
+                       break;
+               case FERR_NF_M22ERR:
+                       specific = "SPD protocol error";
+                       break;
+               case FERR_NF_M27ERR:
+                       specific = "DIMM-spare copy started";
+                       break;
+               case FERR_NF_M28ERR:
+                       specific = "DIMM-spare copy completed";
+                       break;
+               }
+               branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
 
-       /* See if any of the thermal errors have fired */
-       misc_errors = allErrors & FERR_NF_SPD_PROTOCOL;
-       if (misc_errors) {
-               i5000_printk(KERN_WARNING,
-                       "\tSPD Protocol  Error, bits= 0x%x\n",
-                       misc_errors);
-       }
+               /* Form out message */
+               snprintf(msg, sizeof(msg),
+                        "(Branch=%d Err=%#x (%s))", branch >> 1,
+                        misc_errors, specific);
 
-       /* See if any of the thermal errors have fired */
-       misc_errors = allErrors & FERR_NF_DIMM_SPARE;
-       if (misc_errors) {
-               i5000_printk(KERN_WARNING, "\tDIMM-Spare  Error, bits= 0x%x\n",
-                       misc_errors);
+               /* Call the helper to output message */
+               edac_mc_handle_fbd_ce(mci, 0, 0, msg);
        }
 }
 
@@ -1104,7 +1179,7 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
                        pci_read_config_word(pvt->branch_1, where,
                                        &pvt->b1_mtr[slot_row]);
                        debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row,
-                               where, pvt->b0_mtr[slot_row]);
+                               where, pvt->b1_mtr[slot_row]);
                } else {
                        pvt->b1_mtr[slot_row] = 0;
                }
@@ -1163,7 +1238,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
        struct csrow_info *p_csrow;
        int empty, channel_count;
        int max_csrows;
-       int mtr;
+       int mtr, mtr1;
        int csrow_megs;
        int channel;
        int csrow;
@@ -1182,9 +1257,10 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
 
                /* use branch 0 for the basis */
                mtr = pvt->b0_mtr[csrow >> 1];
+               mtr1 = pvt->b1_mtr[csrow >> 1];
 
                /* if no DIMMS on this row, continue */
-               if (!MTR_DIMMS_PRESENT(mtr))
+               if (!MTR_DIMMS_PRESENT(mtr) && !MTR_DIMMS_PRESENT(mtr1))
                        continue;
 
                /* FAKE OUT VALUES, FIXME */
@@ -1286,16 +1362,6 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
        if (PCI_FUNC(pdev->devfn) != 0)
                return -ENODEV;
 
-       /* make sure error reporting method is sane */
-       switch (edac_op_state) {
-       case EDAC_OPSTATE_POLL:
-       case EDAC_OPSTATE_NMI:
-               break;
-       default:
-               edac_op_state = EDAC_OPSTATE_POLL;
-               break;
-       }
-
        /* Ask the devices for the number of CSROWS and CHANNELS so
         * that we can calculate the memory resources, etc
         *
@@ -1322,6 +1388,7 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
        if (mci == NULL)
                return -ENOMEM;
 
+       kobject_get(&mci->edac_mci_kobj);
        debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
 
        mci->dev = &pdev->dev;  /* record ptr  to the generic device */
@@ -1394,6 +1461,7 @@ fail1:
        i5000_put_devices(mci);
 
 fail0:
+       kobject_put(&mci->edac_mci_kobj);
        edac_mc_free(mci);
        return -ENODEV;
 }
@@ -1439,7 +1507,7 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev)
 
        /* retrieve references to resources, and free those resources */
        i5000_put_devices(mci);
-
+       kobject_put(&mci->edac_mci_kobj);
        edac_mc_free(mci);
 }
 
@@ -1478,6 +1546,9 @@ static int __init i5000_init(void)
 
        debugf2("MC: " __FILE__ ": %s()\n", __func__);
 
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
        pci_rc = pci_register_driver(&i5000_driver);
 
        return (pci_rc < 0) ? pci_rc : 0;
@@ -1501,5 +1572,9 @@ MODULE_AUTHOR
     ("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>");
 MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - "
                I5000_REVISION);
+
 module_param(edac_op_state, int, 0444);
 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+module_param(misc_messages, int, 0444);
+MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages");
+