[PATCH] EDAC: protect memory controller list
[safe/jmp/linux-2.6] / drivers / edac / i82875p_edac.c
1 /*
2  * Intel D82875P Memory Controller kernel module
3  * (C) 2003 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Contributors:
9  *      Wang Zhenyu at intel.com
10  *
11  * $Id: edac_i82875p.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $
12  *
13  * Note: E7210 appears same as D82875P - zhenyu.z.wang at intel.com
14  */
15
16
17 #include <linux/config.h>
18 #include <linux/module.h>
19 #include <linux/init.h>
20
21 #include <linux/pci.h>
22 #include <linux/pci_ids.h>
23
24 #include <linux/slab.h>
25
26 #include "edac_mc.h"
27
28
29 #define i82875p_printk(level, fmt, arg...) \
30     edac_printk(level, "i82875p", fmt, ##arg)
31
32
33 #define i82875p_mc_printk(mci, level, fmt, arg...) \
34     edac_mc_chipset_printk(mci, level, "i82875p", fmt, ##arg)
35
36
37 #ifndef PCI_DEVICE_ID_INTEL_82875_0
38 #define PCI_DEVICE_ID_INTEL_82875_0     0x2578
39 #endif                          /* PCI_DEVICE_ID_INTEL_82875_0 */
40
41 #ifndef PCI_DEVICE_ID_INTEL_82875_6
42 #define PCI_DEVICE_ID_INTEL_82875_6     0x257e
43 #endif                          /* PCI_DEVICE_ID_INTEL_82875_6 */
44
45
46 /* four csrows in dual channel, eight in single channel */
47 #define I82875P_NR_CSROWS(nr_chans) (8/(nr_chans))
48
49
50 /* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */
51 #define I82875P_EAP             0x58    /* Error Address Pointer (32b)
52                                          *
53                                          * 31:12 block address
54                                          * 11:0  reserved
55                                          */
56
57 #define I82875P_DERRSYN         0x5c    /* DRAM Error Syndrome (8b)
58                                          *
59                                          *  7:0  DRAM ECC Syndrome
60                                          */
61
62 #define I82875P_DES             0x5d    /* DRAM Error Status (8b)
63                                          *
64                                          *  7:1  reserved
65                                          *  0    Error channel 0/1
66                                          */
67
68 #define I82875P_ERRSTS          0xc8    /* Error Status Register (16b)
69                                          *
70                                          * 15:10 reserved
71                                          *  9    non-DRAM lock error (ndlock)
72                                          *  8    Sftwr Generated SMI
73                                          *  7    ECC UE
74                                          *  6    reserved
75                                          *  5    MCH detects unimplemented cycle
76                                          *  4    AGP access outside GA
77                                          *  3    Invalid AGP access
78                                          *  2    Invalid GA translation table
79                                          *  1    Unsupported AGP command
80                                          *  0    ECC CE
81                                          */
82
83 #define I82875P_ERRCMD          0xca    /* Error Command (16b)
84                                          *
85                                          * 15:10 reserved
86                                          *  9    SERR on non-DRAM lock
87                                          *  8    SERR on ECC UE
88                                          *  7    SERR on ECC CE
89                                          *  6    target abort on high exception
90                                          *  5    detect unimplemented cyc
91                                          *  4    AGP access outside of GA
92                                          *  3    SERR on invalid AGP access
93                                          *  2    invalid translation table
94                                          *  1    SERR on unsupported AGP command
95                                          *  0    reserved
96                                          */
97
98
99 /* Intel 82875p register addresses - device 6 function 0 - DRAM Controller */
100 #define I82875P_PCICMD6         0x04    /* PCI Command Register (16b)
101                                          *
102                                          * 15:10 reserved
103                                          *  9    fast back-to-back - ro 0
104                                          *  8    SERR enable - ro 0
105                                          *  7    addr/data stepping - ro 0
106                                          *  6    parity err enable - ro 0
107                                          *  5    VGA palette snoop - ro 0
108                                          *  4    mem wr & invalidate - ro 0
109                                          *  3    special cycle - ro 0
110                                          *  2    bus master - ro 0
111                                          *  1    mem access dev6 - 0(dis),1(en)
112                                          *  0    IO access dev3 - 0(dis),1(en)
113                                          */
114
115 #define I82875P_BAR6            0x10    /* Mem Delays Base ADDR Reg (32b)
116                                          *
117                                          * 31:12 mem base addr [31:12]
118                                          * 11:4  address mask - ro 0
119                                          *  3    prefetchable - ro 0(non),1(pre)
120                                          *  2:1  mem type - ro 0
121                                          *  0    mem space - ro 0
122                                          */
123
124 /* Intel 82875p MMIO register space - device 0 function 0 - MMR space */
125
126 #define I82875P_DRB_SHIFT 26    /* 64MiB grain */
127 #define I82875P_DRB             0x00    /* DRAM Row Boundary (8b x 8)
128                                          *
129                                          *  7    reserved
130                                          *  6:0  64MiB row boundary addr
131                                          */
132
133 #define I82875P_DRA             0x10    /* DRAM Row Attribute (4b x 8)
134                                          *
135                                          *  7    reserved
136                                          *  6:4  row attr row 1
137                                          *  3    reserved
138                                          *  2:0  row attr row 0
139                                          *
140                                          * 000 =  4KiB
141                                          * 001 =  8KiB
142                                          * 010 = 16KiB
143                                          * 011 = 32KiB
144                                          */
145
146 #define I82875P_DRC             0x68    /* DRAM Controller Mode (32b)
147                                          *
148                                          * 31:30 reserved
149                                          * 29    init complete
150                                          * 28:23 reserved
151                                          * 22:21 nr chan 00=1,01=2
152                                          * 20    reserved
153                                          * 19:18 Data Integ Mode 00=none,01=ecc
154                                          * 17:11 reserved
155                                          * 10:8  refresh mode
156                                          *  7    reserved
157                                          *  6:4  mode select
158                                          *  3:2  reserved
159                                          *  1:0  DRAM type 01=DDR
160                                          */
161
162
163 enum i82875p_chips {
164         I82875P = 0,
165 };
166
167
168 struct i82875p_pvt {
169         struct pci_dev *ovrfl_pdev;
170         void __iomem *ovrfl_window;
171 };
172
173
174 struct i82875p_dev_info {
175         const char *ctl_name;
176 };
177
178
179 struct i82875p_error_info {
180         u16 errsts;
181         u32 eap;
182         u8 des;
183         u8 derrsyn;
184         u16 errsts2;
185 };
186
187
188 static const struct i82875p_dev_info i82875p_devs[] = {
189         [I82875P] = {
190                      .ctl_name = "i82875p"},
191 };
192
193 static struct pci_dev *mci_pdev = NULL; /* init dev: in case that AGP code
194                                            has already registered driver */
195 static int i82875p_registered = 1;
196
197 static void i82875p_get_error_info (struct mem_ctl_info *mci,
198                 struct i82875p_error_info *info)
199 {
200         /*
201          * This is a mess because there is no atomic way to read all the
202          * registers at once and the registers can transition from CE being
203          * overwritten by UE.
204          */
205         pci_read_config_word(mci->pdev, I82875P_ERRSTS, &info->errsts);
206         pci_read_config_dword(mci->pdev, I82875P_EAP, &info->eap);
207         pci_read_config_byte(mci->pdev, I82875P_DES, &info->des);
208         pci_read_config_byte(mci->pdev, I82875P_DERRSYN, &info->derrsyn);
209         pci_read_config_word(mci->pdev, I82875P_ERRSTS, &info->errsts2);
210
211         pci_write_bits16(mci->pdev, I82875P_ERRSTS, 0x0081, 0x0081);
212
213         /*
214          * If the error is the same then we can for both reads then
215          * the first set of reads is valid.  If there is a change then
216          * there is a CE no info and the second set of reads is valid
217          * and should be UE info.
218          */
219         if (!(info->errsts2 & 0x0081))
220                 return;
221         if ((info->errsts ^ info->errsts2) & 0x0081) {
222                 pci_read_config_dword(mci->pdev, I82875P_EAP, &info->eap);
223                 pci_read_config_byte(mci->pdev, I82875P_DES, &info->des);
224                 pci_read_config_byte(mci->pdev, I82875P_DERRSYN,
225                     &info->derrsyn);
226         }
227 }
228
229 static int i82875p_process_error_info (struct mem_ctl_info *mci,
230                 struct i82875p_error_info *info, int handle_errors)
231 {
232         int row, multi_chan;
233
234         multi_chan = mci->csrows[0].nr_channels - 1;
235
236         if (!(info->errsts2 & 0x0081))
237                 return 0;
238
239         if (!handle_errors)
240                 return 1;
241
242         if ((info->errsts ^ info->errsts2) & 0x0081) {
243                 edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
244                 info->errsts = info->errsts2;
245         }
246
247         info->eap >>= PAGE_SHIFT;
248         row = edac_mc_find_csrow_by_page(mci, info->eap);
249
250         if (info->errsts & 0x0080)
251                 edac_mc_handle_ue(mci, info->eap, 0, row, "i82875p UE");
252         else
253                 edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row,
254                                        multi_chan ? (info->des & 0x1) : 0,
255                                        "i82875p CE");
256
257         return 1;
258 }
259
260
261 static void i82875p_check(struct mem_ctl_info *mci)
262 {
263         struct i82875p_error_info info;
264
265         debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
266         i82875p_get_error_info(mci, &info);
267         i82875p_process_error_info(mci, &info, 1);
268 }
269
270
271 #ifdef CONFIG_PROC_FS
272 extern int pci_proc_attach_device(struct pci_dev *);
273 #endif
274
275 static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
276 {
277         int rc = -ENODEV;
278         int index;
279         struct mem_ctl_info *mci = NULL;
280         struct i82875p_pvt *pvt = NULL;
281         unsigned long last_cumul_size;
282         struct pci_dev *ovrfl_pdev;
283         void __iomem *ovrfl_window = NULL;
284
285         u32 drc;
286         u32 drc_chan;           /* Number of channels 0=1chan,1=2chan */
287         u32 nr_chans;
288         u32 drc_ddim;           /* DRAM Data Integrity Mode 0=none,2=edac */
289         struct i82875p_error_info discard;
290
291         debugf0("%s()\n", __func__);
292
293         ovrfl_pdev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL);
294
295         if (!ovrfl_pdev) {
296                 /*
297                  * Intel tells BIOS developers to hide device 6 which
298                  * configures the overflow device access containing
299                  * the DRBs - this is where we expose device 6.
300                  * http://www.x86-secret.com/articles/tweak/pat/patsecrets-2.htm
301                  */
302                 pci_write_bits8(pdev, 0xf4, 0x2, 0x2);
303                 ovrfl_pdev =
304                     pci_scan_single_device(pdev->bus, PCI_DEVFN(6, 0));
305                 if (!ovrfl_pdev)
306                         return -ENODEV;
307         }
308 #ifdef CONFIG_PROC_FS
309         if (!ovrfl_pdev->procent && pci_proc_attach_device(ovrfl_pdev)) {
310                 i82875p_printk(KERN_ERR,
311                                "%s(): Failed to attach overflow device\n",
312                                __func__);
313                 return -ENODEV;
314         }
315 #endif                          /* CONFIG_PROC_FS */
316         if (pci_enable_device(ovrfl_pdev)) {
317                 i82875p_printk(KERN_ERR,
318                                "%s(): Failed to enable overflow device\n",
319                                __func__);
320                 return -ENODEV;
321         }
322
323         if (pci_request_regions(ovrfl_pdev, pci_name(ovrfl_pdev))) {
324 #ifdef CORRECT_BIOS
325                 goto fail0;
326 #endif
327         }
328         /* cache is irrelevant for PCI bus reads/writes */
329         ovrfl_window = ioremap_nocache(pci_resource_start(ovrfl_pdev, 0),
330                                        pci_resource_len(ovrfl_pdev, 0));
331
332         if (!ovrfl_window) {
333                 i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n",
334                                __func__);
335                 goto fail1;
336         }
337
338         /* need to find out the number of channels */
339         drc = readl(ovrfl_window + I82875P_DRC);
340         drc_chan = ((drc >> 21) & 0x1);
341         nr_chans = drc_chan + 1;
342         drc_ddim = (drc >> 18) & 0x1;
343
344         mci = edac_mc_alloc(sizeof(*pvt), I82875P_NR_CSROWS(nr_chans),
345                                  nr_chans);
346
347         if (!mci) {
348                 rc = -ENOMEM;
349                 goto fail2;
350         }
351
352         debugf3("%s(): init mci\n", __func__);
353
354         mci->pdev = pdev;
355         mci->mtype_cap = MEM_FLAG_DDR;
356
357         mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
358         mci->edac_cap = EDAC_FLAG_UNKNOWN;
359         /* adjust FLAGS */
360
361         mci->mod_name = EDAC_MOD_STR;
362         mci->mod_ver = "$Revision: 1.5.2.11 $";
363         mci->ctl_name = i82875p_devs[dev_idx].ctl_name;
364         mci->edac_check = i82875p_check;
365         mci->ctl_page_to_phys = NULL;
366
367         debugf3("%s(): init pvt\n", __func__);
368
369         pvt = (struct i82875p_pvt *) mci->pvt_info;
370         pvt->ovrfl_pdev = ovrfl_pdev;
371         pvt->ovrfl_window = ovrfl_window;
372
373         /*
374          * The dram row boundary (DRB) reg values are boundary address
375          * for each DRAM row with a granularity of 32 or 64MB (single/dual
376          * channel operation).  DRB regs are cumulative; therefore DRB7 will
377          * contain the total memory contained in all eight rows.
378          */
379         for (last_cumul_size = index = 0; index < mci->nr_csrows; index++) {
380                 u8 value;
381                 u32 cumul_size;
382                 struct csrow_info *csrow = &mci->csrows[index];
383
384                 value = readb(ovrfl_window + I82875P_DRB + index);
385                 cumul_size = value << (I82875P_DRB_SHIFT - PAGE_SHIFT);
386                 debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
387                         cumul_size);
388                 if (cumul_size == last_cumul_size)
389                         continue;       /* not populated */
390
391                 csrow->first_page = last_cumul_size;
392                 csrow->last_page = cumul_size - 1;
393                 csrow->nr_pages = cumul_size - last_cumul_size;
394                 last_cumul_size = cumul_size;
395                 csrow->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */
396                 csrow->mtype = MEM_DDR;
397                 csrow->dtype = DEV_UNKNOWN;
398                 csrow->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
399         }
400
401         i82875p_get_error_info(mci, &discard);  /* clear counters */
402
403         if (edac_mc_add_mc(mci)) {
404                 debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
405                 goto fail3;
406         }
407
408         /* get this far and it's successful */
409         debugf3("%s(): success\n", __func__);
410         return 0;
411
412 fail3:
413         edac_mc_free(mci);
414
415 fail2:
416         iounmap(ovrfl_window);
417
418 fail1:
419         pci_release_regions(ovrfl_pdev);
420
421 #ifdef CORRECT_BIOS
422 fail0:
423 #endif
424         pci_disable_device(ovrfl_pdev);
425         /* NOTE: the ovrfl proc entry and pci_dev are intentionally left */
426         return rc;
427 }
428
429
430 /* returns count (>= 0), or negative on error */
431 static int __devinit i82875p_init_one(struct pci_dev *pdev,
432                                       const struct pci_device_id *ent)
433 {
434         int rc;
435
436         debugf0("%s()\n", __func__);
437
438         i82875p_printk(KERN_INFO, "i82875p init one\n");
439         if(pci_enable_device(pdev) < 0)
440                 return -EIO;
441         rc = i82875p_probe1(pdev, ent->driver_data);
442         if (mci_pdev == NULL)
443                 mci_pdev = pci_dev_get(pdev);
444         return rc;
445 }
446
447
448 static void __devexit i82875p_remove_one(struct pci_dev *pdev)
449 {
450         struct mem_ctl_info *mci;
451         struct i82875p_pvt *pvt = NULL;
452
453         debugf0("%s()\n", __func__);
454
455         if ((mci = edac_mc_del_mc(pdev)) == NULL)
456                 return;
457
458         pvt = (struct i82875p_pvt *) mci->pvt_info;
459         if (pvt->ovrfl_window)
460                 iounmap(pvt->ovrfl_window);
461
462         if (pvt->ovrfl_pdev) {
463 #ifdef CORRECT_BIOS
464                 pci_release_regions(pvt->ovrfl_pdev);
465 #endif                          /*CORRECT_BIOS */
466                 pci_disable_device(pvt->ovrfl_pdev);
467                 pci_dev_put(pvt->ovrfl_pdev);
468         }
469
470         edac_mc_free(mci);
471 }
472
473
474 static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = {
475         {PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
476          I82875P},
477         {0,}                    /* 0 terminated list. */
478 };
479
480 MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl);
481
482
483 static struct pci_driver i82875p_driver = {
484         .name = EDAC_MOD_STR,
485         .probe = i82875p_init_one,
486         .remove = __devexit_p(i82875p_remove_one),
487         .id_table = i82875p_pci_tbl,
488 };
489
490
491 static int __init i82875p_init(void)
492 {
493         int pci_rc;
494
495         debugf3("%s()\n", __func__);
496         pci_rc = pci_register_driver(&i82875p_driver);
497         if (pci_rc < 0)
498                 goto fail0;
499         if (mci_pdev == NULL) {
500                 mci_pdev =
501                     pci_get_device(PCI_VENDOR_ID_INTEL,
502                                    PCI_DEVICE_ID_INTEL_82875_0, NULL);
503                 if (!mci_pdev) {
504                         debugf0("875p pci_get_device fail\n");
505                         pci_rc = -ENODEV;
506                         goto fail1;
507                 }
508                 pci_rc = i82875p_init_one(mci_pdev, i82875p_pci_tbl);
509                 if (pci_rc < 0) {
510                         debugf0("875p init fail\n");
511                         pci_rc = -ENODEV;
512                         goto fail1;
513                 }
514         }
515         return 0;
516
517 fail1:
518         pci_unregister_driver(&i82875p_driver);
519
520 fail0:
521         if (mci_pdev != NULL)
522                 pci_dev_put(mci_pdev);
523
524         return pci_rc;
525 }
526
527
528 static void __exit i82875p_exit(void)
529 {
530         debugf3("%s()\n", __func__);
531
532         pci_unregister_driver(&i82875p_driver);
533         if (!i82875p_registered) {
534                 i82875p_remove_one(mci_pdev);
535                 pci_dev_put(mci_pdev);
536         }
537 }
538
539
540 module_init(i82875p_init);
541 module_exit(i82875p_exit);
542
543
544 MODULE_LICENSE("GPL");
545 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh");
546 MODULE_DESCRIPTION("MC support for Intel 82875 memory hub controllers");