1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/edac.h>
29 #include <linux/mmzone.h>
30 #include <linux/edac_mce.h>
31 #include <linux/spinlock.h>
33 #include "edac_core.h"
36 * Alter this version for the module when modifications are made
38 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
39 #define EDAC_MOD_STR "i7core_edac"
41 /* HACK: temporary, just to enable all logs, for now */
43 #define debugf0(fmt, arg...) edac_printk(KERN_INFO, "i7core", fmt, ##arg)
48 #define i7core_printk(level, fmt, arg...) \
49 edac_printk(level, "i7core", fmt, ##arg)
51 #define i7core_mc_printk(mci, level, fmt, arg...) \
52 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
55 * i7core Memory Controller Registers
58 /* OFFSETS for Device 0 Function 0 */
60 #define MC_CFG_CONTROL 0x90
62 /* OFFSETS for Device 3 Function 0 */
64 #define MC_CONTROL 0x48
65 #define MC_STATUS 0x4c
66 #define MC_MAX_DOD 0x64
69 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
70 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
73 #define MC_TEST_ERR_RCV1 0x60
74 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
76 #define MC_TEST_ERR_RCV0 0x64
77 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
78 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
80 /* OFFSETS for Devices 4,5 and 6 Function 0 */
82 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
83 #define THREE_DIMMS_PRESENT (1 << 24)
84 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
85 #define QUAD_RANK_PRESENT (1 << 22)
86 #define REGISTERED_DIMM (1 << 15)
88 #define MC_CHANNEL_MAPPER 0x60
89 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
90 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
92 #define MC_CHANNEL_RANK_PRESENT 0x7c
93 #define RANK_PRESENT_MASK 0xffff
95 #define MC_CHANNEL_ADDR_MATCH 0xf0
96 #define MC_CHANNEL_ERROR_MASK 0xf8
97 #define MC_CHANNEL_ERROR_INJECT 0xfc
98 #define INJECT_ADDR_PARITY 0x10
99 #define INJECT_ECC 0x08
100 #define MASK_CACHELINE 0x06
101 #define MASK_FULL_CACHELINE 0x06
102 #define MASK_MSB32_CACHELINE 0x04
103 #define MASK_LSB32_CACHELINE 0x02
104 #define NO_MASK_CACHELINE 0x00
105 #define REPEAT_EN 0x01
107 /* OFFSETS for Devices 4,5 and 6 Function 1 */
108 #define MC_DOD_CH_DIMM0 0x48
109 #define MC_DOD_CH_DIMM1 0x4c
110 #define MC_DOD_CH_DIMM2 0x50
111 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
112 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
113 #define DIMM_PRESENT_MASK (1 << 9)
114 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
115 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
116 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
117 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
118 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
119 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
120 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
121 #define MC_DOD_NUMCOL_MASK 3
122 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
124 #define MC_RANK_PRESENT 0x7c
126 #define MC_SAG_CH_0 0x80
127 #define MC_SAG_CH_1 0x84
128 #define MC_SAG_CH_2 0x88
129 #define MC_SAG_CH_3 0x8c
130 #define MC_SAG_CH_4 0x90
131 #define MC_SAG_CH_5 0x94
132 #define MC_SAG_CH_6 0x98
133 #define MC_SAG_CH_7 0x9c
135 #define MC_RIR_LIMIT_CH_0 0x40
136 #define MC_RIR_LIMIT_CH_1 0x44
137 #define MC_RIR_LIMIT_CH_2 0x48
138 #define MC_RIR_LIMIT_CH_3 0x4C
139 #define MC_RIR_LIMIT_CH_4 0x50
140 #define MC_RIR_LIMIT_CH_5 0x54
141 #define MC_RIR_LIMIT_CH_6 0x58
142 #define MC_RIR_LIMIT_CH_7 0x5C
143 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
145 #define MC_RIR_WAY_CH 0x80
146 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
147 #define MC_RIR_WAY_RANK_MASK 0x7
154 #define MAX_DIMMS 3 /* Max DIMMS per channel */
155 #define NUM_SOCKETS 2 /* Max number of MC sockets */
156 #define MAX_MCR_FUNC 4
157 #define MAX_CHAN_FUNC 3
167 struct i7core_inject {
175 /* Error address mask */
176 int channel, dimm, rank, bank, page, col;
179 struct i7core_channel {
184 struct pci_id_descr {
188 struct pci_dev *pdev[NUM_SOCKETS];
192 struct pci_dev *pci_noncore[NUM_SOCKETS];
193 struct pci_dev *pci_mcr[NUM_SOCKETS][MAX_MCR_FUNC + 1];
194 struct pci_dev *pci_ch[NUM_SOCKETS][NUM_CHANS][MAX_CHAN_FUNC + 1];
196 struct i7core_info info;
197 struct i7core_inject inject;
198 struct i7core_channel channel[NUM_SOCKETS][NUM_CHANS];
200 int sockets; /* Number of sockets */
201 int channels; /* Number of active channels */
203 int ce_count_available[NUM_SOCKETS];
204 /* ECC corrected errors counts per dimm */
205 unsigned long ce_count[NUM_SOCKETS][MAX_DIMMS];
206 int last_ce_count[NUM_SOCKETS][MAX_DIMMS];
209 struct edac_mce edac_mce;
210 struct mce mce_entry[MCE_LOG_LEN];
215 /* Device name and register DID (Device ID) */
216 struct i7core_dev_info {
217 const char *ctl_name; /* name for this device */
218 u16 fsb_mapping_errors; /* DID for the branchmap,control */
221 #define PCI_DESCR(device, function, device_id) \
223 .func = (function), \
224 .dev_id = (device_id)
226 struct pci_id_descr pci_devs[] = {
227 /* Memory controller */
228 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
229 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
230 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM is supported */
231 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
234 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
235 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
236 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
237 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
240 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
241 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
242 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
243 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
246 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
247 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
248 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
249 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
251 /* Generic Non-core registers */
253 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
254 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
255 * the probing code needs to test for the other address in case of
256 * failure of this one
258 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
261 #define N_DEVS ARRAY_SIZE(pci_devs)
264 * pci_device_id table for which devices we are looking for
265 * This should match the first device at pci_devs table
267 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
268 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
269 {0,} /* 0 terminated list. */
273 /* Table of devices attributes supported by this driver */
274 static const struct i7core_dev_info i7core_devs[] = {
276 .ctl_name = "i7 Core",
277 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7_MCR,
281 static struct edac_pci_ctl_info *i7core_pci;
283 /****************************************************************************
284 Anciliary status routines
285 ****************************************************************************/
287 /* MC_CONTROL bits */
288 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
289 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
292 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 3))
293 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
295 /* MC_MAX_DOD read functions */
296 static inline int numdimms(u32 dimms)
298 return (dimms & 0x3) + 1;
301 static inline int numrank(u32 rank)
303 static int ranks[4] = { 1, 2, 4, -EINVAL };
305 return ranks[rank & 0x3];
308 static inline int numbank(u32 bank)
310 static int banks[4] = { 4, 8, 16, -EINVAL };
312 return banks[bank & 0x3];
315 static inline int numrow(u32 row)
317 static int rows[8] = {
318 1 << 12, 1 << 13, 1 << 14, 1 << 15,
319 1 << 16, -EINVAL, -EINVAL, -EINVAL,
322 return rows[row & 0x7];
325 static inline int numcol(u32 col)
327 static int cols[8] = {
328 1 << 10, 1 << 11, 1 << 12, -EINVAL,
330 return cols[col & 0x3];
333 /****************************************************************************
334 Memory check routines
335 ****************************************************************************/
336 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
341 for (i = 0; i < N_DEVS; i++) {
342 if (!pci_devs[i].pdev[socket])
345 if (PCI_SLOT(pci_devs[i].pdev[socket]->devfn) == slot &&
346 PCI_FUNC(pci_devs[i].pdev[socket]->devfn) == func) {
347 return pci_devs[i].pdev[socket];
355 * i7core_get_active_channels() - gets the number of channels and csrows
356 * @socket: Quick Path Interconnect socket
357 * @channels: Number of channels that will be returned
358 * @csrows: Number of csrows found
360 * Since EDAC core needs to know in advance the number of available channels
361 * and csrows, in order to allocate memory for csrows/channels, it is needed
362 * to run two similar steps. At the first step, implemented on this function,
363 * it checks the number of csrows/channels present at one socket.
364 * this is used in order to properly allocate the size of mci components.
366 * It should be noticed that none of the current available datasheets explain
367 * or even mention how csrows are seen by the memory controller. So, we need
368 * to add a fake description for csrows.
369 * So, this driver is attributing one DIMM memory for one csrow.
371 static int i7core_get_active_channels(u8 socket, unsigned *channels,
374 struct pci_dev *pdev = NULL;
381 pdev = get_pdev_slot_func(socket, 3, 0);
383 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
388 /* Device 3 function 0 reads */
389 pci_read_config_dword(pdev, MC_STATUS, &status);
390 pci_read_config_dword(pdev, MC_CONTROL, &control);
392 for (i = 0; i < NUM_CHANS; i++) {
394 /* Check if the channel is active */
395 if (!(control & (1 << (8 + i))))
398 /* Check if the channel is disabled */
399 if (status & (1 << i))
402 pdev = get_pdev_slot_func(socket, i + 4, 1);
404 i7core_printk(KERN_ERR, "Couldn't find socket %d "
409 /* Devices 4-6 function 1 */
410 pci_read_config_dword(pdev,
411 MC_DOD_CH_DIMM0, &dimm_dod[0]);
412 pci_read_config_dword(pdev,
413 MC_DOD_CH_DIMM1, &dimm_dod[1]);
414 pci_read_config_dword(pdev,
415 MC_DOD_CH_DIMM2, &dimm_dod[2]);
419 for (j = 0; j < 3; j++) {
420 if (!DIMM_PRESENT(dimm_dod[j]))
426 debugf0("Number of active channels on socket %d: %d\n",
432 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket)
434 struct i7core_pvt *pvt = mci->pvt_info;
435 struct csrow_info *csr;
436 struct pci_dev *pdev;
438 unsigned long last_page = 0;
442 /* Get data from the MC register, function 0 */
443 pdev = pvt->pci_mcr[socket][0];
447 /* Device 3 function 0 reads */
448 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
449 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
450 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
451 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
453 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
454 socket, pvt->info.mc_control, pvt->info.mc_status,
455 pvt->info.max_dod, pvt->info.ch_map);
457 if (ECC_ENABLED(pvt)) {
458 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
460 mode = EDAC_S8ECD8ED;
462 mode = EDAC_S4ECD4ED;
464 debugf0("ECC disabled\n");
468 /* FIXME: need to handle the error codes */
469 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
471 numdimms(pvt->info.max_dod),
472 numrank(pvt->info.max_dod >> 2),
473 numbank(pvt->info.max_dod >> 4),
474 numrow(pvt->info.max_dod >> 6),
475 numcol(pvt->info.max_dod >> 9));
477 for (i = 0; i < NUM_CHANS; i++) {
478 u32 data, dimm_dod[3], value[8];
480 if (!CH_ACTIVE(pvt, i)) {
481 debugf0("Channel %i is not active\n", i);
484 if (CH_DISABLED(pvt, i)) {
485 debugf0("Channel %i is disabled\n", i);
489 /* Devices 4-6 function 0 */
490 pci_read_config_dword(pvt->pci_ch[socket][i][0],
491 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
493 pvt->channel[socket][i].ranks = (data & QUAD_RANK_PRESENT) ?
496 if (data & REGISTERED_DIMM)
501 if (data & THREE_DIMMS_PRESENT)
502 pvt->channel[i].dimms = 3;
503 else if (data & SINGLE_QUAD_RANK_PRESENT)
504 pvt->channel[i].dimms = 1;
506 pvt->channel[i].dimms = 2;
509 /* Devices 4-6 function 1 */
510 pci_read_config_dword(pvt->pci_ch[socket][i][1],
511 MC_DOD_CH_DIMM0, &dimm_dod[0]);
512 pci_read_config_dword(pvt->pci_ch[socket][i][1],
513 MC_DOD_CH_DIMM1, &dimm_dod[1]);
514 pci_read_config_dword(pvt->pci_ch[socket][i][1],
515 MC_DOD_CH_DIMM2, &dimm_dod[2]);
517 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
518 "%d ranks, %cDIMMs\n",
520 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
522 pvt->channel[socket][i].ranks,
523 (data & REGISTERED_DIMM) ? 'R' : 'U');
525 for (j = 0; j < 3; j++) {
526 u32 banks, ranks, rows, cols;
529 if (!DIMM_PRESENT(dimm_dod[j]))
532 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
533 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
534 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
535 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
537 /* DDR3 has 8 I/O banks */
538 size = (rows * cols * banks * ranks) >> (20 - 3);
540 pvt->channel[socket][i].dimms++;
542 debugf0("\tdimm %d %d Mb offset: %x, "
543 "bank: %d, rank: %d, row: %#x, col: %#x\n",
545 RANKOFFSET(dimm_dod[j]),
546 banks, ranks, rows, cols);
549 npages = size >> (PAGE_SHIFT - 20);
551 npages = size << (20 - PAGE_SHIFT);
554 csr = &mci->csrows[*csrow];
555 csr->first_page = last_page + 1;
557 csr->last_page = last_page;
558 csr->nr_pages = npages;
562 csr->csrow_idx = *csrow;
563 csr->nr_channels = 1;
565 csr->channels[0].chan_idx = i;
566 csr->channels[0].ce_count = 0;
576 csr->dtype = DEV_X16;
579 csr->dtype = DEV_UNKNOWN;
582 csr->edac_mode = mode;
588 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
589 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
590 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
591 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
592 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
593 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
594 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
595 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
596 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
597 for (j = 0; j < 8; j++)
598 debugf1("\t\t%#x\t%#x\t%#x\n",
599 (value[j] >> 27) & 0x1,
600 (value[j] >> 24) & 0x7,
601 (value[j] && ((1 << 24) - 1)));
607 /****************************************************************************
608 Error insertion routines
609 ****************************************************************************/
611 /* The i7core has independent error injection features per channel.
612 However, to have a simpler code, we don't allow enabling error injection
613 on more than one channel.
614 Also, since a change at an inject parameter will be applied only at enable,
615 we're disabling error injection on all write calls to the sysfs nodes that
616 controls the error code injection.
618 static int disable_inject(struct mem_ctl_info *mci)
620 struct i7core_pvt *pvt = mci->pvt_info;
622 pvt->inject.enable = 0;
624 if (!pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0])
627 pci_write_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
628 MC_CHANNEL_ERROR_MASK, 0);
634 * i7core inject inject.socket
636 * accept and store error injection inject.socket value
638 static ssize_t i7core_inject_socket_store(struct mem_ctl_info *mci,
639 const char *data, size_t count)
641 struct i7core_pvt *pvt = mci->pvt_info;
645 rc = strict_strtoul(data, 10, &value);
646 if ((rc < 0) || (value >= pvt->sockets))
649 pvt->inject.section = (u32) value;
653 static ssize_t i7core_inject_socket_show(struct mem_ctl_info *mci,
656 struct i7core_pvt *pvt = mci->pvt_info;
657 return sprintf(data, "%d\n", pvt->inject.socket);
661 * i7core inject inject.section
663 * accept and store error injection inject.section value
664 * bit 0 - refers to the lower 32-byte half cacheline
665 * bit 1 - refers to the upper 32-byte half cacheline
667 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
668 const char *data, size_t count)
670 struct i7core_pvt *pvt = mci->pvt_info;
674 if (pvt->inject.enable)
677 rc = strict_strtoul(data, 10, &value);
678 if ((rc < 0) || (value > 3))
681 pvt->inject.section = (u32) value;
685 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
688 struct i7core_pvt *pvt = mci->pvt_info;
689 return sprintf(data, "0x%08x\n", pvt->inject.section);
695 * accept and store error injection inject.section value
696 * bit 0 - repeat enable - Enable error repetition
697 * bit 1 - inject ECC error
698 * bit 2 - inject parity error
700 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
701 const char *data, size_t count)
703 struct i7core_pvt *pvt = mci->pvt_info;
707 if (pvt->inject.enable)
710 rc = strict_strtoul(data, 10, &value);
711 if ((rc < 0) || (value > 7))
714 pvt->inject.type = (u32) value;
718 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
721 struct i7core_pvt *pvt = mci->pvt_info;
722 return sprintf(data, "0x%08x\n", pvt->inject.type);
726 * i7core_inject_inject.eccmask_store
728 * The type of error (UE/CE) will depend on the inject.eccmask value:
729 * Any bits set to a 1 will flip the corresponding ECC bit
730 * Correctable errors can be injected by flipping 1 bit or the bits within
731 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
732 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
733 * uncorrectable error to be injected.
735 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
736 const char *data, size_t count)
738 struct i7core_pvt *pvt = mci->pvt_info;
742 if (pvt->inject.enable)
745 rc = strict_strtoul(data, 10, &value);
749 pvt->inject.eccmask = (u32) value;
753 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
756 struct i7core_pvt *pvt = mci->pvt_info;
757 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
763 * The type of error (UE/CE) will depend on the inject.eccmask value:
764 * Any bits set to a 1 will flip the corresponding ECC bit
765 * Correctable errors can be injected by flipping 1 bit or the bits within
766 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
767 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
768 * uncorrectable error to be injected.
770 static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
771 const char *data, size_t count)
773 struct i7core_pvt *pvt = mci->pvt_info;
778 if (pvt->inject.enable)
782 cmd = strsep((char **) &data, ":");
785 val = strsep((char **) &data, " \n\t");
789 if (!strcasecmp(val, "any"))
792 rc = strict_strtol(val, 10, &value);
793 if ((rc < 0) || (value < 0))
797 if (!strcasecmp(cmd, "channel")) {
799 pvt->inject.channel = value;
802 } else if (!strcasecmp(cmd, "dimm")) {
804 pvt->inject.dimm = value;
807 } else if (!strcasecmp(cmd, "rank")) {
809 pvt->inject.rank = value;
812 } else if (!strcasecmp(cmd, "bank")) {
814 pvt->inject.bank = value;
817 } else if (!strcasecmp(cmd, "page")) {
819 pvt->inject.page = value;
822 } else if (!strcasecmp(cmd, "col") ||
823 !strcasecmp(cmd, "column")) {
825 pvt->inject.col = value;
834 static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
837 struct i7core_pvt *pvt = mci->pvt_info;
838 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
840 if (pvt->inject.channel < 0)
841 sprintf(channel, "any");
843 sprintf(channel, "%d", pvt->inject.channel);
844 if (pvt->inject.dimm < 0)
845 sprintf(dimm, "any");
847 sprintf(dimm, "%d", pvt->inject.dimm);
848 if (pvt->inject.bank < 0)
849 sprintf(bank, "any");
851 sprintf(bank, "%d", pvt->inject.bank);
852 if (pvt->inject.rank < 0)
853 sprintf(rank, "any");
855 sprintf(rank, "%d", pvt->inject.rank);
856 if (pvt->inject.page < 0)
857 sprintf(page, "any");
859 sprintf(page, "0x%04x", pvt->inject.page);
860 if (pvt->inject.col < 0)
863 sprintf(col, "0x%04x", pvt->inject.col);
865 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
866 "rank: %s\npage: %s\ncolumn: %s\n",
867 channel, dimm, bank, rank, page, col);
870 static int write_and_test(struct pci_dev *dev, int where, u32 val)
875 for (count = 0; count < 10; count++) {
878 pci_write_config_dword(dev, where, val);
879 pci_read_config_dword(dev, where, &read);
885 debugf0("Error Injection Register 0x%02x: Tried to write 0x%08x, "
886 "but read: 0x%08x\n", where, val, read);
893 * This routine prepares the Memory Controller for error injection.
894 * The error will be injected when some process tries to write to the
895 * memory that matches the given criteria.
896 * The criteria can be set in terms of a mask where dimm, rank, bank, page
897 * and col can be specified.
898 * A -1 value for any of the mask items will make the MCU to ignore
899 * that matching criteria for error injection.
901 * It should be noticed that the error will only happen after a write operation
902 * on a memory that matches the condition. if REPEAT_EN is not enabled at
903 * inject mask, then it will produce just one error. Otherwise, it will repeat
904 * until the injectmask would be cleaned.
906 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
907 * is reliable enough to check if the MC is using the
908 * three channels. However, this is not clear at the datasheet.
910 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
911 const char *data, size_t count)
913 struct i7core_pvt *pvt = mci->pvt_info;
919 if (!pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0])
922 rc = strict_strtoul(data, 10, &enable);
927 pvt->inject.enable = 1;
933 /* Sets pvt->inject.dimm mask */
934 if (pvt->inject.dimm < 0)
937 if (pvt->channel[pvt->inject.socket][pvt->inject.channel].dimms > 2)
938 mask |= (pvt->inject.dimm & 0x3L) << 35;
940 mask |= (pvt->inject.dimm & 0x1L) << 36;
943 /* Sets pvt->inject.rank mask */
944 if (pvt->inject.rank < 0)
947 if (pvt->channel[pvt->inject.socket][pvt->inject.channel].dimms > 2)
948 mask |= (pvt->inject.rank & 0x1L) << 34;
950 mask |= (pvt->inject.rank & 0x3L) << 34;
953 /* Sets pvt->inject.bank mask */
954 if (pvt->inject.bank < 0)
957 mask |= (pvt->inject.bank & 0x15L) << 30;
959 /* Sets pvt->inject.page mask */
960 if (pvt->inject.page < 0)
963 mask |= (pvt->inject.page & 0xffffL) << 14;
965 /* Sets pvt->inject.column mask */
966 if (pvt->inject.col < 0)
969 mask |= (pvt->inject.col & 0x3fffL);
973 * bits 1-2: MASK_HALF_CACHELINE
975 * bit 4: INJECT_ADDR_PARITY
978 injectmask = (pvt->inject.type & 1) |
979 (pvt->inject.section & 0x3) << 1 |
980 (pvt->inject.type & 0x6) << (3 - 1);
982 /* Unlock writes to registers - this register is write only */
983 pci_write_config_dword(pvt->pci_noncore[pvt->inject.socket],
984 MC_CFG_CONTROL, 0x2);
987 /* Zeroes error count registers */
988 pci_write_config_dword(pvt->pci_mcr[pvt->inject.socket][4],
989 MC_TEST_ERR_RCV1, 0);
990 pci_write_config_dword(pvt->pci_mcr[pvt->inject.socket][4],
991 MC_TEST_ERR_RCV0, 0);
992 pvt->ce_count_available[pvt->inject.socket] = 0;
995 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
996 MC_CHANNEL_ADDR_MATCH, mask);
997 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
998 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1000 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1001 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1003 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1004 MC_CHANNEL_ERROR_MASK, injectmask);
1007 * This is something undocumented, based on my tests
1008 * Without writing 8 to this register, errors aren't injected. Not sure
1011 pci_write_config_dword(pvt->pci_noncore[pvt->inject.socket],
1014 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1016 mask, pvt->inject.eccmask, injectmask);
1022 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1025 struct i7core_pvt *pvt = mci->pvt_info;
1028 pci_read_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1029 MC_CHANNEL_ERROR_MASK, &injectmask);
1031 debugf0("Inject error read: 0x%018x\n", injectmask);
1033 if (injectmask & 0x0c)
1034 pvt->inject.enable = 1;
1036 return sprintf(data, "%d\n", pvt->inject.enable);
1039 static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1041 unsigned i, count, total = 0;
1042 struct i7core_pvt *pvt = mci->pvt_info;
1044 for (i = 0; i < pvt->sockets; i++) {
1045 if (!pvt->ce_count_available[i])
1046 count = sprintf(data, "socket 0 data unavailable\n");
1048 count = sprintf(data, "socket %d, dimm0: %lu\n"
1049 "dimm1: %lu\ndimm2: %lu\n",
1051 pvt->ce_count[i][0],
1052 pvt->ce_count[i][1],
1053 pvt->ce_count[i][2]);
1064 static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
1067 .name = "inject_socket",
1068 .mode = (S_IRUGO | S_IWUSR)
1070 .show = i7core_inject_socket_show,
1071 .store = i7core_inject_socket_store,
1074 .name = "inject_section",
1075 .mode = (S_IRUGO | S_IWUSR)
1077 .show = i7core_inject_section_show,
1078 .store = i7core_inject_section_store,
1081 .name = "inject_type",
1082 .mode = (S_IRUGO | S_IWUSR)
1084 .show = i7core_inject_type_show,
1085 .store = i7core_inject_type_store,
1088 .name = "inject_eccmask",
1089 .mode = (S_IRUGO | S_IWUSR)
1091 .show = i7core_inject_eccmask_show,
1092 .store = i7core_inject_eccmask_store,
1095 .name = "inject_addrmatch",
1096 .mode = (S_IRUGO | S_IWUSR)
1098 .show = i7core_inject_addrmatch_show,
1099 .store = i7core_inject_addrmatch_store,
1102 .name = "inject_enable",
1103 .mode = (S_IRUGO | S_IWUSR)
1105 .show = i7core_inject_enable_show,
1106 .store = i7core_inject_enable_store,
1109 .name = "corrected_error_counts",
1110 .mode = (S_IRUGO | S_IWUSR)
1112 .show = i7core_ce_regs_show,
1117 /****************************************************************************
1118 Device initialization routines: put/get, init/exit
1119 ****************************************************************************/
1122 * i7core_put_devices 'put' all the devices that we have
1123 * reserved via 'get'
1125 static void i7core_put_devices(void)
1129 for (i = 0; i < NUM_SOCKETS; i++)
1130 for (j = 0; j < N_DEVS; j++)
1131 pci_dev_put(pci_devs[j].pdev[i]);
1135 * i7core_get_devices Find and perform 'get' operation on the MCH's
1136 * device/functions we want to reference for this driver
1138 * Need to 'get' device 16 func 1 and func 2
1140 int i7core_get_onedevice(struct pci_dev **prev, int devno)
1142 struct pci_dev *pdev = NULL;
1146 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1147 pci_devs[devno].dev_id, *prev);
1150 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1151 * aren't announced by acpi. So, we need to use a legacy scan probing
1154 if (unlikely(!pdev && !devno && !prev)) {
1155 pcibios_scan_specific_bus(254);
1156 pcibios_scan_specific_bus(255);
1158 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1159 pci_devs[devno].dev_id, *prev);
1163 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1164 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1165 * to probe for the alternate address in case of failure
1167 if (pci_devs[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
1168 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1169 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
1178 * Dev 3 function 2 only exists on chips with RDIMMs
1179 * so, it is ok to not found it
1181 if ((pci_devs[devno].dev == 3) && (pci_devs[devno].func == 2)) {
1186 i7core_printk(KERN_ERR,
1187 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1188 pci_devs[devno].dev, pci_devs[devno].func,
1189 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1191 /* End of list, leave */
1194 bus = pdev->bus->number;
1201 if (socket >= NUM_SOCKETS) {
1202 i7core_printk(KERN_ERR,
1203 "Unexpected socket for "
1204 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1205 bus, pci_devs[devno].dev, pci_devs[devno].func,
1206 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1211 if (pci_devs[devno].pdev[socket]) {
1212 i7core_printk(KERN_ERR,
1213 "Duplicated device for "
1214 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1215 bus, pci_devs[devno].dev, pci_devs[devno].func,
1216 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1221 pci_devs[devno].pdev[socket] = pdev;
1224 if (unlikely(PCI_SLOT(pdev->devfn) != pci_devs[devno].dev ||
1225 PCI_FUNC(pdev->devfn) != pci_devs[devno].func)) {
1226 i7core_printk(KERN_ERR,
1227 "Device PCI ID %04x:%04x "
1228 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1229 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id,
1230 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1231 bus, pci_devs[devno].dev, pci_devs[devno].func);
1235 /* Be sure that the device is enabled */
1236 if (unlikely(pci_enable_device(pdev) < 0)) {
1237 i7core_printk(KERN_ERR,
1239 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1240 bus, pci_devs[devno].dev, pci_devs[devno].func,
1241 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1245 i7core_printk(KERN_INFO,
1246 "Registered socket %d "
1247 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1248 socket, bus, pci_devs[devno].dev, pci_devs[devno].func,
1249 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1256 static int i7core_get_devices(void)
1259 struct pci_dev *pdev = NULL;
1261 for (i = 0; i < N_DEVS; i++) {
1264 if (i7core_get_onedevice(&pdev, i) < 0) {
1265 i7core_put_devices();
1273 static int mci_bind_devs(struct mem_ctl_info *mci)
1275 struct i7core_pvt *pvt = mci->pvt_info;
1276 struct pci_dev *pdev;
1277 int i, j, func, slot;
1279 for (i = 0; i < pvt->sockets; i++) {
1280 for (j = 0; j < N_DEVS; j++) {
1281 pdev = pci_devs[j].pdev[i];
1285 func = PCI_FUNC(pdev->devfn);
1286 slot = PCI_SLOT(pdev->devfn);
1288 if (unlikely(func > MAX_MCR_FUNC))
1290 pvt->pci_mcr[i][func] = pdev;
1291 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1292 if (unlikely(func > MAX_CHAN_FUNC))
1294 pvt->pci_ch[i][slot - 4][func] = pdev;
1295 } else if (!slot && !func)
1296 pvt->pci_noncore[i] = pdev;
1300 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1301 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1309 i7core_printk(KERN_ERR, "Device %d, function %d "
1310 "is out of the expected range\n",
1315 /****************************************************************************
1316 Error check routines
1317 ****************************************************************************/
1319 /* This function is based on the device 3 function 4 registers as described on:
1320 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1321 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1322 * also available at:
1323 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1325 static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket)
1327 struct i7core_pvt *pvt = mci->pvt_info;
1329 int new0, new1, new2;
1331 if (!pvt->pci_mcr[socket][4]) {
1332 debugf0("%s MCR registers not found\n",__func__);
1336 /* Corrected error reads */
1337 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV1, &rcv1);
1338 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV0, &rcv0);
1340 /* Store the new values */
1341 new2 = DIMM2_COR_ERR(rcv1);
1342 new1 = DIMM1_COR_ERR(rcv0);
1343 new0 = DIMM0_COR_ERR(rcv0);
1346 debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n",
1347 (pvt->ce_count_available ? "UPDATE" : "READ"),
1348 rcv1, rcv0, new0, new1, new2);
1351 /* Updates CE counters if it is not the first time here */
1352 if (pvt->ce_count_available[socket]) {
1353 /* Updates CE counters */
1354 int add0, add1, add2;
1356 add2 = new2 - pvt->last_ce_count[socket][2];
1357 add1 = new1 - pvt->last_ce_count[socket][1];
1358 add0 = new0 - pvt->last_ce_count[socket][0];
1362 pvt->ce_count[socket][2] += add2;
1366 pvt->ce_count[socket][1] += add1;
1370 pvt->ce_count[socket][0] += add0;
1372 pvt->ce_count_available[socket] = 1;
1374 /* Store the new values */
1375 pvt->last_ce_count[socket][2] = new2;
1376 pvt->last_ce_count[socket][1] = new1;
1377 pvt->last_ce_count[socket][0] = new0;
1381 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1382 * Architectures Software Developer’s Manual Volume 3B.
1383 * Nehalem are defined as family 0x06, model 0x1a
1385 * The MCA registers used here are the following ones:
1386 * struct mce field MCA Register
1387 * m->status MSR_IA32_MC8_STATUS
1388 * m->addr MSR_IA32_MC8_ADDR
1389 * m->misc MSR_IA32_MC8_MISC
1390 * In the case of Nehalem, the error information is masked at .status and .misc
1393 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1396 char *type, *optype, *err, *msg;
1397 unsigned long error = m->status & 0x1ff0000l;
1398 u32 optypenum = (m->status >> 4) & 0x07;
1399 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1400 u32 dimm = (m->misc >> 16) & 0x3;
1401 u32 channel = (m->misc >> 18) & 0x3;
1402 u32 syndrome = m->misc >> 32;
1403 u32 errnum = find_first_bit(&error, 32);
1405 if (m->mcgstatus & 1)
1410 switch (optypenum) {
1412 optype = "generic undef request";
1415 optype = "read error";
1418 optype = "write error";
1421 optype = "addr/cmd error";
1424 optype = "scrubbing error";
1427 optype = "reserved";
1433 err = "read ECC error";
1436 err = "RAS ECC error";
1439 err = "write parity error";
1442 err = "redundacy loss";
1448 err = "memory range error";
1451 err = "RTID out of range";
1454 err = "address parity error";
1457 err = "byte enable parity error";
1463 /* FIXME: should convert addr into bank and rank information */
1464 msg = kasprintf(GFP_ATOMIC,
1465 "%s (addr = 0x%08llx, socket=%d, Dimm=%d, Channel=%d, "
1466 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1467 type, (long long) m->addr, m->cpu, dimm, channel,
1468 syndrome, core_err_cnt, (long long)m->status,
1469 (long long)m->misc, optype, err);
1473 /* Call the helper to output message */
1474 edac_mc_handle_fbd_ue(mci, 0 /* FIXME: should be rank here */,
1475 0, 0 /* FIXME: should be channel here */, msg);
1481 * i7core_check_error Retrieve and process errors reported by the
1482 * hardware. Called by the Core module.
1484 static void i7core_check_error(struct mem_ctl_info *mci)
1486 struct i7core_pvt *pvt = mci->pvt_info;
1489 struct mce *m = NULL;
1490 unsigned long flags;
1492 /* Copy all mce errors into a temporary buffer */
1493 spin_lock_irqsave(&pvt->mce_lock, flags);
1494 if (pvt->mce_count) {
1495 m = kmalloc(sizeof(*m) * pvt->mce_count, GFP_ATOMIC);
1497 count = pvt->mce_count;
1498 memcpy(m, &pvt->mce_entry, sizeof(*m) * count);
1502 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1504 /* proccess mcelog errors */
1505 for (i = 0; i < count; i++)
1506 i7core_mce_output_error(mci, &m[i]);
1510 /* check memory count errors */
1511 for (i = 0; i < pvt->sockets; i++)
1512 check_mc_test_err(mci, i);
1516 * i7core_mce_check_error Replicates mcelog routine to get errors
1517 * This routine simply queues mcelog errors, and
1518 * return. The error itself should be handled later
1519 * by i7core_check_error.
1521 static int i7core_mce_check_error(void *priv, struct mce *mce)
1523 struct mem_ctl_info *mci = priv;
1524 struct i7core_pvt *pvt = mci->pvt_info;
1525 unsigned long flags;
1528 * Just let mcelog handle it if the error is
1529 * outside the memory controller
1531 if (((mce->status & 0xffff) >> 7) != 1)
1534 /* Bank 8 registers are the only ones that we know how to handle */
1538 spin_lock_irqsave(&pvt->mce_lock, flags);
1539 if (pvt->mce_count < MCE_LOG_LEN) {
1540 memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
1543 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1545 /* Handle fatal errors immediately */
1546 if (mce->mcgstatus & 1)
1547 i7core_check_error(mci);
1549 /* Advice mcelog that the error were handled */
1554 * i7core_probe Probe for ONE instance of device to see if it is
1557 * 0 for FOUND a device
1558 * < 0 for error code
1560 static int __devinit i7core_probe(struct pci_dev *pdev,
1561 const struct pci_device_id *id)
1563 struct mem_ctl_info *mci;
1564 struct i7core_pvt *pvt;
1565 int num_channels = 0;
1568 int dev_idx = id->driver_data;
1572 if (unlikely(dev_idx >= ARRAY_SIZE(i7core_devs)))
1575 /* get the pci devices we want to reserve for our use */
1576 rc = i7core_get_devices();
1577 if (unlikely(rc < 0))
1581 for (i = NUM_SOCKETS - 1; i > 0; i--)
1582 if (pci_devs[0].pdev[i]) {
1587 for (i = 0; i < sockets; i++) {
1591 /* Check the number of active and not disabled channels */
1592 rc = i7core_get_active_channels(i, &channels, &csrows);
1593 if (unlikely(rc < 0))
1596 num_channels += channels;
1597 num_csrows += csrows;
1600 /* allocate a new MC control structure */
1601 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
1602 if (unlikely(!mci)) {
1607 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1609 mci->dev = &pdev->dev; /* record ptr to the generic device */
1610 pvt = mci->pvt_info;
1611 memset(pvt, 0, sizeof(*pvt));
1612 pvt->sockets = sockets;
1616 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1617 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1620 mci->mtype_cap = MEM_FLAG_DDR3;
1621 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1622 mci->edac_cap = EDAC_FLAG_NONE;
1623 mci->mod_name = "i7core_edac.c";
1624 mci->mod_ver = I7CORE_REVISION;
1625 mci->ctl_name = i7core_devs[dev_idx].ctl_name;
1626 mci->dev_name = pci_name(pdev);
1627 mci->ctl_page_to_phys = NULL;
1628 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
1629 /* Set the function pointer to an actual operation function */
1630 mci->edac_check = i7core_check_error;
1632 /* Store pci devices at mci for faster access */
1633 rc = mci_bind_devs(mci);
1634 if (unlikely(rc < 0))
1637 /* Get dimm basic config */
1638 for (i = 0; i < sockets; i++)
1639 get_dimm_config(mci, &csrow, i);
1641 /* add this new MC control structure to EDAC's list of MCs */
1642 if (unlikely(edac_mc_add_mc(mci))) {
1643 debugf0("MC: " __FILE__
1644 ": %s(): failed edac_mc_add_mc()\n", __func__);
1645 /* FIXME: perhaps some code should go here that disables error
1646 * reporting if we just enabled it
1653 /* allocating generic PCI control info */
1654 i7core_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1655 if (unlikely(!i7core_pci)) {
1657 "%s(): Unable to create PCI control\n",
1660 "%s(): PCI error report via EDAC not setup\n",
1664 /* Default error mask is any memory */
1665 pvt->inject.channel = 0;
1666 pvt->inject.dimm = -1;
1667 pvt->inject.rank = -1;
1668 pvt->inject.bank = -1;
1669 pvt->inject.page = -1;
1670 pvt->inject.col = -1;
1672 /* Registers on edac_mce in order to receive memory errors */
1673 pvt->edac_mce.priv = mci;
1674 pvt->edac_mce.check_error = i7core_mce_check_error;
1675 spin_lock_init(&pvt->mce_lock);
1677 rc = edac_mce_register(&pvt->edac_mce);
1678 if (unlikely (rc < 0)) {
1679 debugf0("MC: " __FILE__
1680 ": %s(): failed edac_mce_register()\n", __func__);
1684 i7core_printk(KERN_INFO, "Driver loaded.\n");
1692 i7core_put_devices();
1697 * i7core_remove destructor for one instance of device
1700 static void __devexit i7core_remove(struct pci_dev *pdev)
1702 struct mem_ctl_info *mci;
1703 struct i7core_pvt *pvt;
1705 debugf0(__FILE__ ": %s()\n", __func__);
1708 edac_pci_release_generic_ctl(i7core_pci);
1711 mci = edac_mc_del_mc(&pdev->dev);
1715 /* Unregisters on edac_mce in order to receive memory errors */
1716 pvt = mci->pvt_info;
1717 edac_mce_unregister(&pvt->edac_mce);
1719 /* retrieve references to resources, and free those resources */
1720 i7core_put_devices();
1725 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1728 * i7core_driver pci_driver structure for this module
1731 static struct pci_driver i7core_driver = {
1732 .name = "i7core_edac",
1733 .probe = i7core_probe,
1734 .remove = __devexit_p(i7core_remove),
1735 .id_table = i7core_pci_tbl,
1739 * i7core_init Module entry function
1740 * Try to initialize this module for its devices
1742 static int __init i7core_init(void)
1746 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1748 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1751 pci_rc = pci_register_driver(&i7core_driver);
1753 return (pci_rc < 0) ? pci_rc : 0;
1757 * i7core_exit() Module exit function
1758 * Unregister the driver
1760 static void __exit i7core_exit(void)
1762 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1763 pci_unregister_driver(&i7core_driver);
1766 module_init(i7core_init);
1767 module_exit(i7core_exit);
1769 MODULE_LICENSE("GPL");
1770 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1771 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1772 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1775 module_param(edac_op_state, int, 0444);
1776 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");