1/*
2 * Intel e7xxx Memory Controller kernel module
3 * (C) 2003 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * See "enum e7xxx_chips" below for supported chipsets
8 *
9 * Written by Thayne Harbaugh
10 * Based on work by Dan Hollis <goemon at anime dot net> and others.
11 *	http://www.anime.net/~goemon/linux-ecc/
12 *
13 * Contributors:
14 *	Eric Biederman (Linux Networx)
15 *	Tom Zimmerman (Linux Networx)
16 *	Jim Garlick (Lawrence Livermore National Labs)
17 *	Dave Peterson (Lawrence Livermore National Labs)
18 *	That One Guy (Some other place)
19 *	Wang Zhenyu (intel.com)
20 *
21 * $Id: e7xxx_edac.c,v 1.1.1.1 2007/08/03 18:52:30 Exp $
22 *
23 */
24
25#include <linux/module.h>
26#include <linux/init.h>
27#include <linux/pci.h>
28#include <linux/pci_ids.h>
29#include <linux/slab.h>
30#include "edac_mc.h"
31
32#define	E7XXX_REVISION " Ver: 2.0.1 " __DATE__
33#define	EDAC_MOD_STR	"e7xxx_edac"
34
35#define e7xxx_printk(level, fmt, arg...) \
36	edac_printk(level, "e7xxx", fmt, ##arg)
37
38#define e7xxx_mc_printk(mci, level, fmt, arg...) \
39	edac_mc_chipset_printk(mci, level, "e7xxx", fmt, ##arg)
40
41#ifndef PCI_DEVICE_ID_INTEL_7205_0
42#define PCI_DEVICE_ID_INTEL_7205_0	0x255d
43#endif				/* PCI_DEVICE_ID_INTEL_7205_0 */
44
45#ifndef PCI_DEVICE_ID_INTEL_7205_1_ERR
46#define PCI_DEVICE_ID_INTEL_7205_1_ERR	0x2551
47#endif				/* PCI_DEVICE_ID_INTEL_7205_1_ERR */
48
49#ifndef PCI_DEVICE_ID_INTEL_7500_0
50#define PCI_DEVICE_ID_INTEL_7500_0	0x2540
51#endif				/* PCI_DEVICE_ID_INTEL_7500_0 */
52
53#ifndef PCI_DEVICE_ID_INTEL_7500_1_ERR
54#define PCI_DEVICE_ID_INTEL_7500_1_ERR	0x2541
55#endif				/* PCI_DEVICE_ID_INTEL_7500_1_ERR */
56
57#ifndef PCI_DEVICE_ID_INTEL_7501_0
58#define PCI_DEVICE_ID_INTEL_7501_0	0x254c
59#endif				/* PCI_DEVICE_ID_INTEL_7501_0 */
60
61#ifndef PCI_DEVICE_ID_INTEL_7501_1_ERR
62#define PCI_DEVICE_ID_INTEL_7501_1_ERR	0x2541
63#endif				/* PCI_DEVICE_ID_INTEL_7501_1_ERR */
64
65#ifndef PCI_DEVICE_ID_INTEL_7505_0
66#define PCI_DEVICE_ID_INTEL_7505_0	0x2550
67#endif				/* PCI_DEVICE_ID_INTEL_7505_0 */
68
69#ifndef PCI_DEVICE_ID_INTEL_7505_1_ERR
70#define PCI_DEVICE_ID_INTEL_7505_1_ERR	0x2551
71#endif				/* PCI_DEVICE_ID_INTEL_7505_1_ERR */
72
73#define E7XXX_NR_CSROWS		8	/* number of csrows */
74#define E7XXX_NR_DIMMS		8
75
76/* E7XXX register addresses - device 0 function 0 */
77#define E7XXX_DRB		0x60	/* DRAM row boundary register (8b) */
78#define E7XXX_DRA		0x70	/* DRAM row attribute register (8b) */
79					/*
80					 * 31   Device width row 7 0=x8 1=x4
81					 * 27   Device width row 6
82					 * 23   Device width row 5
83					 * 19   Device width row 4
84					 * 15   Device width row 3
85					 * 11   Device width row 2
86					 *  7   Device width row 1
87					 *  3   Device width row 0
88					 */
89#define E7XXX_DRC		0x7C	/* DRAM controller mode reg (32b) */
90					/*
91					 * 22    Number channels 0=1,1=2
92					 * 19:18 DRB Granularity 32/64MB
93					 */
94#define E7XXX_TOLM		0xC4	/* DRAM top of low memory reg (16b) */
95#define E7XXX_REMAPBASE		0xC6	/* DRAM remap base address reg (16b) */
96#define E7XXX_REMAPLIMIT	0xC8	/* DRAM remap limit address reg (16b) */
97
98/* E7XXX register addresses - device 0 function 1 */
99#define E7XXX_DRAM_FERR		0x80	/* DRAM first error register (8b) */
100#define E7XXX_DRAM_NERR		0x82	/* DRAM next error register (8b) */
101#define E7XXX_DRAM_CELOG_ADD	0xA0	/* DRAM first correctable memory */
102					/*     error address register (32b) */
103					/*
104					 * 31:28 Reserved
105					 * 27:6  CE address (4k block 33:12)
106					 *  5:0  Reserved
107					 */
108#define E7XXX_DRAM_UELOG_ADD	0xB0	/* DRAM first uncorrectable memory */
109					/*     error address register (32b) */
110					/*
111					 * 31:28 Reserved
112					 * 27:6  CE address (4k block 33:12)
113					 *  5:0  Reserved
114					 */
115#define E7XXX_DRAM_CELOG_SYNDROME 0xD0	/* DRAM first correctable memory */
116					/*     error syndrome register (16b) */
117
118enum e7xxx_chips {
119	E7500 = 0,
120	E7501,
121	E7505,
122	E7205,
123};
124
125struct e7xxx_pvt {
126	struct pci_dev *bridge_ck;
127	u32 tolm;
128	u32 remapbase;
129	u32 remaplimit;
130	const struct e7xxx_dev_info *dev_info;
131};
132
133struct e7xxx_dev_info {
134	u16 err_dev;
135	const char *ctl_name;
136};
137
138struct e7xxx_error_info {
139	u8 dram_ferr;
140	u8 dram_nerr;
141	u32 dram_celog_add;
142	u16 dram_celog_syndrome;
143	u32 dram_uelog_add;
144};
145
146static const struct e7xxx_dev_info e7xxx_devs[] = {
147	[E7500] = {
148		.err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR,
149		.ctl_name = "E7500"
150	},
151	[E7501] = {
152		.err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR,
153		.ctl_name = "E7501"
154	},
155	[E7505] = {
156		.err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR,
157		.ctl_name = "E7505"
158	},
159	[E7205] = {
160		.err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR,
161		.ctl_name = "E7205"
162	},
163};
164
165static inline int e7xxx_find_channel(u16 syndrome)
166{
167	debugf3("%s()\n", __func__);
168
169	if ((syndrome & 0xff00) == 0)
170		return 0;
171
172	if ((syndrome & 0x00ff) == 0)
173		return 1;
174
175	if ((syndrome & 0xf000) == 0 || (syndrome & 0x0f00) == 0)
176		return 0;
177
178	return 1;
179}
180
181static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci,
182		unsigned long page)
183{
184	u32 remap;
185	struct e7xxx_pvt *pvt = (struct e7xxx_pvt *) mci->pvt_info;
186
187	debugf3("%s()\n", __func__);
188
189	if ((page < pvt->tolm) ||
190			((page >= 0x100000) && (page < pvt->remapbase)))
191		return page;
192
193	remap = (page - pvt->tolm) + pvt->remapbase;
194
195	if (remap < pvt->remaplimit)
196		return remap;
197
198	e7xxx_printk(KERN_ERR, "Invalid page %lx - out of range\n", page);
199	return pvt->tolm - 1;
200}
201
202static void process_ce(struct mem_ctl_info *mci,
203		struct e7xxx_error_info *info)
204{
205	u32 error_1b, page;
206	u16 syndrome;
207	int row;
208	int channel;
209
210	debugf3("%s()\n", __func__);
211	/* read the error address */
212	error_1b = info->dram_celog_add;
213	page = error_1b >> 6;  /* convert the address to 4k page */
214	/* read the syndrome */
215	syndrome = info->dram_celog_syndrome;
216	row = edac_mc_find_csrow_by_page(mci, page);
217	/* convert syndrome to channel */
218	channel = e7xxx_find_channel(syndrome);
219	edac_mc_handle_ce(mci, page, 0, syndrome, row, channel, "e7xxx CE");
220}
221
222static void process_ce_no_info(struct mem_ctl_info *mci)
223{
224	debugf3("%s()\n", __func__);
225	edac_mc_handle_ce_no_info(mci, "e7xxx CE log register overflow");
226}
227
228static void process_ue(struct mem_ctl_info *mci,
229		struct e7xxx_error_info *info)
230{
231	u32 error_2b, block_page;
232	int row;
233
234	debugf3("%s()\n", __func__);
235	/* read the error address */
236	error_2b = info->dram_uelog_add;
237	block_page = error_2b >> 6;  /* convert to 4k address */
238	row = edac_mc_find_csrow_by_page(mci, block_page);
239	edac_mc_handle_ue(mci, block_page, 0, row, "e7xxx UE");
240}
241
242static void process_ue_no_info(struct mem_ctl_info *mci)
243{
244	debugf3("%s()\n", __func__);
245	edac_mc_handle_ue_no_info(mci, "e7xxx UE log register overflow");
246}
247
248static void e7xxx_get_error_info (struct mem_ctl_info *mci,
249		struct e7xxx_error_info *info)
250{
251	struct e7xxx_pvt *pvt;
252
253	pvt = (struct e7xxx_pvt *) mci->pvt_info;
254	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR,
255			&info->dram_ferr);
256	pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR,
257			&info->dram_nerr);
258
259	if ((info->dram_ferr & 1) || (info->dram_nerr & 1)) {
260		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_CELOG_ADD,
261				&info->dram_celog_add);
262		pci_read_config_word(pvt->bridge_ck,
263				E7XXX_DRAM_CELOG_SYNDROME,
264				&info->dram_celog_syndrome);
265	}
266
267	if ((info->dram_ferr & 2) || (info->dram_nerr & 2))
268		pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_UELOG_ADD,
269				&info->dram_uelog_add);
270
271	if (info->dram_ferr & 3)
272		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03);
273
274	if (info->dram_nerr & 3)
275		pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03);
276}
277
278static int e7xxx_process_error_info (struct mem_ctl_info *mci,
279		struct e7xxx_error_info *info, int handle_errors)
280{
281	int error_found;
282
283	error_found = 0;
284
285	/* decode and report errors */
286	if (info->dram_ferr & 1) {	/* check first error correctable */
287		error_found = 1;
288
289		if (handle_errors)
290			process_ce(mci, info);
291	}
292
293	if (info->dram_ferr & 2) {	/* check first error uncorrectable */
294		error_found = 1;
295
296		if (handle_errors)
297			process_ue(mci, info);
298	}
299
300	if (info->dram_nerr & 1) {	/* check next error correctable */
301		error_found = 1;
302
303		if (handle_errors) {
304			if (info->dram_ferr & 1)
305				process_ce_no_info(mci);
306			else
307				process_ce(mci, info);
308		}
309	}
310
311	if (info->dram_nerr & 2) {	/* check next error uncorrectable */
312		error_found = 1;
313
314		if (handle_errors) {
315			if (info->dram_ferr & 2)
316				process_ue_no_info(mci);
317			else
318				process_ue(mci, info);
319		}
320	}
321
322	return error_found;
323}
324
325static void e7xxx_check(struct mem_ctl_info *mci)
326{
327	struct e7xxx_error_info info;
328
329	debugf3("%s()\n", __func__);
330	e7xxx_get_error_info(mci, &info);
331	e7xxx_process_error_info(mci, &info, 1);
332}
333
334/* Return 1 if dual channel mode is active.  Else return 0. */
335static inline int dual_channel_active(u32 drc, int dev_idx)
336{
337	return (dev_idx == E7501) ? ((drc >> 22) & 0x1) : 1;
338}
339
340
341/* Return DRB granularity (0=32mb, 1=64mb). */
342static inline int drb_granularity(u32 drc, int dev_idx)
343{
344	/* only e7501 can be single channel */
345	return (dev_idx == E7501) ? ((drc >> 18) & 0x3) : 1;
346}
347
348
349static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
350		int dev_idx, u32 drc)
351{
352	unsigned long last_cumul_size;
353	int index;
354	u8 value;
355	u32 dra, cumul_size;
356	int drc_chan, drc_drbg, drc_ddim, mem_dev;
357	struct csrow_info *csrow;
358
359	pci_read_config_dword(pdev, E7XXX_DRA, &dra);
360	drc_chan = dual_channel_active(drc, dev_idx);
361	drc_drbg = drb_granularity(drc, dev_idx);
362	drc_ddim = (drc >> 20) & 0x3;
363	last_cumul_size = 0;
364
365	/* The dram row boundary (DRB) reg values are boundary address
366	 * for each DRAM row with a granularity of 32 or 64MB (single/dual
367	 * channel operation).  DRB regs are cumulative; therefore DRB7 will
368	 * contain the total memory contained in all eight rows.
369	 */
370	for (index = 0; index < mci->nr_csrows; index++) {
371		/* mem_dev 0=x8, 1=x4 */
372		mem_dev = (dra >> (index * 4 + 3)) & 0x1;
373		csrow = &mci->csrows[index];
374
375		pci_read_config_byte(pdev, E7XXX_DRB + index, &value);
376		/* convert a 64 or 32 MiB DRB to a page size. */
377		cumul_size = value << (25 + drc_drbg - PAGE_SHIFT);
378		debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
379			cumul_size);
380		if (cumul_size == last_cumul_size)
381			continue;	/* not populated */
382
383		csrow->first_page = last_cumul_size;
384		csrow->last_page = cumul_size - 1;
385		csrow->nr_pages = cumul_size - last_cumul_size;
386		last_cumul_size = cumul_size;
387		csrow->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
388		csrow->mtype = MEM_RDDR;	/* only one type supported */
389		csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
390
391		/*
392		 * if single channel or x8 devices then SECDED
393		 * if dual channel and x4 then S4ECD4ED
394		 */
395		if (drc_ddim) {
396			if (drc_chan && mem_dev) {
397				csrow->edac_mode = EDAC_S4ECD4ED;
398				mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
399			} else {
400				csrow->edac_mode = EDAC_SECDED;
401				mci->edac_cap |= EDAC_FLAG_SECDED;
402			}
403		} else
404			csrow->edac_mode = EDAC_NONE;
405	}
406}
407
408static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
409{
410	u16 pci_data;
411	struct mem_ctl_info *mci = NULL;
412	struct e7xxx_pvt *pvt = NULL;
413	u32 drc;
414	int drc_chan;
415	struct e7xxx_error_info discard;
416
417	debugf0("%s(): mci\n", __func__);
418	pci_read_config_dword(pdev, E7XXX_DRC, &drc);
419
420	drc_chan = dual_channel_active(drc, dev_idx);
421	mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1);
422
423	if (mci == NULL)
424		return -ENOMEM;
425
426	debugf3("%s(): init mci\n", __func__);
427	mci->mtype_cap = MEM_FLAG_RDDR;
428	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED |
429			EDAC_FLAG_S4ECD4ED;
430	mci->mod_name = EDAC_MOD_STR;
431	mci->mod_ver = E7XXX_REVISION;
432	mci->dev = &pdev->dev;
433	debugf3("%s(): init pvt\n", __func__);
434	pvt = (struct e7xxx_pvt *) mci->pvt_info;
435	pvt->dev_info = &e7xxx_devs[dev_idx];
436	pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
437					pvt->dev_info->err_dev,
438					pvt->bridge_ck);
439
440	if (!pvt->bridge_ck) {
441		e7xxx_printk(KERN_ERR, "error reporting device not found:"
442			"vendor %x device 0x%x (broken BIOS?)\n",
443			PCI_VENDOR_ID_INTEL, e7xxx_devs[dev_idx].err_dev);
444		goto fail0;
445	}
446
447	debugf3("%s(): more mci init\n", __func__);
448	mci->ctl_name = pvt->dev_info->ctl_name;
449	mci->edac_check = e7xxx_check;
450	mci->ctl_page_to_phys = ctl_page_to_phys;
451	e7xxx_init_csrows(mci, pdev, dev_idx, drc);
452	mci->edac_cap |= EDAC_FLAG_NONE;
453	debugf3("%s(): tolm, remapbase, remaplimit\n", __func__);
454	/* load the top of low memory, remap base, and remap limit vars */
455	pci_read_config_word(pdev, E7XXX_TOLM, &pci_data);
456	pvt->tolm = ((u32) pci_data) << 4;
457	pci_read_config_word(pdev, E7XXX_REMAPBASE, &pci_data);
458	pvt->remapbase = ((u32) pci_data) << 14;
459	pci_read_config_word(pdev, E7XXX_REMAPLIMIT, &pci_data);
460	pvt->remaplimit = ((u32) pci_data) << 14;
461	e7xxx_printk(KERN_INFO,
462		"tolm = %x, remapbase = %x, remaplimit = %x\n", pvt->tolm,
463		pvt->remapbase, pvt->remaplimit);
464
465	/* clear any pending errors, or initial state bits */
466	e7xxx_get_error_info(mci, &discard);
467
468	/* Here we assume that we will never see multiple instances of this
469	 * type of memory controller.  The ID is therefore hardcoded to 0.
470	 */
471	if (edac_mc_add_mc(mci,0)) {
472		debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
473		goto fail1;
474	}
475
476	/* get this far and it's successful */
477	debugf3("%s(): success\n", __func__);
478	return 0;
479
480fail1:
481	pci_dev_put(pvt->bridge_ck);
482
483fail0:
484	edac_mc_free(mci);
485
486	return -ENODEV;
487}
488
489/* returns count (>= 0), or negative on error */
490static int __devinit e7xxx_init_one(struct pci_dev *pdev,
491		const struct pci_device_id *ent)
492{
493	debugf0("%s()\n", __func__);
494
495	/* wake up and enable device */
496	return pci_enable_device(pdev) ?
497		-EIO : e7xxx_probe1(pdev, ent->driver_data);
498}
499
500static void __devexit e7xxx_remove_one(struct pci_dev *pdev)
501{
502	struct mem_ctl_info *mci;
503	struct e7xxx_pvt *pvt;
504
505	debugf0("%s()\n", __func__);
506
507	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
508		return;
509
510	pvt = (struct e7xxx_pvt *) mci->pvt_info;
511	pci_dev_put(pvt->bridge_ck);
512	edac_mc_free(mci);
513}
514
515static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = {
516	{
517		PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
518		E7205
519	},
520	{
521		PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
522		E7500
523	},
524	{
525		PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
526		E7501
527	},
528	{
529		PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
530		E7505
531	},
532	{
533		0,
534	}	/* 0 terminated list. */
535};
536
537MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl);
538
539static struct pci_driver e7xxx_driver = {
540	.name = EDAC_MOD_STR,
541	.probe = e7xxx_init_one,
542	.remove = __devexit_p(e7xxx_remove_one),
543	.id_table = e7xxx_pci_tbl,
544};
545
546static int __init e7xxx_init(void)
547{
548	return pci_register_driver(&e7xxx_driver);
549}
550
551static void __exit e7xxx_exit(void)
552{
553	pci_unregister_driver(&e7xxx_driver);
554}
555
556module_init(e7xxx_init);
557module_exit(e7xxx_exit);
558
559MODULE_LICENSE("GPL");
560MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
561	"Based on.work by Dan Hollis et al");
562MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers");
563