1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * APM X-Gene SoC EDAC (error detection and correction)
4 *
5 * Copyright (c) 2015, Applied Micro Circuits Corporation
6 * Author: Feng Kan <fkan@apm.com>
7 *         Loc Ho <lho@apm.com>
8 */
9
10#include <linux/ctype.h>
11#include <linux/edac.h>
12#include <linux/interrupt.h>
13#include <linux/mfd/syscon.h>
14#include <linux/module.h>
15#include <linux/of.h>
16#include <linux/of_address.h>
17#include <linux/regmap.h>
18
19#include "edac_module.h"
20
21#define EDAC_MOD_STR			"xgene_edac"
22
23/* Global error configuration status registers (CSR) */
24#define PCPHPERRINTSTS			0x0000
25#define PCPHPERRINTMSK			0x0004
26#define  MCU_CTL_ERR_MASK		BIT(12)
27#define  IOB_PA_ERR_MASK		BIT(11)
28#define  IOB_BA_ERR_MASK		BIT(10)
29#define  IOB_XGIC_ERR_MASK		BIT(9)
30#define  IOB_RB_ERR_MASK		BIT(8)
31#define  L3C_UNCORR_ERR_MASK		BIT(5)
32#define  MCU_UNCORR_ERR_MASK		BIT(4)
33#define  PMD3_MERR_MASK			BIT(3)
34#define  PMD2_MERR_MASK			BIT(2)
35#define  PMD1_MERR_MASK			BIT(1)
36#define  PMD0_MERR_MASK			BIT(0)
37#define PCPLPERRINTSTS			0x0008
38#define PCPLPERRINTMSK			0x000C
39#define  CSW_SWITCH_TRACE_ERR_MASK	BIT(2)
40#define  L3C_CORR_ERR_MASK		BIT(1)
41#define  MCU_CORR_ERR_MASK		BIT(0)
42#define MEMERRINTSTS			0x0010
43#define MEMERRINTMSK			0x0014
44
45struct xgene_edac {
46	struct device		*dev;
47	struct regmap		*csw_map;
48	struct regmap		*mcba_map;
49	struct regmap		*mcbb_map;
50	struct regmap		*efuse_map;
51	struct regmap		*rb_map;
52	void __iomem		*pcp_csr;
53	spinlock_t		lock;
54	struct dentry           *dfs;
55
56	struct list_head	mcus;
57	struct list_head	pmds;
58	struct list_head	l3s;
59	struct list_head	socs;
60
61	struct mutex		mc_lock;
62	int			mc_active_mask;
63	int			mc_registered_mask;
64};
65
66static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
67{
68	*val = readl(edac->pcp_csr + reg);
69}
70
71static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
72				   u32 bits_mask)
73{
74	u32 val;
75
76	spin_lock(&edac->lock);
77	val = readl(edac->pcp_csr + reg);
78	val &= ~bits_mask;
79	writel(val, edac->pcp_csr + reg);
80	spin_unlock(&edac->lock);
81}
82
83static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
84				   u32 bits_mask)
85{
86	u32 val;
87
88	spin_lock(&edac->lock);
89	val = readl(edac->pcp_csr + reg);
90	val |= bits_mask;
91	writel(val, edac->pcp_csr + reg);
92	spin_unlock(&edac->lock);
93}
94
95/* Memory controller error CSR */
96#define MCU_MAX_RANK			8
97#define MCU_RANK_STRIDE			0x40
98
99#define MCUGECR				0x0110
100#define  MCU_GECR_DEMANDUCINTREN_MASK	BIT(0)
101#define  MCU_GECR_BACKUCINTREN_MASK	BIT(1)
102#define  MCU_GECR_CINTREN_MASK		BIT(2)
103#define  MUC_GECR_MCUADDRERREN_MASK	BIT(9)
104#define MCUGESR				0x0114
105#define  MCU_GESR_ADDRNOMATCH_ERR_MASK	BIT(7)
106#define  MCU_GESR_ADDRMULTIMATCH_ERR_MASK	BIT(6)
107#define  MCU_GESR_PHYP_ERR_MASK		BIT(3)
108#define MCUESRR0			0x0314
109#define  MCU_ESRR_MULTUCERR_MASK	BIT(3)
110#define  MCU_ESRR_BACKUCERR_MASK	BIT(2)
111#define  MCU_ESRR_DEMANDUCERR_MASK	BIT(1)
112#define  MCU_ESRR_CERR_MASK		BIT(0)
113#define MCUESRRA0			0x0318
114#define MCUEBLRR0			0x031c
115#define  MCU_EBLRR_ERRBANK_RD(src)	(((src) & 0x00000007) >> 0)
116#define MCUERCRR0			0x0320
117#define  MCU_ERCRR_ERRROW_RD(src)	(((src) & 0xFFFF0000) >> 16)
118#define  MCU_ERCRR_ERRCOL_RD(src)	((src) & 0x00000FFF)
119#define MCUSBECNT0			0x0324
120#define MCU_SBECNT_COUNT(src)		((src) & 0xFFFF)
121
122#define CSW_CSWCR			0x0000
123#define  CSW_CSWCR_DUALMCB_MASK		BIT(0)
124
125#define MCBADDRMR			0x0000
126#define  MCBADDRMR_MCU_INTLV_MODE_MASK	BIT(3)
127#define  MCBADDRMR_DUALMCU_MODE_MASK	BIT(2)
128#define  MCBADDRMR_MCB_INTLV_MODE_MASK	BIT(1)
129#define  MCBADDRMR_ADDRESS_MODE_MASK	BIT(0)
130
131struct xgene_edac_mc_ctx {
132	struct list_head	next;
133	char			*name;
134	struct mem_ctl_info	*mci;
135	struct xgene_edac	*edac;
136	void __iomem		*mcu_csr;
137	u32			mcu_id;
138};
139
140static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
141					      const char __user *data,
142					      size_t count, loff_t *ppos)
143{
144	struct mem_ctl_info *mci = file->private_data;
145	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
146	int i;
147
148	for (i = 0; i < MCU_MAX_RANK; i++) {
149		writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
150		       MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
151		       ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
152	}
153	return count;
154}
155
156static const struct file_operations xgene_edac_mc_debug_inject_fops = {
157	.open = simple_open,
158	.write = xgene_edac_mc_err_inject_write,
159	.llseek = generic_file_llseek,
160};
161
162static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
163{
164	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
165		return;
166
167	if (!mci->debugfs)
168		return;
169
170	edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
171				 &xgene_edac_mc_debug_inject_fops);
172}
173
174static void xgene_edac_mc_check(struct mem_ctl_info *mci)
175{
176	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
177	unsigned int pcp_hp_stat;
178	unsigned int pcp_lp_stat;
179	u32 reg;
180	u32 rank;
181	u32 bank;
182	u32 count;
183	u32 col_row;
184
185	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
186	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
187	if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
188	      (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
189	      (MCU_CORR_ERR_MASK & pcp_lp_stat)))
190		return;
191
192	for (rank = 0; rank < MCU_MAX_RANK; rank++) {
193		reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
194
195		/* Detect uncorrectable memory error */
196		if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
197			   MCU_ESRR_BACKUCERR_MASK)) {
198			/* Detected uncorrectable memory error */
199			edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
200				"MCU uncorrectable error at rank %d\n", rank);
201
202			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
203				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
204		}
205
206		/* Detect correctable memory error */
207		if (reg & MCU_ESRR_CERR_MASK) {
208			bank = readl(ctx->mcu_csr + MCUEBLRR0 +
209				     rank * MCU_RANK_STRIDE);
210			col_row = readl(ctx->mcu_csr + MCUERCRR0 +
211					rank * MCU_RANK_STRIDE);
212			count = readl(ctx->mcu_csr + MCUSBECNT0 +
213				      rank * MCU_RANK_STRIDE);
214			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
215				"MCU correctable error at rank %d bank %d column %d row %d count %d\n",
216				rank, MCU_EBLRR_ERRBANK_RD(bank),
217				MCU_ERCRR_ERRCOL_RD(col_row),
218				MCU_ERCRR_ERRROW_RD(col_row),
219				MCU_SBECNT_COUNT(count));
220
221			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
222				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
223		}
224
225		/* Clear all error registers */
226		writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
227		writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
228		writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
229		       rank * MCU_RANK_STRIDE);
230		writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
231	}
232
233	/* Detect memory controller error */
234	reg = readl(ctx->mcu_csr + MCUGESR);
235	if (reg) {
236		if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
237			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
238				"MCU address miss-match error\n");
239		if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
240			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
241				"MCU address multi-match error\n");
242
243		writel(reg, ctx->mcu_csr + MCUGESR);
244	}
245}
246
247static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
248{
249	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
250	unsigned int val;
251
252	if (edac_op_state != EDAC_OPSTATE_INT)
253		return;
254
255	mutex_lock(&ctx->edac->mc_lock);
256
257	/*
258	 * As there is only single bit for enable error and interrupt mask,
259	 * we must only enable top level interrupt after all MCUs are
260	 * registered. Otherwise, if there is an error and the corresponding
261	 * MCU has not registered, the interrupt will never get cleared. To
262	 * determine all MCU have registered, we will keep track of active
263	 * MCUs and registered MCUs.
264	 */
265	if (enable) {
266		/* Set registered MCU bit */
267		ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
268
269		/* Enable interrupt after all active MCU registered */
270		if (ctx->edac->mc_registered_mask ==
271		    ctx->edac->mc_active_mask) {
272			/* Enable memory controller top level interrupt */
273			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
274					       MCU_UNCORR_ERR_MASK |
275					       MCU_CTL_ERR_MASK);
276			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
277					       MCU_CORR_ERR_MASK);
278		}
279
280		/* Enable MCU interrupt and error reporting */
281		val = readl(ctx->mcu_csr + MCUGECR);
282		val |= MCU_GECR_DEMANDUCINTREN_MASK |
283		       MCU_GECR_BACKUCINTREN_MASK |
284		       MCU_GECR_CINTREN_MASK |
285		       MUC_GECR_MCUADDRERREN_MASK;
286		writel(val, ctx->mcu_csr + MCUGECR);
287	} else {
288		/* Disable MCU interrupt */
289		val = readl(ctx->mcu_csr + MCUGECR);
290		val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
291			 MCU_GECR_BACKUCINTREN_MASK |
292			 MCU_GECR_CINTREN_MASK |
293			 MUC_GECR_MCUADDRERREN_MASK);
294		writel(val, ctx->mcu_csr + MCUGECR);
295
296		/* Disable memory controller top level interrupt */
297		xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
298				       MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
299		xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
300				       MCU_CORR_ERR_MASK);
301
302		/* Clear registered MCU bit */
303		ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
304	}
305
306	mutex_unlock(&ctx->edac->mc_lock);
307}
308
309static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
310{
311	unsigned int reg;
312	u32 mcu_mask;
313
314	if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
315		return 0;
316
317	if (reg & CSW_CSWCR_DUALMCB_MASK) {
318		/*
319		 * Dual MCB active - Determine if all 4 active or just MCU0
320		 * and MCU2 active
321		 */
322		if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
323			return 0;
324		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
325	} else {
326		/*
327		 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
328		 * active
329		 */
330		if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
331			return 0;
332		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
333	}
334
335	/* Save active MC mask if hasn't set already */
336	if (!ctx->edac->mc_active_mask)
337		ctx->edac->mc_active_mask = mcu_mask;
338
339	return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
340}
341
342static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
343{
344	struct mem_ctl_info *mci;
345	struct edac_mc_layer layers[2];
346	struct xgene_edac_mc_ctx tmp_ctx;
347	struct xgene_edac_mc_ctx *ctx;
348	struct resource res;
349	int rc;
350
351	memset(&tmp_ctx, 0, sizeof(tmp_ctx));
352	tmp_ctx.edac = edac;
353
354	if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
355		return -ENOMEM;
356
357	rc = of_address_to_resource(np, 0, &res);
358	if (rc < 0) {
359		dev_err(edac->dev, "no MCU resource address\n");
360		goto err_group;
361	}
362	tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
363	if (IS_ERR(tmp_ctx.mcu_csr)) {
364		dev_err(edac->dev, "unable to map MCU resource\n");
365		rc = PTR_ERR(tmp_ctx.mcu_csr);
366		goto err_group;
367	}
368
369	/* Ignore non-active MCU */
370	if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
371		dev_err(edac->dev, "no memory-controller property\n");
372		rc = -ENODEV;
373		goto err_group;
374	}
375	if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
376		rc = -ENODEV;
377		goto err_group;
378	}
379
380	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
381	layers[0].size = 4;
382	layers[0].is_virt_csrow = true;
383	layers[1].type = EDAC_MC_LAYER_CHANNEL;
384	layers[1].size = 2;
385	layers[1].is_virt_csrow = false;
386	mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
387			    sizeof(*ctx));
388	if (!mci) {
389		rc = -ENOMEM;
390		goto err_group;
391	}
392
393	ctx = mci->pvt_info;
394	*ctx = tmp_ctx;		/* Copy over resource value */
395	ctx->name = "xgene_edac_mc_err";
396	ctx->mci = mci;
397	mci->pdev = &mci->dev;
398	mci->ctl_name = ctx->name;
399	mci->dev_name = ctx->name;
400
401	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
402			 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
403	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
404	mci->edac_cap = EDAC_FLAG_SECDED;
405	mci->mod_name = EDAC_MOD_STR;
406	mci->ctl_page_to_phys = NULL;
407	mci->scrub_cap = SCRUB_FLAG_HW_SRC;
408	mci->scrub_mode = SCRUB_HW_SRC;
409
410	if (edac_op_state == EDAC_OPSTATE_POLL)
411		mci->edac_check = xgene_edac_mc_check;
412
413	if (edac_mc_add_mc(mci)) {
414		dev_err(edac->dev, "edac_mc_add_mc failed\n");
415		rc = -EINVAL;
416		goto err_free;
417	}
418
419	xgene_edac_mc_create_debugfs_node(mci);
420
421	list_add(&ctx->next, &edac->mcus);
422
423	xgene_edac_mc_irq_ctl(mci, true);
424
425	devres_remove_group(edac->dev, xgene_edac_mc_add);
426
427	dev_info(edac->dev, "X-Gene EDAC MC registered\n");
428	return 0;
429
430err_free:
431	edac_mc_free(mci);
432err_group:
433	devres_release_group(edac->dev, xgene_edac_mc_add);
434	return rc;
435}
436
437static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
438{
439	xgene_edac_mc_irq_ctl(mcu->mci, false);
440	edac_mc_del_mc(&mcu->mci->dev);
441	edac_mc_free(mcu->mci);
442	return 0;
443}
444
445/* CPU L1/L2 error CSR */
446#define MAX_CPU_PER_PMD				2
447#define CPU_CSR_STRIDE				0x00100000
448#define CPU_L2C_PAGE				0x000D0000
449#define CPU_MEMERR_L2C_PAGE			0x000E0000
450#define CPU_MEMERR_CPU_PAGE			0x000F0000
451
452#define MEMERR_CPU_ICFECR_PAGE_OFFSET		0x0000
453#define MEMERR_CPU_ICFESR_PAGE_OFFSET		0x0004
454#define  MEMERR_CPU_ICFESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
455#define  MEMERR_CPU_ICFESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
456#define  MEMERR_CPU_ICFESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
457#define  MEMERR_CPU_ICFESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
458#define  MEMERR_CPU_ICFESR_MULTCERR_MASK	BIT(2)
459#define  MEMERR_CPU_ICFESR_CERR_MASK		BIT(0)
460#define MEMERR_CPU_LSUESR_PAGE_OFFSET		0x000c
461#define  MEMERR_CPU_LSUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
462#define  MEMERR_CPU_LSUESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
463#define  MEMERR_CPU_LSUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
464#define  MEMERR_CPU_LSUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
465#define  MEMERR_CPU_LSUESR_MULTCERR_MASK	BIT(2)
466#define  MEMERR_CPU_LSUESR_CERR_MASK		BIT(0)
467#define MEMERR_CPU_LSUECR_PAGE_OFFSET		0x0008
468#define MEMERR_CPU_MMUECR_PAGE_OFFSET		0x0010
469#define MEMERR_CPU_MMUESR_PAGE_OFFSET		0x0014
470#define  MEMERR_CPU_MMUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
471#define  MEMERR_CPU_MMUESR_ERRINDEX_RD(src)	(((src) & 0x007F0000) >> 16)
472#define  MEMERR_CPU_MMUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
473#define  MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK	BIT(7)
474#define  MEMERR_CPU_MMUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
475#define  MEMERR_CPU_MMUESR_MULTCERR_MASK	BIT(2)
476#define  MEMERR_CPU_MMUESR_CERR_MASK		BIT(0)
477#define MEMERR_CPU_ICFESRA_PAGE_OFFSET		0x0804
478#define MEMERR_CPU_LSUESRA_PAGE_OFFSET		0x080c
479#define MEMERR_CPU_MMUESRA_PAGE_OFFSET		0x0814
480
481#define MEMERR_L2C_L2ECR_PAGE_OFFSET		0x0000
482#define MEMERR_L2C_L2ESR_PAGE_OFFSET		0x0004
483#define  MEMERR_L2C_L2ESR_ERRSYN_RD(src)	(((src) & 0xFF000000) >> 24)
484#define  MEMERR_L2C_L2ESR_ERRWAY_RD(src)	(((src) & 0x00FC0000) >> 18)
485#define  MEMERR_L2C_L2ESR_ERRCPU_RD(src)	(((src) & 0x00020000) >> 17)
486#define  MEMERR_L2C_L2ESR_ERRGROUP_RD(src)	(((src) & 0x0000E000) >> 13)
487#define  MEMERR_L2C_L2ESR_ERRACTION_RD(src)	(((src) & 0x00001C00) >> 10)
488#define  MEMERR_L2C_L2ESR_ERRTYPE_RD(src)	(((src) & 0x00000300) >> 8)
489#define  MEMERR_L2C_L2ESR_MULTUCERR_MASK	BIT(3)
490#define  MEMERR_L2C_L2ESR_MULTICERR_MASK	BIT(2)
491#define  MEMERR_L2C_L2ESR_UCERR_MASK		BIT(1)
492#define  MEMERR_L2C_L2ESR_ERR_MASK		BIT(0)
493#define MEMERR_L2C_L2EALR_PAGE_OFFSET		0x0008
494#define CPUX_L2C_L2RTOCR_PAGE_OFFSET		0x0010
495#define MEMERR_L2C_L2EAHR_PAGE_OFFSET		0x000c
496#define CPUX_L2C_L2RTOSR_PAGE_OFFSET		0x0014
497#define  MEMERR_L2C_L2RTOSR_MULTERR_MASK	BIT(1)
498#define  MEMERR_L2C_L2RTOSR_ERR_MASK		BIT(0)
499#define CPUX_L2C_L2RTOALR_PAGE_OFFSET		0x0018
500#define CPUX_L2C_L2RTOAHR_PAGE_OFFSET		0x001c
501#define MEMERR_L2C_L2ESRA_PAGE_OFFSET		0x0804
502
503/*
504 * Processor Module Domain (PMD) context - Context for a pair of processors.
505 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
506 * its own L1 cache.
507 */
508struct xgene_edac_pmd_ctx {
509	struct list_head	next;
510	struct device		ddev;
511	char			*name;
512	struct xgene_edac	*edac;
513	struct edac_device_ctl_info *edac_dev;
514	void __iomem		*pmd_csr;
515	u32			pmd;
516	int			version;
517};
518
519static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
520				    int cpu_idx)
521{
522	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
523	void __iomem *pg_f;
524	u32 val;
525
526	pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
527
528	val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
529	if (!val)
530		goto chk_lsu;
531	dev_err(edac_dev->dev,
532		"CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
533		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
534		MEMERR_CPU_ICFESR_ERRWAY_RD(val),
535		MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
536		MEMERR_CPU_ICFESR_ERRINFO_RD(val));
537	if (val & MEMERR_CPU_ICFESR_CERR_MASK)
538		dev_err(edac_dev->dev, "One or more correctable error\n");
539	if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
540		dev_err(edac_dev->dev, "Multiple correctable error\n");
541	switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
542	case 1:
543		dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
544		break;
545	case 2:
546		dev_err(edac_dev->dev, "Way select multiple hit\n");
547		break;
548	case 3:
549		dev_err(edac_dev->dev, "Physical tag parity error\n");
550		break;
551	case 4:
552	case 5:
553		dev_err(edac_dev->dev, "L1 data parity error\n");
554		break;
555	case 6:
556		dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
557		break;
558	}
559
560	/* Clear any HW errors */
561	writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
562
563	if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
564		   MEMERR_CPU_ICFESR_MULTCERR_MASK))
565		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
566
567chk_lsu:
568	val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
569	if (!val)
570		goto chk_mmu;
571	dev_err(edac_dev->dev,
572		"CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
573		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
574		MEMERR_CPU_LSUESR_ERRWAY_RD(val),
575		MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
576		MEMERR_CPU_LSUESR_ERRINFO_RD(val));
577	if (val & MEMERR_CPU_LSUESR_CERR_MASK)
578		dev_err(edac_dev->dev, "One or more correctable error\n");
579	if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
580		dev_err(edac_dev->dev, "Multiple correctable error\n");
581	switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
582	case 0:
583		dev_err(edac_dev->dev, "Load tag error\n");
584		break;
585	case 1:
586		dev_err(edac_dev->dev, "Load data error\n");
587		break;
588	case 2:
589		dev_err(edac_dev->dev, "WSL multihit error\n");
590		break;
591	case 3:
592		dev_err(edac_dev->dev, "Store tag error\n");
593		break;
594	case 4:
595		dev_err(edac_dev->dev,
596			"DTB multihit from load pipeline error\n");
597		break;
598	case 5:
599		dev_err(edac_dev->dev,
600			"DTB multihit from store pipeline error\n");
601		break;
602	}
603
604	/* Clear any HW errors */
605	writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
606
607	if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
608		   MEMERR_CPU_LSUESR_MULTCERR_MASK))
609		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
610
611chk_mmu:
612	val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
613	if (!val)
614		return;
615	dev_err(edac_dev->dev,
616		"CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
617		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
618		MEMERR_CPU_MMUESR_ERRWAY_RD(val),
619		MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
620		MEMERR_CPU_MMUESR_ERRINFO_RD(val),
621		val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
622	if (val & MEMERR_CPU_MMUESR_CERR_MASK)
623		dev_err(edac_dev->dev, "One or more correctable error\n");
624	if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
625		dev_err(edac_dev->dev, "Multiple correctable error\n");
626	switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
627	case 0:
628		dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
629		break;
630	case 1:
631		dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
632		break;
633	case 2:
634		dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
635		break;
636	case 3:
637		dev_err(edac_dev->dev, "TMO operation single bank error\n");
638		break;
639	case 4:
640		dev_err(edac_dev->dev, "Stage 2 UTB error\n");
641		break;
642	case 5:
643		dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
644		break;
645	case 6:
646		dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
647		break;
648	case 7:
649		dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
650		break;
651	}
652
653	/* Clear any HW errors */
654	writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
655
656	edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
657}
658
659static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
660{
661	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
662	void __iomem *pg_d;
663	void __iomem *pg_e;
664	u32 val_hi;
665	u32 val_lo;
666	u32 val;
667
668	/* Check L2 */
669	pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
670	val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
671	if (!val)
672		goto chk_l2c;
673	val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
674	val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
675	dev_err(edac_dev->dev,
676		"PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
677		ctx->pmd, val, val_hi, val_lo);
678	dev_err(edac_dev->dev,
679		"ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
680		MEMERR_L2C_L2ESR_ERRSYN_RD(val),
681		MEMERR_L2C_L2ESR_ERRWAY_RD(val),
682		MEMERR_L2C_L2ESR_ERRCPU_RD(val),
683		MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
684		MEMERR_L2C_L2ESR_ERRACTION_RD(val));
685
686	if (val & MEMERR_L2C_L2ESR_ERR_MASK)
687		dev_err(edac_dev->dev, "One or more correctable error\n");
688	if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
689		dev_err(edac_dev->dev, "Multiple correctable error\n");
690	if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
691		dev_err(edac_dev->dev, "One or more uncorrectable error\n");
692	if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
693		dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
694
695	switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
696	case 0:
697		dev_err(edac_dev->dev, "Outbound SDB parity error\n");
698		break;
699	case 1:
700		dev_err(edac_dev->dev, "Inbound SDB parity error\n");
701		break;
702	case 2:
703		dev_err(edac_dev->dev, "Tag ECC error\n");
704		break;
705	case 3:
706		dev_err(edac_dev->dev, "Data ECC error\n");
707		break;
708	}
709
710	/* Clear any HW errors */
711	writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
712
713	if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
714		   MEMERR_L2C_L2ESR_MULTICERR_MASK))
715		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
716	if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
717		   MEMERR_L2C_L2ESR_MULTUCERR_MASK))
718		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
719
720chk_l2c:
721	/* Check if any memory request timed out on L2 cache */
722	pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
723	val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
724	if (val) {
725		val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
726		val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
727		dev_err(edac_dev->dev,
728			"PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
729			ctx->pmd, val, val_hi, val_lo);
730		writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
731	}
732}
733
734static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
735{
736	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
737	unsigned int pcp_hp_stat;
738	int i;
739
740	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
741	if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
742		return;
743
744	/* Check CPU L1 error */
745	for (i = 0; i < MAX_CPU_PER_PMD; i++)
746		xgene_edac_pmd_l1_check(edac_dev, i);
747
748	/* Check CPU L2 error */
749	xgene_edac_pmd_l2_check(edac_dev);
750}
751
752static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
753				      int cpu)
754{
755	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
756	void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
757			     CPU_MEMERR_CPU_PAGE;
758
759	/*
760	 * Enable CPU memory error:
761	 *  MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
762	 */
763	writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
764	writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
765	writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
766}
767
768static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
769{
770	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
771	void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
772	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
773
774	/* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
775	writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
776	/* Configure L2C HW request time out feature if supported */
777	if (ctx->version > 1)
778		writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
779}
780
781static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
782				  bool enable)
783{
784	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
785	int i;
786
787	/* Enable PMD error interrupt */
788	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
789		if (enable)
790			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
791					       PMD0_MERR_MASK << ctx->pmd);
792		else
793			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
794					       PMD0_MERR_MASK << ctx->pmd);
795	}
796
797	if (enable) {
798		xgene_edac_pmd_hw_cfg(edac_dev);
799
800		/* Two CPUs per a PMD */
801		for (i = 0; i < MAX_CPU_PER_PMD; i++)
802			xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
803	}
804}
805
806static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
807						   const char __user *data,
808						   size_t count, loff_t *ppos)
809{
810	struct edac_device_ctl_info *edac_dev = file->private_data;
811	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
812	void __iomem *cpux_pg_f;
813	int i;
814
815	for (i = 0; i < MAX_CPU_PER_PMD; i++) {
816		cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
817			    CPU_MEMERR_CPU_PAGE;
818
819		writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
820		       MEMERR_CPU_ICFESR_CERR_MASK,
821		       cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
822		writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
823		       MEMERR_CPU_LSUESR_CERR_MASK,
824		       cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
825		writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
826		       MEMERR_CPU_MMUESR_CERR_MASK,
827		       cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
828	}
829	return count;
830}
831
832static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
833						   const char __user *data,
834						   size_t count, loff_t *ppos)
835{
836	struct edac_device_ctl_info *edac_dev = file->private_data;
837	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
838	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
839
840	writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
841	       MEMERR_L2C_L2ESR_MULTICERR_MASK |
842	       MEMERR_L2C_L2ESR_UCERR_MASK |
843	       MEMERR_L2C_L2ESR_ERR_MASK,
844	       pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
845	return count;
846}
847
848static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
849	{
850	.open = simple_open,
851	.write = xgene_edac_pmd_l1_inject_ctrl_write,
852	.llseek = generic_file_llseek, },
853	{
854	.open = simple_open,
855	.write = xgene_edac_pmd_l2_inject_ctrl_write,
856	.llseek = generic_file_llseek, },
857	{ }
858};
859
860static void
861xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
862{
863	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
864	struct dentry *dbgfs_dir;
865	char name[10];
866
867	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
868		return;
869
870	snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
871	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
872	if (!dbgfs_dir)
873		return;
874
875	edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
876				 &xgene_edac_pmd_debug_inject_fops[0]);
877	edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
878				 &xgene_edac_pmd_debug_inject_fops[1]);
879}
880
881static int xgene_edac_pmd_available(u32 efuse, int pmd)
882{
883	return (efuse & (1 << pmd)) ? 0 : 1;
884}
885
886static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
887			      int version)
888{
889	struct edac_device_ctl_info *edac_dev;
890	struct xgene_edac_pmd_ctx *ctx;
891	struct resource res;
892	char edac_name[10];
893	u32 pmd;
894	int rc;
895	u32 val;
896
897	if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
898		return -ENOMEM;
899
900	/* Determine if this PMD is disabled */
901	if (of_property_read_u32(np, "pmd-controller", &pmd)) {
902		dev_err(edac->dev, "no pmd-controller property\n");
903		rc = -ENODEV;
904		goto err_group;
905	}
906	rc = regmap_read(edac->efuse_map, 0, &val);
907	if (rc)
908		goto err_group;
909	if (!xgene_edac_pmd_available(val, pmd)) {
910		rc = -ENODEV;
911		goto err_group;
912	}
913
914	snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
915	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
916					      edac_name, 1, "l2c", 1, 2, NULL,
917					      0, edac_device_alloc_index());
918	if (!edac_dev) {
919		rc = -ENOMEM;
920		goto err_group;
921	}
922
923	ctx = edac_dev->pvt_info;
924	ctx->name = "xgene_pmd_err";
925	ctx->pmd = pmd;
926	ctx->edac = edac;
927	ctx->edac_dev = edac_dev;
928	ctx->ddev = *edac->dev;
929	ctx->version = version;
930	edac_dev->dev = &ctx->ddev;
931	edac_dev->ctl_name = ctx->name;
932	edac_dev->dev_name = ctx->name;
933	edac_dev->mod_name = EDAC_MOD_STR;
934
935	rc = of_address_to_resource(np, 0, &res);
936	if (rc < 0) {
937		dev_err(edac->dev, "no PMD resource address\n");
938		goto err_free;
939	}
940	ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
941	if (IS_ERR(ctx->pmd_csr)) {
942		dev_err(edac->dev,
943			"devm_ioremap_resource failed for PMD resource address\n");
944		rc = PTR_ERR(ctx->pmd_csr);
945		goto err_free;
946	}
947
948	if (edac_op_state == EDAC_OPSTATE_POLL)
949		edac_dev->edac_check = xgene_edac_pmd_check;
950
951	xgene_edac_pmd_create_debugfs_nodes(edac_dev);
952
953	rc = edac_device_add_device(edac_dev);
954	if (rc > 0) {
955		dev_err(edac->dev, "edac_device_add_device failed\n");
956		rc = -ENOMEM;
957		goto err_free;
958	}
959
960	if (edac_op_state == EDAC_OPSTATE_INT)
961		edac_dev->op_state = OP_RUNNING_INTERRUPT;
962
963	list_add(&ctx->next, &edac->pmds);
964
965	xgene_edac_pmd_hw_ctl(edac_dev, 1);
966
967	devres_remove_group(edac->dev, xgene_edac_pmd_add);
968
969	dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
970	return 0;
971
972err_free:
973	edac_device_free_ctl_info(edac_dev);
974err_group:
975	devres_release_group(edac->dev, xgene_edac_pmd_add);
976	return rc;
977}
978
979static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
980{
981	struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
982
983	xgene_edac_pmd_hw_ctl(edac_dev, 0);
984	edac_device_del_device(edac_dev->dev);
985	edac_device_free_ctl_info(edac_dev);
986	return 0;
987}
988
989/* L3 Error device */
990#define L3C_ESR				(0x0A * 4)
991#define  L3C_ESR_DATATAG_MASK		BIT(9)
992#define  L3C_ESR_MULTIHIT_MASK		BIT(8)
993#define  L3C_ESR_UCEVICT_MASK		BIT(6)
994#define  L3C_ESR_MULTIUCERR_MASK	BIT(5)
995#define  L3C_ESR_MULTICERR_MASK		BIT(4)
996#define  L3C_ESR_UCERR_MASK		BIT(3)
997#define  L3C_ESR_CERR_MASK		BIT(2)
998#define  L3C_ESR_UCERRINTR_MASK		BIT(1)
999#define  L3C_ESR_CERRINTR_MASK		BIT(0)
1000#define L3C_ECR				(0x0B * 4)
1001#define  L3C_ECR_UCINTREN		BIT(3)
1002#define  L3C_ECR_CINTREN		BIT(2)
1003#define  L3C_UCERREN			BIT(1)
1004#define  L3C_CERREN			BIT(0)
1005#define L3C_ELR				(0x0C * 4)
1006#define  L3C_ELR_ERRSYN(src)		((src & 0xFF800000) >> 23)
1007#define  L3C_ELR_ERRWAY(src)		((src & 0x007E0000) >> 17)
1008#define  L3C_ELR_AGENTID(src)		((src & 0x0001E000) >> 13)
1009#define  L3C_ELR_ERRGRP(src)		((src & 0x00000F00) >> 8)
1010#define  L3C_ELR_OPTYPE(src)		((src & 0x000000F0) >> 4)
1011#define  L3C_ELR_PADDRHIGH(src)		(src & 0x0000000F)
1012#define L3C_AELR			(0x0D * 4)
1013#define L3C_BELR			(0x0E * 4)
1014#define  L3C_BELR_BANK(src)		(src & 0x0000000F)
1015
1016struct xgene_edac_dev_ctx {
1017	struct list_head	next;
1018	struct device		ddev;
1019	char			*name;
1020	struct xgene_edac	*edac;
1021	struct edac_device_ctl_info *edac_dev;
1022	int			edac_idx;
1023	void __iomem		*dev_csr;
1024	int			version;
1025};
1026
1027/*
1028 * Version 1 of the L3 controller has broken single bit correctable logic for
1029 * certain error syndromes. Log them as uncorrectable in that case.
1030 */
1031static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1032{
1033	if (l3cesr & L3C_ESR_DATATAG_MASK) {
1034		switch (L3C_ELR_ERRSYN(l3celr)) {
1035		case 0x13C:
1036		case 0x0B4:
1037		case 0x007:
1038		case 0x00D:
1039		case 0x00E:
1040		case 0x019:
1041		case 0x01A:
1042		case 0x01C:
1043		case 0x04E:
1044		case 0x041:
1045			return true;
1046		}
1047	} else if (L3C_ELR_ERRWAY(l3celr) == 9)
1048		return true;
1049
1050	return false;
1051}
1052
1053static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1054{
1055	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1056	u32 l3cesr;
1057	u32 l3celr;
1058	u32 l3caelr;
1059	u32 l3cbelr;
1060
1061	l3cesr = readl(ctx->dev_csr + L3C_ESR);
1062	if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1063		return;
1064
1065	if (l3cesr & L3C_ESR_UCERR_MASK)
1066		dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1067	if (l3cesr & L3C_ESR_CERR_MASK)
1068		dev_warn(edac_dev->dev, "L3C correctable error\n");
1069
1070	l3celr = readl(ctx->dev_csr + L3C_ELR);
1071	l3caelr = readl(ctx->dev_csr + L3C_AELR);
1072	l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1073	if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1074		dev_err(edac_dev->dev, "L3C multiple hit error\n");
1075	if (l3cesr & L3C_ESR_UCEVICT_MASK)
1076		dev_err(edac_dev->dev,
1077			"L3C dropped eviction of line with error\n");
1078	if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1079		dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1080	if (l3cesr & L3C_ESR_DATATAG_MASK)
1081		dev_err(edac_dev->dev,
1082			"L3C data error syndrome 0x%X group 0x%X\n",
1083			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1084	else
1085		dev_err(edac_dev->dev,
1086			"L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1087			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1088			L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1089	/*
1090	 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1091	 *       Address [37:6] in l3caelr. Lower 6 bits are zero.
1092	 */
1093	dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1094		L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1095		(l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1096	dev_err(edac_dev->dev,
1097		"L3C error status register value 0x%X\n", l3cesr);
1098
1099	/* Clear L3C error interrupt */
1100	writel(0, ctx->dev_csr + L3C_ESR);
1101
1102	if (ctx->version <= 1 &&
1103	    xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1104		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1105		return;
1106	}
1107	if (l3cesr & L3C_ESR_CERR_MASK)
1108		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1109	if (l3cesr & L3C_ESR_UCERR_MASK)
1110		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1111}
1112
1113static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1114				  bool enable)
1115{
1116	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1117	u32 val;
1118
1119	val = readl(ctx->dev_csr + L3C_ECR);
1120	val |= L3C_UCERREN | L3C_CERREN;
1121	/* On disable, we just disable interrupt but keep error enabled */
1122	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1123		if (enable)
1124			val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1125		else
1126			val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1127	}
1128	writel(val, ctx->dev_csr + L3C_ECR);
1129
1130	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1131		/* Enable/disable L3 error top level interrupt */
1132		if (enable) {
1133			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1134					       L3C_UNCORR_ERR_MASK);
1135			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1136					       L3C_CORR_ERR_MASK);
1137		} else {
1138			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1139					       L3C_UNCORR_ERR_MASK);
1140			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1141					       L3C_CORR_ERR_MASK);
1142		}
1143	}
1144}
1145
1146static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1147					       const char __user *data,
1148					       size_t count, loff_t *ppos)
1149{
1150	struct edac_device_ctl_info *edac_dev = file->private_data;
1151	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1152
1153	/* Generate all errors */
1154	writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1155	return count;
1156}
1157
1158static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1159	.open = simple_open,
1160	.write = xgene_edac_l3_inject_ctrl_write,
1161	.llseek = generic_file_llseek
1162};
1163
1164static void
1165xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1166{
1167	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1168	struct dentry *dbgfs_dir;
1169	char name[10];
1170
1171	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1172		return;
1173
1174	snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1175	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1176	if (!dbgfs_dir)
1177		return;
1178
1179	debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1180			    &xgene_edac_l3_debug_inject_fops);
1181}
1182
1183static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1184			     int version)
1185{
1186	struct edac_device_ctl_info *edac_dev;
1187	struct xgene_edac_dev_ctx *ctx;
1188	struct resource res;
1189	void __iomem *dev_csr;
1190	int edac_idx;
1191	int rc = 0;
1192
1193	if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1194		return -ENOMEM;
1195
1196	rc = of_address_to_resource(np, 0, &res);
1197	if (rc < 0) {
1198		dev_err(edac->dev, "no L3 resource address\n");
1199		goto err_release_group;
1200	}
1201	dev_csr = devm_ioremap_resource(edac->dev, &res);
1202	if (IS_ERR(dev_csr)) {
1203		dev_err(edac->dev,
1204			"devm_ioremap_resource failed for L3 resource address\n");
1205		rc = PTR_ERR(dev_csr);
1206		goto err_release_group;
1207	}
1208
1209	edac_idx = edac_device_alloc_index();
1210	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1211					      "l3c", 1, "l3c", 1, 0, NULL, 0,
1212					      edac_idx);
1213	if (!edac_dev) {
1214		rc = -ENOMEM;
1215		goto err_release_group;
1216	}
1217
1218	ctx = edac_dev->pvt_info;
1219	ctx->dev_csr = dev_csr;
1220	ctx->name = "xgene_l3_err";
1221	ctx->edac_idx = edac_idx;
1222	ctx->edac = edac;
1223	ctx->edac_dev = edac_dev;
1224	ctx->ddev = *edac->dev;
1225	ctx->version = version;
1226	edac_dev->dev = &ctx->ddev;
1227	edac_dev->ctl_name = ctx->name;
1228	edac_dev->dev_name = ctx->name;
1229	edac_dev->mod_name = EDAC_MOD_STR;
1230
1231	if (edac_op_state == EDAC_OPSTATE_POLL)
1232		edac_dev->edac_check = xgene_edac_l3_check;
1233
1234	xgene_edac_l3_create_debugfs_nodes(edac_dev);
1235
1236	rc = edac_device_add_device(edac_dev);
1237	if (rc > 0) {
1238		dev_err(edac->dev, "failed edac_device_add_device()\n");
1239		rc = -ENOMEM;
1240		goto err_ctl_free;
1241	}
1242
1243	if (edac_op_state == EDAC_OPSTATE_INT)
1244		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1245
1246	list_add(&ctx->next, &edac->l3s);
1247
1248	xgene_edac_l3_hw_init(edac_dev, 1);
1249
1250	devres_remove_group(edac->dev, xgene_edac_l3_add);
1251
1252	dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1253	return 0;
1254
1255err_ctl_free:
1256	edac_device_free_ctl_info(edac_dev);
1257err_release_group:
1258	devres_release_group(edac->dev, xgene_edac_l3_add);
1259	return rc;
1260}
1261
1262static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1263{
1264	struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1265
1266	xgene_edac_l3_hw_init(edac_dev, 0);
1267	edac_device_del_device(l3->edac->dev);
1268	edac_device_free_ctl_info(edac_dev);
1269	return 0;
1270}
1271
1272/* SoC error device */
1273#define IOBAXIS0TRANSERRINTSTS		0x0000
1274#define  IOBAXIS0_M_ILLEGAL_ACCESS_MASK	BIT(1)
1275#define  IOBAXIS0_ILLEGAL_ACCESS_MASK	BIT(0)
1276#define IOBAXIS0TRANSERRINTMSK		0x0004
1277#define IOBAXIS0TRANSERRREQINFOL	0x0008
1278#define IOBAXIS0TRANSERRREQINFOH	0x000c
1279#define  REQTYPE_RD(src)		(((src) & BIT(0)))
1280#define  ERRADDRH_RD(src)		(((src) & 0xffc00000) >> 22)
1281#define IOBAXIS1TRANSERRINTSTS		0x0010
1282#define IOBAXIS1TRANSERRINTMSK		0x0014
1283#define IOBAXIS1TRANSERRREQINFOL	0x0018
1284#define IOBAXIS1TRANSERRREQINFOH	0x001c
1285#define IOBPATRANSERRINTSTS		0x0020
1286#define  IOBPA_M_REQIDRAM_CORRUPT_MASK	BIT(7)
1287#define  IOBPA_REQIDRAM_CORRUPT_MASK	BIT(6)
1288#define  IOBPA_M_TRANS_CORRUPT_MASK	BIT(5)
1289#define  IOBPA_TRANS_CORRUPT_MASK	BIT(4)
1290#define  IOBPA_M_WDATA_CORRUPT_MASK	BIT(3)
1291#define  IOBPA_WDATA_CORRUPT_MASK	BIT(2)
1292#define  IOBPA_M_RDATA_CORRUPT_MASK	BIT(1)
1293#define  IOBPA_RDATA_CORRUPT_MASK	BIT(0)
1294#define IOBBATRANSERRINTSTS		0x0030
1295#define  M_ILLEGAL_ACCESS_MASK		BIT(15)
1296#define  ILLEGAL_ACCESS_MASK		BIT(14)
1297#define  M_WIDRAM_CORRUPT_MASK		BIT(13)
1298#define  WIDRAM_CORRUPT_MASK		BIT(12)
1299#define  M_RIDRAM_CORRUPT_MASK		BIT(11)
1300#define  RIDRAM_CORRUPT_MASK		BIT(10)
1301#define  M_TRANS_CORRUPT_MASK		BIT(9)
1302#define  TRANS_CORRUPT_MASK		BIT(8)
1303#define  M_WDATA_CORRUPT_MASK		BIT(7)
1304#define  WDATA_CORRUPT_MASK		BIT(6)
1305#define  M_RBM_POISONED_REQ_MASK	BIT(5)
1306#define  RBM_POISONED_REQ_MASK		BIT(4)
1307#define  M_XGIC_POISONED_REQ_MASK	BIT(3)
1308#define  XGIC_POISONED_REQ_MASK		BIT(2)
1309#define  M_WRERR_RESP_MASK		BIT(1)
1310#define  WRERR_RESP_MASK		BIT(0)
1311#define IOBBATRANSERRREQINFOL		0x0038
1312#define IOBBATRANSERRREQINFOH		0x003c
1313#define  REQTYPE_F2_RD(src)		((src) & BIT(0))
1314#define  ERRADDRH_F2_RD(src)		(((src) & 0xffc00000) >> 22)
1315#define IOBBATRANSERRCSWREQID		0x0040
1316#define XGICTRANSERRINTSTS		0x0050
1317#define  M_WR_ACCESS_ERR_MASK		BIT(3)
1318#define  WR_ACCESS_ERR_MASK		BIT(2)
1319#define  M_RD_ACCESS_ERR_MASK		BIT(1)
1320#define  RD_ACCESS_ERR_MASK		BIT(0)
1321#define XGICTRANSERRINTMSK		0x0054
1322#define XGICTRANSERRREQINFO		0x0058
1323#define  REQTYPE_MASK			BIT(26)
1324#define  ERRADDR_RD(src)		((src) & 0x03ffffff)
1325#define GLBL_ERR_STS			0x0800
1326#define  MDED_ERR_MASK			BIT(3)
1327#define  DED_ERR_MASK			BIT(2)
1328#define  MSEC_ERR_MASK			BIT(1)
1329#define  SEC_ERR_MASK			BIT(0)
1330#define GLBL_SEC_ERRL			0x0810
1331#define GLBL_SEC_ERRH			0x0818
1332#define GLBL_MSEC_ERRL			0x0820
1333#define GLBL_MSEC_ERRH			0x0828
1334#define GLBL_DED_ERRL			0x0830
1335#define GLBL_DED_ERRLMASK		0x0834
1336#define GLBL_DED_ERRH			0x0838
1337#define GLBL_DED_ERRHMASK		0x083c
1338#define GLBL_MDED_ERRL			0x0840
1339#define GLBL_MDED_ERRLMASK		0x0844
1340#define GLBL_MDED_ERRH			0x0848
1341#define GLBL_MDED_ERRHMASK		0x084c
1342
1343/* IO Bus Registers */
1344#define RBCSR				0x0000
1345#define STICKYERR_MASK			BIT(0)
1346#define RBEIR				0x0008
1347#define AGENT_OFFLINE_ERR_MASK		BIT(30)
1348#define UNIMPL_RBPAGE_ERR_MASK		BIT(29)
1349#define WORD_ALIGNED_ERR_MASK		BIT(28)
1350#define PAGE_ACCESS_ERR_MASK		BIT(27)
1351#define WRITE_ACCESS_MASK		BIT(26)
1352
1353static const char * const soc_mem_err_v1[] = {
1354	"10GbE0",
1355	"10GbE1",
1356	"Security",
1357	"SATA45",
1358	"SATA23/ETH23",
1359	"SATA01/ETH01",
1360	"USB1",
1361	"USB0",
1362	"QML",
1363	"QM0",
1364	"QM1 (XGbE01)",
1365	"PCIE4",
1366	"PCIE3",
1367	"PCIE2",
1368	"PCIE1",
1369	"PCIE0",
1370	"CTX Manager",
1371	"OCM",
1372	"1GbE",
1373	"CLE",
1374	"AHBC",
1375	"PktDMA",
1376	"GFC",
1377	"MSLIM",
1378	"10GbE2",
1379	"10GbE3",
1380	"QM2 (XGbE23)",
1381	"IOB",
1382	"unknown",
1383	"unknown",
1384	"unknown",
1385	"unknown",
1386};
1387
1388static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1389{
1390	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1391	u32 err_addr_lo;
1392	u32 err_addr_hi;
1393	u32 reg;
1394	u32 info;
1395
1396	/* GIC transaction error interrupt */
1397	reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1398	if (!reg)
1399		goto chk_iob_err;
1400	dev_err(edac_dev->dev, "XGIC transaction error\n");
1401	if (reg & RD_ACCESS_ERR_MASK)
1402		dev_err(edac_dev->dev, "XGIC read size error\n");
1403	if (reg & M_RD_ACCESS_ERR_MASK)
1404		dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1405	if (reg & WR_ACCESS_ERR_MASK)
1406		dev_err(edac_dev->dev, "XGIC write size error\n");
1407	if (reg & M_WR_ACCESS_ERR_MASK)
1408		dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1409	info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1410	dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1411		info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1412		info);
1413	writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1414
1415chk_iob_err:
1416	/* IOB memory error */
1417	reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1418	if (!reg)
1419		return;
1420	if (reg & SEC_ERR_MASK) {
1421		err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1422		err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1423		dev_err(edac_dev->dev,
1424			"IOB single-bit correctable memory at 0x%08X.%08X error\n",
1425			err_addr_lo, err_addr_hi);
1426		writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1427		writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1428	}
1429	if (reg & MSEC_ERR_MASK) {
1430		err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1431		err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1432		dev_err(edac_dev->dev,
1433			"IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1434			err_addr_lo, err_addr_hi);
1435		writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1436		writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1437	}
1438	if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1439		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1440
1441	if (reg & DED_ERR_MASK) {
1442		err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1443		err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1444		dev_err(edac_dev->dev,
1445			"IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1446			err_addr_lo, err_addr_hi);
1447		writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1448		writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1449	}
1450	if (reg & MDED_ERR_MASK) {
1451		err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1452		err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1453		dev_err(edac_dev->dev,
1454			"Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1455			err_addr_lo, err_addr_hi);
1456		writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1457		writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1458	}
1459	if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1460		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1461}
1462
1463static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1464{
1465	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1466	u32 err_addr_lo;
1467	u32 err_addr_hi;
1468	u32 reg;
1469
1470	/* If the register bus resource isn't available, just skip it */
1471	if (!ctx->edac->rb_map)
1472		goto rb_skip;
1473
1474	/*
1475	 * Check RB access errors
1476	 * 1. Out of range
1477	 * 2. Un-implemented page
1478	 * 3. Un-aligned access
1479	 * 4. Offline slave IP
1480	 */
1481	if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1482		return;
1483	if (reg & STICKYERR_MASK) {
1484		bool write;
1485
1486		dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1487		if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1488			return;
1489		write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1490		if (reg & AGENT_OFFLINE_ERR_MASK)
1491			dev_err(edac_dev->dev,
1492				"IOB bus %s access to offline agent error\n",
1493				write ? "write" : "read");
1494		if (reg & UNIMPL_RBPAGE_ERR_MASK)
1495			dev_err(edac_dev->dev,
1496				"IOB bus %s access to unimplemented page error\n",
1497				write ? "write" : "read");
1498		if (reg & WORD_ALIGNED_ERR_MASK)
1499			dev_err(edac_dev->dev,
1500				"IOB bus %s word aligned access error\n",
1501				write ? "write" : "read");
1502		if (reg & PAGE_ACCESS_ERR_MASK)
1503			dev_err(edac_dev->dev,
1504				"IOB bus %s to page out of range access error\n",
1505				write ? "write" : "read");
1506		if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1507			return;
1508		if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1509			return;
1510	}
1511rb_skip:
1512
1513	/* IOB Bridge agent transaction error interrupt */
1514	reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1515	if (!reg)
1516		return;
1517
1518	dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1519	if (reg & WRERR_RESP_MASK)
1520		dev_err(edac_dev->dev, "IOB BA write response error\n");
1521	if (reg & M_WRERR_RESP_MASK)
1522		dev_err(edac_dev->dev,
1523			"Multiple IOB BA write response error\n");
1524	if (reg & XGIC_POISONED_REQ_MASK)
1525		dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1526	if (reg & M_XGIC_POISONED_REQ_MASK)
1527		dev_err(edac_dev->dev,
1528			"Multiple IOB BA XGIC poisoned write error\n");
1529	if (reg & RBM_POISONED_REQ_MASK)
1530		dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1531	if (reg & M_RBM_POISONED_REQ_MASK)
1532		dev_err(edac_dev->dev,
1533			"Multiple IOB BA RBM poisoned write error\n");
1534	if (reg & WDATA_CORRUPT_MASK)
1535		dev_err(edac_dev->dev, "IOB BA write error\n");
1536	if (reg & M_WDATA_CORRUPT_MASK)
1537		dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1538	if (reg & TRANS_CORRUPT_MASK)
1539		dev_err(edac_dev->dev, "IOB BA transaction error\n");
1540	if (reg & M_TRANS_CORRUPT_MASK)
1541		dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1542	if (reg & RIDRAM_CORRUPT_MASK)
1543		dev_err(edac_dev->dev,
1544			"IOB BA RDIDRAM read transaction ID error\n");
1545	if (reg & M_RIDRAM_CORRUPT_MASK)
1546		dev_err(edac_dev->dev,
1547			"Multiple IOB BA RDIDRAM read transaction ID error\n");
1548	if (reg & WIDRAM_CORRUPT_MASK)
1549		dev_err(edac_dev->dev,
1550			"IOB BA RDIDRAM write transaction ID error\n");
1551	if (reg & M_WIDRAM_CORRUPT_MASK)
1552		dev_err(edac_dev->dev,
1553			"Multiple IOB BA RDIDRAM write transaction ID error\n");
1554	if (reg & ILLEGAL_ACCESS_MASK)
1555		dev_err(edac_dev->dev,
1556			"IOB BA XGIC/RB illegal access error\n");
1557	if (reg & M_ILLEGAL_ACCESS_MASK)
1558		dev_err(edac_dev->dev,
1559			"Multiple IOB BA XGIC/RB illegal access error\n");
1560
1561	err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1562	err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1563	dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1564		REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1565		ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1566	if (reg & WRERR_RESP_MASK)
1567		dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1568			readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1569	writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1570}
1571
1572static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1573{
1574	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1575	u32 err_addr_lo;
1576	u32 err_addr_hi;
1577	u32 reg;
1578
1579	/* IOB Processing agent transaction error interrupt */
1580	reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1581	if (!reg)
1582		goto chk_iob_axi0;
1583	dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
1584	if (reg & IOBPA_RDATA_CORRUPT_MASK)
1585		dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1586	if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1587		dev_err(edac_dev->dev,
1588			"Multiple IOB PA read data RAM error\n");
1589	if (reg & IOBPA_WDATA_CORRUPT_MASK)
1590		dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1591	if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1592		dev_err(edac_dev->dev,
1593			"Multiple IOB PA write data RAM error\n");
1594	if (reg & IOBPA_TRANS_CORRUPT_MASK)
1595		dev_err(edac_dev->dev, "IOB PA transaction error\n");
1596	if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1597		dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
1598	if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1599		dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1600	if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1601		dev_err(edac_dev->dev,
1602			"Multiple IOB PA transaction ID RAM error\n");
1603	writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1604
1605chk_iob_axi0:
1606	/* IOB AXI0 Error */
1607	reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1608	if (!reg)
1609		goto chk_iob_axi1;
1610	err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1611	err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1612	dev_err(edac_dev->dev,
1613		"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1614		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1615		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1616		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1617	writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1618
1619chk_iob_axi1:
1620	/* IOB AXI1 Error */
1621	reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1622	if (!reg)
1623		return;
1624	err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1625	err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1626	dev_err(edac_dev->dev,
1627		"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1628		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1629		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1630		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1631	writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1632}
1633
1634static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1635{
1636	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1637	const char * const *soc_mem_err = NULL;
1638	u32 pcp_hp_stat;
1639	u32 pcp_lp_stat;
1640	u32 reg;
1641	int i;
1642
1643	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1644	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1645	xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1646	if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1647			      IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1648	      (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1649		return;
1650
1651	if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1652		xgene_edac_iob_gic_report(edac_dev);
1653
1654	if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1655		xgene_edac_rb_report(edac_dev);
1656
1657	if (pcp_hp_stat & IOB_PA_ERR_MASK)
1658		xgene_edac_pa_report(edac_dev);
1659
1660	if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1661		dev_info(edac_dev->dev,
1662			 "CSW switch trace correctable memory parity error\n");
1663		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1664	}
1665
1666	if (!reg)
1667		return;
1668	if (ctx->version == 1)
1669		soc_mem_err = soc_mem_err_v1;
1670	if (!soc_mem_err) {
1671		dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1672			reg);
1673		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1674		return;
1675	}
1676	for (i = 0; i < 31; i++) {
1677		if (reg & (1 << i)) {
1678			dev_err(edac_dev->dev, "%s memory parity error\n",
1679				soc_mem_err[i]);
1680			edac_device_handle_ue(edac_dev, 0, 0,
1681					      edac_dev->ctl_name);
1682		}
1683	}
1684}
1685
1686static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1687				   bool enable)
1688{
1689	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1690
1691	/* Enable SoC IP error interrupt */
1692	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1693		if (enable) {
1694			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1695					       IOB_PA_ERR_MASK |
1696					       IOB_BA_ERR_MASK |
1697					       IOB_XGIC_ERR_MASK |
1698					       IOB_RB_ERR_MASK);
1699			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1700					       CSW_SWITCH_TRACE_ERR_MASK);
1701		} else {
1702			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1703					       IOB_PA_ERR_MASK |
1704					       IOB_BA_ERR_MASK |
1705					       IOB_XGIC_ERR_MASK |
1706					       IOB_RB_ERR_MASK);
1707			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1708					       CSW_SWITCH_TRACE_ERR_MASK);
1709		}
1710
1711		writel(enable ? 0x0 : 0xFFFFFFFF,
1712		       ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1713		writel(enable ? 0x0 : 0xFFFFFFFF,
1714		       ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1715		writel(enable ? 0x0 : 0xFFFFFFFF,
1716		       ctx->dev_csr + XGICTRANSERRINTMSK);
1717
1718		xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1719				       enable ? 0x0 : 0xFFFFFFFF);
1720	}
1721}
1722
1723static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1724			      int version)
1725{
1726	struct edac_device_ctl_info *edac_dev;
1727	struct xgene_edac_dev_ctx *ctx;
1728	void __iomem *dev_csr;
1729	struct resource res;
1730	int edac_idx;
1731	int rc;
1732
1733	if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1734		return -ENOMEM;
1735
1736	rc = of_address_to_resource(np, 0, &res);
1737	if (rc < 0) {
1738		dev_err(edac->dev, "no SoC resource address\n");
1739		goto err_release_group;
1740	}
1741	dev_csr = devm_ioremap_resource(edac->dev, &res);
1742	if (IS_ERR(dev_csr)) {
1743		dev_err(edac->dev,
1744			"devm_ioremap_resource failed for soc resource address\n");
1745		rc = PTR_ERR(dev_csr);
1746		goto err_release_group;
1747	}
1748
1749	edac_idx = edac_device_alloc_index();
1750	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1751					      "SOC", 1, "SOC", 1, 2, NULL, 0,
1752					      edac_idx);
1753	if (!edac_dev) {
1754		rc = -ENOMEM;
1755		goto err_release_group;
1756	}
1757
1758	ctx = edac_dev->pvt_info;
1759	ctx->dev_csr = dev_csr;
1760	ctx->name = "xgene_soc_err";
1761	ctx->edac_idx = edac_idx;
1762	ctx->edac = edac;
1763	ctx->edac_dev = edac_dev;
1764	ctx->ddev = *edac->dev;
1765	ctx->version = version;
1766	edac_dev->dev = &ctx->ddev;
1767	edac_dev->ctl_name = ctx->name;
1768	edac_dev->dev_name = ctx->name;
1769	edac_dev->mod_name = EDAC_MOD_STR;
1770
1771	if (edac_op_state == EDAC_OPSTATE_POLL)
1772		edac_dev->edac_check = xgene_edac_soc_check;
1773
1774	rc = edac_device_add_device(edac_dev);
1775	if (rc > 0) {
1776		dev_err(edac->dev, "failed edac_device_add_device()\n");
1777		rc = -ENOMEM;
1778		goto err_ctl_free;
1779	}
1780
1781	if (edac_op_state == EDAC_OPSTATE_INT)
1782		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1783
1784	list_add(&ctx->next, &edac->socs);
1785
1786	xgene_edac_soc_hw_init(edac_dev, 1);
1787
1788	devres_remove_group(edac->dev, xgene_edac_soc_add);
1789
1790	dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1791
1792	return 0;
1793
1794err_ctl_free:
1795	edac_device_free_ctl_info(edac_dev);
1796err_release_group:
1797	devres_release_group(edac->dev, xgene_edac_soc_add);
1798	return rc;
1799}
1800
1801static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1802{
1803	struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1804
1805	xgene_edac_soc_hw_init(edac_dev, 0);
1806	edac_device_del_device(soc->edac->dev);
1807	edac_device_free_ctl_info(edac_dev);
1808	return 0;
1809}
1810
1811static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1812{
1813	struct xgene_edac *ctx = dev_id;
1814	struct xgene_edac_pmd_ctx *pmd;
1815	struct xgene_edac_dev_ctx *node;
1816	unsigned int pcp_hp_stat;
1817	unsigned int pcp_lp_stat;
1818
1819	xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1820	xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1821	if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1822	    (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1823	    (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1824		struct xgene_edac_mc_ctx *mcu;
1825
1826		list_for_each_entry(mcu, &ctx->mcus, next)
1827			xgene_edac_mc_check(mcu->mci);
1828	}
1829
1830	list_for_each_entry(pmd, &ctx->pmds, next) {
1831		if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1832			xgene_edac_pmd_check(pmd->edac_dev);
1833	}
1834
1835	list_for_each_entry(node, &ctx->l3s, next)
1836		xgene_edac_l3_check(node->edac_dev);
1837
1838	list_for_each_entry(node, &ctx->socs, next)
1839		xgene_edac_soc_check(node->edac_dev);
1840
1841	return IRQ_HANDLED;
1842}
1843
1844static int xgene_edac_probe(struct platform_device *pdev)
1845{
1846	struct xgene_edac *edac;
1847	struct device_node *child;
1848	struct resource *res;
1849	int rc;
1850
1851	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1852	if (!edac)
1853		return -ENOMEM;
1854
1855	edac->dev = &pdev->dev;
1856	platform_set_drvdata(pdev, edac);
1857	INIT_LIST_HEAD(&edac->mcus);
1858	INIT_LIST_HEAD(&edac->pmds);
1859	INIT_LIST_HEAD(&edac->l3s);
1860	INIT_LIST_HEAD(&edac->socs);
1861	spin_lock_init(&edac->lock);
1862	mutex_init(&edac->mc_lock);
1863
1864	edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1865							"regmap-csw");
1866	if (IS_ERR(edac->csw_map)) {
1867		dev_err(edac->dev, "unable to get syscon regmap csw\n");
1868		rc = PTR_ERR(edac->csw_map);
1869		goto out_err;
1870	}
1871
1872	edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1873							 "regmap-mcba");
1874	if (IS_ERR(edac->mcba_map)) {
1875		dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1876		rc = PTR_ERR(edac->mcba_map);
1877		goto out_err;
1878	}
1879
1880	edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1881							 "regmap-mcbb");
1882	if (IS_ERR(edac->mcbb_map)) {
1883		dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1884		rc = PTR_ERR(edac->mcbb_map);
1885		goto out_err;
1886	}
1887	edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1888							  "regmap-efuse");
1889	if (IS_ERR(edac->efuse_map)) {
1890		dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1891		rc = PTR_ERR(edac->efuse_map);
1892		goto out_err;
1893	}
1894
1895	/*
1896	 * NOTE: The register bus resource is optional for compatibility
1897	 * reason.
1898	 */
1899	edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1900						       "regmap-rb");
1901	if (IS_ERR(edac->rb_map)) {
1902		dev_warn(edac->dev, "missing syscon regmap rb\n");
1903		edac->rb_map = NULL;
1904	}
1905
1906	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1907	edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1908	if (IS_ERR(edac->pcp_csr)) {
1909		dev_err(&pdev->dev, "no PCP resource address\n");
1910		rc = PTR_ERR(edac->pcp_csr);
1911		goto out_err;
1912	}
1913
1914	if (edac_op_state == EDAC_OPSTATE_INT) {
1915		int irq;
1916		int i;
1917
1918		for (i = 0; i < 3; i++) {
1919			irq = platform_get_irq_optional(pdev, i);
1920			if (irq < 0) {
1921				dev_err(&pdev->dev, "No IRQ resource\n");
1922				rc = irq;
1923				goto out_err;
1924			}
1925			rc = devm_request_irq(&pdev->dev, irq,
1926					      xgene_edac_isr, IRQF_SHARED,
1927					      dev_name(&pdev->dev), edac);
1928			if (rc) {
1929				dev_err(&pdev->dev,
1930					"Could not request IRQ %d\n", irq);
1931				goto out_err;
1932			}
1933		}
1934	}
1935
1936	edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1937
1938	for_each_child_of_node(pdev->dev.of_node, child) {
1939		if (!of_device_is_available(child))
1940			continue;
1941		if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1942			xgene_edac_mc_add(edac, child);
1943		if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1944			xgene_edac_pmd_add(edac, child, 1);
1945		if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1946			xgene_edac_pmd_add(edac, child, 2);
1947		if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1948			xgene_edac_l3_add(edac, child, 1);
1949		if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1950			xgene_edac_l3_add(edac, child, 2);
1951		if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1952			xgene_edac_soc_add(edac, child, 0);
1953		if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1954			xgene_edac_soc_add(edac, child, 1);
1955	}
1956
1957	return 0;
1958
1959out_err:
1960	return rc;
1961}
1962
1963static void xgene_edac_remove(struct platform_device *pdev)
1964{
1965	struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1966	struct xgene_edac_mc_ctx *mcu;
1967	struct xgene_edac_mc_ctx *temp_mcu;
1968	struct xgene_edac_pmd_ctx *pmd;
1969	struct xgene_edac_pmd_ctx *temp_pmd;
1970	struct xgene_edac_dev_ctx *node;
1971	struct xgene_edac_dev_ctx *temp_node;
1972
1973	list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1974		xgene_edac_mc_remove(mcu);
1975
1976	list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1977		xgene_edac_pmd_remove(pmd);
1978
1979	list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1980		xgene_edac_l3_remove(node);
1981
1982	list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1983		xgene_edac_soc_remove(node);
1984}
1985
1986static const struct of_device_id xgene_edac_of_match[] = {
1987	{ .compatible = "apm,xgene-edac" },
1988	{},
1989};
1990MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1991
1992static struct platform_driver xgene_edac_driver = {
1993	.probe = xgene_edac_probe,
1994	.remove_new = xgene_edac_remove,
1995	.driver = {
1996		.name = "xgene-edac",
1997		.of_match_table = xgene_edac_of_match,
1998	},
1999};
2000
2001static int __init xgene_edac_init(void)
2002{
2003	int rc;
2004
2005	if (ghes_get_devices())
2006		return -EBUSY;
2007
2008	/* Make sure error reporting method is sane */
2009	switch (edac_op_state) {
2010	case EDAC_OPSTATE_POLL:
2011	case EDAC_OPSTATE_INT:
2012		break;
2013	default:
2014		edac_op_state = EDAC_OPSTATE_INT;
2015		break;
2016	}
2017
2018	rc = platform_driver_register(&xgene_edac_driver);
2019	if (rc) {
2020		edac_printk(KERN_ERR, EDAC_MOD_STR,
2021			    "EDAC fails to register\n");
2022		goto reg_failed;
2023	}
2024
2025	return 0;
2026
2027reg_failed:
2028	return rc;
2029}
2030module_init(xgene_edac_init);
2031
2032static void __exit xgene_edac_exit(void)
2033{
2034	platform_driver_unregister(&xgene_edac_driver);
2035}
2036module_exit(xgene_edac_exit);
2037
2038MODULE_LICENSE("GPL");
2039MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2040MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2041module_param(edac_op_state, int, 0444);
2042MODULE_PARM_DESC(edac_op_state,
2043		 "EDAC error reporting state: 0=Poll, 2=Interrupt");
2044