1/*	$NetBSD: amdgpu_ras.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $	*/
2
3/*
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 *
25 */
26#include <sys/cdefs.h>
27__KERNEL_RCSID(0, "$NetBSD: amdgpu_ras.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $");
28
29#include <linux/debugfs.h>
30#include <linux/list.h>
31#include <linux/module.h>
32#include <linux/uaccess.h>
33#include <linux/reboot.h>
34#include <linux/syscalls.h>
35
36#include "amdgpu.h"
37#include "amdgpu_ras.h"
38#include "amdgpu_atomfirmware.h"
39#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
40
41#include <linux/nbsd-namespace.h>
42
43const char *ras_error_string[] = {
44	"none",
45	"parity",
46	"single_correctable",
47	"multi_uncorrectable",
48	"poison",
49};
50
51const char *ras_block_string[] = {
52	"umc",
53	"sdma",
54	"gfx",
55	"mmhub",
56	"athub",
57	"pcie_bif",
58	"hdp",
59	"xgmi_wafl",
60	"df",
61	"smn",
62	"sem",
63	"mp0",
64	"mp1",
65	"fuse",
66};
67
68#define ras_err_str(i) (ras_error_string[ffs(i)])
69#define ras_block_str(i) (ras_block_string[i])
70
71#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		1
72#define AMDGPU_RAS_FLAG_INIT_NEED_RESET		2
73#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
74
75/* inject address is 52 bits */
76#define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52)
77
78enum amdgpu_ras_retire_page_reservation {
79	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
80	AMDGPU_RAS_RETIRE_PAGE_PENDING,
81	AMDGPU_RAS_RETIRE_PAGE_FAULT,
82};
83
84atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
85
86static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
87				uint64_t addr) __unused;
88
89#ifndef __NetBSD__		/* XXX debugfs */
90
91static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
92					size_t size, loff_t *pos)
93{
94	struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private;
95	struct ras_query_if info = {
96		.head = obj->head,
97	};
98	ssize_t s;
99	char val[128];
100
101	if (amdgpu_ras_error_query(obj->adev, &info))
102		return -EINVAL;
103
104	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
105			"ue", info.ue_count,
106			"ce", info.ce_count);
107	if (*pos >= s)
108		return 0;
109
110	s -= *pos;
111	s = min_t(u64, s, size);
112
113
114	if (copy_to_user(buf, &val[*pos], s))
115		return -EINVAL;
116
117	*pos += s;
118
119	return s;
120}
121
122static const struct file_operations amdgpu_ras_debugfs_ops = {
123	.owner = THIS_MODULE,
124	.read = amdgpu_ras_debugfs_read,
125	.write = NULL,
126	.llseek = default_llseek
127};
128
129static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
130{
131	int i;
132
133	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
134		*block_id = i;
135		if (strcmp(name, ras_block_str(i)) == 0)
136			return 0;
137	}
138	return -EINVAL;
139}
140
141static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
142		const char __user *buf, size_t size,
143		loff_t *pos, struct ras_debug_if *data)
144{
145	ssize_t s = min_t(u64, 64, size);
146	char str[65];
147	char block_name[33];
148	char err[9] = "ue";
149	int op = -1;
150	int block_id;
151	uint32_t sub_block;
152	u64 address, value;
153
154	if (*pos)
155		return -EINVAL;
156	*pos = size;
157
158	memset(str, 0, sizeof(str));
159	memset(data, 0, sizeof(*data));
160
161	if (copy_from_user(str, buf, s))
162		return -EINVAL;
163
164	if (sscanf(str, "disable %32s", block_name) == 1)
165		op = 0;
166	else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
167		op = 1;
168	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
169		op = 2;
170	else if (str[0] && str[1] && str[2] && str[3])
171		/* ascii string, but commands are not matched. */
172		return -EINVAL;
173
174	if (op != -1) {
175		if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
176			return -EINVAL;
177
178		data->head.block = block_id;
179		/* only ue and ce errors are supported */
180		if (!memcmp("ue", err, 2))
181			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
182		else if (!memcmp("ce", err, 2))
183			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
184		else
185			return -EINVAL;
186
187		data->op = op;
188
189		if (op == 2) {
190			if (sscanf(str, "%*s %*s %*s %u %llu %llu",
191						&sub_block, &address, &value) != 3)
192				if (sscanf(str, "%*s %*s %*s 0x%x 0x%"PRIx64" 0x%"PRIx64"",
193							&sub_block, &address, &value) != 3)
194					return -EINVAL;
195			data->head.sub_block_index = sub_block;
196			data->inject.address = address;
197			data->inject.value = value;
198		}
199	} else {
200		if (size < sizeof(*data))
201			return -EINVAL;
202
203		if (copy_from_user(data, buf, sizeof(*data)))
204			return -EINVAL;
205	}
206
207	return 0;
208}
209
210/**
211 * DOC: AMDGPU RAS debugfs control interface
212 *
213 * It accepts struct ras_debug_if who has two members.
214 *
215 * First member: ras_debug_if::head or ras_debug_if::inject.
216 *
217 * head is used to indicate which IP block will be under control.
218 *
219 * head has four members, they are block, type, sub_block_index, name.
220 * block: which IP will be under control.
221 * type: what kind of error will be enabled/disabled/injected.
222 * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
223 * name: the name of IP.
224 *
225 * inject has two more members than head, they are address, value.
226 * As their names indicate, inject operation will write the
227 * value to the address.
228 *
229 * The second member: struct ras_debug_if::op.
230 * It has three kinds of operations.
231 *
232 * - 0: disable RAS on the block. Take ::head as its data.
233 * - 1: enable RAS on the block. Take ::head as its data.
234 * - 2: inject errors on the block. Take ::inject as its data.
235 *
236 * How to use the interface?
237 *
238 * Programs
239 *
240 * Copy the struct ras_debug_if in your codes and initialize it.
241 * Write the struct to the control node.
242 *
243 * Shells
244 *
245 * .. code-block:: bash
246 *
247 *	echo op block [error [sub_block address value]] > .../ras/ras_ctrl
248 *
249 * Parameters:
250 *
251 * op: disable, enable, inject
252 *	disable: only block is needed
253 *	enable: block and error are needed
254 *	inject: error, address, value are needed
255 * block: umc, sdma, gfx, .........
256 *	see ras_block_string[] for details
257 * error: ue, ce
258 *	ue: multi_uncorrectable
259 *	ce: single_correctable
260 * sub_block:
261 *	sub block index, pass 0 if there is no sub block
262 *
263 * here are some examples for bash commands:
264 *
265 * .. code-block:: bash
266 *
267 *	echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
268 *	echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
269 *	echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
270 *
271 * How to check the result?
272 *
273 * For disable/enable, please check ras features at
274 * /sys/class/drm/card[0/1/2...]/device/ras/features
275 *
276 * For inject, please check corresponding err count at
277 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
278 *
279 * .. note::
280 *	Operations are only allowed on blocks which are supported.
281 *	Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
282 *	to see which blocks support RAS on a particular asic.
283 *
284 */
285static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
286		size_t size, loff_t *pos)
287{
288	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
289	struct ras_debug_if data;
290	int ret = 0;
291
292	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
293	if (ret)
294		return -EINVAL;
295
296	if (!amdgpu_ras_is_supported(adev, data.head.block))
297		return -EINVAL;
298
299	switch (data.op) {
300	case 0:
301		ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
302		break;
303	case 1:
304		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
305		break;
306	case 2:
307		if ((data.inject.address >= adev->gmc.mc_vram_size) ||
308		    (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
309			ret = -EINVAL;
310			break;
311		}
312
313		/* umc ce/ue error injection for a bad page is not allowed */
314		if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
315		    amdgpu_ras_check_bad_page(adev, data.inject.address)) {
316			DRM_WARN("RAS WARN: 0x%"PRIx64" has been marked as bad before error injection!\n",
317					data.inject.address);
318			break;
319		}
320
321		/* data.inject.address is offset instead of absolute gpu address */
322		ret = amdgpu_ras_error_inject(adev, &data.inject);
323		break;
324	default:
325		ret = -EINVAL;
326		break;
327	}
328
329	if (ret)
330		return -EINVAL;
331
332	return size;
333}
334
335/**
336 * DOC: AMDGPU RAS debugfs EEPROM table reset interface
337 *
338 * Some boards contain an EEPROM which is used to persistently store a list of
339 * bad pages which experiences ECC errors in vram.  This interface provides
340 * a way to reset the EEPROM, e.g., after testing error injection.
341 *
342 * Usage:
343 *
344 * .. code-block:: bash
345 *
346 *	echo 1 > ../ras/ras_eeprom_reset
347 *
348 * will reset EEPROM table to 0 entries.
349 *
350 */
351static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
352		size_t size, loff_t *pos)
353{
354	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
355	int ret;
356
357	ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control);
358
359	return ret == 1 ? size : -EIO;
360}
361
362static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
363	.owner = THIS_MODULE,
364	.read = NULL,
365	.write = amdgpu_ras_debugfs_ctrl_write,
366	.llseek = default_llseek
367};
368
369static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
370	.owner = THIS_MODULE,
371	.read = NULL,
372	.write = amdgpu_ras_debugfs_eeprom_write,
373	.llseek = default_llseek
374};
375
376/**
377 * DOC: AMDGPU RAS sysfs Error Count Interface
378 *
379 * It allows the user to read the error count for each IP block on the gpu through
380 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
381 *
382 * It outputs the multiple lines which report the uncorrected (ue) and corrected
383 * (ce) error counts.
384 *
385 * The format of one line is below,
386 *
387 * [ce|ue]: count
388 *
389 * Example:
390 *
391 * .. code-block:: bash
392 *
393 *	ue: 0
394 *	ce: 1
395 *
396 */
397static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
398		struct device_attribute *attr, char *buf)
399{
400	struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr);
401	struct ras_query_if info = {
402		.head = obj->head,
403	};
404
405	if (amdgpu_ras_error_query(obj->adev, &info))
406		return -EINVAL;
407
408	return snprintf(buf, PAGE_SIZE, "%s: %lu\n%s: %lu\n",
409			"ue", info.ue_count,
410			"ce", info.ce_count);
411}
412
413#endif	/* __NetBSD__ */
414
415/* obj begin */
416
417#define get_obj(obj) do { (obj)->use++; } while (0)
418#define alive_obj(obj) ((obj)->use)
419
420static inline void put_obj(struct ras_manager *obj)
421{
422	if (obj && --obj->use == 0)
423		list_del(&obj->node);
424	if (obj && obj->use < 0) {
425		 DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name);
426	}
427}
428
429/* make one obj and return it. */
430static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
431		struct ras_common_if *head)
432{
433	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
434	struct ras_manager *obj;
435
436	if (!con)
437		return NULL;
438
439	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
440		return NULL;
441
442	obj = &con->objs[head->block];
443	/* already exist. return obj? */
444	if (alive_obj(obj))
445		return NULL;
446
447	obj->head = *head;
448	obj->adev = adev;
449	list_add(&obj->node, &con->head);
450	get_obj(obj);
451
452	return obj;
453}
454
455/* return an obj equal to head, or the first when head is NULL */
456struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
457		struct ras_common_if *head)
458{
459	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
460	struct ras_manager *obj;
461	int i;
462
463	if (!con)
464		return NULL;
465
466	if (head) {
467		if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
468			return NULL;
469
470		obj = &con->objs[head->block];
471
472		if (alive_obj(obj)) {
473			WARN_ON(head->block != obj->head.block);
474			return obj;
475		}
476	} else {
477		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
478			obj = &con->objs[i];
479			if (alive_obj(obj)) {
480				WARN_ON(i != obj->head.block);
481				return obj;
482			}
483		}
484	}
485
486	return NULL;
487}
488/* obj end */
489
490/* feature ctl begin */
491static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
492		struct ras_common_if *head)
493{
494	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
495
496	return con->hw_supported & BIT(head->block);
497}
498
499static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
500		struct ras_common_if *head)
501{
502	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
503
504	return con->features & BIT(head->block);
505}
506
507/*
508 * if obj is not created, then create one.
509 * set feature enable flag.
510 */
511static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
512		struct ras_common_if *head, int enable)
513{
514	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
515	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
516
517	/* If hardware does not support ras, then do not create obj.
518	 * But if hardware support ras, we can create the obj.
519	 * Ras framework checks con->hw_supported to see if it need do
520	 * corresponding initialization.
521	 * IP checks con->support to see if it need disable ras.
522	 */
523	if (!amdgpu_ras_is_feature_allowed(adev, head))
524		return 0;
525	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
526		return 0;
527
528	if (enable) {
529		if (!obj) {
530			obj = amdgpu_ras_create_obj(adev, head);
531			if (!obj)
532				return -EINVAL;
533		} else {
534			/* In case we create obj somewhere else */
535			get_obj(obj);
536		}
537		con->features |= BIT(head->block);
538	} else {
539		if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
540			con->features &= ~BIT(head->block);
541			put_obj(obj);
542		}
543	}
544
545	return 0;
546}
547
548/* wrapper of psp_ras_enable_features */
549int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
550		struct ras_common_if *head, bool enable)
551{
552	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
553	union ta_ras_cmd_input info;
554	int ret;
555
556	if (!con)
557		return -EINVAL;
558
559	if (!enable) {
560		info.disable_features = (struct ta_ras_disable_features_input) {
561			.block_id =  amdgpu_ras_block_to_ta(head->block),
562			.error_type = amdgpu_ras_error_to_ta(head->type),
563		};
564	} else {
565		info.enable_features = (struct ta_ras_enable_features_input) {
566			.block_id =  amdgpu_ras_block_to_ta(head->block),
567			.error_type = amdgpu_ras_error_to_ta(head->type),
568		};
569	}
570
571	/* Do not enable if it is not allowed. */
572	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
573	/* Are we alerady in that state we are going to set? */
574	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
575		return 0;
576
577	if (!amdgpu_ras_intr_triggered()) {
578		ret = psp_ras_enable_features(&adev->psp, &info, enable);
579		if (ret) {
580			DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
581					enable ? "enable":"disable",
582					ras_block_str(head->block),
583					ret);
584			if (ret == TA_RAS_STATUS__RESET_NEEDED)
585				return -EAGAIN;
586			return -EINVAL;
587		}
588	}
589
590	/* setup the obj */
591	__amdgpu_ras_feature_enable(adev, head, enable);
592
593	return 0;
594}
595
596/* Only used in device probe stage and called only once. */
597int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
598		struct ras_common_if *head, bool enable)
599{
600	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
601	int ret;
602
603	if (!con)
604		return -EINVAL;
605
606	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
607		if (enable) {
608			/* There is no harm to issue a ras TA cmd regardless of
609			 * the currecnt ras state.
610			 * If current state == target state, it will do nothing
611			 * But sometimes it requests driver to reset and repost
612			 * with error code -EAGAIN.
613			 */
614			ret = amdgpu_ras_feature_enable(adev, head, 1);
615			/* With old ras TA, we might fail to enable ras.
616			 * Log it and just setup the object.
617			 * TODO need remove this WA in the future.
618			 */
619			if (ret == -EINVAL) {
620				ret = __amdgpu_ras_feature_enable(adev, head, 1);
621				if (!ret)
622					DRM_INFO("RAS INFO: %s setup object\n",
623						ras_block_str(head->block));
624			}
625		} else {
626			/* setup the object then issue a ras TA disable cmd.*/
627			ret = __amdgpu_ras_feature_enable(adev, head, 1);
628			if (ret)
629				return ret;
630
631			ret = amdgpu_ras_feature_enable(adev, head, 0);
632		}
633	} else
634		ret = amdgpu_ras_feature_enable(adev, head, enable);
635
636	return ret;
637}
638
639static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
640		bool bypass)
641{
642	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
643	struct ras_manager *obj, *tmp;
644
645	list_for_each_entry_safe(obj, tmp, &con->head, node) {
646		/* bypass psp.
647		 * aka just release the obj and corresponding flags
648		 */
649		if (bypass) {
650			if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
651				break;
652		} else {
653			if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
654				break;
655		}
656	}
657
658	return con->features;
659}
660
661static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
662		bool bypass)
663{
664	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
665	int ras_block_count = AMDGPU_RAS_BLOCK_COUNT;
666	int i;
667	const enum amdgpu_ras_error_type default_ras_type =
668		AMDGPU_RAS_ERROR__NONE;
669
670	for (i = 0; i < ras_block_count; i++) {
671		struct ras_common_if head = {
672			.block = i,
673			.type = default_ras_type,
674			.sub_block_index = 0,
675		};
676		strcpy(head.name, ras_block_str(i));
677		if (bypass) {
678			/*
679			 * bypass psp. vbios enable ras for us.
680			 * so just create the obj
681			 */
682			if (__amdgpu_ras_feature_enable(adev, &head, 1))
683				break;
684		} else {
685			if (amdgpu_ras_feature_enable(adev, &head, 1))
686				break;
687		}
688	}
689
690	return con->features;
691}
692/* feature ctl end */
693
694/* query/inject/cure begin */
695int amdgpu_ras_error_query(struct amdgpu_device *adev,
696		struct ras_query_if *info)
697{
698	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
699	struct ras_err_data err_data = {0, 0, 0, NULL};
700	int i;
701
702	if (!obj)
703		return -EINVAL;
704
705	switch (info->head.block) {
706	case AMDGPU_RAS_BLOCK__UMC:
707		if (adev->umc.funcs->query_ras_error_count)
708			adev->umc.funcs->query_ras_error_count(adev, &err_data);
709		/* umc query_ras_error_address is also responsible for clearing
710		 * error status
711		 */
712		if (adev->umc.funcs->query_ras_error_address)
713			adev->umc.funcs->query_ras_error_address(adev, &err_data);
714		break;
715	case AMDGPU_RAS_BLOCK__SDMA:
716		if (adev->sdma.funcs->query_ras_error_count) {
717			for (i = 0; i < adev->sdma.num_instances; i++)
718				adev->sdma.funcs->query_ras_error_count(adev, i,
719									&err_data);
720		}
721		break;
722	case AMDGPU_RAS_BLOCK__GFX:
723		if (adev->gfx.funcs->query_ras_error_count)
724			adev->gfx.funcs->query_ras_error_count(adev, &err_data);
725		break;
726	case AMDGPU_RAS_BLOCK__MMHUB:
727		if (adev->mmhub.funcs->query_ras_error_count)
728			adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
729		break;
730	case AMDGPU_RAS_BLOCK__PCIE_BIF:
731		if (adev->nbio.funcs->query_ras_error_count)
732			adev->nbio.funcs->query_ras_error_count(adev, &err_data);
733		break;
734	default:
735		break;
736	}
737
738	obj->err_data.ue_count += err_data.ue_count;
739	obj->err_data.ce_count += err_data.ce_count;
740
741	info->ue_count = obj->err_data.ue_count;
742	info->ce_count = obj->err_data.ce_count;
743
744	if (err_data.ce_count) {
745		dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
746			 obj->err_data.ce_count, ras_block_str(info->head.block));
747	}
748	if (err_data.ue_count) {
749		dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
750			 obj->err_data.ue_count, ras_block_str(info->head.block));
751	}
752
753	return 0;
754}
755
756uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr)
757{
758	uint32_t df_inst_id;
759
760	if ((!adev->df.funcs)                 ||
761	    (!adev->df.funcs->get_df_inst_id) ||
762	    (!adev->df.funcs->get_dram_base_addr))
763		return addr;
764
765	df_inst_id = adev->df.funcs->get_df_inst_id(adev);
766
767	return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id);
768}
769
770/* wrapper of psp_ras_trigger_error */
771int amdgpu_ras_error_inject(struct amdgpu_device *adev,
772		struct ras_inject_if *info)
773{
774	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
775	struct ta_ras_trigger_error_input block_info = {
776		.block_id =  amdgpu_ras_block_to_ta(info->head.block),
777		.inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
778		.sub_block_index = info->head.sub_block_index,
779		.address = info->address,
780		.value = info->value,
781	};
782	int ret = 0;
783
784	if (!obj)
785		return -EINVAL;
786
787	/* Calculate XGMI relative offset */
788	if (adev->gmc.xgmi.num_physical_nodes > 1) {
789		block_info.address = get_xgmi_relative_phy_addr(adev,
790								block_info.address);
791	}
792
793	switch (info->head.block) {
794	case AMDGPU_RAS_BLOCK__GFX:
795		if (adev->gfx.funcs->ras_error_inject)
796			ret = adev->gfx.funcs->ras_error_inject(adev, info);
797		else
798			ret = -EINVAL;
799		break;
800	case AMDGPU_RAS_BLOCK__UMC:
801	case AMDGPU_RAS_BLOCK__MMHUB:
802	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
803	case AMDGPU_RAS_BLOCK__PCIE_BIF:
804		ret = psp_ras_trigger_error(&adev->psp, &block_info);
805		break;
806	default:
807		DRM_INFO("%s error injection is not supported yet\n",
808			 ras_block_str(info->head.block));
809		ret = -EINVAL;
810	}
811
812	if (ret)
813		DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
814				ras_block_str(info->head.block),
815				ret);
816
817	return ret;
818}
819
820int amdgpu_ras_error_cure(struct amdgpu_device *adev,
821		struct ras_cure_if *info)
822{
823	/* psp fw has no cure interface for now. */
824	return 0;
825}
826
827/* get the total error counts on all IPs */
828unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
829		bool is_ce)
830{
831	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
832	struct ras_manager *obj;
833	struct ras_err_data data = {0, 0};
834
835	if (!con)
836		return 0;
837
838	list_for_each_entry(obj, &con->head, node) {
839		struct ras_query_if info = {
840			.head = obj->head,
841		};
842
843		if (amdgpu_ras_error_query(adev, &info))
844			return 0;
845
846		data.ce_count += info.ce_count;
847		data.ue_count += info.ue_count;
848	}
849
850	return is_ce ? data.ce_count : data.ue_count;
851}
852/* query/inject/cure end */
853
854
855/* sysfs begin */
856
857static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
858		struct ras_badpage **bps, unsigned int *count) __unused;
859
860#ifndef __NetBSD__		/* XXX amdgpu sysfs */
861
862static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
863{
864	switch (flags) {
865	case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
866		return "R";
867	case AMDGPU_RAS_RETIRE_PAGE_PENDING:
868		return "P";
869	case AMDGPU_RAS_RETIRE_PAGE_FAULT:
870	default:
871		return "F";
872	};
873}
874
875/**
876 * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
877 *
878 * It allows user to read the bad pages of vram on the gpu through
879 * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
880 *
881 * It outputs multiple lines, and each line stands for one gpu page.
882 *
883 * The format of one line is below,
884 * gpu pfn : gpu page size : flags
885 *
886 * gpu pfn and gpu page size are printed in hex format.
887 * flags can be one of below character,
888 *
889 * R: reserved, this gpu page is reserved and not able to use.
890 *
891 * P: pending for reserve, this gpu page is marked as bad, will be reserved
892 * in next window of page_reserve.
893 *
894 * F: unable to reserve. this gpu page can't be reserved due to some reasons.
895 *
896 * Examples:
897 *
898 * .. code-block:: bash
899 *
900 *	0x00000001 : 0x00001000 : R
901 *	0x00000002 : 0x00001000 : P
902 *
903 */
904
905static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
906		struct kobject *kobj, struct bin_attribute *attr,
907		char *buf, loff_t ppos, size_t count)
908{
909	struct amdgpu_ras *con =
910		container_of(attr, struct amdgpu_ras, badpages_attr);
911	struct amdgpu_device *adev = con->adev;
912	const unsigned int element_size =
913		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
914	unsigned int start = div64_ul(ppos + element_size - 1, element_size);
915	unsigned int end = div64_ul(ppos + count - 1, element_size);
916	ssize_t s = 0;
917	struct ras_badpage *bps = NULL;
918	unsigned int bps_count = 0;
919
920	memset(buf, 0, count);
921
922	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
923		return 0;
924
925	for (; start < end && start < bps_count; start++)
926		s += scnprintf(&buf[s], element_size + 1,
927				"0x%08x : 0x%08x : %1s\n",
928				bps[start].bp,
929				bps[start].size,
930				amdgpu_ras_badpage_flags_str(bps[start].flags));
931
932	kfree(bps);
933
934	return s;
935}
936
937static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
938		struct device_attribute *attr, char *buf)
939{
940	struct amdgpu_ras *con =
941		container_of(attr, struct amdgpu_ras, features_attr);
942
943	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
944}
945
946static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
947{
948	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
949	struct attribute *attrs[] = {
950		&con->features_attr.attr,
951		NULL
952	};
953	struct bin_attribute *bin_attrs[] = {
954		&con->badpages_attr,
955		NULL
956	};
957	struct attribute_group group = {
958		.name = "ras",
959		.attrs = attrs,
960		.bin_attrs = bin_attrs,
961	};
962
963	con->features_attr = (struct device_attribute) {
964		.attr = {
965			.name = "features",
966			.mode = S_IRUGO,
967		},
968			.show = amdgpu_ras_sysfs_features_read,
969	};
970
971	con->badpages_attr = (struct bin_attribute) {
972		.attr = {
973			.name = "gpu_vram_bad_pages",
974			.mode = S_IRUGO,
975		},
976		.size = 0,
977		.private = NULL,
978		.read = amdgpu_ras_sysfs_badpages_read,
979	};
980
981	sysfs_attr_init(attrs[0]);
982	sysfs_bin_attr_init(bin_attrs[0]);
983
984	return sysfs_create_group(&adev->dev->kobj, &group);
985}
986
987static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
988{
989	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
990	struct attribute *attrs[] = {
991		&con->features_attr.attr,
992		NULL
993	};
994	struct bin_attribute *bin_attrs[] = {
995		&con->badpages_attr,
996		NULL
997	};
998	struct attribute_group group = {
999		.name = "ras",
1000		.attrs = attrs,
1001		.bin_attrs = bin_attrs,
1002	};
1003
1004	sysfs_remove_group(&adev->dev->kobj, &group);
1005
1006	return 0;
1007}
1008
1009#endif	/* __NetBSD__ */
1010
1011int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
1012		struct ras_fs_if *head)
1013{
1014#ifndef __NetBSD__
1015	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
1016
1017	if (!obj || obj->attr_inuse)
1018		return -EINVAL;
1019
1020	get_obj(obj);
1021
1022	memcpy(obj->fs_data.sysfs_name,
1023			head->sysfs_name,
1024			sizeof(obj->fs_data.sysfs_name));
1025
1026	obj->sysfs_attr = (struct device_attribute){
1027		.attr = {
1028			.name = obj->fs_data.sysfs_name,
1029			.mode = S_IRUGO,
1030		},
1031			.show = amdgpu_ras_sysfs_read,
1032	};
1033	sysfs_attr_init(&obj->sysfs_attr.attr);
1034
1035	if (sysfs_add_file_to_group(&adev->dev->kobj,
1036				&obj->sysfs_attr.attr,
1037				"ras")) {
1038		put_obj(obj);
1039		return -EINVAL;
1040	}
1041
1042	obj->attr_inuse = 1;
1043#endif
1044
1045	return 0;
1046}
1047
1048int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
1049		struct ras_common_if *head)
1050{
1051#ifndef __NetBSD__		/* XXX amdgpu sysfs */
1052	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1053
1054	if (!obj || !obj->attr_inuse)
1055		return -EINVAL;
1056
1057	sysfs_remove_file_from_group(&adev->dev->kobj,
1058				&obj->sysfs_attr.attr,
1059				"ras");
1060	obj->attr_inuse = 0;
1061	put_obj(obj);
1062#endif	/* __NetBSD__ */
1063
1064	return 0;
1065}
1066
1067static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
1068{
1069#ifndef __NetBSD__
1070	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1071	struct ras_manager *obj, *tmp;
1072
1073	list_for_each_entry_safe(obj, tmp, &con->head, node) {
1074		amdgpu_ras_sysfs_remove(adev, &obj->head);
1075	}
1076
1077	amdgpu_ras_sysfs_remove_feature_node(adev);
1078#endif
1079
1080	return 0;
1081}
1082/* sysfs end */
1083
1084/**
1085 * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
1086 *
1087 * Normally when there is an uncorrectable error, the driver will reset
1088 * the GPU to recover.  However, in the event of an unrecoverable error,
1089 * the driver provides an interface to reboot the system automatically
1090 * in that event.
1091 *
1092 * The following file in debugfs provides that interface:
1093 * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
1094 *
1095 * Usage:
1096 *
1097 * .. code-block:: bash
1098 *
1099 *	echo true > .../ras/auto_reboot
1100 *
1101 */
1102/* debugfs begin */
1103#ifndef __NetBSD__		/* XXX amdgpu debugfs */
1104static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
1105{
1106	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1107	struct drm_minor *minor = adev->ddev->primary;
1108
1109	con->dir = debugfs_create_dir("ras", minor->debugfs_root);
1110	debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
1111				adev, &amdgpu_ras_debugfs_ctrl_ops);
1112	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
1113				adev, &amdgpu_ras_debugfs_eeprom_ops);
1114
1115	/*
1116	 * After one uncorrectable error happens, usually GPU recovery will
1117	 * be scheduled. But due to the known problem in GPU recovery failing
1118	 * to bring GPU back, below interface provides one direct way to
1119	 * user to reboot system automatically in such case within
1120	 * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
1121	 * will never be called.
1122	 */
1123	debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
1124				&con->reboot);
1125}
1126
1127#endif	/* __NetBSD__ */
1128
1129void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
1130		struct ras_fs_if *head)
1131{
1132#ifndef __NetBSD__		/* XXX amdgpu debugfs */
1133	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1134	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
1135
1136	if (!obj || obj->ent)
1137		return;
1138
1139	get_obj(obj);
1140
1141	memcpy(obj->fs_data.debugfs_name,
1142			head->debugfs_name,
1143			sizeof(obj->fs_data.debugfs_name));
1144
1145	obj->ent = debugfs_create_file(obj->fs_data.debugfs_name,
1146				       S_IWUGO | S_IRUGO, con->dir, obj,
1147				       &amdgpu_ras_debugfs_ops);
1148#endif
1149}
1150
1151void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
1152		struct ras_common_if *head)
1153{
1154#ifndef __NetBSD__		/* XXX amdgpu debugfs */
1155	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1156
1157	if (!obj || !obj->ent)
1158		return;
1159
1160	debugfs_remove(obj->ent);
1161	obj->ent = NULL;
1162	put_obj(obj);
1163#endif	/* __NetBSD__ */
1164}
1165
1166static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
1167{
1168#ifndef __NetBSD__
1169	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1170	struct ras_manager *obj, *tmp;
1171
1172	list_for_each_entry_safe(obj, tmp, &con->head, node) {
1173		amdgpu_ras_debugfs_remove(adev, &obj->head);
1174	}
1175
1176	debugfs_remove_recursive(con->dir);
1177	con->dir = NULL;
1178#endif
1179}
1180/* debugfs end */
1181
1182/* ras fs */
1183
1184static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
1185{
1186#ifndef __NetBSD__		/* XXX amdgpu debugfs sysfs */
1187	amdgpu_ras_sysfs_create_feature_node(adev);
1188	amdgpu_ras_debugfs_create_ctrl_node(adev);
1189#endif
1190
1191	return 0;
1192}
1193
1194static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
1195{
1196	amdgpu_ras_debugfs_remove_all(adev);
1197	amdgpu_ras_sysfs_remove_all(adev);
1198	return 0;
1199}
1200/* ras fs end */
1201
1202/* ih begin */
1203static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
1204{
1205	struct ras_ih_data *data = &obj->ih_data;
1206	struct amdgpu_iv_entry entry;
1207	int ret;
1208	struct ras_err_data err_data = {0, 0, 0, NULL};
1209
1210	while (data->rptr != data->wptr) {
1211		rmb();
1212		memcpy(&entry, &data->ring[data->rptr],
1213				data->element_size);
1214
1215		wmb();
1216		data->rptr = (data->aligned_element_size +
1217				data->rptr) % data->ring_size;
1218
1219		/* Let IP handle its data, maybe we need get the output
1220		 * from the callback to udpate the error type/count, etc
1221		 */
1222		if (data->cb) {
1223			ret = data->cb(obj->adev, &err_data, &entry);
1224			/* ue will trigger an interrupt, and in that case
1225			 * we need do a reset to recovery the whole system.
1226			 * But leave IP do that recovery, here we just dispatch
1227			 * the error.
1228			 */
1229			if (ret == AMDGPU_RAS_SUCCESS) {
1230				/* these counts could be left as 0 if
1231				 * some blocks do not count error number
1232				 */
1233				obj->err_data.ue_count += err_data.ue_count;
1234				obj->err_data.ce_count += err_data.ce_count;
1235			}
1236		}
1237	}
1238}
1239
1240static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
1241{
1242	struct ras_ih_data *data =
1243		container_of(work, struct ras_ih_data, ih_work);
1244	struct ras_manager *obj =
1245		container_of(data, struct ras_manager, ih_data);
1246
1247	amdgpu_ras_interrupt_handler(obj);
1248}
1249
1250int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
1251		struct ras_dispatch_if *info)
1252{
1253	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1254	struct ras_ih_data *data = &obj->ih_data;
1255
1256	if (!obj)
1257		return -EINVAL;
1258
1259	if (data->inuse == 0)
1260		return 0;
1261
1262	/* Might be overflow... */
1263	memcpy(&data->ring[data->wptr], info->entry,
1264			data->element_size);
1265
1266	wmb();
1267	data->wptr = (data->aligned_element_size +
1268			data->wptr) % data->ring_size;
1269
1270	schedule_work(&data->ih_work);
1271
1272	return 0;
1273}
1274
1275int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
1276		struct ras_ih_if *info)
1277{
1278	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1279	struct ras_ih_data *data;
1280
1281	if (!obj)
1282		return -EINVAL;
1283
1284	data = &obj->ih_data;
1285	if (data->inuse == 0)
1286		return 0;
1287
1288	cancel_work_sync(&data->ih_work);
1289
1290	kfree(data->ring);
1291	memset(data, 0, sizeof(*data));
1292	put_obj(obj);
1293
1294	return 0;
1295}
1296
1297int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
1298		struct ras_ih_if *info)
1299{
1300	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1301	struct ras_ih_data *data;
1302
1303	if (!obj) {
1304		/* in case we registe the IH before enable ras feature */
1305		obj = amdgpu_ras_create_obj(adev, &info->head);
1306		if (!obj)
1307			return -EINVAL;
1308	} else
1309		get_obj(obj);
1310
1311	data = &obj->ih_data;
1312	/* add the callback.etc */
1313	*data = (struct ras_ih_data) {
1314		.inuse = 0,
1315		.cb = info->cb,
1316		.element_size = sizeof(struct amdgpu_iv_entry),
1317		.rptr = 0,
1318		.wptr = 0,
1319	};
1320
1321	INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
1322
1323	data->aligned_element_size = ALIGN(data->element_size, 8);
1324	/* the ring can store 64 iv entries. */
1325	data->ring_size = 64 * data->aligned_element_size;
1326	data->ring = kmalloc(data->ring_size, GFP_KERNEL);
1327	if (!data->ring) {
1328		put_obj(obj);
1329		return -ENOMEM;
1330	}
1331
1332	/* IH is ready */
1333	data->inuse = 1;
1334
1335	return 0;
1336}
1337
1338static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
1339{
1340	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1341	struct ras_manager *obj, *tmp;
1342
1343	list_for_each_entry_safe(obj, tmp, &con->head, node) {
1344		struct ras_ih_if info = {
1345			.head = obj->head,
1346		};
1347		amdgpu_ras_interrupt_remove_handler(adev, &info);
1348	}
1349
1350	return 0;
1351}
1352/* ih end */
1353
1354/* recovery begin */
1355
1356/* return 0 on success.
1357 * caller need free bps.
1358 */
1359static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
1360		struct ras_badpage **bps, unsigned int *count)
1361{
1362	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1363	struct ras_err_handler_data *data;
1364	int i = 0;
1365	int ret = 0;
1366
1367	if (!con || !con->eh_data || !bps || !count)
1368		return -EINVAL;
1369
1370	mutex_lock(&con->recovery_lock);
1371	data = con->eh_data;
1372	if (!data || data->count == 0) {
1373		*bps = NULL;
1374		ret = -EINVAL;
1375		goto out;
1376	}
1377
1378	*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
1379	if (!*bps) {
1380		ret = -ENOMEM;
1381		goto out;
1382	}
1383
1384	for (; i < data->count; i++) {
1385		(*bps)[i] = (struct ras_badpage){
1386			.bp = data->bps[i].retired_page,
1387			.size = AMDGPU_GPU_PAGE_SIZE,
1388			.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
1389		};
1390
1391		if (data->last_reserved <= i)
1392			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
1393		else if (data->bps_bo[i] == NULL)
1394			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
1395	}
1396
1397	*count = data->count;
1398out:
1399	mutex_unlock(&con->recovery_lock);
1400	return ret;
1401}
1402
1403static void amdgpu_ras_do_recovery(struct work_struct *work)
1404{
1405	struct amdgpu_ras *ras =
1406		container_of(work, struct amdgpu_ras, recovery_work);
1407
1408	if (amdgpu_device_should_recover_gpu(ras->adev))
1409		amdgpu_device_gpu_recover(ras->adev, 0);
1410	atomic_set(&ras->in_recovery, 0);
1411}
1412
1413/* alloc/realloc bps array */
1414static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
1415		struct ras_err_handler_data *data, int pages)
1416{
1417	unsigned int old_space = data->count + data->space_left;
1418	unsigned int new_space = old_space + pages;
1419	unsigned int align_space = ALIGN(new_space, 512);
1420	void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
1421	struct amdgpu_bo **bps_bo =
1422			kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
1423
1424	if (!bps || !bps_bo) {
1425		kfree(bps);
1426		kfree(bps_bo);
1427		return -ENOMEM;
1428	}
1429
1430	if (data->bps) {
1431		memcpy(bps, data->bps,
1432				data->count * sizeof(*data->bps));
1433		kfree(data->bps);
1434	}
1435	if (data->bps_bo) {
1436		memcpy(bps_bo, data->bps_bo,
1437				data->count * sizeof(*data->bps_bo));
1438		kfree(data->bps_bo);
1439	}
1440
1441	data->bps = bps;
1442	data->bps_bo = bps_bo;
1443	data->space_left += align_space - old_space;
1444	return 0;
1445}
1446
1447/* it deal with vram only. */
1448int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
1449		struct eeprom_table_record *bps, int pages)
1450{
1451	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1452	struct ras_err_handler_data *data;
1453	int ret = 0;
1454
1455	if (!con || !con->eh_data || !bps || pages <= 0)
1456		return 0;
1457
1458	mutex_lock(&con->recovery_lock);
1459	data = con->eh_data;
1460	if (!data)
1461		goto out;
1462
1463	if (data->space_left <= pages)
1464		if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) {
1465			ret = -ENOMEM;
1466			goto out;
1467		}
1468
1469	memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
1470	data->count += pages;
1471	data->space_left -= pages;
1472
1473out:
1474	mutex_unlock(&con->recovery_lock);
1475
1476	return ret;
1477}
1478
1479/*
1480 * write error record array to eeprom, the function should be
1481 * protected by recovery_lock
1482 */
1483static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
1484{
1485	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1486	struct ras_err_handler_data *data;
1487	struct amdgpu_ras_eeprom_control *control;
1488	int save_count;
1489
1490	if (!con || !con->eh_data)
1491		return 0;
1492
1493	control = &con->eeprom_control;
1494	data = con->eh_data;
1495	save_count = data->count - control->num_recs;
1496	/* only new entries are saved */
1497	if (save_count > 0)
1498		if (amdgpu_ras_eeprom_process_recods(control,
1499							&data->bps[control->num_recs],
1500							true,
1501							save_count)) {
1502			DRM_ERROR("Failed to save EEPROM table data!");
1503			return -EIO;
1504		}
1505
1506	return 0;
1507}
1508
1509/*
1510 * read error record array in eeprom and reserve enough space for
1511 * storing new bad pages
1512 */
1513static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
1514{
1515	struct amdgpu_ras_eeprom_control *control =
1516					&adev->psp.ras.ras->eeprom_control;
1517	struct eeprom_table_record *bps = NULL;
1518	int ret = 0;
1519
1520	/* no bad page record, skip eeprom access */
1521	if (!control->num_recs)
1522		return ret;
1523
1524	bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
1525	if (!bps)
1526		return -ENOMEM;
1527
1528	if (amdgpu_ras_eeprom_process_recods(control, bps, false,
1529		control->num_recs)) {
1530		DRM_ERROR("Failed to load EEPROM table records!");
1531		ret = -EIO;
1532		goto out;
1533	}
1534
1535	ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
1536
1537out:
1538	kfree(bps);
1539	return ret;
1540}
1541
1542/*
1543 * check if an address belongs to bad page
1544 *
1545 * Note: this check is only for umc block
1546 */
1547static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
1548				uint64_t addr)
1549{
1550	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1551	struct ras_err_handler_data *data;
1552	int i;
1553	bool ret = false;
1554
1555	if (!con || !con->eh_data)
1556		return ret;
1557
1558	mutex_lock(&con->recovery_lock);
1559	data = con->eh_data;
1560	if (!data)
1561		goto out;
1562
1563	addr >>= AMDGPU_GPU_PAGE_SHIFT;
1564	for (i = 0; i < data->count; i++)
1565		if (addr == data->bps[i].retired_page) {
1566			ret = true;
1567			goto out;
1568		}
1569
1570out:
1571	mutex_unlock(&con->recovery_lock);
1572	return ret;
1573}
1574
1575/* called in gpu recovery/init */
1576int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
1577{
1578	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1579	struct ras_err_handler_data *data;
1580	uint64_t bp;
1581	struct amdgpu_bo *bo = NULL;
1582	int i, ret = 0;
1583
1584	if (!con || !con->eh_data)
1585		return 0;
1586
1587	mutex_lock(&con->recovery_lock);
1588	data = con->eh_data;
1589	if (!data)
1590		goto out;
1591	/* reserve vram at driver post stage. */
1592	for (i = data->last_reserved; i < data->count; i++) {
1593		bp = data->bps[i].retired_page;
1594
1595		/* There are two cases of reserve error should be ignored:
1596		 * 1) a ras bad page has been allocated (used by someone);
1597		 * 2) a ras bad page has been reserved (duplicate error injection
1598		 *    for one page);
1599		 */
1600		if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
1601					       AMDGPU_GPU_PAGE_SIZE,
1602					       AMDGPU_GEM_DOMAIN_VRAM,
1603					       &bo, NULL))
1604			DRM_WARN("RAS WARN: reserve vram for retired page %"PRIx64" fail\n", bp);
1605
1606		data->bps_bo[i] = bo;
1607		data->last_reserved = i + 1;
1608		bo = NULL;
1609	}
1610
1611	/* continue to save bad pages to eeprom even reesrve_vram fails */
1612	ret = amdgpu_ras_save_bad_pages(adev);
1613out:
1614	mutex_unlock(&con->recovery_lock);
1615	return ret;
1616}
1617
1618/* called when driver unload */
1619static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
1620{
1621	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1622	struct ras_err_handler_data *data;
1623	struct amdgpu_bo *bo;
1624	int i;
1625
1626	if (!con || !con->eh_data)
1627		return 0;
1628
1629	mutex_lock(&con->recovery_lock);
1630	data = con->eh_data;
1631	if (!data)
1632		goto out;
1633
1634	for (i = data->last_reserved - 1; i >= 0; i--) {
1635		bo = data->bps_bo[i];
1636
1637		amdgpu_bo_free_kernel(&bo, NULL, NULL);
1638
1639		data->bps_bo[i] = bo;
1640		data->last_reserved = i;
1641	}
1642out:
1643	mutex_unlock(&con->recovery_lock);
1644	return 0;
1645}
1646
1647int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
1648{
1649	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1650	struct ras_err_handler_data **data;
1651	int ret;
1652
1653	if (con)
1654		data = &con->eh_data;
1655	else
1656		return 0;
1657
1658	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
1659	if (!*data) {
1660		ret = -ENOMEM;
1661		goto out;
1662	}
1663
1664	mutex_init(&con->recovery_lock);
1665	INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
1666	atomic_set(&con->in_recovery, 0);
1667	con->adev = adev;
1668
1669	ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
1670	if (ret)
1671		goto free;
1672
1673	if (con->eeprom_control.num_recs) {
1674		ret = amdgpu_ras_load_bad_pages(adev);
1675		if (ret)
1676			goto free;
1677		ret = amdgpu_ras_reserve_bad_pages(adev);
1678		if (ret)
1679			goto release;
1680	}
1681
1682	return 0;
1683
1684release:
1685	amdgpu_ras_release_bad_pages(adev);
1686free:
1687	kfree((*data)->bps);
1688	kfree((*data)->bps_bo);
1689	kfree(*data);
1690	con->eh_data = NULL;
1691	mutex_destroy(&con->recovery_lock);
1692out:
1693	DRM_WARN("Failed to initialize ras recovery!\n");
1694
1695	return ret;
1696}
1697
1698static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
1699{
1700	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1701	struct ras_err_handler_data *data = con->eh_data;
1702
1703	/* recovery_init failed to init it, fini is useless */
1704	if (!data)
1705		return 0;
1706
1707	cancel_work_sync(&con->recovery_work);
1708	amdgpu_ras_release_bad_pages(adev);
1709
1710	mutex_destroy(&con->recovery_lock);
1711	con->eh_data = NULL;
1712	kfree(data->bps);
1713	kfree(data->bps_bo);
1714	kfree(data);
1715
1716	return 0;
1717}
1718/* recovery end */
1719
1720/* return 0 if ras will reset gpu and repost.*/
1721int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
1722		unsigned int block)
1723{
1724	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1725
1726	if (!ras)
1727		return -EINVAL;
1728
1729	ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
1730	return 0;
1731}
1732
1733/*
1734 * check hardware's ras ability which will be saved in hw_supported.
1735 * if hardware does not support ras, we can skip some ras initializtion and
1736 * forbid some ras operations from IP.
1737 * if software itself, say boot parameter, limit the ras ability. We still
1738 * need allow IP do some limited operations, like disable. In such case,
1739 * we have to initialize ras as normal. but need check if operation is
1740 * allowed or not in each function.
1741 */
1742static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
1743		uint32_t *hw_supported, uint32_t *supported)
1744{
1745	*hw_supported = 0;
1746	*supported = 0;
1747
1748	if (amdgpu_sriov_vf(adev) ||
1749	    (adev->asic_type != CHIP_VEGA20 &&
1750	     adev->asic_type != CHIP_ARCTURUS))
1751		return;
1752
1753	if (adev->is_atom_fw &&
1754			(amdgpu_atomfirmware_mem_ecc_supported(adev) ||
1755			 amdgpu_atomfirmware_sram_ecc_supported(adev)))
1756		*hw_supported = AMDGPU_RAS_BLOCK_MASK;
1757
1758	*supported = amdgpu_ras_enable == 0 ?
1759				0 : *hw_supported & amdgpu_ras_mask;
1760}
1761
1762int amdgpu_ras_init(struct amdgpu_device *adev)
1763{
1764	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1765	int r;
1766
1767	if (con)
1768		return 0;
1769
1770	con = kmalloc(sizeof(struct amdgpu_ras) +
1771			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT,
1772			GFP_KERNEL|__GFP_ZERO);
1773	if (!con)
1774		return -ENOMEM;
1775
1776	con->objs = (struct ras_manager *)(con + 1);
1777
1778	amdgpu_ras_set_context(adev, con);
1779
1780	amdgpu_ras_check_supported(adev, &con->hw_supported,
1781			&con->supported);
1782	if (!con->hw_supported) {
1783		amdgpu_ras_set_context(adev, NULL);
1784		kfree(con);
1785		return 0;
1786	}
1787
1788	con->features = 0;
1789	INIT_LIST_HEAD(&con->head);
1790	/* Might need get this flag from vbios. */
1791	con->flags = RAS_DEFAULT_FLAGS;
1792
1793	if (adev->nbio.funcs->init_ras_controller_interrupt) {
1794		r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
1795		if (r)
1796			return r;
1797	}
1798
1799	if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
1800		r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
1801		if (r)
1802			return r;
1803	}
1804
1805	amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
1806
1807	if (amdgpu_ras_fs_init(adev))
1808		goto fs_out;
1809
1810	DRM_INFO("RAS INFO: ras initialized successfully, "
1811			"hardware ability[%x] ras_mask[%x]\n",
1812			con->hw_supported, con->supported);
1813	return 0;
1814fs_out:
1815	amdgpu_ras_set_context(adev, NULL);
1816	kfree(con);
1817
1818	return -EINVAL;
1819}
1820
1821/* helper function to handle common stuff in ip late init phase */
1822int amdgpu_ras_late_init(struct amdgpu_device *adev,
1823			 struct ras_common_if *ras_block,
1824			 struct ras_fs_if *fs_info,
1825			 struct ras_ih_if *ih_info)
1826{
1827	int r;
1828
1829	/* disable RAS feature per IP block if it is not supported */
1830	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
1831		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
1832		return 0;
1833	}
1834
1835	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
1836	if (r) {
1837		if (r == -EAGAIN) {
1838			/* request gpu reset. will run again */
1839			amdgpu_ras_request_reset_on_boot(adev,
1840					ras_block->block);
1841			return 0;
1842		} else if (adev->in_suspend || adev->in_gpu_reset) {
1843			/* in resume phase, if fail to enable ras,
1844			 * clean up all ras fs nodes, and disable ras */
1845			goto cleanup;
1846		} else
1847			return r;
1848	}
1849
1850	/* in resume phase, no need to create ras fs node */
1851	if (adev->in_suspend || adev->in_gpu_reset)
1852		return 0;
1853
1854	if (ih_info->cb) {
1855		r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
1856		if (r)
1857			goto interrupt;
1858	}
1859
1860	amdgpu_ras_debugfs_create(adev, fs_info);
1861
1862	r = amdgpu_ras_sysfs_create(adev, fs_info);
1863	if (r)
1864		goto sysfs;
1865
1866	return 0;
1867cleanup:
1868	amdgpu_ras_sysfs_remove(adev, ras_block);
1869sysfs:
1870	amdgpu_ras_debugfs_remove(adev, ras_block);
1871	if (ih_info->cb)
1872		amdgpu_ras_interrupt_remove_handler(adev, ih_info);
1873interrupt:
1874	amdgpu_ras_feature_enable(adev, ras_block, 0);
1875	return r;
1876}
1877
1878/* helper function to remove ras fs node and interrupt handler */
1879void amdgpu_ras_late_fini(struct amdgpu_device *adev,
1880			  struct ras_common_if *ras_block,
1881			  struct ras_ih_if *ih_info)
1882{
1883	if (!ras_block || !ih_info)
1884		return;
1885
1886	amdgpu_ras_sysfs_remove(adev, ras_block);
1887	amdgpu_ras_debugfs_remove(adev, ras_block);
1888	if (ih_info->cb)
1889                amdgpu_ras_interrupt_remove_handler(adev, ih_info);
1890	amdgpu_ras_feature_enable(adev, ras_block, 0);
1891}
1892
1893/* do some init work after IP late init as dependence.
1894 * and it runs in resume/gpu reset/booting up cases.
1895 */
1896void amdgpu_ras_resume(struct amdgpu_device *adev)
1897{
1898	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1899	struct ras_manager *obj, *tmp;
1900
1901	if (!con)
1902		return;
1903
1904	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
1905		/* Set up all other IPs which are not implemented. There is a
1906		 * tricky thing that IP's actual ras error type should be
1907		 * MULTI_UNCORRECTABLE, but as driver does not handle it, so
1908		 * ERROR_NONE make sense anyway.
1909		 */
1910		amdgpu_ras_enable_all_features(adev, 1);
1911
1912		/* We enable ras on all hw_supported block, but as boot
1913		 * parameter might disable some of them and one or more IP has
1914		 * not implemented yet. So we disable them on behalf.
1915		 */
1916		list_for_each_entry_safe(obj, tmp, &con->head, node) {
1917			if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
1918				amdgpu_ras_feature_enable(adev, &obj->head, 0);
1919				/* there should be no any reference. */
1920				WARN_ON(alive_obj(obj));
1921			}
1922		}
1923	}
1924
1925	if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
1926		con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
1927		/* setup ras obj state as disabled.
1928		 * for init_by_vbios case.
1929		 * if we want to enable ras, just enable it in a normal way.
1930		 * If we want do disable it, need setup ras obj as enabled,
1931		 * then issue another TA disable cmd.
1932		 * See feature_enable_on_boot
1933		 */
1934		amdgpu_ras_disable_all_features(adev, 1);
1935		amdgpu_ras_reset_gpu(adev);
1936	}
1937}
1938
1939void amdgpu_ras_suspend(struct amdgpu_device *adev)
1940{
1941	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1942
1943	if (!con)
1944		return;
1945
1946	amdgpu_ras_disable_all_features(adev, 0);
1947	/* Make sure all ras objects are disabled. */
1948	if (con->features)
1949		amdgpu_ras_disable_all_features(adev, 1);
1950}
1951
1952/* do some fini work before IP fini as dependence */
1953int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
1954{
1955	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1956
1957	if (!con)
1958		return 0;
1959
1960	/* Need disable ras on all IPs here before ip [hw/sw]fini */
1961	amdgpu_ras_disable_all_features(adev, 0);
1962	amdgpu_ras_recovery_fini(adev);
1963	return 0;
1964}
1965
1966int amdgpu_ras_fini(struct amdgpu_device *adev)
1967{
1968	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1969
1970	if (!con)
1971		return 0;
1972
1973	amdgpu_ras_fs_fini(adev);
1974	amdgpu_ras_interrupt_remove_all(adev);
1975
1976	WARN(con->features, "Feature mask is not cleared");
1977
1978	if (con->features)
1979		amdgpu_ras_disable_all_features(adev, 1);
1980
1981	amdgpu_ras_set_context(adev, NULL);
1982	kfree(con);
1983
1984	return 0;
1985}
1986
1987void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
1988{
1989	uint32_t hw_supported, supported;
1990
1991	amdgpu_ras_check_supported(adev, &hw_supported, &supported);
1992	if (!hw_supported)
1993		return;
1994
1995	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
1996		DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n");
1997
1998		amdgpu_ras_reset_gpu(adev);
1999	}
2000}
2001