1/*	$NetBSD: amdgpu_ras.h,v 1.3 2021/12/19 10:59:01 riastradh Exp $	*/
2
3/*
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 *
25 */
26#ifndef _AMDGPU_RAS_H
27#define _AMDGPU_RAS_H
28
29#include <linux/debugfs.h>
30#include <linux/list.h>
31#include "amdgpu.h"
32#include "amdgpu_psp.h"
33#include "ta_ras_if.h"
34#include "amdgpu_ras_eeprom.h"
35
36enum amdgpu_ras_block {
37	AMDGPU_RAS_BLOCK__UMC = 0,
38	AMDGPU_RAS_BLOCK__SDMA,
39	AMDGPU_RAS_BLOCK__GFX,
40	AMDGPU_RAS_BLOCK__MMHUB,
41	AMDGPU_RAS_BLOCK__ATHUB,
42	AMDGPU_RAS_BLOCK__PCIE_BIF,
43	AMDGPU_RAS_BLOCK__HDP,
44	AMDGPU_RAS_BLOCK__XGMI_WAFL,
45	AMDGPU_RAS_BLOCK__DF,
46	AMDGPU_RAS_BLOCK__SMN,
47	AMDGPU_RAS_BLOCK__SEM,
48	AMDGPU_RAS_BLOCK__MP0,
49	AMDGPU_RAS_BLOCK__MP1,
50	AMDGPU_RAS_BLOCK__FUSE,
51
52	AMDGPU_RAS_BLOCK__LAST
53};
54
55#define AMDGPU_RAS_BLOCK_COUNT	AMDGPU_RAS_BLOCK__LAST
56#define AMDGPU_RAS_BLOCK_MASK	((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
57
58enum amdgpu_ras_gfx_subblock {
59	/* CPC */
60	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
61	AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
62		AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
63	AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
64	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
65	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
66	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
67	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
68	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
69	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
70	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
71		AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
72	/* CPF */
73	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
74	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
75		AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
76	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
77	AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
78	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
79	/* CPG */
80	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
81	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
82		AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
83	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
84	AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
85	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
86	/* GDS */
87	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
88	AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
89	AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
90	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
91	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
92	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
93	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
94		AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
95	/* SPI */
96	AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
97	/* SQ */
98	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
99	AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
100	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
101	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
102	AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
103	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
104	/* SQC (3 ranges) */
105	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
106	/* SQC range 0 */
107	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
108		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
109	AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
110		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
111	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
112	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
113	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
114	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
115	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
116	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
117	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
118		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
119	/* SQC range 1 */
120	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
121	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
122		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
123	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
124	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
125	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
126	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
127	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
128	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
129	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
130	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
131	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
132		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
133	/* SQC range 2 */
134	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
135	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
136		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
137	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
138	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
139	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
140	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
141	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
142	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
143	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
144	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
145	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
146		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
147	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
148		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
149	/* TA */
150	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
151	AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
152		AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
153	AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
154	AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
155	AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
156	AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
157	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
158	/* TCA */
159	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
160	AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
161		AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
162	AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
163	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
164		AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
165	/* TCC (5 sub-ranges) */
166	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
167	/* TCC range 0 */
168	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
169		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
170	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
171		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
172	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
173	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
174	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
175	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
176	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
177	AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
178	AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
179	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
180		AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
181	/* TCC range 1 */
182	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
183	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
184		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
185	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
186	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
187		AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
188	/* TCC range 2 */
189	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
190	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
191		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
192	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
193	AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
194	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
195	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
196	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
197	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
198	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
199	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
200		AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
201	/* TCC range 3 */
202	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
203	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
204		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
205	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
206	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
207		AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
208	/* TCC range 4 */
209	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
210	AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
211		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
212	AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
213	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
214		AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
215	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
216		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
217	/* TCI */
218	AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
219	/* TCP */
220	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
221	AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
222		AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
223	AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
224	AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
225	AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
226	AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
227	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
228	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
229	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
230		AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
231	/* TD */
232	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
233	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
234		AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
235	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
236	AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
237	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
238	/* EA (3 sub-ranges) */
239	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
240	/* EA range 0 */
241	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
242		AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
243	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
244		AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
245	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
246	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
247	AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
248	AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
249	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
250	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
251	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
252	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
253		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
254	/* EA range 1 */
255	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
256	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
257		AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
258	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
259	AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
260	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
261	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
262	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
263	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
264	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
265		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
266	/* EA range 2 */
267	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
268	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
269		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
270	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
271	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
272	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
273	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
274		AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
275	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
276		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
277	/* UTC VM L2 bank */
278	AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
279	/* UTC VM walker */
280	AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
281	/* UTC ATC L2 2MB cache */
282	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
283	/* UTC ATC L2 4KB cache */
284	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
285	AMDGPU_RAS_BLOCK__GFX_MAX
286};
287
288enum amdgpu_ras_error_type {
289	AMDGPU_RAS_ERROR__NONE							= 0,
290	AMDGPU_RAS_ERROR__PARITY						= 1,
291	AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE					= 2,
292	AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE					= 4,
293	AMDGPU_RAS_ERROR__POISON						= 8,
294};
295
296enum amdgpu_ras_ret {
297	AMDGPU_RAS_SUCCESS = 0,
298	AMDGPU_RAS_FAIL,
299	AMDGPU_RAS_UE,
300	AMDGPU_RAS_CE,
301	AMDGPU_RAS_PT,
302};
303
304struct ras_common_if {
305	enum amdgpu_ras_block block;
306	enum amdgpu_ras_error_type type;
307	uint32_t sub_block_index;
308	/* block name */
309	char name[32];
310};
311
312struct amdgpu_ras {
313	/* ras infrastructure */
314	/* for ras itself. */
315	uint32_t hw_supported;
316	/* for IP to check its ras ability. */
317	uint32_t supported;
318	uint32_t features;
319	struct list_head head;
320	/* debugfs */
321	struct dentry *dir;
322	/* sysfs */
323#ifdef CONFIG_SYSFS
324	struct device_attribute features_attr;
325	struct bin_attribute badpages_attr;
326#endif
327	/* block array */
328	struct ras_manager *objs;
329
330	/* gpu recovery */
331	struct work_struct recovery_work;
332	atomic_t in_recovery;
333	struct amdgpu_device *adev;
334	/* error handler data */
335	struct ras_err_handler_data *eh_data;
336	struct mutex recovery_lock;
337
338	uint32_t flags;
339	bool reboot;
340	struct amdgpu_ras_eeprom_control eeprom_control;
341};
342
343struct ras_fs_data {
344	char sysfs_name[32];
345	char debugfs_name[32];
346};
347
348struct ras_err_data {
349	unsigned long ue_count;
350	unsigned long ce_count;
351	unsigned long err_addr_cnt;
352	struct eeprom_table_record *err_addr;
353};
354
355struct ras_err_handler_data {
356	/* point to bad page records array */
357	struct eeprom_table_record *bps;
358	/* point to reserved bo array */
359	struct amdgpu_bo **bps_bo;
360	/* the count of entries */
361	int count;
362	/* the space can place new entries */
363	int space_left;
364	/* last reserved entry's index + 1 */
365	int last_reserved;
366};
367
368typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
369		void *err_data,
370		struct amdgpu_iv_entry *entry);
371
372struct ras_ih_data {
373	/* interrupt bottom half */
374	struct work_struct ih_work;
375	int inuse;
376	/* IP callback */
377	ras_ih_cb cb;
378	/* full of entries */
379	unsigned char *ring;
380	unsigned int ring_size;
381	unsigned int element_size;
382	unsigned int aligned_element_size;
383	unsigned int rptr;
384	unsigned int wptr;
385};
386
387struct ras_manager {
388	struct ras_common_if head;
389	/* reference count */
390	int use;
391	/* ras block link */
392	struct list_head node;
393	/* the device */
394	struct amdgpu_device *adev;
395	/* debugfs */
396	struct dentry *ent;
397	/* sysfs */
398#ifdef CONFIG_SYSFS
399	struct device_attribute sysfs_attr;
400	int attr_inuse;
401#endif
402
403	/* fs node name */
404	struct ras_fs_data fs_data;
405
406	/* IH data */
407	struct ras_ih_data ih_data;
408
409	struct ras_err_data err_data;
410};
411
412struct ras_badpage {
413	unsigned int bp;
414	unsigned int size;
415	unsigned int flags;
416};
417
418/* interfaces for IP */
419struct ras_fs_if {
420	struct ras_common_if head;
421	char sysfs_name[32];
422	char debugfs_name[32];
423};
424
425struct ras_query_if {
426	struct ras_common_if head;
427	unsigned long ue_count;
428	unsigned long ce_count;
429};
430
431struct ras_inject_if {
432	struct ras_common_if head;
433	uint64_t address;
434	uint64_t value;
435};
436
437struct ras_cure_if {
438	struct ras_common_if head;
439	uint64_t address;
440};
441
442struct ras_ih_if {
443	struct ras_common_if head;
444	ras_ih_cb cb;
445};
446
447struct ras_dispatch_if {
448	struct ras_common_if head;
449	struct amdgpu_iv_entry *entry;
450};
451
452struct ras_debug_if {
453	union {
454		struct ras_common_if head;
455		struct ras_inject_if inject;
456	};
457	int op;
458};
459/* work flow
460 * vbios
461 * 1: ras feature enable (enabled by default)
462 * psp
463 * 2: ras framework init (in ip_init)
464 * IP
465 * 3: IH add
466 * 4: debugfs/sysfs create
467 * 5: query/inject
468 * 6: debugfs/sysfs remove
469 * 7: IH remove
470 * 8: feature disable
471 */
472
473#define amdgpu_ras_get_context(adev)		((adev)->psp.ras.ras)
474#define amdgpu_ras_set_context(adev, ras_con)	((adev)->psp.ras.ras = (ras_con))
475
476/* check if ras is supported on block, say, sdma, gfx */
477static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
478		unsigned int block)
479{
480	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
481
482	if (block >= AMDGPU_RAS_BLOCK_COUNT)
483		return 0;
484	return ras && (ras->supported & (1 << block));
485}
486
487int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
488int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
489		unsigned int block);
490
491void amdgpu_ras_resume(struct amdgpu_device *adev);
492void amdgpu_ras_suspend(struct amdgpu_device *adev);
493
494unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
495		bool is_ce);
496
497/* error handling functions */
498int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
499		struct eeprom_table_record *bps, int pages);
500
501int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
502
503static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
504{
505	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
506
507	/* save bad page to eeprom before gpu reset,
508	 * i2c may be unstable in gpu reset
509	 */
510	if (in_task())
511		amdgpu_ras_reserve_bad_pages(adev);
512
513	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
514		schedule_work(&ras->recovery_work);
515	return 0;
516}
517
518static inline enum ta_ras_block
519amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
520	switch (block) {
521	case AMDGPU_RAS_BLOCK__UMC:
522		return TA_RAS_BLOCK__UMC;
523	case AMDGPU_RAS_BLOCK__SDMA:
524		return TA_RAS_BLOCK__SDMA;
525	case AMDGPU_RAS_BLOCK__GFX:
526		return TA_RAS_BLOCK__GFX;
527	case AMDGPU_RAS_BLOCK__MMHUB:
528		return TA_RAS_BLOCK__MMHUB;
529	case AMDGPU_RAS_BLOCK__ATHUB:
530		return TA_RAS_BLOCK__ATHUB;
531	case AMDGPU_RAS_BLOCK__PCIE_BIF:
532		return TA_RAS_BLOCK__PCIE_BIF;
533	case AMDGPU_RAS_BLOCK__HDP:
534		return TA_RAS_BLOCK__HDP;
535	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
536		return TA_RAS_BLOCK__XGMI_WAFL;
537	case AMDGPU_RAS_BLOCK__DF:
538		return TA_RAS_BLOCK__DF;
539	case AMDGPU_RAS_BLOCK__SMN:
540		return TA_RAS_BLOCK__SMN;
541	case AMDGPU_RAS_BLOCK__SEM:
542		return TA_RAS_BLOCK__SEM;
543	case AMDGPU_RAS_BLOCK__MP0:
544		return TA_RAS_BLOCK__MP0;
545	case AMDGPU_RAS_BLOCK__MP1:
546		return TA_RAS_BLOCK__MP1;
547	case AMDGPU_RAS_BLOCK__FUSE:
548		return TA_RAS_BLOCK__FUSE;
549	default:
550		WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
551		return TA_RAS_BLOCK__UMC;
552	}
553}
554
555static inline enum ta_ras_error_type
556amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
557	switch (error) {
558	case AMDGPU_RAS_ERROR__NONE:
559		return TA_RAS_ERROR__NONE;
560	case AMDGPU_RAS_ERROR__PARITY:
561		return TA_RAS_ERROR__PARITY;
562	case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
563		return TA_RAS_ERROR__SINGLE_CORRECTABLE;
564	case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
565		return TA_RAS_ERROR__MULTI_UNCORRECTABLE;
566	case AMDGPU_RAS_ERROR__POISON:
567		return TA_RAS_ERROR__POISON;
568	default:
569		WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error);
570		return TA_RAS_ERROR__NONE;
571	}
572}
573
574/* called in ip_init and ip_fini */
575int amdgpu_ras_init(struct amdgpu_device *adev);
576int amdgpu_ras_fini(struct amdgpu_device *adev);
577int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
578int amdgpu_ras_late_init(struct amdgpu_device *adev,
579			 struct ras_common_if *ras_block,
580			 struct ras_fs_if *fs_info,
581			 struct ras_ih_if *ih_info);
582void amdgpu_ras_late_fini(struct amdgpu_device *adev,
583			  struct ras_common_if *ras_block,
584			  struct ras_ih_if *ih_info);
585
586int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
587		struct ras_common_if *head, bool enable);
588
589int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
590		struct ras_common_if *head, bool enable);
591
592int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
593		struct ras_fs_if *head);
594
595int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
596		struct ras_common_if *head);
597
598void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
599		struct ras_fs_if *head);
600
601void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
602		struct ras_common_if *head);
603
604int amdgpu_ras_error_query(struct amdgpu_device *adev,
605		struct ras_query_if *info);
606
607int amdgpu_ras_error_inject(struct amdgpu_device *adev,
608		struct ras_inject_if *info);
609
610int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
611		struct ras_ih_if *info);
612
613int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
614		struct ras_ih_if *info);
615
616int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
617		struct ras_dispatch_if *info);
618
619struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
620		struct ras_common_if *head);
621
622extern atomic_t amdgpu_ras_in_intr;
623
624static inline bool amdgpu_ras_intr_triggered(void)
625{
626	return !!atomic_read(&amdgpu_ras_in_intr);
627}
628
629static inline void amdgpu_ras_intr_cleared(void)
630{
631	atomic_set(&amdgpu_ras_in_intr, 0);
632}
633
634void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
635
636#endif
637