1/*	$NetBSD: amdgpu_gfx_v9_0.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $	*/
2
3/*
4 * Copyright 2016 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26#include <sys/cdefs.h>
27__KERNEL_RCSID(0, "$NetBSD: amdgpu_gfx_v9_0.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $");
28
29#include <linux/delay.h>
30#include <linux/kernel.h>
31#include <linux/firmware.h>
32#include <linux/module.h>
33#include <linux/pci.h>
34
35#include "amdgpu.h"
36#include "amdgpu_gfx.h"
37#include "soc15.h"
38#include "soc15d.h"
39#include "amdgpu_atomfirmware.h"
40#include "amdgpu_pm.h"
41
42#include "gc/gc_9_0_offset.h"
43#include "gc/gc_9_0_sh_mask.h"
44
45#include "vega10_enum.h"
46#include "hdp/hdp_4_0_offset.h"
47
48#include "soc15_common.h"
49#include "clearstate_gfx9.h"
50#include "v9_structs.h"
51
52#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
53
54#include "amdgpu_ras.h"
55
56#include "gfx_v9_4.h"
57
58#include <linux/nbsd-namespace.h>
59
60#define GFX9_NUM_GFX_RINGS     1
61#define GFX9_MEC_HPD_SIZE 4096
62#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64
65#define mmPWR_MISC_CNTL_STATUS					0x0183
66#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
67#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
68#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
69#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
70#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
71
72#define mmGCEA_PROBE_MAP                        0x070c
73#define mmGCEA_PROBE_MAP_BASE_IDX               0
74
75MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
76MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
77MODULE_FIRMWARE("amdgpu/vega10_me.bin");
78MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
79MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
80MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
81
82MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
83MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
84MODULE_FIRMWARE("amdgpu/vega12_me.bin");
85MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
86MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
87MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
88
89MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
90MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
91MODULE_FIRMWARE("amdgpu/vega20_me.bin");
92MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
93MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
94MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
95
96MODULE_FIRMWARE("amdgpu/raven_ce.bin");
97MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
98MODULE_FIRMWARE("amdgpu/raven_me.bin");
99MODULE_FIRMWARE("amdgpu/raven_mec.bin");
100MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
101MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
102
103MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
104MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
105MODULE_FIRMWARE("amdgpu/picasso_me.bin");
106MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
107MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
108MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
109MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
110
111MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
112MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
113MODULE_FIRMWARE("amdgpu/raven2_me.bin");
114MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
115MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
116MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
117MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
118
119MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
120MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
121MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
122
123MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
124MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
125MODULE_FIRMWARE("amdgpu/renoir_me.bin");
126MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
127MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
128MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
129
130#define mmTCP_CHAN_STEER_0_ARCT								0x0b03
131#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
132#define mmTCP_CHAN_STEER_1_ARCT								0x0b04
133#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
134#define mmTCP_CHAN_STEER_2_ARCT								0x0b09
135#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
136#define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
137#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
138#define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
139#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
140#define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
141#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
142
143enum ta_ras_gfx_subblock {
144	/*CPC*/
145	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
146	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
147	TA_RAS_BLOCK__GFX_CPC_UCODE,
148	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
149	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
150	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
151	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
152	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
153	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
154	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
155	/* CPF*/
156	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
157	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
158	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
159	TA_RAS_BLOCK__GFX_CPF_TAG,
160	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
161	/* CPG*/
162	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
163	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
164	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
165	TA_RAS_BLOCK__GFX_CPG_TAG,
166	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
167	/* GDS*/
168	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
169	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
170	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
171	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
172	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
173	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
174	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
175	/* SPI*/
176	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
177	/* SQ*/
178	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
179	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
180	TA_RAS_BLOCK__GFX_SQ_LDS_D,
181	TA_RAS_BLOCK__GFX_SQ_LDS_I,
182	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
183	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
184	/* SQC (3 ranges)*/
185	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
186	/* SQC range 0*/
187	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
188	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
189		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
190	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
191	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
192	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
193	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
194	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
195	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
196	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
197		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
198	/* SQC range 1*/
199	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
200	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
201		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
202	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
203	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
204	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
205	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
206	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
207	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
208	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
209	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
210	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
211		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
212	/* SQC range 2*/
213	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
214	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
215		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
216	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
217	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
218	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
219	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
220	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
221	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
222	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
223	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
224	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
225		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
226	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
227	/* TA*/
228	TA_RAS_BLOCK__GFX_TA_INDEX_START,
229	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
230	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
231	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
232	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
233	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
234	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
235	/* TCA*/
236	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
237	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
238	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
239	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
240	/* TCC (5 sub-ranges)*/
241	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
242	/* TCC range 0*/
243	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
244	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
245	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
246	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
247	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
248	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
249	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
250	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
251	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
252	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
253	/* TCC range 1*/
254	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
255	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
256	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
257	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
258		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
259	/* TCC range 2*/
260	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
261	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
262	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
263	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
264	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
265	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
266	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
267	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
268	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
269	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
270		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
271	/* TCC range 3*/
272	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
273	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
274	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
275	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
276		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
277	/* TCC range 4*/
278	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
279	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
280		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
281	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
282	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
283		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
284	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
285	/* TCI*/
286	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
287	/* TCP*/
288	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
289	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
290	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
291	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
292	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
293	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
294	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
295	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
296	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
297	/* TD*/
298	TA_RAS_BLOCK__GFX_TD_INDEX_START,
299	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
300	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
301	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
302	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
303	/* EA (3 sub-ranges)*/
304	TA_RAS_BLOCK__GFX_EA_INDEX_START,
305	/* EA range 0*/
306	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
307	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
308	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
309	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
310	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
311	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
312	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
313	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
314	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
315	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
316	/* EA range 1*/
317	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
318	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
319	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
320	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
321	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
322	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
323	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
324	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
325	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
326	/* EA range 2*/
327	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
328	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
329	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
330	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
331	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
332	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
333	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
334	/* UTC VM L2 bank*/
335	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
336	/* UTC VM walker*/
337	TA_RAS_BLOCK__UTC_VML2_WALKER,
338	/* UTC ATC L2 2MB cache*/
339	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
340	/* UTC ATC L2 4KB cache*/
341	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
342	TA_RAS_BLOCK__GFX_MAX
343};
344
345struct ras_gfx_subblock {
346	const unsigned char *name;
347	int ta_subblock;
348	int hw_supported_error_type;
349	int sw_supported_error_type;
350};
351
352#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
353	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
354		#subblock,                                                     \
355		TA_RAS_BLOCK__##subblock,                                      \
356		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
357		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
358	}
359
360static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
361	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
362	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
363	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
365	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
366	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
367	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
368	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
369	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
371	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
372	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
373	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
374	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
375	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
376	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
377	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
378			     0),
379	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
380			     0),
381	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
382	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
383	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
384	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
385	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
386	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
388	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
389			     0, 0),
390	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
391			     0),
392	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
393			     0, 0),
394	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
395			     0),
396	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
397			     0, 0),
398	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
399			     0),
400	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
401			     1),
402	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
403			     0, 0, 0),
404	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
405			     0),
406	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
407			     0),
408	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
409			     0),
410	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
411			     0),
412	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
413			     0),
414	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
415			     0, 0),
416	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
417			     0),
418	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
419			     0),
420	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
421			     0, 0, 0),
422	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
423			     0),
424	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
425			     0),
426	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
427			     0),
428	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
429			     0),
430	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
431			     0),
432	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
433			     0, 0),
434	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
435			     0),
436	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
437	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
438	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
439	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
440	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
442	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
444	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
445			     1),
446	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
447			     1),
448	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
449			     1),
450	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
451			     0),
452	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
453			     0),
454	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
455	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
456	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
457	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
458	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
459	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
460	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
462	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
463	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
464	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
465	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
466			     0),
467	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
468	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
469			     0),
470	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
471			     0, 0),
472	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
473			     0),
474	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
475	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
476	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
477	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
479	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
481	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
482	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
483	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
484	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
485	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
486	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
487	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
488	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
489	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
494	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
495	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
496	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
501	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
502	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
503	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
505	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
506	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
507	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
508};
509
510static const struct soc15_reg_golden golden_settings_gc_9_0[] =
511{
512	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
513	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
514	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
515	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
516	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
517	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
518	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
519	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
520	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
521	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
522	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
523	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
524	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
525	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
526	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
527	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
528	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
529	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
530	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
531	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
532};
533
534static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
535{
536	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
537	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
538	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
539	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
540	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
541	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
542	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
543	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
544	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
545	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
546	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
547	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
548	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
549	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
550	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
551	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
552	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
553	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
554};
555
556static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
557{
558	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
559	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
560	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
561	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
562	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
563	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
564	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
565	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
566	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
567	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
568	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
569};
570
571static const struct soc15_reg_golden golden_settings_gc_9_1[] =
572{
573	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
574	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
575	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
576	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
577	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
578	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
579	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
580	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
581	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
582	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
583	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
584	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
585	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
586	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
587	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
588	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
589	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
590	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
591	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
592	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
593	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
594	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
595	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
596	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
597};
598
599static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
600{
601	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
602	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
603	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
604	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
605	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
606	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
607	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
608};
609
610static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
611{
612	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
613	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
614	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
615	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
616	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
617	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
618	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
619	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
620	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
621	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
622	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
623	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
624	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
625	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
626	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
627	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
628	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
629	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
630	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
631};
632
633static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
634{
635	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
636	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
637	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
638	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
639	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
640	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
641	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
642	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
643	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
644	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
645	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
646	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
647};
648
649static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
650{
651	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
652	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
653	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
654};
655
656static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
657{
658	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
659	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
660	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
661	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
662	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
663	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
664	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
665	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
666	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
667	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
668	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
669	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
670	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
671	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
672	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
673	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
674};
675
676static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
677{
678	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
679	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
680	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
681	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
682	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
683	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
684	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
685	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
686	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
687	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
688	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
689	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
690	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
691};
692
693static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
694{
695	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
696	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
697	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
698	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
699	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
700	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
701	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
702	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
703	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
704	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
705};
706
707static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
708{
709	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
711	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
712	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717};
718
719static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
720{
721	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
723	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
724	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729};
730
731#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
732#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
733#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
734#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
735
736static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
737static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
738static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
739static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
740static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
741                                 struct amdgpu_cu_info *cu_info);
742static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
743static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
744static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
745static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
746static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
747					  void *ras_error_status);
748static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev);
749static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
750				     void *inject_if);
751
752static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
753				uint64_t queue_mask)
754{
755	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
756	amdgpu_ring_write(kiq_ring,
757		PACKET3_SET_RESOURCES_VMID_MASK(0) |
758		/* vmid_mask:0* queue_type:0 (KIQ) */
759		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
760	amdgpu_ring_write(kiq_ring,
761			lower_32_bits(queue_mask));	/* queue mask lo */
762	amdgpu_ring_write(kiq_ring,
763			upper_32_bits(queue_mask));	/* queue mask hi */
764	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
765	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
766	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
767	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
768}
769
770static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
771				 struct amdgpu_ring *ring)
772{
773	struct amdgpu_device *adev = kiq_ring->adev;
774	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
775	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
776	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
777
778	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
779	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
780	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
781			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
782			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
783			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
784			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
785			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
786			 /*queue_type: normal compute queue */
787			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
788			 /* alloc format: all_on_one_pipe */
789			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
790			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
791			 /* num_queues: must be 1 */
792			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
793	amdgpu_ring_write(kiq_ring,
794			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
795	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
796	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
797	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
798	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
799}
800
801static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
802				   struct amdgpu_ring *ring,
803				   enum amdgpu_unmap_queues_action action,
804				   u64 gpu_addr, u64 seq)
805{
806	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
807
808	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
809	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
810			  PACKET3_UNMAP_QUEUES_ACTION(action) |
811			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
812			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
813			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
814	amdgpu_ring_write(kiq_ring,
815			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
816
817	if (action == PREEMPT_QUEUES_NO_UNMAP) {
818		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
819		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
820		amdgpu_ring_write(kiq_ring, seq);
821	} else {
822		amdgpu_ring_write(kiq_ring, 0);
823		amdgpu_ring_write(kiq_ring, 0);
824		amdgpu_ring_write(kiq_ring, 0);
825	}
826}
827
828static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
829				   struct amdgpu_ring *ring,
830				   u64 addr,
831				   u64 seq)
832{
833	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
834
835	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
836	amdgpu_ring_write(kiq_ring,
837			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
838			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
839			  PACKET3_QUERY_STATUS_COMMAND(2));
840	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
841	amdgpu_ring_write(kiq_ring,
842			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
843			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
844	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
845	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
846	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
847	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
848}
849
850static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
851				uint16_t pasid, uint32_t flush_type,
852				bool all_hub)
853{
854	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
855	amdgpu_ring_write(kiq_ring,
856			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
857			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
858			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
859			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
860}
861
862static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
863	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
864	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
865	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
866	.kiq_query_status = gfx_v9_0_kiq_query_status,
867	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
868	.set_resources_size = 8,
869	.map_queues_size = 7,
870	.unmap_queues_size = 6,
871	.query_status_size = 7,
872	.invalidate_tlbs_size = 2,
873};
874
875static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
876{
877	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
878}
879
880static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
881{
882	switch (adev->asic_type) {
883	case CHIP_VEGA10:
884		soc15_program_register_sequence(adev,
885						golden_settings_gc_9_0,
886						ARRAY_SIZE(golden_settings_gc_9_0));
887		soc15_program_register_sequence(adev,
888						golden_settings_gc_9_0_vg10,
889						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
890		break;
891	case CHIP_VEGA12:
892		soc15_program_register_sequence(adev,
893						golden_settings_gc_9_2_1,
894						ARRAY_SIZE(golden_settings_gc_9_2_1));
895		soc15_program_register_sequence(adev,
896						golden_settings_gc_9_2_1_vg12,
897						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
898		break;
899	case CHIP_VEGA20:
900		soc15_program_register_sequence(adev,
901						golden_settings_gc_9_0,
902						ARRAY_SIZE(golden_settings_gc_9_0));
903		soc15_program_register_sequence(adev,
904						golden_settings_gc_9_0_vg20,
905						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
906		break;
907	case CHIP_ARCTURUS:
908		soc15_program_register_sequence(adev,
909						golden_settings_gc_9_4_1_arct,
910						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
911		break;
912	case CHIP_RAVEN:
913		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
914						ARRAY_SIZE(golden_settings_gc_9_1));
915		if (adev->rev_id >= 8)
916			soc15_program_register_sequence(adev,
917							golden_settings_gc_9_1_rv2,
918							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
919		else
920			soc15_program_register_sequence(adev,
921							golden_settings_gc_9_1_rv1,
922							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
923		break;
924	 case CHIP_RENOIR:
925		soc15_program_register_sequence(adev,
926						golden_settings_gc_9_1_rn,
927						ARRAY_SIZE(golden_settings_gc_9_1_rn));
928		return; /* for renoir, don't need common goldensetting */
929	default:
930		break;
931	}
932
933	if (adev->asic_type != CHIP_ARCTURUS)
934		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
935						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
936}
937
938static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
939{
940	adev->gfx.scratch.num_reg = 8;
941	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
942	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
943}
944
945static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
946				       bool wc, uint32_t reg, uint32_t val)
947{
948	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
949	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
950				WRITE_DATA_DST_SEL(0) |
951				(wc ? WR_CONFIRM : 0));
952	amdgpu_ring_write(ring, reg);
953	amdgpu_ring_write(ring, 0);
954	amdgpu_ring_write(ring, val);
955}
956
957static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
958				  int mem_space, int opt, uint32_t addr0,
959				  uint32_t addr1, uint32_t ref, uint32_t mask,
960				  uint32_t inv)
961{
962	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
963	amdgpu_ring_write(ring,
964				 /* memory (1) or register (0) */
965				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
966				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
967				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
968				 WAIT_REG_MEM_ENGINE(eng_sel)));
969
970	if (mem_space)
971		BUG_ON(addr0 & 0x3); /* Dword align */
972	amdgpu_ring_write(ring, addr0);
973	amdgpu_ring_write(ring, addr1);
974	amdgpu_ring_write(ring, ref);
975	amdgpu_ring_write(ring, mask);
976	amdgpu_ring_write(ring, inv); /* poll interval */
977}
978
979static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
980{
981	struct amdgpu_device *adev = ring->adev;
982	uint32_t scratch;
983	uint32_t tmp = 0;
984	unsigned i;
985	int r;
986
987	r = amdgpu_gfx_scratch_get(adev, &scratch);
988	if (r)
989		return r;
990
991	WREG32(scratch, 0xCAFEDEAD);
992	r = amdgpu_ring_alloc(ring, 3);
993	if (r)
994		goto error_free_scratch;
995
996	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
997	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
998	amdgpu_ring_write(ring, 0xDEADBEEF);
999	amdgpu_ring_commit(ring);
1000
1001	for (i = 0; i < adev->usec_timeout; i++) {
1002		tmp = RREG32(scratch);
1003		if (tmp == 0xDEADBEEF)
1004			break;
1005		udelay(1);
1006	}
1007
1008	if (i >= adev->usec_timeout)
1009		r = -ETIMEDOUT;
1010
1011error_free_scratch:
1012	amdgpu_gfx_scratch_free(adev, scratch);
1013	return r;
1014}
1015
1016static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1017{
1018	struct amdgpu_device *adev = ring->adev;
1019	struct amdgpu_ib ib;
1020	struct dma_fence *f = NULL;
1021
1022	unsigned index;
1023	uint64_t gpu_addr;
1024	uint32_t tmp;
1025	long r;
1026
1027	r = amdgpu_device_wb_get(adev, &index);
1028	if (r)
1029		return r;
1030
1031	gpu_addr = adev->wb.gpu_addr + (index * 4);
1032	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1033	memset(&ib, 0, sizeof(ib));
1034	r = amdgpu_ib_get(adev, NULL, 16, &ib);
1035	if (r)
1036		goto err1;
1037
1038	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1039	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1040	ib.ptr[2] = lower_32_bits(gpu_addr);
1041	ib.ptr[3] = upper_32_bits(gpu_addr);
1042	ib.ptr[4] = 0xDEADBEEF;
1043	ib.length_dw = 5;
1044
1045	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1046	if (r)
1047		goto err2;
1048
1049	r = dma_fence_wait_timeout(f, false, timeout);
1050	if (r == 0) {
1051		r = -ETIMEDOUT;
1052		goto err2;
1053	} else if (r < 0) {
1054		goto err2;
1055	}
1056
1057	tmp = adev->wb.wb[index];
1058	if (tmp == 0xDEADBEEF)
1059		r = 0;
1060	else
1061		r = -EINVAL;
1062
1063err2:
1064	amdgpu_ib_free(adev, &ib, NULL);
1065	dma_fence_put(f);
1066err1:
1067	amdgpu_device_wb_free(adev, index);
1068	return r;
1069}
1070
1071
1072static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1073{
1074	release_firmware(adev->gfx.pfp_fw);
1075	adev->gfx.pfp_fw = NULL;
1076	release_firmware(adev->gfx.me_fw);
1077	adev->gfx.me_fw = NULL;
1078	release_firmware(adev->gfx.ce_fw);
1079	adev->gfx.ce_fw = NULL;
1080	release_firmware(adev->gfx.rlc_fw);
1081	adev->gfx.rlc_fw = NULL;
1082	release_firmware(adev->gfx.mec_fw);
1083	adev->gfx.mec_fw = NULL;
1084	release_firmware(adev->gfx.mec2_fw);
1085	adev->gfx.mec2_fw = NULL;
1086
1087	kfree(adev->gfx.rlc.register_list_format);
1088}
1089
1090static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1091{
1092	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1093
1094	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1095	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1096	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1097	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1098	adev->gfx.rlc.save_restore_list_cntl = (const u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1099	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1100	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1101	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1102	adev->gfx.rlc.save_restore_list_gpm = (const u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1103	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1104	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1105	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1106	adev->gfx.rlc.save_restore_list_srm = (const u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1107	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1108			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1109}
1110
1111static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1112{
1113	adev->gfx.me_fw_write_wait = false;
1114	adev->gfx.mec_fw_write_wait = false;
1115
1116	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
1117	    (adev->gfx.mec_feature_version < 46) ||
1118	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1119	    (adev->gfx.pfp_feature_version < 46))
1120		DRM_WARN_ONCE("CP firmware version too old, please update!");
1121
1122	switch (adev->asic_type) {
1123	case CHIP_VEGA10:
1124		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1125		    (adev->gfx.me_feature_version >= 42) &&
1126		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1127		    (adev->gfx.pfp_feature_version >= 42))
1128			adev->gfx.me_fw_write_wait = true;
1129
1130		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1131		    (adev->gfx.mec_feature_version >= 42))
1132			adev->gfx.mec_fw_write_wait = true;
1133		break;
1134	case CHIP_VEGA12:
1135		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1136		    (adev->gfx.me_feature_version >= 44) &&
1137		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1138		    (adev->gfx.pfp_feature_version >= 44))
1139			adev->gfx.me_fw_write_wait = true;
1140
1141		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1142		    (adev->gfx.mec_feature_version >= 44))
1143			adev->gfx.mec_fw_write_wait = true;
1144		break;
1145	case CHIP_VEGA20:
1146		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1147		    (adev->gfx.me_feature_version >= 44) &&
1148		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1149		    (adev->gfx.pfp_feature_version >= 44))
1150			adev->gfx.me_fw_write_wait = true;
1151
1152		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1153		    (adev->gfx.mec_feature_version >= 44))
1154			adev->gfx.mec_fw_write_wait = true;
1155		break;
1156	case CHIP_RAVEN:
1157		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1158		    (adev->gfx.me_feature_version >= 42) &&
1159		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1160		    (adev->gfx.pfp_feature_version >= 42))
1161			adev->gfx.me_fw_write_wait = true;
1162
1163		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1164		    (adev->gfx.mec_feature_version >= 42))
1165			adev->gfx.mec_fw_write_wait = true;
1166		break;
1167	default:
1168		break;
1169	}
1170}
1171
1172struct amdgpu_gfxoff_quirk {
1173	u16 chip_vendor;
1174	u16 chip_device;
1175	u16 subsys_vendor;
1176	u16 subsys_device;
1177	u8 revision;
1178};
1179
1180static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1181	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1182	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1183	{ 0, 0, 0, 0, 0 },
1184};
1185
1186static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1187{
1188	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1189
1190	while (p && p->chip_device != 0) {
1191		if (pdev->vendor == p->chip_vendor &&
1192		    pdev->device == p->chip_device &&
1193		    pdev->subsystem_vendor == p->subsys_vendor &&
1194		    pdev->subsystem_device == p->subsys_device &&
1195		    pdev->revision == p->revision) {
1196			return true;
1197		}
1198		++p;
1199	}
1200	return false;
1201}
1202
1203static bool is_raven_kicker(struct amdgpu_device *adev)
1204{
1205	if (adev->pm.fw_version >= 0x41e2b)
1206		return true;
1207	else
1208		return false;
1209}
1210
1211static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1212{
1213	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1214		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1215
1216	switch (adev->asic_type) {
1217	case CHIP_VEGA10:
1218	case CHIP_VEGA12:
1219	case CHIP_VEGA20:
1220		break;
1221	case CHIP_RAVEN:
1222		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1223		    ((!is_raven_kicker(adev) &&
1224		      adev->gfx.rlc_fw_version < 531) ||
1225		     (adev->gfx.rlc_feature_version < 1) ||
1226		     !adev->gfx.rlc.is_rlc_v2_1))
1227			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1228
1229		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1230			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1231				AMD_PG_SUPPORT_CP |
1232				AMD_PG_SUPPORT_RLC_SMU_HS;
1233		break;
1234	case CHIP_RENOIR:
1235		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1236			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1237				AMD_PG_SUPPORT_CP |
1238				AMD_PG_SUPPORT_RLC_SMU_HS;
1239		break;
1240	default:
1241		break;
1242	}
1243}
1244
1245static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1246					  const char *chip_name)
1247{
1248	char fw_name[30];
1249	int err;
1250	struct amdgpu_firmware_info *info = NULL;
1251	const struct common_firmware_header *header = NULL;
1252	const struct gfx_firmware_header_v1_0 *cp_hdr;
1253
1254	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1255	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1256	if (err)
1257		goto out;
1258	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1259	if (err)
1260		goto out;
1261	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1262	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1263	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1264
1265	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1266	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1267	if (err)
1268		goto out;
1269	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1270	if (err)
1271		goto out;
1272	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1273	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1274	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1275
1276	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1277	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1278	if (err)
1279		goto out;
1280	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1281	if (err)
1282		goto out;
1283	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1284	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1285	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1286
1287	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1288		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1289		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1290		info->fw = adev->gfx.pfp_fw;
1291		header = (const struct common_firmware_header *)info->fw->data;
1292		adev->firmware.fw_size +=
1293			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1294
1295		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1296		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1297		info->fw = adev->gfx.me_fw;
1298		header = (const struct common_firmware_header *)info->fw->data;
1299		adev->firmware.fw_size +=
1300			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1301
1302		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1303		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1304		info->fw = adev->gfx.ce_fw;
1305		header = (const struct common_firmware_header *)info->fw->data;
1306		adev->firmware.fw_size +=
1307			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1308	}
1309
1310out:
1311	if (err) {
1312		dev_err(adev->dev,
1313			"gfx9: Failed to load firmware \"%s\"\n",
1314			fw_name);
1315		release_firmware(adev->gfx.pfp_fw);
1316		adev->gfx.pfp_fw = NULL;
1317		release_firmware(adev->gfx.me_fw);
1318		adev->gfx.me_fw = NULL;
1319		release_firmware(adev->gfx.ce_fw);
1320		adev->gfx.ce_fw = NULL;
1321	}
1322	return err;
1323}
1324
1325static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1326					  const char *chip_name)
1327{
1328	char fw_name[30];
1329	int err;
1330	struct amdgpu_firmware_info *info = NULL;
1331	const struct common_firmware_header *header = NULL;
1332	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1333	unsigned int *tmp = NULL;
1334	unsigned int i = 0;
1335	uint16_t version_major;
1336	uint16_t version_minor;
1337	uint32_t smu_version;
1338
1339	/*
1340	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1341	 * instead of picasso_rlc.bin.
1342	 * Judgment method:
1343	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1344	 *          or revision >= 0xD8 && revision <= 0xDF
1345	 * otherwise is PCO FP5
1346	 */
1347	if (!strcmp(chip_name, "picasso") &&
1348		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1349		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1350		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1351	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1352		(smu_version >= 0x41e2b))
1353		/**
1354		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1355		*/
1356		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1357	else
1358		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1359	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1360	if (err)
1361		goto out;
1362	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1363	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1364
1365	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1366	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1367	if (version_major == 2 && version_minor == 1)
1368		adev->gfx.rlc.is_rlc_v2_1 = true;
1369
1370	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1371	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1372	adev->gfx.rlc.save_and_restore_offset =
1373			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1374	adev->gfx.rlc.clear_state_descriptor_offset =
1375			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1376	adev->gfx.rlc.avail_scratch_ram_locations =
1377			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1378	adev->gfx.rlc.reg_restore_list_size =
1379			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1380	adev->gfx.rlc.reg_list_format_start =
1381			le32_to_cpu(rlc_hdr->reg_list_format_start);
1382	adev->gfx.rlc.reg_list_format_separate_start =
1383			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1384	adev->gfx.rlc.starting_offsets_start =
1385			le32_to_cpu(rlc_hdr->starting_offsets_start);
1386	adev->gfx.rlc.reg_list_format_size_bytes =
1387			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1388	adev->gfx.rlc.reg_list_size_bytes =
1389			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1390	adev->gfx.rlc.register_list_format =
1391			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1392				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1393	if (!adev->gfx.rlc.register_list_format) {
1394		err = -ENOMEM;
1395		goto out;
1396	}
1397
1398	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1399			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1400	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1401		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1402
1403	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1404
1405	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1406			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1407	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1408		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1409
1410	if (adev->gfx.rlc.is_rlc_v2_1)
1411		gfx_v9_0_init_rlc_ext_microcode(adev);
1412
1413	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1414		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1415		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1416		info->fw = adev->gfx.rlc_fw;
1417		header = (const struct common_firmware_header *)info->fw->data;
1418		adev->firmware.fw_size +=
1419			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1420
1421		if (adev->gfx.rlc.is_rlc_v2_1 &&
1422		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1423		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1424		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1425			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1426			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1427			info->fw = adev->gfx.rlc_fw;
1428			adev->firmware.fw_size +=
1429				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1430
1431			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1432			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1433			info->fw = adev->gfx.rlc_fw;
1434			adev->firmware.fw_size +=
1435				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1436
1437			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1438			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1439			info->fw = adev->gfx.rlc_fw;
1440			adev->firmware.fw_size +=
1441				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1442		}
1443	}
1444
1445out:
1446	if (err) {
1447		dev_err(adev->dev,
1448			"gfx9: Failed to load firmware \"%s\"\n",
1449			fw_name);
1450		release_firmware(adev->gfx.rlc_fw);
1451		adev->gfx.rlc_fw = NULL;
1452	}
1453	return err;
1454}
1455
1456static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1457					  const char *chip_name)
1458{
1459	char fw_name[30];
1460	int err;
1461	struct amdgpu_firmware_info *info = NULL;
1462	const struct common_firmware_header *header = NULL;
1463	const struct gfx_firmware_header_v1_0 *cp_hdr;
1464
1465	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1466	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1467	if (err)
1468		goto out;
1469	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1470	if (err)
1471		goto out;
1472	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1473	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1474	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1475
1476
1477	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1478	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1479	if (!err) {
1480		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1481		if (err)
1482			goto out;
1483		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1484		adev->gfx.mec2_fw->data;
1485		adev->gfx.mec2_fw_version =
1486		le32_to_cpu(cp_hdr->header.ucode_version);
1487		adev->gfx.mec2_feature_version =
1488		le32_to_cpu(cp_hdr->ucode_feature_version);
1489	} else {
1490		err = 0;
1491		adev->gfx.mec2_fw = NULL;
1492	}
1493
1494	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1495		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1496		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1497		info->fw = adev->gfx.mec_fw;
1498		header = (const struct common_firmware_header *)info->fw->data;
1499		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1500		adev->firmware.fw_size +=
1501			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1502
1503		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1504		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1505		info->fw = adev->gfx.mec_fw;
1506		adev->firmware.fw_size +=
1507			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1508
1509		if (adev->gfx.mec2_fw) {
1510			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1511			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1512			info->fw = adev->gfx.mec2_fw;
1513			header = (const struct common_firmware_header *)info->fw->data;
1514			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1515			adev->firmware.fw_size +=
1516				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1517
1518			/* TODO: Determine if MEC2 JT FW loading can be removed
1519				 for all GFX V9 asic and above */
1520			if (adev->asic_type != CHIP_ARCTURUS &&
1521			    adev->asic_type != CHIP_RENOIR) {
1522				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1523				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1524				info->fw = adev->gfx.mec2_fw;
1525				adev->firmware.fw_size +=
1526					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1527					PAGE_SIZE);
1528			}
1529		}
1530	}
1531
1532out:
1533	gfx_v9_0_check_if_need_gfxoff(adev);
1534	gfx_v9_0_check_fw_write_wait(adev);
1535	if (err) {
1536		dev_err(adev->dev,
1537			"gfx9: Failed to load firmware \"%s\"\n",
1538			fw_name);
1539		release_firmware(adev->gfx.mec_fw);
1540		adev->gfx.mec_fw = NULL;
1541		release_firmware(adev->gfx.mec2_fw);
1542		adev->gfx.mec2_fw = NULL;
1543	}
1544	return err;
1545}
1546
1547static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1548{
1549	const char *chip_name;
1550	int r;
1551
1552	DRM_DEBUG("\n");
1553
1554	switch (adev->asic_type) {
1555	case CHIP_VEGA10:
1556		chip_name = "vega10";
1557		break;
1558	case CHIP_VEGA12:
1559		chip_name = "vega12";
1560		break;
1561	case CHIP_VEGA20:
1562		chip_name = "vega20";
1563		break;
1564	case CHIP_RAVEN:
1565		if (adev->rev_id >= 8)
1566			chip_name = "raven2";
1567		else if (adev->pdev->device == 0x15d8)
1568			chip_name = "picasso";
1569		else
1570			chip_name = "raven";
1571		break;
1572	case CHIP_ARCTURUS:
1573		chip_name = "arcturus";
1574		break;
1575	case CHIP_RENOIR:
1576		chip_name = "renoir";
1577		break;
1578	default:
1579		BUG();
1580	}
1581
1582	/* No CPG in Arcturus */
1583	if (adev->asic_type != CHIP_ARCTURUS) {
1584		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1585		if (r)
1586			return r;
1587	}
1588
1589	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1590	if (r)
1591		return r;
1592
1593	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1594	if (r)
1595		return r;
1596
1597	return r;
1598}
1599
1600static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1601{
1602	u32 count = 0;
1603	const struct cs_section_def *sect = NULL;
1604	const struct cs_extent_def *ext = NULL;
1605
1606	/* begin clear state */
1607	count += 2;
1608	/* context control state */
1609	count += 3;
1610
1611	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1612		for (ext = sect->section; ext->extent != NULL; ++ext) {
1613			if (sect->id == SECT_CONTEXT)
1614				count += 2 + ext->reg_count;
1615			else
1616				return 0;
1617		}
1618	}
1619
1620	/* end clear state */
1621	count += 2;
1622	/* clear state */
1623	count += 2;
1624
1625	return count;
1626}
1627
1628static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1629				    volatile u32 *buffer)
1630{
1631	u32 count = 0, i;
1632	const struct cs_section_def *sect = NULL;
1633	const struct cs_extent_def *ext = NULL;
1634
1635	if (adev->gfx.rlc.cs_data == NULL)
1636		return;
1637	if (buffer == NULL)
1638		return;
1639
1640	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1641	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1642
1643	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1644	buffer[count++] = cpu_to_le32(0x80000000);
1645	buffer[count++] = cpu_to_le32(0x80000000);
1646
1647	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1648		for (ext = sect->section; ext->extent != NULL; ++ext) {
1649			if (sect->id == SECT_CONTEXT) {
1650				buffer[count++] =
1651					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1652				buffer[count++] = cpu_to_le32(ext->reg_index -
1653						PACKET3_SET_CONTEXT_REG_START);
1654				for (i = 0; i < ext->reg_count; i++)
1655					buffer[count++] = cpu_to_le32(ext->extent[i]);
1656			} else {
1657				return;
1658			}
1659		}
1660	}
1661
1662	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1663	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1664
1665	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1666	buffer[count++] = cpu_to_le32(0);
1667}
1668
1669static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1670{
1671	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1672	uint32_t pg_always_on_cu_num = 2;
1673	uint32_t always_on_cu_num;
1674	uint32_t i, j, k;
1675	uint32_t mask, cu_bitmap, counter;
1676
1677	if (adev->flags & AMD_IS_APU)
1678		always_on_cu_num = 4;
1679	else if (adev->asic_type == CHIP_VEGA12)
1680		always_on_cu_num = 8;
1681	else
1682		always_on_cu_num = 12;
1683
1684	mutex_lock(&adev->grbm_idx_mutex);
1685	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1686		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1687			mask = 1;
1688			cu_bitmap = 0;
1689			counter = 0;
1690			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1691
1692			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1693				if (cu_info->bitmap[i][j] & mask) {
1694					if (counter == pg_always_on_cu_num)
1695						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1696					if (counter < always_on_cu_num)
1697						cu_bitmap |= mask;
1698					else
1699						break;
1700					counter++;
1701				}
1702				mask <<= 1;
1703			}
1704
1705			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1706			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1707		}
1708	}
1709	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1710	mutex_unlock(&adev->grbm_idx_mutex);
1711}
1712
1713static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1714{
1715	uint32_t data;
1716
1717	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1718	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1719	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1720	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1721	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1722
1723	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1724	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1725
1726	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1727	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1728
1729	mutex_lock(&adev->grbm_idx_mutex);
1730	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1731	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1732	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1733
1734	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1735	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1736	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1737	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1738	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1739
1740	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1741	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1742	data &= 0x0000FFFF;
1743	data |= 0x00C00000;
1744	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1745
1746	/*
1747	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1748	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1749	 */
1750
1751	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1752	 * but used for RLC_LB_CNTL configuration */
1753	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1754	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1755	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1756	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1757	mutex_unlock(&adev->grbm_idx_mutex);
1758
1759	gfx_v9_0_init_always_on_cu_mask(adev);
1760}
1761
1762static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1763{
1764	uint32_t data;
1765
1766	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1767	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1768	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1769	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1770	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1771
1772	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1773	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1774
1775	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1776	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1777
1778	mutex_lock(&adev->grbm_idx_mutex);
1779	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1780	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1781	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1782
1783	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1784	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1785	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1786	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1787	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1788
1789	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1790	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1791	data &= 0x0000FFFF;
1792	data |= 0x00C00000;
1793	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1794
1795	/*
1796	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1797	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1798	 */
1799
1800	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1801	 * but used for RLC_LB_CNTL configuration */
1802	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1803	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1804	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1805	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1806	mutex_unlock(&adev->grbm_idx_mutex);
1807
1808	gfx_v9_0_init_always_on_cu_mask(adev);
1809}
1810
1811static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1812{
1813	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1814}
1815
1816static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1817{
1818	return 5;
1819}
1820
1821static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1822{
1823	const struct cs_section_def *cs_data;
1824	int r;
1825
1826	adev->gfx.rlc.cs_data = gfx9_cs_data;
1827
1828	cs_data = adev->gfx.rlc.cs_data;
1829
1830	if (cs_data) {
1831		/* init clear state block */
1832		r = amdgpu_gfx_rlc_init_csb(adev);
1833		if (r)
1834			return r;
1835	}
1836
1837	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1838		/* TODO: double check the cp_table_size for RV */
1839		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1840		r = amdgpu_gfx_rlc_init_cpt(adev);
1841		if (r)
1842			return r;
1843	}
1844
1845	switch (adev->asic_type) {
1846	case CHIP_RAVEN:
1847		gfx_v9_0_init_lbpw(adev);
1848		break;
1849	case CHIP_VEGA20:
1850		gfx_v9_4_init_lbpw(adev);
1851		break;
1852	default:
1853		break;
1854	}
1855
1856	return 0;
1857}
1858
1859static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1860{
1861	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1862	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1863}
1864
1865static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1866{
1867	int r;
1868	u32 *hpd;
1869	const __le32 *fw_data;
1870	unsigned fw_size;
1871	u32 *fw;
1872	size_t mec_hpd_size;
1873
1874	const struct gfx_firmware_header_v1_0 *mec_hdr;
1875
1876	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1877
1878	/* take ownership of the relevant compute queues */
1879	amdgpu_gfx_compute_queue_acquire(adev);
1880	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1881
1882	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1883				      AMDGPU_GEM_DOMAIN_VRAM,
1884				      &adev->gfx.mec.hpd_eop_obj,
1885				      &adev->gfx.mec.hpd_eop_gpu_addr,
1886				      (void **)&hpd);
1887	if (r) {
1888		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1889		gfx_v9_0_mec_fini(adev);
1890		return r;
1891	}
1892
1893	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1894
1895	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1896	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1897
1898	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1899
1900	fw_data = (const __le32 *)
1901		(adev->gfx.mec_fw->data +
1902		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1903	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1904
1905	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1906				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1907				      &adev->gfx.mec.mec_fw_obj,
1908				      &adev->gfx.mec.mec_fw_gpu_addr,
1909				      (void **)&fw);
1910	if (r) {
1911		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1912		gfx_v9_0_mec_fini(adev);
1913		return r;
1914	}
1915
1916	memcpy(fw, fw_data, fw_size);
1917
1918	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1919	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1920
1921	return 0;
1922}
1923
1924static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1925{
1926	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1927		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1928		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1929		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1930		(SQ_IND_INDEX__FORCE_READ_MASK));
1931	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1932}
1933
1934static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1935			   uint32_t wave, uint32_t thread,
1936			   uint32_t regno, uint32_t num, uint32_t *out)
1937{
1938	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1939		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1940		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1941		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1942		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1943		(SQ_IND_INDEX__FORCE_READ_MASK) |
1944		(SQ_IND_INDEX__AUTO_INCR_MASK));
1945	while (num--)
1946		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1947}
1948
1949static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1950{
1951	/* type 1 wave data */
1952	dst[(*no_fields)++] = 1;
1953	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1954	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1955	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1956	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1957	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1958	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1959	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1960	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1961	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1962	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1963	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1964	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1965	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1966	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1967}
1968
1969static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1970				     uint32_t wave, uint32_t start,
1971				     uint32_t size, uint32_t *dst)
1972{
1973	wave_read_regs(
1974		adev, simd, wave, 0,
1975		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1976}
1977
1978static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1979				     uint32_t wave, uint32_t thread,
1980				     uint32_t start, uint32_t size,
1981				     uint32_t *dst)
1982{
1983	wave_read_regs(
1984		adev, simd, wave, thread,
1985		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1986}
1987
1988static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1989				  u32 me, u32 pipe, u32 q, u32 vm)
1990{
1991	soc15_grbm_select(adev, me, pipe, q, vm);
1992}
1993
1994static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1995	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1996	.select_se_sh = &gfx_v9_0_select_se_sh,
1997	.read_wave_data = &gfx_v9_0_read_wave_data,
1998	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1999	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2000	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2001	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2002	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
2003};
2004
2005static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2006	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2007	.select_se_sh = &gfx_v9_0_select_se_sh,
2008	.read_wave_data = &gfx_v9_0_read_wave_data,
2009	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2010	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2011	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2012	.ras_error_inject = &gfx_v9_4_ras_error_inject,
2013	.query_ras_error_count = &gfx_v9_4_query_ras_error_count
2014};
2015
2016static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2017{
2018	u32 gb_addr_config;
2019	int err;
2020
2021	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2022
2023	switch (adev->asic_type) {
2024	case CHIP_VEGA10:
2025		adev->gfx.config.max_hw_contexts = 8;
2026		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2029		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2031		break;
2032	case CHIP_VEGA12:
2033		adev->gfx.config.max_hw_contexts = 8;
2034		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2035		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2036		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2037		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2038		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2039		DRM_INFO("fix gfx.config for vega12\n");
2040		break;
2041	case CHIP_VEGA20:
2042		adev->gfx.config.max_hw_contexts = 8;
2043		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2044		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2045		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2046		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2047		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2048		gb_addr_config &= ~0xf3e777ff;
2049		gb_addr_config |= 0x22014042;
2050		/* check vbios table if gpu info is not available */
2051		err = amdgpu_atomfirmware_get_gfx_info(adev);
2052		if (err)
2053			return err;
2054		break;
2055	case CHIP_RAVEN:
2056		adev->gfx.config.max_hw_contexts = 8;
2057		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2058		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2059		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2060		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2061		if (adev->rev_id >= 8)
2062			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2063		else
2064			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2065		break;
2066	case CHIP_ARCTURUS:
2067		adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2068		adev->gfx.config.max_hw_contexts = 8;
2069		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2070		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2071		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2072		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2073		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2074		gb_addr_config &= ~0xf3e777ff;
2075		gb_addr_config |= 0x22014042;
2076		break;
2077	case CHIP_RENOIR:
2078		adev->gfx.config.max_hw_contexts = 8;
2079		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2080		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2081		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2082		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2083		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2084		gb_addr_config &= ~0xf3e777ff;
2085		gb_addr_config |= 0x22010042;
2086		break;
2087	default:
2088		BUG();
2089		break;
2090	}
2091
2092	adev->gfx.config.gb_addr_config = gb_addr_config;
2093
2094	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2095			REG_GET_FIELD(
2096					adev->gfx.config.gb_addr_config,
2097					GB_ADDR_CONFIG,
2098					NUM_PIPES);
2099
2100	adev->gfx.config.max_tile_pipes =
2101		adev->gfx.config.gb_addr_config_fields.num_pipes;
2102
2103	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2104			REG_GET_FIELD(
2105					adev->gfx.config.gb_addr_config,
2106					GB_ADDR_CONFIG,
2107					NUM_BANKS);
2108	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2109			REG_GET_FIELD(
2110					adev->gfx.config.gb_addr_config,
2111					GB_ADDR_CONFIG,
2112					MAX_COMPRESSED_FRAGS);
2113	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2114			REG_GET_FIELD(
2115					adev->gfx.config.gb_addr_config,
2116					GB_ADDR_CONFIG,
2117					NUM_RB_PER_SE);
2118	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2119			REG_GET_FIELD(
2120					adev->gfx.config.gb_addr_config,
2121					GB_ADDR_CONFIG,
2122					NUM_SHADER_ENGINES);
2123	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2124			REG_GET_FIELD(
2125					adev->gfx.config.gb_addr_config,
2126					GB_ADDR_CONFIG,
2127					PIPE_INTERLEAVE_SIZE));
2128
2129	return 0;
2130}
2131
2132static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2133				      int mec, int pipe, int queue)
2134{
2135	int r;
2136	unsigned irq_type;
2137	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2138
2139	ring = &adev->gfx.compute_ring[ring_id];
2140
2141	/* mec0 is me1 */
2142	ring->me = mec + 1;
2143	ring->pipe = pipe;
2144	ring->queue = queue;
2145
2146	ring->ring_obj = NULL;
2147	ring->use_doorbell = true;
2148	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2149	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2150				+ (ring_id * GFX9_MEC_HPD_SIZE);
2151	snprintf(ring->name, sizeof(ring->name), "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2152
2153	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2154		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2155		+ ring->pipe;
2156
2157	/* type-2 packets are deprecated on MEC, use type-3 instead */
2158	r = amdgpu_ring_init(adev, ring, 1024,
2159			     &adev->gfx.eop_irq, irq_type);
2160	if (r)
2161		return r;
2162
2163
2164	return 0;
2165}
2166
2167static int gfx_v9_0_sw_init(void *handle)
2168{
2169	int i, j, k, r, ring_id;
2170	struct amdgpu_ring *ring;
2171	struct amdgpu_kiq *kiq;
2172	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2173
2174	switch (adev->asic_type) {
2175	case CHIP_VEGA10:
2176	case CHIP_VEGA12:
2177	case CHIP_VEGA20:
2178	case CHIP_RAVEN:
2179	case CHIP_ARCTURUS:
2180	case CHIP_RENOIR:
2181		adev->gfx.mec.num_mec = 2;
2182		break;
2183	default:
2184		adev->gfx.mec.num_mec = 1;
2185		break;
2186	}
2187
2188	adev->gfx.mec.num_pipe_per_mec = 4;
2189	adev->gfx.mec.num_queue_per_pipe = 8;
2190
2191	/* EOP Event */
2192	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2193	if (r)
2194		return r;
2195
2196	/* Privileged reg */
2197	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2198			      &adev->gfx.priv_reg_irq);
2199	if (r)
2200		return r;
2201
2202	/* Privileged inst */
2203	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2204			      &adev->gfx.priv_inst_irq);
2205	if (r)
2206		return r;
2207
2208	/* ECC error */
2209	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2210			      &adev->gfx.cp_ecc_error_irq);
2211	if (r)
2212		return r;
2213
2214	/* FUE error */
2215	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2216			      &adev->gfx.cp_ecc_error_irq);
2217	if (r)
2218		return r;
2219
2220	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2221
2222	gfx_v9_0_scratch_init(adev);
2223
2224	r = gfx_v9_0_init_microcode(adev);
2225	if (r) {
2226		DRM_ERROR("Failed to load gfx firmware!\n");
2227		return r;
2228	}
2229
2230	r = adev->gfx.rlc.funcs->init(adev);
2231	if (r) {
2232		DRM_ERROR("Failed to init rlc BOs!\n");
2233		return r;
2234	}
2235
2236	r = gfx_v9_0_mec_init(adev);
2237	if (r) {
2238		DRM_ERROR("Failed to init MEC BOs!\n");
2239		return r;
2240	}
2241
2242	/* set up the gfx ring */
2243	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2244		ring = &adev->gfx.gfx_ring[i];
2245		ring->ring_obj = NULL;
2246		if (!i)
2247			snprintf(ring->name, sizeof(ring->name), "gfx");
2248		else
2249			snprintf(ring->name, sizeof(ring->name), "gfx_%d", i);
2250		ring->use_doorbell = true;
2251		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2252		r = amdgpu_ring_init(adev, ring, 1024,
2253				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2254		if (r)
2255			return r;
2256	}
2257
2258	/* set up the compute queues - allocate horizontally across pipes */
2259	ring_id = 0;
2260	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2261		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2262			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2263				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2264					continue;
2265
2266				r = gfx_v9_0_compute_ring_init(adev,
2267							       ring_id,
2268							       i, k, j);
2269				if (r)
2270					return r;
2271
2272				ring_id++;
2273			}
2274		}
2275	}
2276
2277	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2278	if (r) {
2279		DRM_ERROR("Failed to init KIQ BOs!\n");
2280		return r;
2281	}
2282
2283	kiq = &adev->gfx.kiq;
2284	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2285	if (r)
2286		return r;
2287
2288	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2289	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2290	if (r)
2291		return r;
2292
2293	adev->gfx.ce_ram_size = 0x8000;
2294
2295	r = gfx_v9_0_gpu_early_init(adev);
2296	if (r)
2297		return r;
2298
2299	return 0;
2300}
2301
2302
2303static int gfx_v9_0_sw_fini(void *handle)
2304{
2305	int i;
2306	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2307
2308	amdgpu_gfx_ras_fini(adev);
2309
2310	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2311		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2312	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2313		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2314
2315	amdgpu_gfx_mqd_sw_fini(adev);
2316	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2317	amdgpu_gfx_kiq_fini(adev);
2318
2319	gfx_v9_0_mec_fini(adev);
2320	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2321	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2322		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2323				&adev->gfx.rlc.cp_table_gpu_addr,
2324				(void **)__UNVOLATILE(&adev->gfx.rlc.cp_table_ptr));
2325	}
2326	gfx_v9_0_free_microcode(adev);
2327
2328	return 0;
2329}
2330
2331
2332static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2333{
2334	/* TODO */
2335}
2336
2337static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2338{
2339	u32 data;
2340
2341	if (instance == 0xffffffff)
2342		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2343	else
2344		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2345
2346	if (se_num == 0xffffffff)
2347		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2348	else
2349		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2350
2351	if (sh_num == 0xffffffff)
2352		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2353	else
2354		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2355
2356	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2357}
2358
2359static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2360{
2361	u32 data, mask;
2362
2363	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2364	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2365
2366	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2367	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2368
2369	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2370					 adev->gfx.config.max_sh_per_se);
2371
2372	return (~data) & mask;
2373}
2374
2375static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2376{
2377	int i, j;
2378	u32 data;
2379	u32 active_rbs = 0;
2380	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2381					adev->gfx.config.max_sh_per_se;
2382
2383	mutex_lock(&adev->grbm_idx_mutex);
2384	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2385		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2386			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2387			data = gfx_v9_0_get_rb_active_bitmap(adev);
2388			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2389					       rb_bitmap_width_per_sh);
2390		}
2391	}
2392	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2393	mutex_unlock(&adev->grbm_idx_mutex);
2394
2395	adev->gfx.config.backend_enable_mask = active_rbs;
2396	adev->gfx.config.num_rbs = hweight32(active_rbs);
2397}
2398
2399#define DEFAULT_SH_MEM_BASES	(0x6000)
2400#define FIRST_COMPUTE_VMID	(8)
2401#define LAST_COMPUTE_VMID	(16)
2402static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2403{
2404	int i;
2405	uint32_t sh_mem_config;
2406	uint32_t sh_mem_bases;
2407
2408	/*
2409	 * Configure apertures:
2410	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2411	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2412	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2413	 */
2414	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2415
2416	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2417			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2418			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2419
2420	mutex_lock(&adev->srbm_mutex);
2421	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2422		soc15_grbm_select(adev, 0, 0, 0, i);
2423		/* CP and shaders */
2424		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2425		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2426	}
2427	soc15_grbm_select(adev, 0, 0, 0, 0);
2428	mutex_unlock(&adev->srbm_mutex);
2429
2430	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2431	   acccess. These should be enabled by FW for target VMIDs. */
2432	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2433		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2434		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2435		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2436		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2437	}
2438}
2439
2440static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2441{
2442	int vmid;
2443
2444	/*
2445	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2446	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2447	 * the driver can enable them for graphics. VMID0 should maintain
2448	 * access so that HWS firmware can save/restore entries.
2449	 */
2450	for (vmid = 1; vmid < 16; vmid++) {
2451		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2452		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2453		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2454		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2455	}
2456}
2457
2458static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2459{
2460	uint32_t tmp;
2461
2462	switch (adev->asic_type) {
2463	case CHIP_ARCTURUS:
2464		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2465		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2466					DISABLE_BARRIER_WAITCNT, 1);
2467		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2468		break;
2469	default:
2470		break;
2471	};
2472}
2473
2474static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2475{
2476	u32 tmp;
2477	int i;
2478
2479	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2480
2481	gfx_v9_0_tiling_mode_table_init(adev);
2482
2483	gfx_v9_0_setup_rb(adev);
2484	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2485	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2486
2487	/* XXX SH_MEM regs */
2488	/* where to put LDS, scratch, GPUVM in FSA64 space */
2489	mutex_lock(&adev->srbm_mutex);
2490	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2491		soc15_grbm_select(adev, 0, 0, 0, i);
2492		/* CP and shaders */
2493		if (i == 0) {
2494			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2495					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2496			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2497					    !!amdgpu_noretry);
2498			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2499			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2500		} else {
2501			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2502					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2503			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2504					    !!amdgpu_noretry);
2505			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2506			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2507				(adev->gmc.private_aperture_start >> 48));
2508			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2509				(adev->gmc.shared_aperture_start >> 48));
2510			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2511		}
2512	}
2513	soc15_grbm_select(adev, 0, 0, 0, 0);
2514
2515	mutex_unlock(&adev->srbm_mutex);
2516
2517	gfx_v9_0_init_compute_vmid(adev);
2518	gfx_v9_0_init_gds_vmid(adev);
2519	gfx_v9_0_init_sq_config(adev);
2520}
2521
2522static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2523{
2524	u32 i, j, k;
2525	u32 mask;
2526
2527	mutex_lock(&adev->grbm_idx_mutex);
2528	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2529		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2530			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2531			for (k = 0; k < adev->usec_timeout; k++) {
2532				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2533					break;
2534				udelay(1);
2535			}
2536			if (k == adev->usec_timeout) {
2537				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2538						      0xffffffff, 0xffffffff);
2539				mutex_unlock(&adev->grbm_idx_mutex);
2540				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2541					 i, j);
2542				return;
2543			}
2544		}
2545	}
2546	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2547	mutex_unlock(&adev->grbm_idx_mutex);
2548
2549	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2550		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2551		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2552		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2553	for (k = 0; k < adev->usec_timeout; k++) {
2554		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2555			break;
2556		udelay(1);
2557	}
2558}
2559
2560static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2561					       bool enable)
2562{
2563	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2564
2565	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2566	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2567	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2568	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2569
2570	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2571}
2572
2573static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2574{
2575	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2576	/* csib */
2577	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2578			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2579	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2580			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2581	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2582			adev->gfx.rlc.clear_state_size);
2583}
2584
2585static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2586				int indirect_offset,
2587				int list_size,
2588				int *unique_indirect_regs,
2589				int unique_indirect_reg_count,
2590				int *indirect_start_offsets,
2591				int *indirect_start_offsets_count,
2592				int max_start_offsets_count)
2593{
2594	int idx;
2595
2596	for (; indirect_offset < list_size; indirect_offset++) {
2597		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2598		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2599		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2600
2601		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2602			indirect_offset += 2;
2603
2604			/* look for the matching indice */
2605			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2606				if (unique_indirect_regs[idx] ==
2607					register_list_format[indirect_offset] ||
2608					!unique_indirect_regs[idx])
2609					break;
2610			}
2611
2612			BUG_ON(idx >= unique_indirect_reg_count);
2613
2614			if (!unique_indirect_regs[idx])
2615				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2616
2617			indirect_offset++;
2618		}
2619	}
2620}
2621
2622static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2623{
2624	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2625	int unique_indirect_reg_count = 0;
2626
2627	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2628	int indirect_start_offsets_count = 0;
2629
2630	int list_size = 0;
2631	int i = 0, j = 0;
2632	u32 tmp = 0;
2633
2634	u32 *register_list_format =
2635		kmemdup(adev->gfx.rlc.register_list_format,
2636			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2637	if (!register_list_format)
2638		return -ENOMEM;
2639
2640	/* setup unique_indirect_regs array and indirect_start_offsets array */
2641	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2642	gfx_v9_1_parse_ind_reg_list(register_list_format,
2643				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2644				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2645				    unique_indirect_regs,
2646				    unique_indirect_reg_count,
2647				    indirect_start_offsets,
2648				    &indirect_start_offsets_count,
2649				    ARRAY_SIZE(indirect_start_offsets));
2650
2651	/* enable auto inc in case it is disabled */
2652	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2653	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2654	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2655
2656	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2657	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2658		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2659	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2660		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2661			adev->gfx.rlc.register_restore[i]);
2662
2663	/* load indirect register */
2664	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2665		adev->gfx.rlc.reg_list_format_start);
2666
2667	/* direct register portion */
2668	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2669		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2670			register_list_format[i]);
2671
2672	/* indirect register portion */
2673	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2674		if (register_list_format[i] == 0xFFFFFFFF) {
2675			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2676			continue;
2677		}
2678
2679		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2680		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2681
2682		for (j = 0; j < unique_indirect_reg_count; j++) {
2683			if (register_list_format[i] == unique_indirect_regs[j]) {
2684				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2685				break;
2686			}
2687		}
2688
2689		BUG_ON(j >= unique_indirect_reg_count);
2690
2691		i++;
2692	}
2693
2694	/* set save/restore list size */
2695	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2696	list_size = list_size >> 1;
2697	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2698		adev->gfx.rlc.reg_restore_list_size);
2699	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2700
2701	/* write the starting offsets to RLC scratch ram */
2702	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2703		adev->gfx.rlc.starting_offsets_start);
2704	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2705		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2706		       indirect_start_offsets[i]);
2707
2708	/* load unique indirect regs*/
2709	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2710		if (unique_indirect_regs[i] != 0) {
2711			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2712			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2713			       unique_indirect_regs[i] & 0x3FFFF);
2714
2715			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2716			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2717			       unique_indirect_regs[i] >> 20);
2718		}
2719	}
2720
2721	kfree(register_list_format);
2722	return 0;
2723}
2724
2725static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2726{
2727	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2728}
2729
2730static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2731					     bool enable)
2732{
2733	uint32_t data = 0;
2734	uint32_t default_data = 0;
2735
2736	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2737	if (enable == true) {
2738		/* enable GFXIP control over CGPG */
2739		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2740		if(default_data != data)
2741			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2742
2743		/* update status */
2744		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2745		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2746		if(default_data != data)
2747			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2748	} else {
2749		/* restore GFXIP control over GCPG */
2750		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2751		if(default_data != data)
2752			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2753	}
2754}
2755
2756static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2757{
2758	uint32_t data = 0;
2759
2760	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2761			      AMD_PG_SUPPORT_GFX_SMG |
2762			      AMD_PG_SUPPORT_GFX_DMG)) {
2763		/* init IDLE_POLL_COUNT = 60 */
2764		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2765		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2766		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2767		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2768
2769		/* init RLC PG Delay */
2770		data = 0;
2771		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2772		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2773		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2774		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2775		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2776
2777		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2778		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2779		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2780		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2781
2782		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2783		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2784		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2785		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2786
2787		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2788		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2789
2790		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2791		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2792		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2793
2794		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2795	}
2796}
2797
2798static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2799						bool enable)
2800{
2801	uint32_t data = 0;
2802	uint32_t default_data = 0;
2803
2804	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2805	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2806			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2807			     enable ? 1 : 0);
2808	if (default_data != data)
2809		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2810}
2811
2812static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2813						bool enable)
2814{
2815	uint32_t data = 0;
2816	uint32_t default_data = 0;
2817
2818	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2819	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2820			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2821			     enable ? 1 : 0);
2822	if(default_data != data)
2823		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2824}
2825
2826static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2827					bool enable)
2828{
2829	uint32_t data = 0;
2830	uint32_t default_data = 0;
2831
2832	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2833	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2834			     CP_PG_DISABLE,
2835			     enable ? 0 : 1);
2836	if(default_data != data)
2837		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2838}
2839
2840static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2841						bool enable)
2842{
2843	uint32_t data, default_data;
2844
2845	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2846	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2847			     GFX_POWER_GATING_ENABLE,
2848			     enable ? 1 : 0);
2849	if(default_data != data)
2850		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2851}
2852
2853static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2854						bool enable)
2855{
2856	uint32_t data, default_data;
2857
2858	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2859	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2860			     GFX_PIPELINE_PG_ENABLE,
2861			     enable ? 1 : 0);
2862	if(default_data != data)
2863		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2864
2865	if (!enable)
2866		/* read any GFX register to wake up GFX */
2867		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2868}
2869
2870static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2871						       bool enable)
2872{
2873	uint32_t data, default_data;
2874
2875	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2876	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2877			     STATIC_PER_CU_PG_ENABLE,
2878			     enable ? 1 : 0);
2879	if(default_data != data)
2880		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2881}
2882
2883static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2884						bool enable)
2885{
2886	uint32_t data, default_data;
2887
2888	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2889	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2890			     DYN_PER_CU_PG_ENABLE,
2891			     enable ? 1 : 0);
2892	if(default_data != data)
2893		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2894}
2895
2896static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2897{
2898	gfx_v9_0_init_csb(adev);
2899
2900	/*
2901	 * Rlc save restore list is workable since v2_1.
2902	 * And it's needed by gfxoff feature.
2903	 */
2904	if (adev->gfx.rlc.is_rlc_v2_1) {
2905		if (adev->asic_type == CHIP_VEGA12 ||
2906		    (adev->asic_type == CHIP_RAVEN &&
2907		     adev->rev_id >= 8))
2908			gfx_v9_1_init_rlc_save_restore_list(adev);
2909		gfx_v9_0_enable_save_restore_machine(adev);
2910	}
2911
2912	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2913			      AMD_PG_SUPPORT_GFX_SMG |
2914			      AMD_PG_SUPPORT_GFX_DMG |
2915			      AMD_PG_SUPPORT_CP |
2916			      AMD_PG_SUPPORT_GDS |
2917			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2918		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2919		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2920		gfx_v9_0_init_gfx_power_gating(adev);
2921	}
2922}
2923
2924void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2925{
2926	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2927	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2928	gfx_v9_0_wait_for_rlc_serdes(adev);
2929}
2930
2931static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2932{
2933	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2934	udelay(50);
2935	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2936	udelay(50);
2937}
2938
2939static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2940{
2941#ifdef AMDGPU_RLC_DEBUG_RETRY
2942	u32 rlc_ucode_ver;
2943#endif
2944
2945	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2946	udelay(50);
2947
2948	/* carrizo do enable cp interrupt after cp inited */
2949	if (!(adev->flags & AMD_IS_APU)) {
2950		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2951		udelay(50);
2952	}
2953
2954#ifdef AMDGPU_RLC_DEBUG_RETRY
2955	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2956	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2957	if(rlc_ucode_ver == 0x108) {
2958		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2959				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2960		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2961		 * default is 0x9C4 to create a 100us interval */
2962		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2963		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2964		 * to disable the page fault retry interrupts, default is
2965		 * 0x100 (256) */
2966		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2967	}
2968#endif
2969}
2970
2971static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2972{
2973	const struct rlc_firmware_header_v2_0 *hdr;
2974	const __le32 *fw_data;
2975	unsigned i, fw_size;
2976
2977	if (!adev->gfx.rlc_fw)
2978		return -EINVAL;
2979
2980	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2981	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2982
2983	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2984			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2985	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2986
2987	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2988			RLCG_UCODE_LOADING_START_ADDRESS);
2989	for (i = 0; i < fw_size; i++)
2990		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2991	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2992
2993	return 0;
2994}
2995
2996static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2997{
2998	int r;
2999
3000	if (amdgpu_sriov_vf(adev)) {
3001		gfx_v9_0_init_csb(adev);
3002		return 0;
3003	}
3004
3005	adev->gfx.rlc.funcs->stop(adev);
3006
3007	/* disable CG */
3008	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3009
3010	gfx_v9_0_init_pg(adev);
3011
3012	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3013		/* legacy rlc firmware loading */
3014		r = gfx_v9_0_rlc_load_microcode(adev);
3015		if (r)
3016			return r;
3017	}
3018
3019	switch (adev->asic_type) {
3020	case CHIP_RAVEN:
3021		if (amdgpu_lbpw == 0)
3022			gfx_v9_0_enable_lbpw(adev, false);
3023		else
3024			gfx_v9_0_enable_lbpw(adev, true);
3025		break;
3026	case CHIP_VEGA20:
3027		if (amdgpu_lbpw > 0)
3028			gfx_v9_0_enable_lbpw(adev, true);
3029		else
3030			gfx_v9_0_enable_lbpw(adev, false);
3031		break;
3032	default:
3033		break;
3034	}
3035
3036	adev->gfx.rlc.funcs->start(adev);
3037
3038	return 0;
3039}
3040
3041static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3042{
3043	int i;
3044	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3045
3046	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3047	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3048	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3049	if (!enable) {
3050		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3051			adev->gfx.gfx_ring[i].sched.ready = false;
3052	}
3053	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3054	udelay(50);
3055}
3056
3057static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3058{
3059	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3060	const struct gfx_firmware_header_v1_0 *ce_hdr;
3061	const struct gfx_firmware_header_v1_0 *me_hdr;
3062	const __le32 *fw_data;
3063	unsigned i, fw_size;
3064
3065	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3066		return -EINVAL;
3067
3068	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3069		adev->gfx.pfp_fw->data;
3070	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3071		adev->gfx.ce_fw->data;
3072	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3073		adev->gfx.me_fw->data;
3074
3075	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3076	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3077	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3078
3079	gfx_v9_0_cp_gfx_enable(adev, false);
3080
3081	/* PFP */
3082	fw_data = (const __le32 *)
3083		(adev->gfx.pfp_fw->data +
3084		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3085	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3086	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3087	for (i = 0; i < fw_size; i++)
3088		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3089	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3090
3091	/* CE */
3092	fw_data = (const __le32 *)
3093		(adev->gfx.ce_fw->data +
3094		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3095	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3096	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3097	for (i = 0; i < fw_size; i++)
3098		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3099	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3100
3101	/* ME */
3102	fw_data = (const __le32 *)
3103		(adev->gfx.me_fw->data +
3104		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3105	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3106	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3107	for (i = 0; i < fw_size; i++)
3108		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3109	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3110
3111	return 0;
3112}
3113
3114static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3115{
3116	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3117	const struct cs_section_def *sect = NULL;
3118	const struct cs_extent_def *ext = NULL;
3119	int r, i, tmp;
3120
3121	/* init the CP */
3122	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3123	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3124
3125	gfx_v9_0_cp_gfx_enable(adev, true);
3126
3127	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3128	if (r) {
3129		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3130		return r;
3131	}
3132
3133	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3134	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3135
3136	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3137	amdgpu_ring_write(ring, 0x80000000);
3138	amdgpu_ring_write(ring, 0x80000000);
3139
3140	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3141		for (ext = sect->section; ext->extent != NULL; ++ext) {
3142			if (sect->id == SECT_CONTEXT) {
3143				amdgpu_ring_write(ring,
3144				       PACKET3(PACKET3_SET_CONTEXT_REG,
3145					       ext->reg_count));
3146				amdgpu_ring_write(ring,
3147				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3148				for (i = 0; i < ext->reg_count; i++)
3149					amdgpu_ring_write(ring, ext->extent[i]);
3150			}
3151		}
3152	}
3153
3154	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3155	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3156
3157	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3158	amdgpu_ring_write(ring, 0);
3159
3160	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3161	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3162	amdgpu_ring_write(ring, 0x8000);
3163	amdgpu_ring_write(ring, 0x8000);
3164
3165	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3166	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3167		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3168	amdgpu_ring_write(ring, tmp);
3169	amdgpu_ring_write(ring, 0);
3170
3171	amdgpu_ring_commit(ring);
3172
3173	return 0;
3174}
3175
3176static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3177{
3178	struct amdgpu_ring *ring;
3179	u32 tmp;
3180	u32 rb_bufsz;
3181	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3182
3183	/* Set the write pointer delay */
3184	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3185
3186	/* set the RB to use vmid 0 */
3187	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3188
3189	/* Set ring buffer size */
3190	ring = &adev->gfx.gfx_ring[0];
3191	rb_bufsz = order_base_2(ring->ring_size / 8);
3192	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3193	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3194#ifdef __BIG_ENDIAN
3195	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3196#endif
3197	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3198
3199	/* Initialize the ring buffer's write pointers */
3200	ring->wptr = 0;
3201	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3202	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3203
3204	/* set the wb address wether it's enabled or not */
3205	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3206	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3207	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3208
3209	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3210	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3211	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3212
3213	mdelay(1);
3214	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3215
3216	rb_addr = ring->gpu_addr >> 8;
3217	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3218	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3219
3220	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3221	if (ring->use_doorbell) {
3222		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3223				    DOORBELL_OFFSET, ring->doorbell_index);
3224		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3225				    DOORBELL_EN, 1);
3226	} else {
3227		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3228	}
3229	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3230
3231	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3232			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3233	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3234
3235	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3236		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3237
3238
3239	/* start the ring */
3240	gfx_v9_0_cp_gfx_start(adev);
3241	ring->sched.ready = true;
3242
3243	return 0;
3244}
3245
3246static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3247{
3248	int i;
3249
3250	if (enable) {
3251		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3252	} else {
3253		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3254			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3255		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3256			adev->gfx.compute_ring[i].sched.ready = false;
3257		adev->gfx.kiq.ring.sched.ready = false;
3258	}
3259	udelay(50);
3260}
3261
3262static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3263{
3264	const struct gfx_firmware_header_v1_0 *mec_hdr;
3265	const __le32 *fw_data;
3266	unsigned i;
3267	u32 tmp;
3268
3269	if (!adev->gfx.mec_fw)
3270		return -EINVAL;
3271
3272	gfx_v9_0_cp_compute_enable(adev, false);
3273
3274	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3275	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3276
3277	fw_data = (const __le32 *)
3278		(adev->gfx.mec_fw->data +
3279		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3280	tmp = 0;
3281	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3282	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3283	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3284
3285	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3286		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3287	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3288		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3289
3290	/* MEC1 */
3291	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3292			 mec_hdr->jt_offset);
3293	for (i = 0; i < mec_hdr->jt_size; i++)
3294		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3295			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3296
3297	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3298			adev->gfx.mec_fw_version);
3299	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3300
3301	return 0;
3302}
3303
3304/* KIQ functions */
3305static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3306{
3307	uint32_t tmp;
3308	struct amdgpu_device *adev = ring->adev;
3309
3310	/* tell RLC which is KIQ queue */
3311	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3312	tmp &= 0xffffff00;
3313	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3314	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3315	tmp |= 0x80;
3316	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3317}
3318
3319static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3320{
3321	struct amdgpu_device *adev = ring->adev;
3322	struct v9_mqd *mqd = ring->mqd_ptr;
3323	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3324	uint32_t tmp;
3325
3326	mqd->header = 0xC0310800;
3327	mqd->compute_pipelinestat_enable = 0x00000001;
3328	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3329	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3330	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3331	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3332	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3333	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3334	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3335	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3336	mqd->compute_misc_reserved = 0x00000003;
3337
3338	mqd->dynamic_cu_mask_addr_lo =
3339		lower_32_bits(ring->mqd_gpu_addr
3340			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3341	mqd->dynamic_cu_mask_addr_hi =
3342		upper_32_bits(ring->mqd_gpu_addr
3343			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3344
3345	eop_base_addr = ring->eop_gpu_addr >> 8;
3346	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3347	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3348
3349	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3350	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3351	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3352			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3353
3354	mqd->cp_hqd_eop_control = tmp;
3355
3356	/* enable doorbell? */
3357	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3358
3359	if (ring->use_doorbell) {
3360		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3361				    DOORBELL_OFFSET, ring->doorbell_index);
3362		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3363				    DOORBELL_EN, 1);
3364		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3365				    DOORBELL_SOURCE, 0);
3366		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3367				    DOORBELL_HIT, 0);
3368	} else {
3369		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3370					 DOORBELL_EN, 0);
3371	}
3372
3373	mqd->cp_hqd_pq_doorbell_control = tmp;
3374
3375	/* disable the queue if it's active */
3376	ring->wptr = 0;
3377	mqd->cp_hqd_dequeue_request = 0;
3378	mqd->cp_hqd_pq_rptr = 0;
3379	mqd->cp_hqd_pq_wptr_lo = 0;
3380	mqd->cp_hqd_pq_wptr_hi = 0;
3381
3382	/* set the pointer to the MQD */
3383	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3384	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3385
3386	/* set MQD vmid to 0 */
3387	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3388	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3389	mqd->cp_mqd_control = tmp;
3390
3391	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3392	hqd_gpu_addr = ring->gpu_addr >> 8;
3393	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3394	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3395
3396	/* set up the HQD, this is similar to CP_RB0_CNTL */
3397	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3398	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3399			    (order_base_2(ring->ring_size / 4) - 1));
3400	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3401			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3402#ifdef __BIG_ENDIAN
3403	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3404#endif
3405	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3406	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3407	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3408	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3409	mqd->cp_hqd_pq_control = tmp;
3410
3411	/* set the wb address whether it's enabled or not */
3412	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3413	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3414	mqd->cp_hqd_pq_rptr_report_addr_hi =
3415		upper_32_bits(wb_gpu_addr) & 0xffff;
3416
3417	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3418	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3419	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3420	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3421
3422	tmp = 0;
3423	/* enable the doorbell if requested */
3424	if (ring->use_doorbell) {
3425		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3426		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3427				DOORBELL_OFFSET, ring->doorbell_index);
3428
3429		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3430					 DOORBELL_EN, 1);
3431		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3432					 DOORBELL_SOURCE, 0);
3433		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3434					 DOORBELL_HIT, 0);
3435	}
3436
3437	mqd->cp_hqd_pq_doorbell_control = tmp;
3438
3439	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3440	ring->wptr = 0;
3441	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3442
3443	/* set the vmid for the queue */
3444	mqd->cp_hqd_vmid = 0;
3445
3446	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3447	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3448	mqd->cp_hqd_persistent_state = tmp;
3449
3450	/* set MIN_IB_AVAIL_SIZE */
3451	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3452	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3453	mqd->cp_hqd_ib_control = tmp;
3454
3455	/* map_queues packet doesn't need activate the queue,
3456	 * so only kiq need set this field.
3457	 */
3458	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3459		mqd->cp_hqd_active = 1;
3460
3461	return 0;
3462}
3463
3464static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3465{
3466	struct amdgpu_device *adev = ring->adev;
3467	struct v9_mqd *mqd = ring->mqd_ptr;
3468	int j;
3469
3470	/* disable wptr polling */
3471	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3472
3473	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3474	       mqd->cp_hqd_eop_base_addr_lo);
3475	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3476	       mqd->cp_hqd_eop_base_addr_hi);
3477
3478	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3479	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3480	       mqd->cp_hqd_eop_control);
3481
3482	/* enable doorbell? */
3483	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3484	       mqd->cp_hqd_pq_doorbell_control);
3485
3486	/* disable the queue if it's active */
3487	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3488		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3489		for (j = 0; j < adev->usec_timeout; j++) {
3490			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3491				break;
3492			udelay(1);
3493		}
3494		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3495		       mqd->cp_hqd_dequeue_request);
3496		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3497		       mqd->cp_hqd_pq_rptr);
3498		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3499		       mqd->cp_hqd_pq_wptr_lo);
3500		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3501		       mqd->cp_hqd_pq_wptr_hi);
3502	}
3503
3504	/* set the pointer to the MQD */
3505	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3506	       mqd->cp_mqd_base_addr_lo);
3507	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3508	       mqd->cp_mqd_base_addr_hi);
3509
3510	/* set MQD vmid to 0 */
3511	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3512	       mqd->cp_mqd_control);
3513
3514	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3515	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3516	       mqd->cp_hqd_pq_base_lo);
3517	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3518	       mqd->cp_hqd_pq_base_hi);
3519
3520	/* set up the HQD, this is similar to CP_RB0_CNTL */
3521	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3522	       mqd->cp_hqd_pq_control);
3523
3524	/* set the wb address whether it's enabled or not */
3525	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3526				mqd->cp_hqd_pq_rptr_report_addr_lo);
3527	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3528				mqd->cp_hqd_pq_rptr_report_addr_hi);
3529
3530	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3531	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3532	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3533	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3534	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3535
3536	/* enable the doorbell if requested */
3537	if (ring->use_doorbell) {
3538		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3539					(adev->doorbell_index.kiq * 2) << 2);
3540		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3541					(adev->doorbell_index.userqueue_end * 2) << 2);
3542	}
3543
3544	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3545	       mqd->cp_hqd_pq_doorbell_control);
3546
3547	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3548	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3549	       mqd->cp_hqd_pq_wptr_lo);
3550	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3551	       mqd->cp_hqd_pq_wptr_hi);
3552
3553	/* set the vmid for the queue */
3554	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3555
3556	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3557	       mqd->cp_hqd_persistent_state);
3558
3559	/* activate the queue */
3560	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3561	       mqd->cp_hqd_active);
3562
3563	if (ring->use_doorbell)
3564		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3565
3566	return 0;
3567}
3568
3569static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3570{
3571	struct amdgpu_device *adev = ring->adev;
3572	int j;
3573
3574	/* disable the queue if it's active */
3575	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3576
3577		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3578
3579		for (j = 0; j < adev->usec_timeout; j++) {
3580			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3581				break;
3582			udelay(1);
3583		}
3584
3585		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3586			DRM_DEBUG("KIQ dequeue request failed.\n");
3587
3588			/* Manual disable if dequeue request times out */
3589			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3590		}
3591
3592		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3593		      0);
3594	}
3595
3596	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3597	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3598	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3599	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3600	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3601	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3602	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3603	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3604
3605	return 0;
3606}
3607
3608static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3609{
3610	struct amdgpu_device *adev = ring->adev;
3611	struct v9_mqd *mqd = ring->mqd_ptr;
3612	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3613
3614	gfx_v9_0_kiq_setting(ring);
3615
3616	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3617		/* reset MQD to a clean status */
3618		if (adev->gfx.mec.mqd_backup[mqd_idx])
3619			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3620
3621		/* reset ring buffer */
3622		ring->wptr = 0;
3623		amdgpu_ring_clear_ring(ring);
3624
3625		mutex_lock(&adev->srbm_mutex);
3626		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3627		gfx_v9_0_kiq_init_register(ring);
3628		soc15_grbm_select(adev, 0, 0, 0, 0);
3629		mutex_unlock(&adev->srbm_mutex);
3630	} else {
3631		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3632		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3633		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3634		mutex_lock(&adev->srbm_mutex);
3635		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3636		gfx_v9_0_mqd_init(ring);
3637		gfx_v9_0_kiq_init_register(ring);
3638		soc15_grbm_select(adev, 0, 0, 0, 0);
3639		mutex_unlock(&adev->srbm_mutex);
3640
3641		if (adev->gfx.mec.mqd_backup[mqd_idx])
3642			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3643	}
3644
3645	return 0;
3646}
3647
3648static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3649{
3650	struct amdgpu_device *adev = ring->adev;
3651	struct v9_mqd *mqd = ring->mqd_ptr;
3652	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3653
3654	if (!adev->in_gpu_reset && !adev->in_suspend) {
3655		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3656		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3657		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3658		mutex_lock(&adev->srbm_mutex);
3659		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3660		gfx_v9_0_mqd_init(ring);
3661		soc15_grbm_select(adev, 0, 0, 0, 0);
3662		mutex_unlock(&adev->srbm_mutex);
3663
3664		if (adev->gfx.mec.mqd_backup[mqd_idx])
3665			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3666	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3667		/* reset MQD to a clean status */
3668		if (adev->gfx.mec.mqd_backup[mqd_idx])
3669			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3670
3671		/* reset ring buffer */
3672		ring->wptr = 0;
3673		amdgpu_ring_clear_ring(ring);
3674	} else {
3675		amdgpu_ring_clear_ring(ring);
3676	}
3677
3678	return 0;
3679}
3680
3681static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3682{
3683	struct amdgpu_ring *ring;
3684	int r;
3685
3686	ring = &adev->gfx.kiq.ring;
3687
3688	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3689	if (unlikely(r != 0))
3690		return r;
3691
3692	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3693	if (unlikely(r != 0))
3694		return r;
3695
3696	gfx_v9_0_kiq_init_queue(ring);
3697	amdgpu_bo_kunmap(ring->mqd_obj);
3698	ring->mqd_ptr = NULL;
3699	amdgpu_bo_unreserve(ring->mqd_obj);
3700	ring->sched.ready = true;
3701	return 0;
3702}
3703
3704static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3705{
3706	struct amdgpu_ring *ring = NULL;
3707	int r = 0, i;
3708
3709	gfx_v9_0_cp_compute_enable(adev, true);
3710
3711	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3712		ring = &adev->gfx.compute_ring[i];
3713
3714		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3715		if (unlikely(r != 0))
3716			goto done;
3717		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3718		if (!r) {
3719			r = gfx_v9_0_kcq_init_queue(ring);
3720			amdgpu_bo_kunmap(ring->mqd_obj);
3721			ring->mqd_ptr = NULL;
3722		}
3723		amdgpu_bo_unreserve(ring->mqd_obj);
3724		if (r)
3725			goto done;
3726	}
3727
3728	r = amdgpu_gfx_enable_kcq(adev);
3729done:
3730	return r;
3731}
3732
3733static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3734{
3735	int r, i;
3736	struct amdgpu_ring *ring;
3737
3738	if (!(adev->flags & AMD_IS_APU))
3739		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3740
3741	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3742		if (adev->asic_type != CHIP_ARCTURUS) {
3743			/* legacy firmware loading */
3744			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3745			if (r)
3746				return r;
3747		}
3748
3749		r = gfx_v9_0_cp_compute_load_microcode(adev);
3750		if (r)
3751			return r;
3752	}
3753
3754	r = gfx_v9_0_kiq_resume(adev);
3755	if (r)
3756		return r;
3757
3758	if (adev->asic_type != CHIP_ARCTURUS) {
3759		r = gfx_v9_0_cp_gfx_resume(adev);
3760		if (r)
3761			return r;
3762	}
3763
3764	r = gfx_v9_0_kcq_resume(adev);
3765	if (r)
3766		return r;
3767
3768	if (adev->asic_type != CHIP_ARCTURUS) {
3769		ring = &adev->gfx.gfx_ring[0];
3770		r = amdgpu_ring_test_helper(ring);
3771		if (r)
3772			return r;
3773	}
3774
3775	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3776		ring = &adev->gfx.compute_ring[i];
3777		amdgpu_ring_test_helper(ring);
3778	}
3779
3780	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3781
3782	return 0;
3783}
3784
3785static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3786{
3787	u32 tmp;
3788
3789	if (adev->asic_type != CHIP_ARCTURUS)
3790		return;
3791
3792	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3793	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3794				adev->df.hash_status.hash_64k);
3795	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3796				adev->df.hash_status.hash_2m);
3797	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3798				adev->df.hash_status.hash_1g);
3799	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3800}
3801
3802static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3803{
3804	if (adev->asic_type != CHIP_ARCTURUS)
3805		gfx_v9_0_cp_gfx_enable(adev, enable);
3806	gfx_v9_0_cp_compute_enable(adev, enable);
3807}
3808
3809static int gfx_v9_0_hw_init(void *handle)
3810{
3811	int r;
3812	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3813
3814	if (!amdgpu_sriov_vf(adev))
3815		gfx_v9_0_init_golden_registers(adev);
3816
3817	gfx_v9_0_constants_init(adev);
3818
3819	gfx_v9_0_init_tcp_config(adev);
3820
3821	r = adev->gfx.rlc.funcs->resume(adev);
3822	if (r)
3823		return r;
3824
3825	r = gfx_v9_0_cp_resume(adev);
3826	if (r)
3827		return r;
3828
3829	return r;
3830}
3831
3832static int gfx_v9_0_hw_fini(void *handle)
3833{
3834	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3835
3836	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3837	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3838	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3839
3840	/* DF freeze and kcq disable will fail */
3841	if (!amdgpu_ras_intr_triggered())
3842		/* disable KCQ to avoid CPC touch memory not valid anymore */
3843		amdgpu_gfx_disable_kcq(adev);
3844
3845	if (amdgpu_sriov_vf(adev)) {
3846		gfx_v9_0_cp_gfx_enable(adev, false);
3847		/* must disable polling for SRIOV when hw finished, otherwise
3848		 * CPC engine may still keep fetching WB address which is already
3849		 * invalid after sw finished and trigger DMAR reading error in
3850		 * hypervisor side.
3851		 */
3852		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3853		return 0;
3854	}
3855
3856	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3857	 * otherwise KIQ is hanging when binding back
3858	 */
3859	if (!adev->in_gpu_reset && !adev->in_suspend) {
3860		mutex_lock(&adev->srbm_mutex);
3861		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3862				adev->gfx.kiq.ring.pipe,
3863				adev->gfx.kiq.ring.queue, 0);
3864		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3865		soc15_grbm_select(adev, 0, 0, 0, 0);
3866		mutex_unlock(&adev->srbm_mutex);
3867	}
3868
3869	gfx_v9_0_cp_enable(adev, false);
3870	adev->gfx.rlc.funcs->stop(adev);
3871
3872	return 0;
3873}
3874
3875static int gfx_v9_0_suspend(void *handle)
3876{
3877	return gfx_v9_0_hw_fini(handle);
3878}
3879
3880static int gfx_v9_0_resume(void *handle)
3881{
3882	return gfx_v9_0_hw_init(handle);
3883}
3884
3885static bool gfx_v9_0_is_idle(void *handle)
3886{
3887	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3888
3889	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3890				GRBM_STATUS, GUI_ACTIVE))
3891		return false;
3892	else
3893		return true;
3894}
3895
3896static int gfx_v9_0_wait_for_idle(void *handle)
3897{
3898	unsigned i;
3899	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3900
3901	for (i = 0; i < adev->usec_timeout; i++) {
3902		if (gfx_v9_0_is_idle(handle))
3903			return 0;
3904		udelay(1);
3905	}
3906	return -ETIMEDOUT;
3907}
3908
3909static int gfx_v9_0_soft_reset(void *handle)
3910{
3911	u32 grbm_soft_reset = 0;
3912	u32 tmp;
3913	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3914
3915	/* GRBM_STATUS */
3916	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3917	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3918		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3919		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3920		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3921		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3922		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3923		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3924						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3925		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3926						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3927	}
3928
3929	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3930		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3931						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3932	}
3933
3934	/* GRBM_STATUS2 */
3935	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3936	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3937		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3938						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3939
3940
3941	if (grbm_soft_reset) {
3942		/* stop the rlc */
3943		adev->gfx.rlc.funcs->stop(adev);
3944
3945		if (adev->asic_type != CHIP_ARCTURUS)
3946			/* Disable GFX parsing/prefetching */
3947			gfx_v9_0_cp_gfx_enable(adev, false);
3948
3949		/* Disable MEC parsing/prefetching */
3950		gfx_v9_0_cp_compute_enable(adev, false);
3951
3952		if (grbm_soft_reset) {
3953			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3954			tmp |= grbm_soft_reset;
3955			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3956			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3957			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3958
3959			udelay(50);
3960
3961			tmp &= ~grbm_soft_reset;
3962			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3963			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3964		}
3965
3966		/* Wait a little for things to settle down */
3967		udelay(50);
3968	}
3969	return 0;
3970}
3971
3972static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3973{
3974	uint64_t clock;
3975
3976	amdgpu_gfx_off_ctrl(adev, false);
3977	mutex_lock(&adev->gfx.gpu_clock_mutex);
3978	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3979		uint32_t tmp, lsb, msb, i = 0;
3980		do {
3981			if (i != 0)
3982				udelay(1);
3983			tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3984			lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3985			msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3986			i++;
3987		} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3988		clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3989	} else {
3990		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3991		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3992			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3993	}
3994	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3995	amdgpu_gfx_off_ctrl(adev, true);
3996	return clock;
3997}
3998
3999static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4000					  uint32_t vmid,
4001					  uint32_t gds_base, uint32_t gds_size,
4002					  uint32_t gws_base, uint32_t gws_size,
4003					  uint32_t oa_base, uint32_t oa_size)
4004{
4005	struct amdgpu_device *adev = ring->adev;
4006
4007	/* GDS Base */
4008	gfx_v9_0_write_data_to_reg(ring, 0, false,
4009				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4010				   gds_base);
4011
4012	/* GDS Size */
4013	gfx_v9_0_write_data_to_reg(ring, 0, false,
4014				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4015				   gds_size);
4016
4017	/* GWS */
4018	gfx_v9_0_write_data_to_reg(ring, 0, false,
4019				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4020				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4021
4022	/* OA */
4023	gfx_v9_0_write_data_to_reg(ring, 0, false,
4024				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4025				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4026}
4027
4028static const u32 vgpr_init_compute_shader[] =
4029{
4030	0xb07c0000, 0xbe8000ff,
4031	0x000000f8, 0xbf110800,
4032	0x7e000280, 0x7e020280,
4033	0x7e040280, 0x7e060280,
4034	0x7e080280, 0x7e0a0280,
4035	0x7e0c0280, 0x7e0e0280,
4036	0x80808800, 0xbe803200,
4037	0xbf84fff5, 0xbf9c0000,
4038	0xd28c0001, 0x0001007f,
4039	0xd28d0001, 0x0002027e,
4040	0x10020288, 0xb8810904,
4041	0xb7814000, 0xd1196a01,
4042	0x00000301, 0xbe800087,
4043	0xbefc00c1, 0xd89c4000,
4044	0x00020201, 0xd89cc080,
4045	0x00040401, 0x320202ff,
4046	0x00000800, 0x80808100,
4047	0xbf84fff8, 0x7e020280,
4048	0xbf810000, 0x00000000,
4049};
4050
4051static const u32 sgpr_init_compute_shader[] =
4052{
4053	0xb07c0000, 0xbe8000ff,
4054	0x0000005f, 0xbee50080,
4055	0xbe812c65, 0xbe822c65,
4056	0xbe832c65, 0xbe842c65,
4057	0xbe852c65, 0xb77c0005,
4058	0x80808500, 0xbf84fff8,
4059	0xbe800080, 0xbf810000,
4060};
4061
4062/* When below register arrays changed, please update gpr_reg_size,
4063  and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4064  to cover all gfx9 ASICs */
4065static const struct soc15_reg_entry vgpr_init_regs[] = {
4066   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4067   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4068   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4069   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4070   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4071   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4072   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4073   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4074   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4075   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4076   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4077   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4078   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4079   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4080};
4081
4082static const struct soc15_reg_entry sgpr1_init_regs[] = {
4083   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4084   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4085   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4086   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4087   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4088   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4089   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4090   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4091   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4092   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4093   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4094   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4095   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4096   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4097};
4098
4099static const struct soc15_reg_entry sgpr2_init_regs[] = {
4100   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4101   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4102   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4103   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4104   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4105   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4106   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4107   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4108   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4109   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4110   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4111   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4112   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4113   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4114};
4115
4116static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4117   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4118   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4119   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4120   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4121   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4122   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4123   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4124   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4125   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4126   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4127   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4128   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4129   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4130   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4131   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4132   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4133   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4134   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4135   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4136   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4137   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4138   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4139   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4140   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4141   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4142   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4143   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4144   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4145   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4146   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4147   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4148   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4149   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4150   { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4151};
4152
4153static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4154{
4155	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4156	int i, r;
4157
4158	/* only support when RAS is enabled */
4159	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4160		return 0;
4161
4162	r = amdgpu_ring_alloc(ring, 7);
4163	if (r) {
4164		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4165			ring->name, r);
4166		return r;
4167	}
4168
4169	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4170	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4171
4172	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4173	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4174				PACKET3_DMA_DATA_DST_SEL(1) |
4175				PACKET3_DMA_DATA_SRC_SEL(2) |
4176				PACKET3_DMA_DATA_ENGINE(0)));
4177	amdgpu_ring_write(ring, 0);
4178	amdgpu_ring_write(ring, 0);
4179	amdgpu_ring_write(ring, 0);
4180	amdgpu_ring_write(ring, 0);
4181	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4182				adev->gds.gds_size);
4183
4184	amdgpu_ring_commit(ring);
4185
4186	for (i = 0; i < adev->usec_timeout; i++) {
4187		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4188			break;
4189		udelay(1);
4190	}
4191
4192	if (i >= adev->usec_timeout)
4193		r = -ETIMEDOUT;
4194
4195	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4196
4197	return r;
4198}
4199
4200static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4201{
4202	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4203	struct amdgpu_ib ib;
4204	struct dma_fence *f = NULL;
4205	int r, i;
4206	unsigned total_size, vgpr_offset, sgpr_offset;
4207	u64 gpu_addr;
4208
4209	int compute_dim_x = adev->gfx.config.max_shader_engines *
4210						adev->gfx.config.max_cu_per_sh *
4211						adev->gfx.config.max_sh_per_se;
4212	int sgpr_work_group_size = 5;
4213	int gpr_reg_size = compute_dim_x / 16 + 6;
4214
4215	/* only support when RAS is enabled */
4216	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4217		return 0;
4218
4219	/* bail if the compute ring is not ready */
4220	if (!ring->sched.ready)
4221		return 0;
4222
4223	total_size =
4224		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4225	total_size +=
4226		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4227	total_size +=
4228		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4229	total_size = ALIGN(total_size, 256);
4230	vgpr_offset = total_size;
4231	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4232	sgpr_offset = total_size;
4233	total_size += sizeof(sgpr_init_compute_shader);
4234
4235	/* allocate an indirect buffer to put the commands in */
4236	memset(&ib, 0, sizeof(ib));
4237	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4238	if (r) {
4239		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4240		return r;
4241	}
4242
4243	/* load the compute shaders */
4244	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4245		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4246
4247	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4248		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4249
4250	/* init the ib length to 0 */
4251	ib.length_dw = 0;
4252
4253	/* VGPR */
4254	/* write the register state for the compute dispatch */
4255	for (i = 0; i < gpr_reg_size; i++) {
4256		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4257		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4258								- PACKET3_SET_SH_REG_START;
4259		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4260	}
4261	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4262	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4263	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4264	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4265							- PACKET3_SET_SH_REG_START;
4266	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4267	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4268
4269	/* write dispatch packet */
4270	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4271	ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4272	ib.ptr[ib.length_dw++] = 1; /* y */
4273	ib.ptr[ib.length_dw++] = 1; /* z */
4274	ib.ptr[ib.length_dw++] =
4275		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4276
4277	/* write CS partial flush packet */
4278	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4279	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4280
4281	/* SGPR1 */
4282	/* write the register state for the compute dispatch */
4283	for (i = 0; i < gpr_reg_size; i++) {
4284		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4285		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4286								- PACKET3_SET_SH_REG_START;
4287		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4288	}
4289	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4290	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4291	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4292	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4293							- PACKET3_SET_SH_REG_START;
4294	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4295	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4296
4297	/* write dispatch packet */
4298	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4299	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4300	ib.ptr[ib.length_dw++] = 1; /* y */
4301	ib.ptr[ib.length_dw++] = 1; /* z */
4302	ib.ptr[ib.length_dw++] =
4303		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4304
4305	/* write CS partial flush packet */
4306	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4307	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4308
4309	/* SGPR2 */
4310	/* write the register state for the compute dispatch */
4311	for (i = 0; i < gpr_reg_size; i++) {
4312		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4313		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4314								- PACKET3_SET_SH_REG_START;
4315		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4316	}
4317	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4318	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4319	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4320	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4321							- PACKET3_SET_SH_REG_START;
4322	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4323	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4324
4325	/* write dispatch packet */
4326	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4327	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4328	ib.ptr[ib.length_dw++] = 1; /* y */
4329	ib.ptr[ib.length_dw++] = 1; /* z */
4330	ib.ptr[ib.length_dw++] =
4331		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4332
4333	/* write CS partial flush packet */
4334	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4335	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4336
4337	/* shedule the ib on the ring */
4338	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4339	if (r) {
4340		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4341		goto fail;
4342	}
4343
4344	/* wait for the GPU to finish processing the IB */
4345	r = dma_fence_wait(f, false);
4346	if (r) {
4347		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4348		goto fail;
4349	}
4350
4351	switch (adev->asic_type)
4352	{
4353	case CHIP_VEGA20:
4354		gfx_v9_0_clear_ras_edc_counter(adev);
4355		break;
4356	case CHIP_ARCTURUS:
4357		gfx_v9_4_clear_ras_edc_counter(adev);
4358		break;
4359	default:
4360		break;
4361	}
4362
4363fail:
4364	amdgpu_ib_free(adev, &ib, NULL);
4365	dma_fence_put(f);
4366
4367	return r;
4368}
4369
4370static int gfx_v9_0_early_init(void *handle)
4371{
4372	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4373
4374	if (adev->asic_type == CHIP_ARCTURUS)
4375		adev->gfx.num_gfx_rings = 0;
4376	else
4377		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4378	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4379	gfx_v9_0_set_kiq_pm4_funcs(adev);
4380	gfx_v9_0_set_ring_funcs(adev);
4381	gfx_v9_0_set_irq_funcs(adev);
4382	gfx_v9_0_set_gds_init(adev);
4383	gfx_v9_0_set_rlc_funcs(adev);
4384
4385	return 0;
4386}
4387
4388static int gfx_v9_0_ecc_late_init(void *handle)
4389{
4390	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4391	int r;
4392
4393	/*
4394	 * Temp workaround to fix the issue that CP firmware fails to
4395	 * update read pointer when CPDMA is writing clearing operation
4396	 * to GDS in suspend/resume sequence on several cards. So just
4397	 * limit this operation in cold boot sequence.
4398	 */
4399	if (!adev->in_suspend) {
4400		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4401		if (r)
4402			return r;
4403	}
4404
4405	/* requires IBs so do in late init after IB pool is initialized */
4406	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4407	if (r)
4408		return r;
4409
4410	r = amdgpu_gfx_ras_late_init(adev);
4411	if (r)
4412		return r;
4413
4414	return 0;
4415}
4416
4417static int gfx_v9_0_late_init(void *handle)
4418{
4419	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4420	int r;
4421
4422	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4423	if (r)
4424		return r;
4425
4426	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4427	if (r)
4428		return r;
4429
4430	r = gfx_v9_0_ecc_late_init(handle);
4431	if (r)
4432		return r;
4433
4434	return 0;
4435}
4436
4437static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4438{
4439	uint32_t rlc_setting;
4440
4441	/* if RLC is not enabled, do nothing */
4442	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4443	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4444		return false;
4445
4446	return true;
4447}
4448
4449static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4450{
4451	uint32_t data;
4452	unsigned i;
4453
4454	data = RLC_SAFE_MODE__CMD_MASK;
4455	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4456	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4457
4458	/* wait for RLC_SAFE_MODE */
4459	for (i = 0; i < adev->usec_timeout; i++) {
4460		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4461			break;
4462		udelay(1);
4463	}
4464}
4465
4466static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4467{
4468	uint32_t data;
4469
4470	data = RLC_SAFE_MODE__CMD_MASK;
4471	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4472}
4473
4474static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4475						bool enable)
4476{
4477	amdgpu_gfx_rlc_enter_safe_mode(adev);
4478
4479	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4480		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4481		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4482			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4483	} else {
4484		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4485		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4486			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4487	}
4488
4489	amdgpu_gfx_rlc_exit_safe_mode(adev);
4490}
4491
4492static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4493						bool enable)
4494{
4495	/* TODO: double check if we need to perform under safe mode */
4496	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4497
4498	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4499		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4500	else
4501		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4502
4503	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4504		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4505	else
4506		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4507
4508	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4509}
4510
4511static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4512						      bool enable)
4513{
4514	uint32_t data, def;
4515
4516	amdgpu_gfx_rlc_enter_safe_mode(adev);
4517
4518	/* It is disabled by HW by default */
4519	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4520		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4521		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4522
4523		if (adev->asic_type != CHIP_VEGA12)
4524			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4525
4526		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4527			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4528			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4529
4530		/* only for Vega10 & Raven1 */
4531		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4532
4533		if (def != data)
4534			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4535
4536		/* MGLS is a global flag to control all MGLS in GFX */
4537		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4538			/* 2 - RLC memory Light sleep */
4539			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4540				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4541				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4542				if (def != data)
4543					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4544			}
4545			/* 3 - CP memory Light sleep */
4546			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4547				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4548				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4549				if (def != data)
4550					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4551			}
4552		}
4553	} else {
4554		/* 1 - MGCG_OVERRIDE */
4555		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4556
4557		if (adev->asic_type != CHIP_VEGA12)
4558			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4559
4560		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4561			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4562			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4563			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4564
4565		if (def != data)
4566			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4567
4568		/* 2 - disable MGLS in RLC */
4569		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4570		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4571			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4572			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4573		}
4574
4575		/* 3 - disable MGLS in CP */
4576		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4577		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4578			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4579			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4580		}
4581	}
4582
4583	amdgpu_gfx_rlc_exit_safe_mode(adev);
4584}
4585
4586static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4587					   bool enable)
4588{
4589	uint32_t data, def;
4590
4591	if (adev->asic_type == CHIP_ARCTURUS)
4592		return;
4593
4594	amdgpu_gfx_rlc_enter_safe_mode(adev);
4595
4596	/* Enable 3D CGCG/CGLS */
4597	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4598		/* write cmd to clear cgcg/cgls ov */
4599		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4600		/* unset CGCG override */
4601		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4602		/* update CGCG and CGLS override bits */
4603		if (def != data)
4604			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4605
4606		/* enable 3Dcgcg FSM(0x0000363f) */
4607		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4608
4609		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4610			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4611		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4612			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4613				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4614		if (def != data)
4615			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4616
4617		/* set IDLE_POLL_COUNT(0x00900100) */
4618		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4619		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4620			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4621		if (def != data)
4622			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4623	} else {
4624		/* Disable CGCG/CGLS */
4625		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4626		/* disable cgcg, cgls should be disabled */
4627		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4628			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4629		/* disable cgcg and cgls in FSM */
4630		if (def != data)
4631			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4632	}
4633
4634	amdgpu_gfx_rlc_exit_safe_mode(adev);
4635}
4636
4637static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4638						      bool enable)
4639{
4640	uint32_t def, data;
4641
4642	amdgpu_gfx_rlc_enter_safe_mode(adev);
4643
4644	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4645		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4646		/* unset CGCG override */
4647		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4648		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4649			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4650		else
4651			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4652		/* update CGCG and CGLS override bits */
4653		if (def != data)
4654			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4655
4656		/* enable cgcg FSM(0x0000363F) */
4657		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4658
4659		if (adev->asic_type == CHIP_ARCTURUS)
4660			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4661				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4662		else
4663			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4664				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4665		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4666			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4667				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4668		if (def != data)
4669			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4670
4671		/* set IDLE_POLL_COUNT(0x00900100) */
4672		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4673		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4674			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4675		if (def != data)
4676			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4677	} else {
4678		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4679		/* reset CGCG/CGLS bits */
4680		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4681		/* disable cgcg and cgls in FSM */
4682		if (def != data)
4683			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4684	}
4685
4686	amdgpu_gfx_rlc_exit_safe_mode(adev);
4687}
4688
4689static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4690					    bool enable)
4691{
4692	if (enable) {
4693		/* CGCG/CGLS should be enabled after MGCG/MGLS
4694		 * ===  MGCG + MGLS ===
4695		 */
4696		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4697		/* ===  CGCG /CGLS for GFX 3D Only === */
4698		gfx_v9_0_update_3d_clock_gating(adev, enable);
4699		/* ===  CGCG + CGLS === */
4700		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4701	} else {
4702		/* CGCG/CGLS should be disabled before MGCG/MGLS
4703		 * ===  CGCG + CGLS ===
4704		 */
4705		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4706		/* ===  CGCG /CGLS for GFX 3D Only === */
4707		gfx_v9_0_update_3d_clock_gating(adev, enable);
4708		/* ===  MGCG + MGLS === */
4709		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4710	}
4711	return 0;
4712}
4713
4714static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4715	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4716	.set_safe_mode = gfx_v9_0_set_safe_mode,
4717	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4718	.init = gfx_v9_0_rlc_init,
4719	.get_csb_size = gfx_v9_0_get_csb_size,
4720	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4721	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4722	.resume = gfx_v9_0_rlc_resume,
4723	.stop = gfx_v9_0_rlc_stop,
4724	.reset = gfx_v9_0_rlc_reset,
4725	.start = gfx_v9_0_rlc_start
4726};
4727
4728static int gfx_v9_0_set_powergating_state(void *handle,
4729					  enum amd_powergating_state state)
4730{
4731	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4732	bool enable = (state == AMD_PG_STATE_GATE);
4733
4734	switch (adev->asic_type) {
4735	case CHIP_RAVEN:
4736	case CHIP_RENOIR:
4737		if (!enable) {
4738			amdgpu_gfx_off_ctrl(adev, false);
4739			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4740		}
4741		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4742			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4743			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4744		} else {
4745			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4746			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4747		}
4748
4749		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4750			gfx_v9_0_enable_cp_power_gating(adev, true);
4751		else
4752			gfx_v9_0_enable_cp_power_gating(adev, false);
4753
4754		/* update gfx cgpg state */
4755		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4756
4757		/* update mgcg state */
4758		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4759
4760		if (enable)
4761			amdgpu_gfx_off_ctrl(adev, true);
4762		break;
4763	case CHIP_VEGA12:
4764		if (!enable) {
4765			amdgpu_gfx_off_ctrl(adev, false);
4766			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4767		} else {
4768			amdgpu_gfx_off_ctrl(adev, true);
4769		}
4770		break;
4771	default:
4772		break;
4773	}
4774
4775	return 0;
4776}
4777
4778static int gfx_v9_0_set_clockgating_state(void *handle,
4779					  enum amd_clockgating_state state)
4780{
4781	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4782
4783	if (amdgpu_sriov_vf(adev))
4784		return 0;
4785
4786	switch (adev->asic_type) {
4787	case CHIP_VEGA10:
4788	case CHIP_VEGA12:
4789	case CHIP_VEGA20:
4790	case CHIP_RAVEN:
4791	case CHIP_ARCTURUS:
4792	case CHIP_RENOIR:
4793		gfx_v9_0_update_gfx_clock_gating(adev,
4794						 state == AMD_CG_STATE_GATE);
4795		break;
4796	default:
4797		break;
4798	}
4799	return 0;
4800}
4801
4802static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4803{
4804	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4805	int data;
4806
4807	if (amdgpu_sriov_vf(adev))
4808		*flags = 0;
4809
4810	/* AMD_CG_SUPPORT_GFX_MGCG */
4811	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
4812	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4813		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4814
4815	/* AMD_CG_SUPPORT_GFX_CGCG */
4816	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
4817	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4818		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4819
4820	/* AMD_CG_SUPPORT_GFX_CGLS */
4821	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4822		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4823
4824	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4825	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
4826	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4827		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4828
4829	/* AMD_CG_SUPPORT_GFX_CP_LS */
4830	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
4831	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4832		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4833
4834	if (adev->asic_type != CHIP_ARCTURUS) {
4835		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4836		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
4837		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4838			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4839
4840		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4841		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4842			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4843	}
4844}
4845
4846static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4847{
4848	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4849}
4850
4851static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4852{
4853	struct amdgpu_device *adev = ring->adev;
4854	u64 wptr;
4855
4856	/* XXX check if swapping is necessary on BE */
4857	if (ring->use_doorbell) {
4858		wptr = atomic_load_relaxed(&adev->wb.wb[ring->wptr_offs]);
4859	} else {
4860		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4861		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4862	}
4863
4864	return wptr;
4865}
4866
4867static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4868{
4869	struct amdgpu_device *adev = ring->adev;
4870
4871	if (ring->use_doorbell) {
4872		/* XXX check if swapping is necessary on BE */
4873		atomic_store_relaxed(&adev->wb.wb[ring->wptr_offs], ring->wptr);
4874		WDOORBELL64(ring->doorbell_index, ring->wptr);
4875	} else {
4876		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4877		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4878	}
4879}
4880
4881static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4882{
4883	struct amdgpu_device *adev = ring->adev;
4884	u32 ref_and_mask, reg_mem_engine;
4885	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4886
4887	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4888		switch (ring->me) {
4889		case 1:
4890			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4891			break;
4892		case 2:
4893			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4894			break;
4895		default:
4896			return;
4897		}
4898		reg_mem_engine = 0;
4899	} else {
4900		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4901		reg_mem_engine = 1; /* pfp */
4902	}
4903
4904	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4905			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4906			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4907			      ref_and_mask, ref_and_mask, 0x20);
4908}
4909
4910static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4911					struct amdgpu_job *job,
4912					struct amdgpu_ib *ib,
4913					uint32_t flags)
4914{
4915	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4916	u32 header, control = 0;
4917
4918	if (ib->flags & AMDGPU_IB_FLAG_CE)
4919		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4920	else
4921		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4922
4923	control |= ib->length_dw | (vmid << 24);
4924
4925	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4926		control |= INDIRECT_BUFFER_PRE_ENB(1);
4927
4928		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4929			gfx_v9_0_ring_emit_de_meta(ring);
4930	}
4931
4932	amdgpu_ring_write(ring, header);
4933	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4934	amdgpu_ring_write(ring,
4935#ifdef __BIG_ENDIAN
4936		(2 << 0) |
4937#endif
4938		lower_32_bits(ib->gpu_addr));
4939	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4940	amdgpu_ring_write(ring, control);
4941}
4942
4943static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4944					  struct amdgpu_job *job,
4945					  struct amdgpu_ib *ib,
4946					  uint32_t flags)
4947{
4948	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4949	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4950
4951	/* Currently, there is a high possibility to get wave ID mismatch
4952	 * between ME and GDS, leading to a hw deadlock, because ME generates
4953	 * different wave IDs than the GDS expects. This situation happens
4954	 * randomly when at least 5 compute pipes use GDS ordered append.
4955	 * The wave IDs generated by ME are also wrong after suspend/resume.
4956	 * Those are probably bugs somewhere else in the kernel driver.
4957	 *
4958	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4959	 * GDS to 0 for this ring (me/pipe).
4960	 */
4961	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4962		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4963		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4964		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4965	}
4966
4967	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4968	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4969	amdgpu_ring_write(ring,
4970#ifdef __BIG_ENDIAN
4971				(2 << 0) |
4972#endif
4973				lower_32_bits(ib->gpu_addr));
4974	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4975	amdgpu_ring_write(ring, control);
4976}
4977
4978static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4979				     u64 seq, unsigned flags)
4980{
4981	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4982	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4983	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4984
4985	/* RELEASE_MEM - flush caches, send int */
4986	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4987	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4988					       EOP_TC_NC_ACTION_EN) :
4989					      (EOP_TCL1_ACTION_EN |
4990					       EOP_TC_ACTION_EN |
4991					       EOP_TC_WB_ACTION_EN |
4992					       EOP_TC_MD_ACTION_EN)) |
4993				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4994				 EVENT_INDEX(5)));
4995	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4996
4997	/*
4998	 * the address should be Qword aligned if 64bit write, Dword
4999	 * aligned if only send 32bit data low (discard data high)
5000	 */
5001	if (write64bit)
5002		BUG_ON(addr & 0x7);
5003	else
5004		BUG_ON(addr & 0x3);
5005	amdgpu_ring_write(ring, lower_32_bits(addr));
5006	amdgpu_ring_write(ring, upper_32_bits(addr));
5007	amdgpu_ring_write(ring, lower_32_bits(seq));
5008	amdgpu_ring_write(ring, upper_32_bits(seq));
5009	amdgpu_ring_write(ring, 0);
5010}
5011
5012static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5013{
5014	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5015	uint32_t seq = ring->fence_drv.sync_seq;
5016	uint64_t addr = ring->fence_drv.gpu_addr;
5017
5018	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5019			      lower_32_bits(addr), upper_32_bits(addr),
5020			      seq, 0xffffffff, 4);
5021}
5022
5023static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5024					unsigned vmid, uint64_t pd_addr)
5025{
5026	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5027
5028	/* compute doesn't have PFP */
5029	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5030		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5031		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5032		amdgpu_ring_write(ring, 0x0);
5033	}
5034}
5035
5036static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5037{
5038	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5039}
5040
5041static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5042{
5043	u64 wptr;
5044
5045	/* XXX check if swapping is necessary on BE */
5046	if (ring->use_doorbell)
5047		wptr = atomic_load_relaxed(&ring->adev->wb.wb[ring->wptr_offs]);
5048	else
5049		BUG();
5050	return wptr;
5051}
5052
5053static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5054					   bool acquire)
5055{
5056	struct amdgpu_device *adev = ring->adev;
5057	int pipe_num, tmp, reg;
5058	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5059
5060	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5061
5062	/* first me only has 2 entries, GFX and HP3D */
5063	if (ring->me > 0)
5064		pipe_num -= 2;
5065
5066	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5067	tmp = RREG32(reg);
5068	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5069	WREG32(reg, tmp);
5070}
5071
5072static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5073					    struct amdgpu_ring *ring,
5074					    bool acquire)
5075{
5076	int i, pipe;
5077	bool reserve;
5078	struct amdgpu_ring *iring;
5079
5080	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5081	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5082	if (acquire)
5083		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5084	else
5085		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5086
5087	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5088		/* Clear all reservations - everyone reacquires all resources */
5089		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5090			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5091						       true);
5092
5093		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5094			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5095						       true);
5096	} else {
5097		/* Lower all pipes without a current reservation */
5098		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5099			iring = &adev->gfx.gfx_ring[i];
5100			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5101							   iring->me,
5102							   iring->pipe,
5103							   0);
5104			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5105			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5106		}
5107
5108		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5109			iring = &adev->gfx.compute_ring[i];
5110			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5111							   iring->me,
5112							   iring->pipe,
5113							   0);
5114			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5115			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5116		}
5117	}
5118
5119	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5120}
5121
5122static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5123				      struct amdgpu_ring *ring,
5124				      bool acquire)
5125{
5126	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5127	uint32_t queue_priority = acquire ? 0xf : 0x0;
5128
5129	mutex_lock(&adev->srbm_mutex);
5130	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5131
5132	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5133	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5134
5135	soc15_grbm_select(adev, 0, 0, 0, 0);
5136	mutex_unlock(&adev->srbm_mutex);
5137}
5138
5139static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5140					       enum drm_sched_priority priority)
5141{
5142	struct amdgpu_device *adev = ring->adev;
5143	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5144
5145	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5146		return;
5147
5148	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5149	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5150}
5151
5152static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5153{
5154	struct amdgpu_device *adev = ring->adev;
5155
5156	/* XXX check if swapping is necessary on BE */
5157	if (ring->use_doorbell) {
5158		atomic_store_relaxed(&adev->wb.wb[ring->wptr_offs], ring->wptr);
5159		WDOORBELL64(ring->doorbell_index, ring->wptr);
5160	} else{
5161		BUG(); /* only DOORBELL method supported on gfx9 now */
5162	}
5163}
5164
5165static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5166					 u64 seq, unsigned int flags)
5167{
5168	struct amdgpu_device *adev = ring->adev;
5169
5170	/* we only allocate 32bit for each seq wb address */
5171	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5172
5173	/* write fence seq to the "addr" */
5174	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5175	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5176				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5177	amdgpu_ring_write(ring, lower_32_bits(addr));
5178	amdgpu_ring_write(ring, upper_32_bits(addr));
5179	amdgpu_ring_write(ring, lower_32_bits(seq));
5180
5181	if (flags & AMDGPU_FENCE_FLAG_INT) {
5182		/* set register to trigger INT */
5183		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5184		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5185					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5186		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5187		amdgpu_ring_write(ring, 0);
5188		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5189	}
5190}
5191
5192static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5193{
5194	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5195	amdgpu_ring_write(ring, 0);
5196}
5197
5198static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5199{
5200	struct v9_ce_ib_state ce_payload = {0};
5201	uint64_t csa_addr;
5202	int cnt;
5203
5204	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5205	csa_addr = amdgpu_csa_vaddr(ring->adev);
5206
5207	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5208	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5209				 WRITE_DATA_DST_SEL(8) |
5210				 WR_CONFIRM) |
5211				 WRITE_DATA_CACHE_POLICY(0));
5212	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5213	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5214	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5215}
5216
5217static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5218{
5219	struct v9_de_ib_state de_payload = {0};
5220	uint64_t csa_addr, gds_addr;
5221	int cnt;
5222
5223	csa_addr = amdgpu_csa_vaddr(ring->adev);
5224	gds_addr = csa_addr + 4096;
5225	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5226	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5227
5228	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5229	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5230	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5231				 WRITE_DATA_DST_SEL(8) |
5232				 WR_CONFIRM) |
5233				 WRITE_DATA_CACHE_POLICY(0));
5234	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5235	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5236	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5237}
5238
5239static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5240{
5241	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5242	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5243}
5244
5245static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5246{
5247	uint32_t dw2 = 0;
5248
5249	if (amdgpu_sriov_vf(ring->adev))
5250		gfx_v9_0_ring_emit_ce_meta(ring);
5251
5252	gfx_v9_0_ring_emit_tmz(ring, true);
5253
5254	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5255	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5256		/* set load_global_config & load_global_uconfig */
5257		dw2 |= 0x8001;
5258		/* set load_cs_sh_regs */
5259		dw2 |= 0x01000000;
5260		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5261		dw2 |= 0x10002;
5262
5263		/* set load_ce_ram if preamble presented */
5264		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5265			dw2 |= 0x10000000;
5266	} else {
5267		/* still load_ce_ram if this is the first time preamble presented
5268		 * although there is no context switch happens.
5269		 */
5270		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5271			dw2 |= 0x10000000;
5272	}
5273
5274	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5275	amdgpu_ring_write(ring, dw2);
5276	amdgpu_ring_write(ring, 0);
5277}
5278
5279static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5280{
5281	unsigned ret;
5282	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5283	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5284	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5285	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5286	ret = ring->wptr & ring->buf_mask;
5287	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5288	return ret;
5289}
5290
5291static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5292{
5293	unsigned cur;
5294	BUG_ON(offset > ring->buf_mask);
5295	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5296
5297	cur = (ring->wptr & ring->buf_mask) - 1;
5298	if (likely(cur > offset))
5299		ring->ring[offset] = cur - offset;
5300	else
5301		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5302}
5303
5304static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5305{
5306	struct amdgpu_device *adev = ring->adev;
5307	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5308
5309	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5310	amdgpu_ring_write(ring, 0 |	/* src: register*/
5311				(5 << 8) |	/* dst: memory */
5312				(1 << 20));	/* write confirm */
5313	amdgpu_ring_write(ring, reg);
5314	amdgpu_ring_write(ring, 0);
5315	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5316				kiq->reg_val_offs * 4));
5317	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5318				kiq->reg_val_offs * 4));
5319}
5320
5321static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5322				    uint32_t val)
5323{
5324	uint32_t cmd = 0;
5325
5326	switch (ring->funcs->type) {
5327	case AMDGPU_RING_TYPE_GFX:
5328		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5329		break;
5330	case AMDGPU_RING_TYPE_KIQ:
5331		cmd = (1 << 16); /* no inc addr */
5332		break;
5333	default:
5334		cmd = WR_CONFIRM;
5335		break;
5336	}
5337	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5338	amdgpu_ring_write(ring, cmd);
5339	amdgpu_ring_write(ring, reg);
5340	amdgpu_ring_write(ring, 0);
5341	amdgpu_ring_write(ring, val);
5342}
5343
5344static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5345					uint32_t val, uint32_t mask)
5346{
5347	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5348}
5349
5350static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5351						  uint32_t reg0, uint32_t reg1,
5352						  uint32_t ref, uint32_t mask)
5353{
5354	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5355	struct amdgpu_device *adev = ring->adev;
5356	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5357		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5358
5359	if (fw_version_ok)
5360		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5361				      ref, mask, 0x20);
5362	else
5363		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5364							   ref, mask);
5365}
5366
5367static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5368{
5369	struct amdgpu_device *adev = ring->adev;
5370	uint32_t value = 0;
5371
5372	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5373	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5374	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5375	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5376	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5377}
5378
5379static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5380						 enum amdgpu_interrupt_state state)
5381{
5382	switch (state) {
5383	case AMDGPU_IRQ_STATE_DISABLE:
5384	case AMDGPU_IRQ_STATE_ENABLE:
5385		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5386			       TIME_STAMP_INT_ENABLE,
5387			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5388		break;
5389	default:
5390		break;
5391	}
5392}
5393
5394static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5395						     int me, int pipe,
5396						     enum amdgpu_interrupt_state state)
5397{
5398	u32 mec_int_cntl, mec_int_cntl_reg;
5399
5400	/*
5401	 * amdgpu controls only the first MEC. That's why this function only
5402	 * handles the setting of interrupts for this specific MEC. All other
5403	 * pipes' interrupts are set by amdkfd.
5404	 */
5405
5406	if (me == 1) {
5407		switch (pipe) {
5408		case 0:
5409			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5410			break;
5411		case 1:
5412			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5413			break;
5414		case 2:
5415			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5416			break;
5417		case 3:
5418			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5419			break;
5420		default:
5421			DRM_DEBUG("invalid pipe %d\n", pipe);
5422			return;
5423		}
5424	} else {
5425		DRM_DEBUG("invalid me %d\n", me);
5426		return;
5427	}
5428
5429	switch (state) {
5430	case AMDGPU_IRQ_STATE_DISABLE:
5431		mec_int_cntl = RREG32(mec_int_cntl_reg);
5432		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5433					     TIME_STAMP_INT_ENABLE, 0);
5434		WREG32(mec_int_cntl_reg, mec_int_cntl);
5435		break;
5436	case AMDGPU_IRQ_STATE_ENABLE:
5437		mec_int_cntl = RREG32(mec_int_cntl_reg);
5438		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5439					     TIME_STAMP_INT_ENABLE, 1);
5440		WREG32(mec_int_cntl_reg, mec_int_cntl);
5441		break;
5442	default:
5443		break;
5444	}
5445}
5446
5447static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5448					     struct amdgpu_irq_src *source,
5449					     unsigned type,
5450					     enum amdgpu_interrupt_state state)
5451{
5452	switch (state) {
5453	case AMDGPU_IRQ_STATE_DISABLE:
5454	case AMDGPU_IRQ_STATE_ENABLE:
5455		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5456			       PRIV_REG_INT_ENABLE,
5457			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5458		break;
5459	default:
5460		break;
5461	}
5462
5463	return 0;
5464}
5465
5466static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5467					      struct amdgpu_irq_src *source,
5468					      unsigned type,
5469					      enum amdgpu_interrupt_state state)
5470{
5471	switch (state) {
5472	case AMDGPU_IRQ_STATE_DISABLE:
5473	case AMDGPU_IRQ_STATE_ENABLE:
5474		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5475			       PRIV_INSTR_INT_ENABLE,
5476			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5477	default:
5478		break;
5479	}
5480
5481	return 0;
5482}
5483
5484#define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5485	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5486			CP_ECC_ERROR_INT_ENABLE, 1)
5487
5488#define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5489	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5490			CP_ECC_ERROR_INT_ENABLE, 0)
5491
5492static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5493					      struct amdgpu_irq_src *source,
5494					      unsigned type,
5495					      enum amdgpu_interrupt_state state)
5496{
5497	switch (state) {
5498	case AMDGPU_IRQ_STATE_DISABLE:
5499		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5500				CP_ECC_ERROR_INT_ENABLE, 0);
5501		DISABLE_ECC_ON_ME_PIPE(1, 0);
5502		DISABLE_ECC_ON_ME_PIPE(1, 1);
5503		DISABLE_ECC_ON_ME_PIPE(1, 2);
5504		DISABLE_ECC_ON_ME_PIPE(1, 3);
5505		break;
5506
5507	case AMDGPU_IRQ_STATE_ENABLE:
5508		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5509				CP_ECC_ERROR_INT_ENABLE, 1);
5510		ENABLE_ECC_ON_ME_PIPE(1, 0);
5511		ENABLE_ECC_ON_ME_PIPE(1, 1);
5512		ENABLE_ECC_ON_ME_PIPE(1, 2);
5513		ENABLE_ECC_ON_ME_PIPE(1, 3);
5514		break;
5515	default:
5516		break;
5517	}
5518
5519	return 0;
5520}
5521
5522
5523static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5524					    struct amdgpu_irq_src *src,
5525					    unsigned type,
5526					    enum amdgpu_interrupt_state state)
5527{
5528	switch (type) {
5529	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5530		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5531		break;
5532	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5533		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5534		break;
5535	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5536		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5537		break;
5538	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5539		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5540		break;
5541	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5542		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5543		break;
5544	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5545		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5546		break;
5547	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5548		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5549		break;
5550	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5551		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5552		break;
5553	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5554		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5555		break;
5556	default:
5557		break;
5558	}
5559	return 0;
5560}
5561
5562static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5563			    struct amdgpu_irq_src *source,
5564			    struct amdgpu_iv_entry *entry)
5565{
5566	int i;
5567	u8 me_id, pipe_id, queue_id;
5568	struct amdgpu_ring *ring;
5569
5570	DRM_DEBUG("IH: CP EOP\n");
5571	me_id = (entry->ring_id & 0x0c) >> 2;
5572	pipe_id = (entry->ring_id & 0x03) >> 0;
5573	queue_id = (entry->ring_id & 0x70) >> 4;
5574
5575	switch (me_id) {
5576	case 0:
5577		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5578		break;
5579	case 1:
5580	case 2:
5581		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5582			ring = &adev->gfx.compute_ring[i];
5583			/* Per-queue interrupt is supported for MEC starting from VI.
5584			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5585			  */
5586			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5587				amdgpu_fence_process(ring);
5588		}
5589		break;
5590	}
5591	return 0;
5592}
5593
5594static void gfx_v9_0_fault(struct amdgpu_device *adev,
5595			   struct amdgpu_iv_entry *entry)
5596{
5597	u8 me_id, pipe_id, queue_id;
5598	struct amdgpu_ring *ring;
5599	int i;
5600
5601	me_id = (entry->ring_id & 0x0c) >> 2;
5602	pipe_id = (entry->ring_id & 0x03) >> 0;
5603	queue_id = (entry->ring_id & 0x70) >> 4;
5604
5605	switch (me_id) {
5606	case 0:
5607		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5608		break;
5609	case 1:
5610	case 2:
5611		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5612			ring = &adev->gfx.compute_ring[i];
5613			if (ring->me == me_id && ring->pipe == pipe_id &&
5614			    ring->queue == queue_id)
5615				drm_sched_fault(&ring->sched);
5616		}
5617		break;
5618	}
5619}
5620
5621static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5622				 struct amdgpu_irq_src *source,
5623				 struct amdgpu_iv_entry *entry)
5624{
5625	DRM_ERROR("Illegal register access in command stream\n");
5626	gfx_v9_0_fault(adev, entry);
5627	return 0;
5628}
5629
5630static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5631				  struct amdgpu_irq_src *source,
5632				  struct amdgpu_iv_entry *entry)
5633{
5634	DRM_ERROR("Illegal instruction in command stream\n");
5635	gfx_v9_0_fault(adev, entry);
5636	return 0;
5637}
5638
5639
5640static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5641	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5642	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5643	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5644	},
5645	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5646	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5647	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5648	},
5649	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5650	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5651	  0, 0
5652	},
5653	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5654	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5655	  0, 0
5656	},
5657	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5658	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5659	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5660	},
5661	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5662	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5663	  0, 0
5664	},
5665	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5666	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5667	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5668	},
5669	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5670	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5671	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5672	},
5673	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5674	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5675	  0, 0
5676	},
5677	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5678	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5679	  0, 0
5680	},
5681	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5682	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5683	  0, 0
5684	},
5685	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5686	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5687	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5688	},
5689	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5690	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5691	  0, 0
5692	},
5693	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5694	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5695	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5696	},
5697	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5698	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5699	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5700	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5701	},
5702	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5703	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5704	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5705	  0, 0
5706	},
5707	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5708	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5709	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5710	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5711	},
5712	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5713	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5714	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5715	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5716	},
5717	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5718	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5719	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5720	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5721	},
5722	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5723	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5724	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5725	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5726	},
5727	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5728	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5729	  0, 0
5730	},
5731	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5732	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5733	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5734	},
5735	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5736	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5737	  0, 0
5738	},
5739	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5740	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5741	  0, 0
5742	},
5743	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5744	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5745	  0, 0
5746	},
5747	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5748	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5749	  0, 0
5750	},
5751	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5752	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5753	  0, 0
5754	},
5755	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5756	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5757	  0, 0
5758	},
5759	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5760	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5761	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5762	},
5763	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5764	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5765	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5766	},
5767	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5768	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5769	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5770	},
5771	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5772	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5773	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5774	},
5775	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5776	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5777	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5778	},
5779	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5780	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5781	  0, 0
5782	},
5783	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5784	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5785	  0, 0
5786	},
5787	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5788	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5789	  0, 0
5790	},
5791	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5792	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5793	  0, 0
5794	},
5795	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5796	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5797	  0, 0
5798	},
5799	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5800	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5801	  0, 0
5802	},
5803	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5804	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5805	  0, 0
5806	},
5807	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5808	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5809	  0, 0
5810	},
5811	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5812	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5813	  0, 0
5814	},
5815	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5816	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5817	  0, 0
5818	},
5819	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5820	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5821	  0, 0
5822	},
5823	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5824	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5825	  0, 0
5826	},
5827	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5828	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5829	  0, 0
5830	},
5831	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5832	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5833	  0, 0
5834	},
5835	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5836	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5837	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5838	},
5839	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5840	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5841	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5842	},
5843	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5844	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5845	  0, 0
5846	},
5847	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5848	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5849	  0, 0
5850	},
5851	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5852	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5853	  0, 0
5854	},
5855	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5856	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5857	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5858	},
5859	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5860	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5861	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5862	},
5863	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5864	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5865	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5866	},
5867	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5868	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5869	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5870	},
5871	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5872	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5873	  0, 0
5874	},
5875	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5876	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5877	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5878	},
5879	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5880	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5881	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5882	},
5883	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5884	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5885	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5886	},
5887	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5888	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5889	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5890	},
5891	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5892	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5893	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5894	},
5895	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5896	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5897	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5898	},
5899	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5900	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5901	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5902	},
5903	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5904	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5905	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5906	},
5907	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5908	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5909	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5910	},
5911	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5912	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5913	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5914	},
5915	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5916	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5917	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5918	},
5919	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5920	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5921	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5922	},
5923	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5924	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5925	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5926	},
5927	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5928	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5929	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5930	},
5931	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5932	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5933	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5934	},
5935	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5936	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5937	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5938	},
5939	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5940	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5941	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5942	},
5943	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5944	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5945	  0, 0
5946	},
5947	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5948	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5949	  0, 0
5950	},
5951	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5952	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5953	  0, 0
5954	},
5955	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5956	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5957	  0, 0
5958	},
5959	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5960	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5961	  0, 0
5962	},
5963	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5964	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5965	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5966	},
5967	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5968	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5969	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5970	},
5971	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5972	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5973	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5974	},
5975	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5976	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5977	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5978	},
5979	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5980	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5981	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5982	},
5983	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5984	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5985	  0, 0
5986	},
5987	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5988	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5989	  0, 0
5990	},
5991	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5992	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5993	  0, 0
5994	},
5995	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5996	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5997	  0, 0
5998	},
5999	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6000	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6001	  0, 0
6002	},
6003	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6004	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6005	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6006	},
6007	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6008	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6009	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6010	},
6011	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6012	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6013	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6014	},
6015	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6016	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6017	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6018	},
6019	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6020	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6021	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6022	},
6023	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6024	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6025	  0, 0
6026	},
6027	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6028	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6029	  0, 0
6030	},
6031	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6032	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6033	  0, 0
6034	},
6035	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6036	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6037	  0, 0
6038	},
6039	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6040	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6041	  0, 0
6042	},
6043	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6044	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6045	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6046	},
6047	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6048	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6049	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6050	},
6051	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6052	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6053	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6054	},
6055	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6056	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6057	  0, 0
6058	},
6059	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6060	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6061	  0, 0
6062	},
6063	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6064	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6065	  0, 0
6066	},
6067	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6068	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6069	  0, 0
6070	},
6071	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6072	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6073	  0, 0
6074	},
6075	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6076	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6077	  0, 0
6078	}
6079};
6080
6081static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6082				     void *inject_if)
6083{
6084	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6085	int ret;
6086	struct ta_ras_trigger_error_input block_info = { 0 };
6087
6088	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6089		return -EINVAL;
6090
6091	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6092		return -EINVAL;
6093
6094	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6095		return -EPERM;
6096
6097	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6098	      info->head.type)) {
6099		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6100			ras_gfx_subblocks[info->head.sub_block_index].name,
6101			info->head.type);
6102		return -EPERM;
6103	}
6104
6105	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6106	      info->head.type)) {
6107		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6108			ras_gfx_subblocks[info->head.sub_block_index].name,
6109			info->head.type);
6110		return -EPERM;
6111	}
6112
6113	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6114	block_info.sub_block_index =
6115		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6116	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6117	block_info.address = info->address;
6118	block_info.value = info->value;
6119
6120	mutex_lock(&adev->grbm_idx_mutex);
6121	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6122	mutex_unlock(&adev->grbm_idx_mutex);
6123
6124	return ret;
6125}
6126
6127static const char *vml2_mems[] = {
6128	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6129	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6130	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6131	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6132	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6133	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6134	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6135	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6136	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6137	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6138	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6139	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6140	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6141	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6142	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6143	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6144};
6145
6146static const char *vml2_walker_mems[] = {
6147	"UTC_VML2_CACHE_PDE0_MEM0",
6148	"UTC_VML2_CACHE_PDE0_MEM1",
6149	"UTC_VML2_CACHE_PDE1_MEM0",
6150	"UTC_VML2_CACHE_PDE1_MEM1",
6151	"UTC_VML2_CACHE_PDE2_MEM0",
6152	"UTC_VML2_CACHE_PDE2_MEM1",
6153	"UTC_VML2_RDIF_LOG_FIFO",
6154};
6155
6156static const char *atc_l2_cache_2m_mems[] = {
6157	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6158	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6159	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6160	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6161};
6162
6163static const char *atc_l2_cache_4k_mems[] = {
6164	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6165	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6166	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6167	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6168	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6169	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6170	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6171	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6172	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6173	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6174	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6175	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6176	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6177	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6178	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6179	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6180	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6181	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6182	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6183	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6184	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6185	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6186	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6187	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6188	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6189	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6190	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6191	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6192	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6193	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6194	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6195	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6196};
6197
6198static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6199					 struct ras_err_data *err_data)
6200{
6201	uint32_t i, data;
6202	uint32_t sec_count, ded_count;
6203
6204	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6205	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6206	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6207	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6208	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6209	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6210	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6211	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6212
6213	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6214		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6215		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6216
6217		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6218		if (sec_count) {
6219			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6220				 vml2_mems[i], sec_count);
6221			err_data->ce_count += sec_count;
6222		}
6223
6224		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6225		if (ded_count) {
6226			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6227				 vml2_mems[i], ded_count);
6228			err_data->ue_count += ded_count;
6229		}
6230	}
6231
6232	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6233		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6234		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6235
6236		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6237						SEC_COUNT);
6238		if (sec_count) {
6239			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6240				 vml2_walker_mems[i], sec_count);
6241			err_data->ce_count += sec_count;
6242		}
6243
6244		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6245						DED_COUNT);
6246		if (ded_count) {
6247			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6248				 vml2_walker_mems[i], ded_count);
6249			err_data->ue_count += ded_count;
6250		}
6251	}
6252
6253	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6254		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6255		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6256
6257		sec_count = (data & 0x00006000L) >> 0xd;
6258		if (sec_count) {
6259			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6260				 atc_l2_cache_2m_mems[i], sec_count);
6261			err_data->ce_count += sec_count;
6262		}
6263	}
6264
6265	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6266		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6267		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6268
6269		sec_count = (data & 0x00006000L) >> 0xd;
6270		if (sec_count) {
6271			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6272				 atc_l2_cache_4k_mems[i], sec_count);
6273			err_data->ce_count += sec_count;
6274		}
6275
6276		ded_count = (data & 0x00018000L) >> 0xf;
6277		if (ded_count) {
6278			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6279				 atc_l2_cache_4k_mems[i], ded_count);
6280			err_data->ue_count += ded_count;
6281		}
6282	}
6283
6284	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6285	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6286	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6287	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6288
6289	return 0;
6290}
6291
6292static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6293	uint32_t se_id, uint32_t inst_id, uint32_t value,
6294	uint32_t *sec_count, uint32_t *ded_count)
6295{
6296	uint32_t i;
6297	uint32_t sec_cnt, ded_cnt;
6298
6299	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6300		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6301			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6302			gfx_v9_0_ras_fields[i].inst != reg->inst)
6303			continue;
6304
6305		sec_cnt = (value &
6306				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6307				gfx_v9_0_ras_fields[i].sec_count_shift;
6308		if (sec_cnt) {
6309			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6310				gfx_v9_0_ras_fields[i].name,
6311				se_id, inst_id,
6312				sec_cnt);
6313			*sec_count += sec_cnt;
6314		}
6315
6316		ded_cnt = (value &
6317				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6318				gfx_v9_0_ras_fields[i].ded_count_shift;
6319		if (ded_cnt) {
6320			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6321				gfx_v9_0_ras_fields[i].name,
6322				se_id, inst_id,
6323				ded_cnt);
6324			*ded_count += ded_cnt;
6325		}
6326	}
6327
6328	return 0;
6329}
6330
6331static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev)
6332{
6333	int i, j, k;
6334
6335	/* read back registers to clear the counters */
6336	mutex_lock(&adev->grbm_idx_mutex);
6337	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6338		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6339			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6340				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6341				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6342			}
6343		}
6344	}
6345	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6346	mutex_unlock(&adev->grbm_idx_mutex);
6347
6348	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6349	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6350	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6351	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6352	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6353	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6354	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6355	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6356
6357	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6358		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6359		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6360	}
6361
6362	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6363		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6364		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6365	}
6366
6367	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6368		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6369		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6370	}
6371
6372	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6373		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6374		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6375	}
6376
6377	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6378	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6379	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6380	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6381}
6382
6383static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6384					  void *ras_error_status)
6385{
6386	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6387	uint32_t sec_count = 0, ded_count = 0;
6388	uint32_t i, j, k;
6389	uint32_t reg_value;
6390
6391	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6392		return -EINVAL;
6393
6394	err_data->ue_count = 0;
6395	err_data->ce_count = 0;
6396
6397	mutex_lock(&adev->grbm_idx_mutex);
6398
6399	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6400		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6401			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6402				gfx_v9_0_select_se_sh(adev, j, 0, k);
6403				reg_value =
6404					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6405				if (reg_value)
6406					gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6407							j, k, reg_value,
6408							&sec_count, &ded_count);
6409			}
6410		}
6411	}
6412
6413	err_data->ce_count += sec_count;
6414	err_data->ue_count += ded_count;
6415
6416	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6417	mutex_unlock(&adev->grbm_idx_mutex);
6418
6419	gfx_v9_0_query_utc_edc_status(adev, err_data);
6420
6421	return 0;
6422}
6423
6424static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6425	.name = "gfx_v9_0",
6426	.early_init = gfx_v9_0_early_init,
6427	.late_init = gfx_v9_0_late_init,
6428	.sw_init = gfx_v9_0_sw_init,
6429	.sw_fini = gfx_v9_0_sw_fini,
6430	.hw_init = gfx_v9_0_hw_init,
6431	.hw_fini = gfx_v9_0_hw_fini,
6432	.suspend = gfx_v9_0_suspend,
6433	.resume = gfx_v9_0_resume,
6434	.is_idle = gfx_v9_0_is_idle,
6435	.wait_for_idle = gfx_v9_0_wait_for_idle,
6436	.soft_reset = gfx_v9_0_soft_reset,
6437	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6438	.set_powergating_state = gfx_v9_0_set_powergating_state,
6439	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6440};
6441
6442static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6443	.type = AMDGPU_RING_TYPE_GFX,
6444	.align_mask = 0xff,
6445	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6446	.support_64bit_ptrs = true,
6447	.vmhub = AMDGPU_GFXHUB_0,
6448	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6449	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6450	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6451	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6452		5 +  /* COND_EXEC */
6453		7 +  /* PIPELINE_SYNC */
6454		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6455		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6456		2 + /* VM_FLUSH */
6457		8 +  /* FENCE for VM_FLUSH */
6458		20 + /* GDS switch */
6459		4 + /* double SWITCH_BUFFER,
6460		       the first COND_EXEC jump to the place just
6461			   prior to this double SWITCH_BUFFER  */
6462		5 + /* COND_EXEC */
6463		7 +	 /*	HDP_flush */
6464		4 +	 /*	VGT_flush */
6465		14 + /*	CE_META */
6466		31 + /*	DE_META */
6467		3 + /* CNTX_CTRL */
6468		5 + /* HDP_INVL */
6469		8 + 8 + /* FENCE x2 */
6470		2, /* SWITCH_BUFFER */
6471	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6472	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6473	.emit_fence = gfx_v9_0_ring_emit_fence,
6474	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6475	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6476	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6477	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6478	.test_ring = gfx_v9_0_ring_test_ring,
6479	.test_ib = gfx_v9_0_ring_test_ib,
6480	.insert_nop = amdgpu_ring_insert_nop,
6481	.pad_ib = amdgpu_ring_generic_pad_ib,
6482	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6483	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6484	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6485	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6486	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6487	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6488	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6489	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6490	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6491};
6492
6493static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6494	.type = AMDGPU_RING_TYPE_COMPUTE,
6495	.align_mask = 0xff,
6496	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6497	.support_64bit_ptrs = true,
6498	.vmhub = AMDGPU_GFXHUB_0,
6499	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6500	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6501	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6502	.emit_frame_size =
6503		20 + /* gfx_v9_0_ring_emit_gds_switch */
6504		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6505		5 + /* hdp invalidate */
6506		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6507		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6508		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6509		2 + /* gfx_v9_0_ring_emit_vm_flush */
6510		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6511	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6512	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6513	.emit_fence = gfx_v9_0_ring_emit_fence,
6514	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6515	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6516	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6517	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6518	.test_ring = gfx_v9_0_ring_test_ring,
6519	.test_ib = gfx_v9_0_ring_test_ib,
6520	.insert_nop = amdgpu_ring_insert_nop,
6521	.pad_ib = amdgpu_ring_generic_pad_ib,
6522	.set_priority = gfx_v9_0_ring_set_priority_compute,
6523	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6524	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6525	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6526};
6527
6528static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6529	.type = AMDGPU_RING_TYPE_KIQ,
6530	.align_mask = 0xff,
6531	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6532	.support_64bit_ptrs = true,
6533	.vmhub = AMDGPU_GFXHUB_0,
6534	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6535	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6536	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6537	.emit_frame_size =
6538		20 + /* gfx_v9_0_ring_emit_gds_switch */
6539		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6540		5 + /* hdp invalidate */
6541		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6542		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6543		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6544		2 + /* gfx_v9_0_ring_emit_vm_flush */
6545		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6546	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6547	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6548	.test_ring = gfx_v9_0_ring_test_ring,
6549	.insert_nop = amdgpu_ring_insert_nop,
6550	.pad_ib = amdgpu_ring_generic_pad_ib,
6551	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6552	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6553	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6554	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6555};
6556
6557static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6558{
6559	int i;
6560
6561	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6562
6563	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6564		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6565
6566	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6567		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6568}
6569
6570static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6571	.set = gfx_v9_0_set_eop_interrupt_state,
6572	.process = gfx_v9_0_eop_irq,
6573};
6574
6575static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6576	.set = gfx_v9_0_set_priv_reg_fault_state,
6577	.process = gfx_v9_0_priv_reg_irq,
6578};
6579
6580static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6581	.set = gfx_v9_0_set_priv_inst_fault_state,
6582	.process = gfx_v9_0_priv_inst_irq,
6583};
6584
6585static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6586	.set = gfx_v9_0_set_cp_ecc_error_state,
6587	.process = amdgpu_gfx_cp_ecc_error_irq,
6588};
6589
6590
6591static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6592{
6593	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6594	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6595
6596	adev->gfx.priv_reg_irq.num_types = 1;
6597	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6598
6599	adev->gfx.priv_inst_irq.num_types = 1;
6600	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6601
6602	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6603	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6604}
6605
6606static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6607{
6608	switch (adev->asic_type) {
6609	case CHIP_VEGA10:
6610	case CHIP_VEGA12:
6611	case CHIP_VEGA20:
6612	case CHIP_RAVEN:
6613	case CHIP_ARCTURUS:
6614	case CHIP_RENOIR:
6615		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6616		break;
6617	default:
6618		break;
6619	}
6620}
6621
6622static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6623{
6624	/* init asci gds info */
6625	switch (adev->asic_type) {
6626	case CHIP_VEGA10:
6627	case CHIP_VEGA12:
6628	case CHIP_VEGA20:
6629		adev->gds.gds_size = 0x10000;
6630		break;
6631	case CHIP_RAVEN:
6632	case CHIP_ARCTURUS:
6633		adev->gds.gds_size = 0x1000;
6634		break;
6635	default:
6636		adev->gds.gds_size = 0x10000;
6637		break;
6638	}
6639
6640	switch (adev->asic_type) {
6641	case CHIP_VEGA10:
6642	case CHIP_VEGA20:
6643		adev->gds.gds_compute_max_wave_id = 0x7ff;
6644		break;
6645	case CHIP_VEGA12:
6646		adev->gds.gds_compute_max_wave_id = 0x27f;
6647		break;
6648	case CHIP_RAVEN:
6649		if (adev->rev_id >= 0x8)
6650			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6651		else
6652			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6653		break;
6654	case CHIP_ARCTURUS:
6655		adev->gds.gds_compute_max_wave_id = 0xfff;
6656		break;
6657	default:
6658		/* this really depends on the chip */
6659		adev->gds.gds_compute_max_wave_id = 0x7ff;
6660		break;
6661	}
6662
6663	adev->gds.gws_size = 64;
6664	adev->gds.oa_size = 16;
6665}
6666
6667static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6668						 u32 bitmap)
6669{
6670	u32 data;
6671
6672	if (!bitmap)
6673		return;
6674
6675	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6676	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6677
6678	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6679}
6680
6681static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6682{
6683	u32 data, mask;
6684
6685	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6686	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6687
6688	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6689	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6690
6691	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6692
6693	return (~data) & mask;
6694}
6695
6696static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6697				 struct amdgpu_cu_info *cu_info)
6698{
6699	int i, j, k, counter, active_cu_number = 0;
6700	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6701	unsigned disable_masks[4 * 4];
6702
6703	if (!adev || !cu_info)
6704		return -EINVAL;
6705
6706	/*
6707	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6708	 */
6709	if (adev->gfx.config.max_shader_engines *
6710		adev->gfx.config.max_sh_per_se > 16)
6711		return -EINVAL;
6712
6713	amdgpu_gfx_parse_disable_cu(disable_masks,
6714				    adev->gfx.config.max_shader_engines,
6715				    adev->gfx.config.max_sh_per_se);
6716
6717	mutex_lock(&adev->grbm_idx_mutex);
6718	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6719		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6720			mask = 1;
6721			ao_bitmap = 0;
6722			counter = 0;
6723			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6724			gfx_v9_0_set_user_cu_inactive_bitmap(
6725				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6726			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6727
6728			/*
6729			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6730			 * 4x4 size array, and it's usually suitable for Vega
6731			 * ASICs which has 4*2 SE/SH layout.
6732			 * But for Arcturus, SE/SH layout is changed to 8*1.
6733			 * To mostly reduce the impact, we make it compatible
6734			 * with current bitmap array as below:
6735			 *    SE4,SH0 --> bitmap[0][1]
6736			 *    SE5,SH0 --> bitmap[1][1]
6737			 *    SE6,SH0 --> bitmap[2][1]
6738			 *    SE7,SH0 --> bitmap[3][1]
6739			 */
6740			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6741
6742			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6743				if (bitmap & mask) {
6744					if (counter < adev->gfx.config.max_cu_per_sh)
6745						ao_bitmap |= mask;
6746					counter ++;
6747				}
6748				mask <<= 1;
6749			}
6750			active_cu_number += counter;
6751			if (i < 2 && j < 2)
6752				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6753			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6754		}
6755	}
6756	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6757	mutex_unlock(&adev->grbm_idx_mutex);
6758
6759	cu_info->number = active_cu_number;
6760	cu_info->ao_cu_mask = ao_cu_mask;
6761	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6762
6763	return 0;
6764}
6765
6766const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6767{
6768	.type = AMD_IP_BLOCK_TYPE_GFX,
6769	.major = 9,
6770	.minor = 0,
6771	.rev = 0,
6772	.funcs = &gfx_v9_0_ip_funcs,
6773};
6774