1330653Shselasky/*
2330653Shselasky * Copyright 2016 Advanced Micro Devices, Inc.
3330653Shselasky *
4330653Shselasky * Permission is hereby granted, free of charge, to any person obtaining a
5330653Shselasky * copy of this software and associated documentation files (the "Software"),
6330653Shselasky * to deal in the Software without restriction, including without limitation
7330653Shselasky * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8330653Shselasky * and/or sell copies of the Software, and to permit persons to whom the
9330653Shselasky * Software is furnished to do so, subject to the following conditions:
10330653Shselasky *
11330653Shselasky * The above copyright notice and this permission notice shall be included in
12330653Shselasky * all copies or substantial portions of the Software.
13330653Shselasky *
14330653Shselasky * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15330653Shselasky * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16330653Shselasky * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17330653Shselasky * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18330653Shselasky * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19330653Shselasky * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20330653Shselasky * OTHER DEALINGS IN THE SOFTWARE.
21330653Shselasky *
22330653Shselasky */
23330653Shselasky
24330653Shselasky#include <linux/delay.h>
25330653Shselasky#include <linux/kernel.h>
26330653Shselasky#include <linux/firmware.h>
27330653Shselasky#include <linux/module.h>
28330653Shselasky#include <linux/pci.h>
29330653Shselasky
30330653Shselasky#include "amdgpu.h"
31330653Shselasky#include "amdgpu_gfx.h"
32330653Shselasky#include "soc15.h"
33330653Shselasky#include "soc15d.h"
34330653Shselasky#include "amdgpu_atomfirmware.h"
35330653Shselasky#include "amdgpu_pm.h"
36330653Shselasky
37330653Shselasky#include "gc/gc_9_0_offset.h"
38347840Shselasky#include "gc/gc_9_0_sh_mask.h"
39330653Shselasky
40330653Shselasky#include "vega10_enum.h"
41330653Shselasky
42330653Shselasky#include "soc15_common.h"
43330653Shselasky#include "clearstate_gfx9.h"
44330653Shselasky#include "v9_structs.h"
45330653Shselasky
46347840Shselasky#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47330653Shselasky
48330653Shselasky#include "amdgpu_ras.h"
49330653Shselasky
50330653Shselasky#include "amdgpu_ring_mux.h"
51330653Shselasky#include "gfx_v9_4.h"
52347841Shselasky#include "gfx_v9_0.h"
53347841Shselasky#include "gfx_v9_4_2.h"
54347841Shselasky
55347841Shselasky#include "asic_reg/pwr/pwr_10_0_offset.h"
56347841Shselasky#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57347841Shselasky#include "asic_reg/gc/gc_9_0_default.h"
58353240Shselasky
59353240Shselasky#define GFX9_NUM_GFX_RINGS     1
60353240Shselasky#define GFX9_NUM_SW_GFX_RINGS  2
61353240Shselasky#define GFX9_MEC_HPD_SIZE 4096
62353240Shselasky#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63353240Shselasky#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64353240Shselasky
65330653Shselasky#define mmGCEA_PROBE_MAP                        0x070c
66347840Shselasky#define mmGCEA_PROBE_MAP_BASE_IDX               0
67347840Shselasky
68347841ShselaskyMODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69347869ShselaskyMODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70353240ShselaskyMODULE_FIRMWARE("amdgpu/vega10_me.bin");
71330653ShselaskyMODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72330653ShselaskyMODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73330653ShselaskyMODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74330653Shselasky
75330653ShselaskyMODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76341958ShselaskyMODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77341958ShselaskyMODULE_FIRMWARE("amdgpu/vega12_me.bin");
78341958ShselaskyMODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79341958ShselaskyMODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80341962ShselaskyMODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81341958Shselasky
82341958ShselaskyMODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83341958ShselaskyMODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84341958ShselaskyMODULE_FIRMWARE("amdgpu/vega20_me.bin");
85341958ShselaskyMODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86341958ShselaskyMODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87347797ShselaskyMODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88347797Shselasky
89341958ShselaskyMODULE_FIRMWARE("amdgpu/raven_ce.bin");
90341958ShselaskyMODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91341958ShselaskyMODULE_FIRMWARE("amdgpu/raven_me.bin");
92341958ShselaskyMODULE_FIRMWARE("amdgpu/raven_mec.bin");
93341958ShselaskyMODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94341958ShselaskyMODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95341958Shselasky
96341958ShselaskyMODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97341958ShselaskyMODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98341958ShselaskyMODULE_FIRMWARE("amdgpu/picasso_me.bin");
99341958ShselaskyMODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100341958ShselaskyMODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101341958ShselaskyMODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102341958ShselaskyMODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103341958Shselasky
104341964ShselaskyMODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105341964ShselaskyMODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106341964ShselaskyMODULE_FIRMWARE("amdgpu/raven2_me.bin");
107341964ShselaskyMODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108341964ShselaskyMODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109341964ShselaskyMODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110341966ShselaskyMODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111341966Shselasky
112341966ShselaskyMODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113341966ShselaskyMODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114341966Shselasky
115341966ShselaskyMODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116341958ShselaskyMODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117341958ShselaskyMODULE_FIRMWARE("amdgpu/renoir_me.bin");
118341958ShselaskyMODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119341958ShselaskyMODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120341958Shselasky
121341958ShselaskyMODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122341958ShselaskyMODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123341958ShselaskyMODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124341958ShselaskyMODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125341958ShselaskyMODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126341958ShselaskyMODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127341958Shselasky
128341958ShselaskyMODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129341958ShselaskyMODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130341964ShselaskyMODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131341964ShselaskyMODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132341964ShselaskyMODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133341964Shselasky
134341964Shselasky#define mmTCP_CHAN_STEER_0_ARCT								0x0b03
135341964Shselasky#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
136341964Shselasky#define mmTCP_CHAN_STEER_1_ARCT								0x0b04
137341964Shselasky#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
138341964Shselasky#define mmTCP_CHAN_STEER_2_ARCT								0x0b09
139341964Shselasky#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
140341964Shselasky#define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
141341964Shselasky#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
142341964Shselasky#define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
143341964Shselasky#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
144341964Shselasky#define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
145341960Shselasky#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
146341960Shselasky
147341960Shselasky#define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148341960Shselasky#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149341960Shselasky#define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150341960Shselasky#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151341960Shselasky
152341960Shselaskyenum ta_ras_gfx_subblock {
153341964Shselasky	/*CPC*/
154341966Shselasky	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
155347797Shselasky	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
156341960Shselasky	TA_RAS_BLOCK__GFX_CPC_UCODE,
157341960Shselasky	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
158341960Shselasky	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
159330653Shselasky	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
160	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
161	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
162	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
163	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
164	/* CPF*/
165	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
166	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
167	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
168	TA_RAS_BLOCK__GFX_CPF_TAG,
169	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
170	/* CPG*/
171	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
172	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
173	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
174	TA_RAS_BLOCK__GFX_CPG_TAG,
175	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
176	/* GDS*/
177	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
178	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
179	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
180	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
181	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
182	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
183	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
184	/* SPI*/
185	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
186	/* SQ*/
187	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
188	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
189	TA_RAS_BLOCK__GFX_SQ_LDS_D,
190	TA_RAS_BLOCK__GFX_SQ_LDS_I,
191	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
192	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
193	/* SQC (3 ranges)*/
194	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
195	/* SQC range 0*/
196	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
197	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
198		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
199	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
200	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
201	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
202	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
203	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
204	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
205	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
206		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
207	/* SQC range 1*/
208	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
209	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
210		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
211	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
212	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
213	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
214	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
215	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
216	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
217	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
218	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
219	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
220		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
221	/* SQC range 2*/
222	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
223	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
224		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
225	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
226	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
227	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
228	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
229	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
230	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
231	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
232	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
233	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
234		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
235	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
236	/* TA*/
237	TA_RAS_BLOCK__GFX_TA_INDEX_START,
238	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
239	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
240	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
241	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
242	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
243	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
244	/* TCA*/
245	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
246	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
247	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
248	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
249	/* TCC (5 sub-ranges)*/
250	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
251	/* TCC range 0*/
252	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
253	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
254	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
255	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
256	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
257	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
258	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
259	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
260	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
261	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
262	/* TCC range 1*/
263	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
264	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
265	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
266	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
267		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
268	/* TCC range 2*/
269	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
270	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
271	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
272	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
273	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
274	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
275	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
276	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
277	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
278	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
279		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
280	/* TCC range 3*/
281	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
282	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
283	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
284	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
285		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
286	/* TCC range 4*/
287	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
288	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
289		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
290	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
291	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
292		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
293	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
294	/* TCI*/
295	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
296	/* TCP*/
297	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
298	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
299	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
300	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
301	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
302	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
303	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
304	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
305	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
306	/* TD*/
307	TA_RAS_BLOCK__GFX_TD_INDEX_START,
308	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
309	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
310	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
311	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
312	/* EA (3 sub-ranges)*/
313	TA_RAS_BLOCK__GFX_EA_INDEX_START,
314	/* EA range 0*/
315	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
316	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
317	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
318	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
319	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
320	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
321	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
322	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
323	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
324	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
325	/* EA range 1*/
326	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
327	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
328	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
329	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
330	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
331	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
332	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
333	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
334	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
335	/* EA range 2*/
336	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
337	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
338	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
339	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
340	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
341	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
342	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
343	/* UTC VM L2 bank*/
344	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
345	/* UTC VM walker*/
346	TA_RAS_BLOCK__UTC_VML2_WALKER,
347	/* UTC ATC L2 2MB cache*/
348	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
349	/* UTC ATC L2 4KB cache*/
350	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
351	TA_RAS_BLOCK__GFX_MAX
352};
353
354struct ras_gfx_subblock {
355	unsigned char *name;
356	int ta_subblock;
357	int hw_supported_error_type;
358	int sw_supported_error_type;
359};
360
361#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
362	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
363		#subblock,                                                     \
364		TA_RAS_BLOCK__##subblock,                                      \
365		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
366		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
367	}
368
369static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
370	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
371	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
372	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
373	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
374	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
375	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
376	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
377	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
378	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
379	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
380	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
381	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
382	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
383	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
384	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
386	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
387			     0),
388	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
389			     0),
390	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
391	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
392	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
393	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
394	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
395	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
396	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
397	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398			     0, 0),
399	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
400			     0),
401	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402			     0, 0),
403	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
404			     0),
405	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
406			     0, 0),
407	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
408			     0),
409	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410			     1),
411	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412			     0, 0, 0),
413	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414			     0),
415	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416			     0),
417	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418			     0),
419	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420			     0),
421	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422			     0),
423	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424			     0, 0),
425	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426			     0),
427	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
428			     0),
429	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
430			     0, 0, 0),
431	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
432			     0),
433	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
434			     0),
435	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
436			     0),
437	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
438			     0),
439	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
440			     0),
441	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
442			     0, 0),
443	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
444			     0),
445	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
446	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
447	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
450	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
451	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
453	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
454			     1),
455	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
456			     1),
457	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
458			     1),
459	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
460			     0),
461	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
462			     0),
463	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
464	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
465	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
466	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
467	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
468	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
469	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
470	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
471	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
472	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
473	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
474	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
475			     0),
476	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
477	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
478			     0),
479	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
480			     0, 0),
481	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
482			     0),
483	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
484	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
485	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
486	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
487	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
488	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
489	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
490	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
491	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
492	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
493	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
494	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
495	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
499	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
500	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
501	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
502	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
506	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
507	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
508	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
509	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
510	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
511	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
512	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
513	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
514	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
515	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
516	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
517};
518
519static const struct soc15_reg_golden golden_settings_gc_9_0[] =
520{
521	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
522	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
523	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
524	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
525	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
526	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
527	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
528	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
529	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
530	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
531	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
532	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
533	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
534	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
535	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
536	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
537	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
538	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
539	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
540	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
541};
542
543static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
544{
545	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
546	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
547	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
548	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
549	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
550	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
551	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
552	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
553	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
554	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
555	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
556	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
557	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
558	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
559	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
560	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
561	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
562	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
563};
564
565static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
566{
567	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
568	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
569	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
570	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
571	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
572	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
573	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
574	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
575	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
576	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
577	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
578};
579
580static const struct soc15_reg_golden golden_settings_gc_9_1[] =
581{
582	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
583	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
584	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
585	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
586	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
587	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
588	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
589	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
590	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
591	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
592	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
593	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
594	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
595	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
596	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
597	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
598	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
599	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
600	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
601	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
602	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
603	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
604	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
605	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
606};
607
608static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
609{
610	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
611	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
612	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
613	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
614	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
615	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
616	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
617};
618
619static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
620{
621	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
622	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
623	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
624	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
625	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
626	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
627	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
628	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
629	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
630	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
631	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
632	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
633	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
634	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
635	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
636	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
637	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
638	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
639	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
640};
641
642static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
643{
644	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
646	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
647	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
648	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
649	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
650	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
651	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
652	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
653	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
654	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
655	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
656};
657
658static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
659{
660	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
661	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
662	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
663};
664
665static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
666{
667	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
668	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
669	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
670	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
671	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
672	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
673	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
674	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
675	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
676	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
677	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
678	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
679	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
680	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
681	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
682	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
683};
684
685static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
686{
687	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
688	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
689	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
690	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
691	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
692	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
693	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
694	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
695	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
696	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
697	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
698	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
699	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
700};
701
702static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
703{
704	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
705	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
706	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
707	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
708	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
709	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
710	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
711	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
712	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
713	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
714	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
715};
716
717static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
718	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
719	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
720};
721
722static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
723{
724	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
729	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
730	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
731	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
732};
733
734static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
735{
736	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
741	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
742	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
743	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
744};
745
746#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
747#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
748#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
749#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
750
751static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
752static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
753static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
754static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
756				struct amdgpu_cu_info *cu_info);
757static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
758static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
759static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
760static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
761					  void *ras_error_status);
762static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
763				     void *inject_if, uint32_t instance_mask);
764static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
765static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
766					      unsigned int vmid);
767
768static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
769				uint64_t queue_mask)
770{
771	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
772	amdgpu_ring_write(kiq_ring,
773		PACKET3_SET_RESOURCES_VMID_MASK(0) |
774		/* vmid_mask:0* queue_type:0 (KIQ) */
775		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
776	amdgpu_ring_write(kiq_ring,
777			lower_32_bits(queue_mask));	/* queue mask lo */
778	amdgpu_ring_write(kiq_ring,
779			upper_32_bits(queue_mask));	/* queue mask hi */
780	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
781	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
782	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
783	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
784}
785
786static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
787				 struct amdgpu_ring *ring)
788{
789	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
790	uint64_t wptr_addr = ring->wptr_gpu_addr;
791	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
792
793	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
794	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
795	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
796			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
797			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
798			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
799			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
800			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
801			 /*queue_type: normal compute queue */
802			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
803			 /* alloc format: all_on_one_pipe */
804			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
805			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
806			 /* num_queues: must be 1 */
807			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
808	amdgpu_ring_write(kiq_ring,
809			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
810	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
811	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
812	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
813	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
814}
815
816static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
817				   struct amdgpu_ring *ring,
818				   enum amdgpu_unmap_queues_action action,
819				   u64 gpu_addr, u64 seq)
820{
821	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
822
823	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
824	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
825			  PACKET3_UNMAP_QUEUES_ACTION(action) |
826			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
827			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
828			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
829	amdgpu_ring_write(kiq_ring,
830			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
831
832	if (action == PREEMPT_QUEUES_NO_UNMAP) {
833		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
834		amdgpu_ring_write(kiq_ring, 0);
835		amdgpu_ring_write(kiq_ring, 0);
836
837	} else {
838		amdgpu_ring_write(kiq_ring, 0);
839		amdgpu_ring_write(kiq_ring, 0);
840		amdgpu_ring_write(kiq_ring, 0);
841	}
842}
843
844static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
845				   struct amdgpu_ring *ring,
846				   u64 addr,
847				   u64 seq)
848{
849	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
850
851	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
852	amdgpu_ring_write(kiq_ring,
853			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
854			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
855			  PACKET3_QUERY_STATUS_COMMAND(2));
856	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
857	amdgpu_ring_write(kiq_ring,
858			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
859			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
860	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
861	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
862	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
863	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
864}
865
866static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
867				uint16_t pasid, uint32_t flush_type,
868				bool all_hub)
869{
870	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
871	amdgpu_ring_write(kiq_ring,
872			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
873			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
874			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
875			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
876}
877
878static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
879	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
880	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
881	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
882	.kiq_query_status = gfx_v9_0_kiq_query_status,
883	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
884	.set_resources_size = 8,
885	.map_queues_size = 7,
886	.unmap_queues_size = 6,
887	.query_status_size = 7,
888	.invalidate_tlbs_size = 2,
889};
890
891static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
892{
893	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
894}
895
896static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
897{
898	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
899	case IP_VERSION(9, 0, 1):
900		soc15_program_register_sequence(adev,
901						golden_settings_gc_9_0,
902						ARRAY_SIZE(golden_settings_gc_9_0));
903		soc15_program_register_sequence(adev,
904						golden_settings_gc_9_0_vg10,
905						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
906		break;
907	case IP_VERSION(9, 2, 1):
908		soc15_program_register_sequence(adev,
909						golden_settings_gc_9_2_1,
910						ARRAY_SIZE(golden_settings_gc_9_2_1));
911		soc15_program_register_sequence(adev,
912						golden_settings_gc_9_2_1_vg12,
913						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
914		break;
915	case IP_VERSION(9, 4, 0):
916		soc15_program_register_sequence(adev,
917						golden_settings_gc_9_0,
918						ARRAY_SIZE(golden_settings_gc_9_0));
919		soc15_program_register_sequence(adev,
920						golden_settings_gc_9_0_vg20,
921						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
922		break;
923	case IP_VERSION(9, 4, 1):
924		soc15_program_register_sequence(adev,
925						golden_settings_gc_9_4_1_arct,
926						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
927		break;
928	case IP_VERSION(9, 2, 2):
929	case IP_VERSION(9, 1, 0):
930		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
931						ARRAY_SIZE(golden_settings_gc_9_1));
932		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
933			soc15_program_register_sequence(adev,
934							golden_settings_gc_9_1_rv2,
935							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
936		else
937			soc15_program_register_sequence(adev,
938							golden_settings_gc_9_1_rv1,
939							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
940		break;
941	 case IP_VERSION(9, 3, 0):
942		soc15_program_register_sequence(adev,
943						golden_settings_gc_9_1_rn,
944						ARRAY_SIZE(golden_settings_gc_9_1_rn));
945		return; /* for renoir, don't need common goldensetting */
946	case IP_VERSION(9, 4, 2):
947		gfx_v9_4_2_init_golden_registers(adev,
948						 adev->smuio.funcs->get_die_id(adev));
949		break;
950	default:
951		break;
952	}
953
954	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
955	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
956		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
957						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
958}
959
960static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
961				       bool wc, uint32_t reg, uint32_t val)
962{
963	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
964	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
965				WRITE_DATA_DST_SEL(0) |
966				(wc ? WR_CONFIRM : 0));
967	amdgpu_ring_write(ring, reg);
968	amdgpu_ring_write(ring, 0);
969	amdgpu_ring_write(ring, val);
970}
971
972static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
973				  int mem_space, int opt, uint32_t addr0,
974				  uint32_t addr1, uint32_t ref, uint32_t mask,
975				  uint32_t inv)
976{
977	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
978	amdgpu_ring_write(ring,
979				 /* memory (1) or register (0) */
980				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
981				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
982				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
983				 WAIT_REG_MEM_ENGINE(eng_sel)));
984
985	if (mem_space)
986		BUG_ON(addr0 & 0x3); /* Dword align */
987	amdgpu_ring_write(ring, addr0);
988	amdgpu_ring_write(ring, addr1);
989	amdgpu_ring_write(ring, ref);
990	amdgpu_ring_write(ring, mask);
991	amdgpu_ring_write(ring, inv); /* poll interval */
992}
993
994static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
995{
996	struct amdgpu_device *adev = ring->adev;
997	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
998	uint32_t tmp = 0;
999	unsigned i;
1000	int r;
1001
1002	WREG32(scratch, 0xCAFEDEAD);
1003	r = amdgpu_ring_alloc(ring, 3);
1004	if (r)
1005		return r;
1006
1007	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1008	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1009	amdgpu_ring_write(ring, 0xDEADBEEF);
1010	amdgpu_ring_commit(ring);
1011
1012	for (i = 0; i < adev->usec_timeout; i++) {
1013		tmp = RREG32(scratch);
1014		if (tmp == 0xDEADBEEF)
1015			break;
1016		udelay(1);
1017	}
1018
1019	if (i >= adev->usec_timeout)
1020		r = -ETIMEDOUT;
1021	return r;
1022}
1023
1024static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1025{
1026	struct amdgpu_device *adev = ring->adev;
1027	struct amdgpu_ib ib;
1028	struct dma_fence *f = NULL;
1029
1030	unsigned index;
1031	uint64_t gpu_addr;
1032	uint32_t tmp;
1033	long r;
1034
1035	r = amdgpu_device_wb_get(adev, &index);
1036	if (r)
1037		return r;
1038
1039	gpu_addr = adev->wb.gpu_addr + (index * 4);
1040	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1041	memset(&ib, 0, sizeof(ib));
1042
1043	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1044	if (r)
1045		goto err1;
1046
1047	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1048	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1049	ib.ptr[2] = lower_32_bits(gpu_addr);
1050	ib.ptr[3] = upper_32_bits(gpu_addr);
1051	ib.ptr[4] = 0xDEADBEEF;
1052	ib.length_dw = 5;
1053
1054	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1055	if (r)
1056		goto err2;
1057
1058	r = dma_fence_wait_timeout(f, false, timeout);
1059	if (r == 0) {
1060		r = -ETIMEDOUT;
1061		goto err2;
1062	} else if (r < 0) {
1063		goto err2;
1064	}
1065
1066	tmp = adev->wb.wb[index];
1067	if (tmp == 0xDEADBEEF)
1068		r = 0;
1069	else
1070		r = -EINVAL;
1071
1072err2:
1073	amdgpu_ib_free(adev, &ib, NULL);
1074	dma_fence_put(f);
1075err1:
1076	amdgpu_device_wb_free(adev, index);
1077	return r;
1078}
1079
1080
1081static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1082{
1083	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1084	amdgpu_ucode_release(&adev->gfx.me_fw);
1085	amdgpu_ucode_release(&adev->gfx.ce_fw);
1086	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1087	amdgpu_ucode_release(&adev->gfx.mec_fw);
1088	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1089
1090	kfree(adev->gfx.rlc.register_list_format);
1091}
1092
1093static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1094{
1095	adev->gfx.me_fw_write_wait = false;
1096	adev->gfx.mec_fw_write_wait = false;
1097
1098	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1099	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1100	     (adev->gfx.mec_feature_version < 46) ||
1101	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1102	     (adev->gfx.pfp_feature_version < 46)))
1103		DRM_WARN_ONCE("CP firmware version too old, please update!");
1104
1105	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1106	case IP_VERSION(9, 0, 1):
1107		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1108		    (adev->gfx.me_feature_version >= 42) &&
1109		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1110		    (adev->gfx.pfp_feature_version >= 42))
1111			adev->gfx.me_fw_write_wait = true;
1112
1113		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1114		    (adev->gfx.mec_feature_version >= 42))
1115			adev->gfx.mec_fw_write_wait = true;
1116		break;
1117	case IP_VERSION(9, 2, 1):
1118		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1119		    (adev->gfx.me_feature_version >= 44) &&
1120		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1121		    (adev->gfx.pfp_feature_version >= 44))
1122			adev->gfx.me_fw_write_wait = true;
1123
1124		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1125		    (adev->gfx.mec_feature_version >= 44))
1126			adev->gfx.mec_fw_write_wait = true;
1127		break;
1128	case IP_VERSION(9, 4, 0):
1129		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1130		    (adev->gfx.me_feature_version >= 44) &&
1131		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1132		    (adev->gfx.pfp_feature_version >= 44))
1133			adev->gfx.me_fw_write_wait = true;
1134
1135		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1136		    (adev->gfx.mec_feature_version >= 44))
1137			adev->gfx.mec_fw_write_wait = true;
1138		break;
1139	case IP_VERSION(9, 1, 0):
1140	case IP_VERSION(9, 2, 2):
1141		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1142		    (adev->gfx.me_feature_version >= 42) &&
1143		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1144		    (adev->gfx.pfp_feature_version >= 42))
1145			adev->gfx.me_fw_write_wait = true;
1146
1147		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1148		    (adev->gfx.mec_feature_version >= 42))
1149			adev->gfx.mec_fw_write_wait = true;
1150		break;
1151	default:
1152		adev->gfx.me_fw_write_wait = true;
1153		adev->gfx.mec_fw_write_wait = true;
1154		break;
1155	}
1156}
1157
1158struct amdgpu_gfxoff_quirk {
1159	u16 chip_vendor;
1160	u16 chip_device;
1161	u16 subsys_vendor;
1162	u16 subsys_device;
1163	u8 revision;
1164};
1165
1166static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1167	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1168	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1169	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1170	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1171	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1172	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1173	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1174	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1175	{ 0, 0, 0, 0, 0 },
1176};
1177
1178static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1179{
1180	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1181
1182	while (p && p->chip_device != 0) {
1183		if (pdev->vendor == p->chip_vendor &&
1184		    pdev->device == p->chip_device &&
1185		    pdev->subsystem_vendor == p->subsys_vendor &&
1186		    pdev->subsystem_device == p->subsys_device &&
1187		    pdev->revision == p->revision) {
1188			return true;
1189		}
1190		++p;
1191	}
1192	return false;
1193}
1194
1195static bool is_raven_kicker(struct amdgpu_device *adev)
1196{
1197	if (adev->pm.fw_version >= 0x41e2b)
1198		return true;
1199	else
1200		return false;
1201}
1202
1203static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1204{
1205	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1206	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1207	    (adev->gfx.me_feature_version >= 52))
1208		return true;
1209	else
1210		return false;
1211}
1212
1213static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1214{
1215	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1216		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1217
1218	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1219	case IP_VERSION(9, 0, 1):
1220	case IP_VERSION(9, 2, 1):
1221	case IP_VERSION(9, 4, 0):
1222		break;
1223	case IP_VERSION(9, 2, 2):
1224	case IP_VERSION(9, 1, 0):
1225		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1226		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1227		    ((!is_raven_kicker(adev) &&
1228		      adev->gfx.rlc_fw_version < 531) ||
1229		     (adev->gfx.rlc_feature_version < 1) ||
1230		     !adev->gfx.rlc.is_rlc_v2_1))
1231			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1232
1233		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1234			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1235				AMD_PG_SUPPORT_CP |
1236				AMD_PG_SUPPORT_RLC_SMU_HS;
1237		break;
1238	case IP_VERSION(9, 3, 0):
1239		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1240			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1241				AMD_PG_SUPPORT_CP |
1242				AMD_PG_SUPPORT_RLC_SMU_HS;
1243		break;
1244	default:
1245		break;
1246	}
1247}
1248
1249static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1250					  char *chip_name)
1251{
1252	char fw_name[30];
1253	int err;
1254
1255	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1256	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
1257	if (err)
1258		goto out;
1259	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1260
1261	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1262	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1263	if (err)
1264		goto out;
1265	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1266
1267	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1268	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1269	if (err)
1270		goto out;
1271	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1272
1273out:
1274	if (err) {
1275		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1276		amdgpu_ucode_release(&adev->gfx.me_fw);
1277		amdgpu_ucode_release(&adev->gfx.ce_fw);
1278	}
1279	return err;
1280}
1281
1282static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1283				       char *chip_name)
1284{
1285	char fw_name[30];
1286	int err;
1287	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1288	uint16_t version_major;
1289	uint16_t version_minor;
1290	uint32_t smu_version;
1291
1292	/*
1293	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1294	 * instead of picasso_rlc.bin.
1295	 * Judgment method:
1296	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1297	 *          or revision >= 0xD8 && revision <= 0xDF
1298	 * otherwise is PCO FP5
1299	 */
1300	if (!strcmp(chip_name, "picasso") &&
1301		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1302		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1303		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1304	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1305		(smu_version >= 0x41e2b))
1306		/**
1307		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1308		*/
1309		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1310	else
1311		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1312	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1313	if (err)
1314		goto out;
1315	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1316
1317	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1318	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1319	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1320out:
1321	if (err)
1322		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1323
1324	return err;
1325}
1326
1327static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1328{
1329	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1330	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1331	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1332		return false;
1333
1334	return true;
1335}
1336
1337static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1338					      char *chip_name)
1339{
1340	char fw_name[30];
1341	int err;
1342
1343	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1344		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1345	else
1346		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1347
1348	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1349	if (err)
1350		goto out;
1351	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1352	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1353
1354	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1355		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1356			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1357		else
1358			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1359
1360		/* ignore failures to load */
1361		err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1362		if (!err) {
1363			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1364			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1365		} else {
1366			err = 0;
1367			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1368		}
1369	} else {
1370		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1371		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1372	}
1373
1374	gfx_v9_0_check_if_need_gfxoff(adev);
1375	gfx_v9_0_check_fw_write_wait(adev);
1376
1377out:
1378	if (err)
1379		amdgpu_ucode_release(&adev->gfx.mec_fw);
1380	return err;
1381}
1382
1383static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1384{
1385	char ucode_prefix[30];
1386	int r;
1387
1388	DRM_DEBUG("\n");
1389	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1390
1391	/* No CPG in Arcturus */
1392	if (adev->gfx.num_gfx_rings) {
1393		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1394		if (r)
1395			return r;
1396	}
1397
1398	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1399	if (r)
1400		return r;
1401
1402	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1403	if (r)
1404		return r;
1405
1406	return r;
1407}
1408
1409static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1410{
1411	u32 count = 0;
1412	const struct cs_section_def *sect = NULL;
1413	const struct cs_extent_def *ext = NULL;
1414
1415	/* begin clear state */
1416	count += 2;
1417	/* context control state */
1418	count += 3;
1419
1420	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1421		for (ext = sect->section; ext->extent != NULL; ++ext) {
1422			if (sect->id == SECT_CONTEXT)
1423				count += 2 + ext->reg_count;
1424			else
1425				return 0;
1426		}
1427	}
1428
1429	/* end clear state */
1430	count += 2;
1431	/* clear state */
1432	count += 2;
1433
1434	return count;
1435}
1436
1437static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1438				    volatile u32 *buffer)
1439{
1440	u32 count = 0, i;
1441	const struct cs_section_def *sect = NULL;
1442	const struct cs_extent_def *ext = NULL;
1443
1444	if (adev->gfx.rlc.cs_data == NULL)
1445		return;
1446	if (buffer == NULL)
1447		return;
1448
1449	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1450	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1451
1452	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1453	buffer[count++] = cpu_to_le32(0x80000000);
1454	buffer[count++] = cpu_to_le32(0x80000000);
1455
1456	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1457		for (ext = sect->section; ext->extent != NULL; ++ext) {
1458			if (sect->id == SECT_CONTEXT) {
1459				buffer[count++] =
1460					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1461				buffer[count++] = cpu_to_le32(ext->reg_index -
1462						PACKET3_SET_CONTEXT_REG_START);
1463				for (i = 0; i < ext->reg_count; i++)
1464					buffer[count++] = cpu_to_le32(ext->extent[i]);
1465			} else {
1466				return;
1467			}
1468		}
1469	}
1470
1471	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1472	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1473
1474	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1475	buffer[count++] = cpu_to_le32(0);
1476}
1477
1478static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1479{
1480	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1481	uint32_t pg_always_on_cu_num = 2;
1482	uint32_t always_on_cu_num;
1483	uint32_t i, j, k;
1484	uint32_t mask, cu_bitmap, counter;
1485
1486	if (adev->flags & AMD_IS_APU)
1487		always_on_cu_num = 4;
1488	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1489		always_on_cu_num = 8;
1490	else
1491		always_on_cu_num = 12;
1492
1493	mutex_lock(&adev->grbm_idx_mutex);
1494	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1495		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1496			mask = 1;
1497			cu_bitmap = 0;
1498			counter = 0;
1499			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1500
1501			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1502				if (cu_info->bitmap[0][i][j] & mask) {
1503					if (counter == pg_always_on_cu_num)
1504						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1505					if (counter < always_on_cu_num)
1506						cu_bitmap |= mask;
1507					else
1508						break;
1509					counter++;
1510				}
1511				mask <<= 1;
1512			}
1513
1514			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1515			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1516		}
1517	}
1518	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1519	mutex_unlock(&adev->grbm_idx_mutex);
1520}
1521
1522static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1523{
1524	uint32_t data;
1525
1526	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1527	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1528	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1529	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1530	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1531
1532	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1533	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1534
1535	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1536	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1537
1538	mutex_lock(&adev->grbm_idx_mutex);
1539	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1540	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1541	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1542
1543	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1544	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1545	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1546	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1547	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1548
1549	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1550	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1551	data &= 0x0000FFFF;
1552	data |= 0x00C00000;
1553	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1554
1555	/*
1556	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1557	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1558	 */
1559
1560	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1561	 * but used for RLC_LB_CNTL configuration */
1562	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1563	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1564	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1565	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1566	mutex_unlock(&adev->grbm_idx_mutex);
1567
1568	gfx_v9_0_init_always_on_cu_mask(adev);
1569}
1570
1571static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1572{
1573	uint32_t data;
1574
1575	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1576	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1577	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1578	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1579	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1580
1581	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1582	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1583
1584	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1585	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1586
1587	mutex_lock(&adev->grbm_idx_mutex);
1588	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1589	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1590	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1591
1592	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1593	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1594	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1595	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1596	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1597
1598	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1599	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1600	data &= 0x0000FFFF;
1601	data |= 0x00C00000;
1602	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1603
1604	/*
1605	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1606	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1607	 */
1608
1609	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1610	 * but used for RLC_LB_CNTL configuration */
1611	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1612	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1613	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1614	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1615	mutex_unlock(&adev->grbm_idx_mutex);
1616
1617	gfx_v9_0_init_always_on_cu_mask(adev);
1618}
1619
1620static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1621{
1622	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1623}
1624
1625static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1626{
1627	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1628		return 5;
1629	else
1630		return 4;
1631}
1632
1633static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1634{
1635	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1636
1637	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1638	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1639	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1640	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1641	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1642	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1643	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1644	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1645	adev->gfx.rlc.rlcg_reg_access_supported = true;
1646}
1647
1648static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1649{
1650	const struct cs_section_def *cs_data;
1651	int r;
1652
1653	adev->gfx.rlc.cs_data = gfx9_cs_data;
1654
1655	cs_data = adev->gfx.rlc.cs_data;
1656
1657	if (cs_data) {
1658		/* init clear state block */
1659		r = amdgpu_gfx_rlc_init_csb(adev);
1660		if (r)
1661			return r;
1662	}
1663
1664	if (adev->flags & AMD_IS_APU) {
1665		/* TODO: double check the cp_table_size for RV */
1666		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1667		r = amdgpu_gfx_rlc_init_cpt(adev);
1668		if (r)
1669			return r;
1670	}
1671
1672	return 0;
1673}
1674
1675static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1676{
1677	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1678	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1679}
1680
1681static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1682{
1683	int r;
1684	u32 *hpd;
1685	const __le32 *fw_data;
1686	unsigned fw_size;
1687	u32 *fw;
1688	size_t mec_hpd_size;
1689
1690	const struct gfx_firmware_header_v1_0 *mec_hdr;
1691
1692	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1693
1694	/* take ownership of the relevant compute queues */
1695	amdgpu_gfx_compute_queue_acquire(adev);
1696	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1697	if (mec_hpd_size) {
1698		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1699					      AMDGPU_GEM_DOMAIN_VRAM |
1700					      AMDGPU_GEM_DOMAIN_GTT,
1701					      &adev->gfx.mec.hpd_eop_obj,
1702					      &adev->gfx.mec.hpd_eop_gpu_addr,
1703					      (void **)&hpd);
1704		if (r) {
1705			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1706			gfx_v9_0_mec_fini(adev);
1707			return r;
1708		}
1709
1710		memset(hpd, 0, mec_hpd_size);
1711
1712		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1713		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1714	}
1715
1716	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1717
1718	fw_data = (const __le32 *)
1719		(adev->gfx.mec_fw->data +
1720		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1721	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1722
1723	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1724				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1725				      &adev->gfx.mec.mec_fw_obj,
1726				      &adev->gfx.mec.mec_fw_gpu_addr,
1727				      (void **)&fw);
1728	if (r) {
1729		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1730		gfx_v9_0_mec_fini(adev);
1731		return r;
1732	}
1733
1734	memcpy(fw, fw_data, fw_size);
1735
1736	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1737	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1738
1739	return 0;
1740}
1741
1742static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1743{
1744	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1745		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1746		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1747		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1748		(SQ_IND_INDEX__FORCE_READ_MASK));
1749	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1750}
1751
1752static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1753			   uint32_t wave, uint32_t thread,
1754			   uint32_t regno, uint32_t num, uint32_t *out)
1755{
1756	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1757		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1758		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1759		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1760		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1761		(SQ_IND_INDEX__FORCE_READ_MASK) |
1762		(SQ_IND_INDEX__AUTO_INCR_MASK));
1763	while (num--)
1764		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1765}
1766
1767static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1768{
1769	/* type 1 wave data */
1770	dst[(*no_fields)++] = 1;
1771	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1772	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1773	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1774	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1775	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1776	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1777	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1778	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1779	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1780	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1781	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1782	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1783	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1784	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1785	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1786}
1787
1788static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1789				     uint32_t wave, uint32_t start,
1790				     uint32_t size, uint32_t *dst)
1791{
1792	wave_read_regs(
1793		adev, simd, wave, 0,
1794		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1795}
1796
1797static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1798				     uint32_t wave, uint32_t thread,
1799				     uint32_t start, uint32_t size,
1800				     uint32_t *dst)
1801{
1802	wave_read_regs(
1803		adev, simd, wave, thread,
1804		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1805}
1806
1807static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1808				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1809{
1810	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1811}
1812
1813static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1814        .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1815        .select_se_sh = &gfx_v9_0_select_se_sh,
1816        .read_wave_data = &gfx_v9_0_read_wave_data,
1817        .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1818        .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1819        .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1820};
1821
1822const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1823		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1824		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1825		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1826};
1827
1828static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1829	.ras_block = {
1830		.hw_ops = &gfx_v9_0_ras_ops,
1831	},
1832};
1833
1834static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1835{
1836	u32 gb_addr_config;
1837	int err;
1838
1839	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1840	case IP_VERSION(9, 0, 1):
1841		adev->gfx.config.max_hw_contexts = 8;
1842		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1843		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1844		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1845		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1846		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1847		break;
1848	case IP_VERSION(9, 2, 1):
1849		adev->gfx.config.max_hw_contexts = 8;
1850		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1851		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1852		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1853		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1854		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1855		DRM_INFO("fix gfx.config for vega12\n");
1856		break;
1857	case IP_VERSION(9, 4, 0):
1858		adev->gfx.ras = &gfx_v9_0_ras;
1859		adev->gfx.config.max_hw_contexts = 8;
1860		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1861		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1862		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1863		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1864		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1865		gb_addr_config &= ~0xf3e777ff;
1866		gb_addr_config |= 0x22014042;
1867		/* check vbios table if gpu info is not available */
1868		err = amdgpu_atomfirmware_get_gfx_info(adev);
1869		if (err)
1870			return err;
1871		break;
1872	case IP_VERSION(9, 2, 2):
1873	case IP_VERSION(9, 1, 0):
1874		adev->gfx.config.max_hw_contexts = 8;
1875		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1879		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1880			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1881		else
1882			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1883		break;
1884	case IP_VERSION(9, 4, 1):
1885		adev->gfx.ras = &gfx_v9_4_ras;
1886		adev->gfx.config.max_hw_contexts = 8;
1887		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1888		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1889		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1890		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1891		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1892		gb_addr_config &= ~0xf3e777ff;
1893		gb_addr_config |= 0x22014042;
1894		break;
1895	case IP_VERSION(9, 3, 0):
1896		adev->gfx.config.max_hw_contexts = 8;
1897		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1898		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1899		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1900		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1901		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1902		gb_addr_config &= ~0xf3e777ff;
1903		gb_addr_config |= 0x22010042;
1904		break;
1905	case IP_VERSION(9, 4, 2):
1906		adev->gfx.ras = &gfx_v9_4_2_ras;
1907		adev->gfx.config.max_hw_contexts = 8;
1908		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1912		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1913		gb_addr_config &= ~0xf3e777ff;
1914		gb_addr_config |= 0x22014042;
1915		/* check vbios table if gpu info is not available */
1916		err = amdgpu_atomfirmware_get_gfx_info(adev);
1917		if (err)
1918			return err;
1919		break;
1920	default:
1921		BUG();
1922		break;
1923	}
1924
1925	adev->gfx.config.gb_addr_config = gb_addr_config;
1926
1927	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1928			REG_GET_FIELD(
1929					adev->gfx.config.gb_addr_config,
1930					GB_ADDR_CONFIG,
1931					NUM_PIPES);
1932
1933	adev->gfx.config.max_tile_pipes =
1934		adev->gfx.config.gb_addr_config_fields.num_pipes;
1935
1936	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1937			REG_GET_FIELD(
1938					adev->gfx.config.gb_addr_config,
1939					GB_ADDR_CONFIG,
1940					NUM_BANKS);
1941	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1942			REG_GET_FIELD(
1943					adev->gfx.config.gb_addr_config,
1944					GB_ADDR_CONFIG,
1945					MAX_COMPRESSED_FRAGS);
1946	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1947			REG_GET_FIELD(
1948					adev->gfx.config.gb_addr_config,
1949					GB_ADDR_CONFIG,
1950					NUM_RB_PER_SE);
1951	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1952			REG_GET_FIELD(
1953					adev->gfx.config.gb_addr_config,
1954					GB_ADDR_CONFIG,
1955					NUM_SHADER_ENGINES);
1956	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1957			REG_GET_FIELD(
1958					adev->gfx.config.gb_addr_config,
1959					GB_ADDR_CONFIG,
1960					PIPE_INTERLEAVE_SIZE));
1961
1962	return 0;
1963}
1964
1965static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1966				      int mec, int pipe, int queue)
1967{
1968	unsigned irq_type;
1969	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1970	unsigned int hw_prio;
1971
1972	ring = &adev->gfx.compute_ring[ring_id];
1973
1974	/* mec0 is me1 */
1975	ring->me = mec + 1;
1976	ring->pipe = pipe;
1977	ring->queue = queue;
1978
1979	ring->ring_obj = NULL;
1980	ring->use_doorbell = true;
1981	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1982	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1983				+ (ring_id * GFX9_MEC_HPD_SIZE);
1984	ring->vm_hub = AMDGPU_GFXHUB(0);
1985	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1986
1987	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1988		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1989		+ ring->pipe;
1990	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1991			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1992	/* type-2 packets are deprecated on MEC, use type-3 instead */
1993	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1994				hw_prio, NULL);
1995}
1996
1997static int gfx_v9_0_sw_init(void *handle)
1998{
1999	int i, j, k, r, ring_id;
2000	int xcc_id = 0;
2001	struct amdgpu_ring *ring;
2002	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2003	unsigned int hw_prio;
2004
2005	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2006	case IP_VERSION(9, 0, 1):
2007	case IP_VERSION(9, 2, 1):
2008	case IP_VERSION(9, 4, 0):
2009	case IP_VERSION(9, 2, 2):
2010	case IP_VERSION(9, 1, 0):
2011	case IP_VERSION(9, 4, 1):
2012	case IP_VERSION(9, 3, 0):
2013	case IP_VERSION(9, 4, 2):
2014		adev->gfx.mec.num_mec = 2;
2015		break;
2016	default:
2017		adev->gfx.mec.num_mec = 1;
2018		break;
2019	}
2020
2021	adev->gfx.mec.num_pipe_per_mec = 4;
2022	adev->gfx.mec.num_queue_per_pipe = 8;
2023
2024	/* EOP Event */
2025	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2026	if (r)
2027		return r;
2028
2029	/* Privileged reg */
2030	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2031			      &adev->gfx.priv_reg_irq);
2032	if (r)
2033		return r;
2034
2035	/* Privileged inst */
2036	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2037			      &adev->gfx.priv_inst_irq);
2038	if (r)
2039		return r;
2040
2041	/* ECC error */
2042	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2043			      &adev->gfx.cp_ecc_error_irq);
2044	if (r)
2045		return r;
2046
2047	/* FUE error */
2048	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2049			      &adev->gfx.cp_ecc_error_irq);
2050	if (r)
2051		return r;
2052
2053	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2054
2055	if (adev->gfx.rlc.funcs) {
2056		if (adev->gfx.rlc.funcs->init) {
2057			r = adev->gfx.rlc.funcs->init(adev);
2058			if (r) {
2059				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2060				return r;
2061			}
2062		}
2063	}
2064
2065	r = gfx_v9_0_mec_init(adev);
2066	if (r) {
2067		DRM_ERROR("Failed to init MEC BOs!\n");
2068		return r;
2069	}
2070
2071	/* set up the gfx ring */
2072	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2073		ring = &adev->gfx.gfx_ring[i];
2074		ring->ring_obj = NULL;
2075		if (!i)
2076			sprintf(ring->name, "gfx");
2077		else
2078			sprintf(ring->name, "gfx_%d", i);
2079		ring->use_doorbell = true;
2080		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2081
2082		/* disable scheduler on the real ring */
2083		ring->no_scheduler = adev->gfx.mcbp;
2084		ring->vm_hub = AMDGPU_GFXHUB(0);
2085		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2086				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2087				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2088		if (r)
2089			return r;
2090	}
2091
2092	/* set up the software rings */
2093	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2094		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2095			ring = &adev->gfx.sw_gfx_ring[i];
2096			ring->ring_obj = NULL;
2097			sprintf(ring->name, amdgpu_sw_ring_name(i));
2098			ring->use_doorbell = true;
2099			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2100			ring->is_sw_ring = true;
2101			hw_prio = amdgpu_sw_ring_priority(i);
2102			ring->vm_hub = AMDGPU_GFXHUB(0);
2103			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2104					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2105					     NULL);
2106			if (r)
2107				return r;
2108			ring->wptr = 0;
2109		}
2110
2111		/* init the muxer and add software rings */
2112		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2113					 GFX9_NUM_SW_GFX_RINGS);
2114		if (r) {
2115			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2116			return r;
2117		}
2118		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2119			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2120							&adev->gfx.sw_gfx_ring[i]);
2121			if (r) {
2122				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2123				return r;
2124			}
2125		}
2126	}
2127
2128	/* set up the compute queues - allocate horizontally across pipes */
2129	ring_id = 0;
2130	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2131		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2132			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2133				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2134								     k, j))
2135					continue;
2136
2137				r = gfx_v9_0_compute_ring_init(adev,
2138							       ring_id,
2139							       i, k, j);
2140				if (r)
2141					return r;
2142
2143				ring_id++;
2144			}
2145		}
2146	}
2147
2148	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2149	if (r) {
2150		DRM_ERROR("Failed to init KIQ BOs!\n");
2151		return r;
2152	}
2153
2154	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2155	if (r)
2156		return r;
2157
2158	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2159	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2160	if (r)
2161		return r;
2162
2163	adev->gfx.ce_ram_size = 0x8000;
2164
2165	r = gfx_v9_0_gpu_early_init(adev);
2166	if (r)
2167		return r;
2168
2169	if (amdgpu_gfx_ras_sw_init(adev)) {
2170		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2171		return -EINVAL;
2172	}
2173
2174	return 0;
2175}
2176
2177
2178static int gfx_v9_0_sw_fini(void *handle)
2179{
2180	int i;
2181	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2182
2183	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2184		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2185			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2186		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2187	}
2188
2189	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2190		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2191	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2192		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2193
2194	amdgpu_gfx_mqd_sw_fini(adev, 0);
2195	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2196	amdgpu_gfx_kiq_fini(adev, 0);
2197
2198	gfx_v9_0_mec_fini(adev);
2199	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2200				&adev->gfx.rlc.clear_state_gpu_addr,
2201				(void **)&adev->gfx.rlc.cs_ptr);
2202	if (adev->flags & AMD_IS_APU) {
2203		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2204				&adev->gfx.rlc.cp_table_gpu_addr,
2205				(void **)&adev->gfx.rlc.cp_table_ptr);
2206	}
2207	gfx_v9_0_free_microcode(adev);
2208
2209	return 0;
2210}
2211
2212
2213static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2214{
2215	/* TODO */
2216}
2217
2218void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2219			   u32 instance, int xcc_id)
2220{
2221	u32 data;
2222
2223	if (instance == 0xffffffff)
2224		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2225	else
2226		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2227
2228	if (se_num == 0xffffffff)
2229		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2230	else
2231		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2232
2233	if (sh_num == 0xffffffff)
2234		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2235	else
2236		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2237
2238	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2239}
2240
2241static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2242{
2243	u32 data, mask;
2244
2245	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2246	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2247
2248	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2249	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2250
2251	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2252					 adev->gfx.config.max_sh_per_se);
2253
2254	return (~data) & mask;
2255}
2256
2257static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2258{
2259	int i, j;
2260	u32 data;
2261	u32 active_rbs = 0;
2262	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2263					adev->gfx.config.max_sh_per_se;
2264
2265	mutex_lock(&adev->grbm_idx_mutex);
2266	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2267		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2268			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2269			data = gfx_v9_0_get_rb_active_bitmap(adev);
2270			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2271					       rb_bitmap_width_per_sh);
2272		}
2273	}
2274	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2275	mutex_unlock(&adev->grbm_idx_mutex);
2276
2277	adev->gfx.config.backend_enable_mask = active_rbs;
2278	adev->gfx.config.num_rbs = hweight32(active_rbs);
2279}
2280
2281static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2282				uint32_t first_vmid,
2283				uint32_t last_vmid)
2284{
2285	uint32_t data;
2286	uint32_t trap_config_vmid_mask = 0;
2287	int i;
2288
2289	/* Calculate trap config vmid mask */
2290	for (i = first_vmid; i < last_vmid; i++)
2291		trap_config_vmid_mask |= (1 << i);
2292
2293	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2294			VMID_SEL, trap_config_vmid_mask);
2295	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2296			TRAP_EN, 1);
2297	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2298	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2299
2300	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2301	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2302}
2303
2304#define DEFAULT_SH_MEM_BASES	(0x6000)
2305static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2306{
2307	int i;
2308	uint32_t sh_mem_config;
2309	uint32_t sh_mem_bases;
2310
2311	/*
2312	 * Configure apertures:
2313	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2314	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2315	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2316	 */
2317	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2318
2319	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2320			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2321			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2322
2323	mutex_lock(&adev->srbm_mutex);
2324	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2325		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2326		/* CP and shaders */
2327		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2328		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2329	}
2330	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2331	mutex_unlock(&adev->srbm_mutex);
2332
2333	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2334	   access. These should be enabled by FW for target VMIDs. */
2335	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2336		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2337		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2338		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2339		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2340	}
2341}
2342
2343static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2344{
2345	int vmid;
2346
2347	/*
2348	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2349	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2350	 * the driver can enable them for graphics. VMID0 should maintain
2351	 * access so that HWS firmware can save/restore entries.
2352	 */
2353	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2354		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2355		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2356		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2357		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2358	}
2359}
2360
2361static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2362{
2363	uint32_t tmp;
2364
2365	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2366	case IP_VERSION(9, 4, 1):
2367		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2368		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2369				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2370		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2371		break;
2372	default:
2373		break;
2374	}
2375}
2376
2377static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2378{
2379	u32 tmp;
2380	int i;
2381
2382	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2383
2384	gfx_v9_0_tiling_mode_table_init(adev);
2385
2386	if (adev->gfx.num_gfx_rings)
2387		gfx_v9_0_setup_rb(adev);
2388	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2389	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2390
2391	/* XXX SH_MEM regs */
2392	/* where to put LDS, scratch, GPUVM in FSA64 space */
2393	mutex_lock(&adev->srbm_mutex);
2394	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2395		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2396		/* CP and shaders */
2397		if (i == 0) {
2398			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2399					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2400			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2401					    !!adev->gmc.noretry);
2402			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2403			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2404		} else {
2405			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2406					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2407			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2408					    !!adev->gmc.noretry);
2409			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2410			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2411				(adev->gmc.private_aperture_start >> 48));
2412			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2413				(adev->gmc.shared_aperture_start >> 48));
2414			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2415		}
2416	}
2417	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2418
2419	mutex_unlock(&adev->srbm_mutex);
2420
2421	gfx_v9_0_init_compute_vmid(adev);
2422	gfx_v9_0_init_gds_vmid(adev);
2423	gfx_v9_0_init_sq_config(adev);
2424}
2425
2426static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2427{
2428	u32 i, j, k;
2429	u32 mask;
2430
2431	mutex_lock(&adev->grbm_idx_mutex);
2432	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2433		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2434			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2435			for (k = 0; k < adev->usec_timeout; k++) {
2436				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2437					break;
2438				udelay(1);
2439			}
2440			if (k == adev->usec_timeout) {
2441				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2442						      0xffffffff, 0xffffffff, 0);
2443				mutex_unlock(&adev->grbm_idx_mutex);
2444				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2445					 i, j);
2446				return;
2447			}
2448		}
2449	}
2450	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2451	mutex_unlock(&adev->grbm_idx_mutex);
2452
2453	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2454		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2455		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2456		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2457	for (k = 0; k < adev->usec_timeout; k++) {
2458		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2459			break;
2460		udelay(1);
2461	}
2462}
2463
2464static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2465					       bool enable)
2466{
2467	u32 tmp;
2468
2469	/* These interrupts should be enabled to drive DS clock */
2470
2471	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2472
2473	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2474	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2475	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2476	if(adev->gfx.num_gfx_rings)
2477		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2478
2479	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2480}
2481
2482static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2483{
2484	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2485	/* csib */
2486	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2487			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2488	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2489			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2490	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2491			adev->gfx.rlc.clear_state_size);
2492}
2493
2494static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2495				int indirect_offset,
2496				int list_size,
2497				int *unique_indirect_regs,
2498				int unique_indirect_reg_count,
2499				int *indirect_start_offsets,
2500				int *indirect_start_offsets_count,
2501				int max_start_offsets_count)
2502{
2503	int idx;
2504
2505	for (; indirect_offset < list_size; indirect_offset++) {
2506		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2507		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2508		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2509
2510		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2511			indirect_offset += 2;
2512
2513			/* look for the matching indice */
2514			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2515				if (unique_indirect_regs[idx] ==
2516					register_list_format[indirect_offset] ||
2517					!unique_indirect_regs[idx])
2518					break;
2519			}
2520
2521			BUG_ON(idx >= unique_indirect_reg_count);
2522
2523			if (!unique_indirect_regs[idx])
2524				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2525
2526			indirect_offset++;
2527		}
2528	}
2529}
2530
2531static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2532{
2533	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2534	int unique_indirect_reg_count = 0;
2535
2536	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2537	int indirect_start_offsets_count = 0;
2538
2539	int list_size = 0;
2540	int i = 0, j = 0;
2541	u32 tmp = 0;
2542
2543	u32 *register_list_format =
2544		kmemdup(adev->gfx.rlc.register_list_format,
2545			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2546	if (!register_list_format)
2547		return -ENOMEM;
2548
2549	/* setup unique_indirect_regs array and indirect_start_offsets array */
2550	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2551	gfx_v9_1_parse_ind_reg_list(register_list_format,
2552				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2553				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2554				    unique_indirect_regs,
2555				    unique_indirect_reg_count,
2556				    indirect_start_offsets,
2557				    &indirect_start_offsets_count,
2558				    ARRAY_SIZE(indirect_start_offsets));
2559
2560	/* enable auto inc in case it is disabled */
2561	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2562	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2563	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2564
2565	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2566	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2567		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2568	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2569		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2570			adev->gfx.rlc.register_restore[i]);
2571
2572	/* load indirect register */
2573	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2574		adev->gfx.rlc.reg_list_format_start);
2575
2576	/* direct register portion */
2577	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2578		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2579			register_list_format[i]);
2580
2581	/* indirect register portion */
2582	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2583		if (register_list_format[i] == 0xFFFFFFFF) {
2584			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2585			continue;
2586		}
2587
2588		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2589		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2590
2591		for (j = 0; j < unique_indirect_reg_count; j++) {
2592			if (register_list_format[i] == unique_indirect_regs[j]) {
2593				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2594				break;
2595			}
2596		}
2597
2598		BUG_ON(j >= unique_indirect_reg_count);
2599
2600		i++;
2601	}
2602
2603	/* set save/restore list size */
2604	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2605	list_size = list_size >> 1;
2606	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2607		adev->gfx.rlc.reg_restore_list_size);
2608	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2609
2610	/* write the starting offsets to RLC scratch ram */
2611	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2612		adev->gfx.rlc.starting_offsets_start);
2613	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2614		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2615		       indirect_start_offsets[i]);
2616
2617	/* load unique indirect regs*/
2618	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2619		if (unique_indirect_regs[i] != 0) {
2620			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2621			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2622			       unique_indirect_regs[i] & 0x3FFFF);
2623
2624			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2625			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2626			       unique_indirect_regs[i] >> 20);
2627		}
2628	}
2629
2630	kfree(register_list_format);
2631	return 0;
2632}
2633
2634static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2635{
2636	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2637}
2638
2639static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2640					     bool enable)
2641{
2642	uint32_t data = 0;
2643	uint32_t default_data = 0;
2644
2645	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2646	if (enable) {
2647		/* enable GFXIP control over CGPG */
2648		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2649		if(default_data != data)
2650			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2651
2652		/* update status */
2653		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2654		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2655		if(default_data != data)
2656			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2657	} else {
2658		/* restore GFXIP control over GCPG */
2659		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2660		if(default_data != data)
2661			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2662	}
2663}
2664
2665static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2666{
2667	uint32_t data = 0;
2668
2669	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2670			      AMD_PG_SUPPORT_GFX_SMG |
2671			      AMD_PG_SUPPORT_GFX_DMG)) {
2672		/* init IDLE_POLL_COUNT = 60 */
2673		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2674		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2675		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2676		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2677
2678		/* init RLC PG Delay */
2679		data = 0;
2680		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2681		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2682		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2683		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2684		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2685
2686		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2687		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2688		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2689		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2690
2691		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2692		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2693		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2694		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2695
2696		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2697		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2698
2699		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2700		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2701		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2702		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2703			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2704	}
2705}
2706
2707static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2708						bool enable)
2709{
2710	uint32_t data = 0;
2711	uint32_t default_data = 0;
2712
2713	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2714	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2715			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2716			     enable ? 1 : 0);
2717	if (default_data != data)
2718		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2719}
2720
2721static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2722						bool enable)
2723{
2724	uint32_t data = 0;
2725	uint32_t default_data = 0;
2726
2727	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2728	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2729			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2730			     enable ? 1 : 0);
2731	if(default_data != data)
2732		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2733}
2734
2735static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2736					bool enable)
2737{
2738	uint32_t data = 0;
2739	uint32_t default_data = 0;
2740
2741	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2742	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2743			     CP_PG_DISABLE,
2744			     enable ? 0 : 1);
2745	if(default_data != data)
2746		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2747}
2748
2749static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2750						bool enable)
2751{
2752	uint32_t data, default_data;
2753
2754	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2755	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2756			     GFX_POWER_GATING_ENABLE,
2757			     enable ? 1 : 0);
2758	if(default_data != data)
2759		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2760}
2761
2762static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2763						bool enable)
2764{
2765	uint32_t data, default_data;
2766
2767	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2768	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2769			     GFX_PIPELINE_PG_ENABLE,
2770			     enable ? 1 : 0);
2771	if(default_data != data)
2772		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2773
2774	if (!enable)
2775		/* read any GFX register to wake up GFX */
2776		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2777}
2778
2779static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2780						       bool enable)
2781{
2782	uint32_t data, default_data;
2783
2784	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2785	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2786			     STATIC_PER_CU_PG_ENABLE,
2787			     enable ? 1 : 0);
2788	if(default_data != data)
2789		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2790}
2791
2792static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2793						bool enable)
2794{
2795	uint32_t data, default_data;
2796
2797	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2798	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2799			     DYN_PER_CU_PG_ENABLE,
2800			     enable ? 1 : 0);
2801	if(default_data != data)
2802		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2803}
2804
2805static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2806{
2807	gfx_v9_0_init_csb(adev);
2808
2809	/*
2810	 * Rlc save restore list is workable since v2_1.
2811	 * And it's needed by gfxoff feature.
2812	 */
2813	if (adev->gfx.rlc.is_rlc_v2_1) {
2814		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2815			    IP_VERSION(9, 2, 1) ||
2816		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2817			gfx_v9_1_init_rlc_save_restore_list(adev);
2818		gfx_v9_0_enable_save_restore_machine(adev);
2819	}
2820
2821	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2822			      AMD_PG_SUPPORT_GFX_SMG |
2823			      AMD_PG_SUPPORT_GFX_DMG |
2824			      AMD_PG_SUPPORT_CP |
2825			      AMD_PG_SUPPORT_GDS |
2826			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2827		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2828			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2829		gfx_v9_0_init_gfx_power_gating(adev);
2830	}
2831}
2832
2833static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2834{
2835	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2836	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2837	gfx_v9_0_wait_for_rlc_serdes(adev);
2838}
2839
2840static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2841{
2842	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2843	udelay(50);
2844	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2845	udelay(50);
2846}
2847
2848static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2849{
2850#ifdef AMDGPU_RLC_DEBUG_RETRY
2851	u32 rlc_ucode_ver;
2852#endif
2853
2854	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2855	udelay(50);
2856
2857	/* carrizo do enable cp interrupt after cp inited */
2858	if (!(adev->flags & AMD_IS_APU)) {
2859		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2860		udelay(50);
2861	}
2862
2863#ifdef AMDGPU_RLC_DEBUG_RETRY
2864	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2865	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2866	if(rlc_ucode_ver == 0x108) {
2867		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2868				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2869		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2870		 * default is 0x9C4 to create a 100us interval */
2871		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2872		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2873		 * to disable the page fault retry interrupts, default is
2874		 * 0x100 (256) */
2875		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2876	}
2877#endif
2878}
2879
2880static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2881{
2882	const struct rlc_firmware_header_v2_0 *hdr;
2883	const __le32 *fw_data;
2884	unsigned i, fw_size;
2885
2886	if (!adev->gfx.rlc_fw)
2887		return -EINVAL;
2888
2889	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2890	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2891
2892	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2893			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2894	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2895
2896	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2897			RLCG_UCODE_LOADING_START_ADDRESS);
2898	for (i = 0; i < fw_size; i++)
2899		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2900	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2901
2902	return 0;
2903}
2904
2905static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2906{
2907	int r;
2908
2909	if (amdgpu_sriov_vf(adev)) {
2910		gfx_v9_0_init_csb(adev);
2911		return 0;
2912	}
2913
2914	adev->gfx.rlc.funcs->stop(adev);
2915
2916	/* disable CG */
2917	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2918
2919	gfx_v9_0_init_pg(adev);
2920
2921	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2922		/* legacy rlc firmware loading */
2923		r = gfx_v9_0_rlc_load_microcode(adev);
2924		if (r)
2925			return r;
2926	}
2927
2928	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2929	case IP_VERSION(9, 2, 2):
2930	case IP_VERSION(9, 1, 0):
2931		gfx_v9_0_init_lbpw(adev);
2932		if (amdgpu_lbpw == 0)
2933			gfx_v9_0_enable_lbpw(adev, false);
2934		else
2935			gfx_v9_0_enable_lbpw(adev, true);
2936		break;
2937	case IP_VERSION(9, 4, 0):
2938		gfx_v9_4_init_lbpw(adev);
2939		if (amdgpu_lbpw > 0)
2940			gfx_v9_0_enable_lbpw(adev, true);
2941		else
2942			gfx_v9_0_enable_lbpw(adev, false);
2943		break;
2944	default:
2945		break;
2946	}
2947
2948	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
2949
2950	adev->gfx.rlc.funcs->start(adev);
2951
2952	return 0;
2953}
2954
2955static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2956{
2957	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2958
2959	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2960	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2961	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2962	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2963	udelay(50);
2964}
2965
2966static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2967{
2968	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2969	const struct gfx_firmware_header_v1_0 *ce_hdr;
2970	const struct gfx_firmware_header_v1_0 *me_hdr;
2971	const __le32 *fw_data;
2972	unsigned i, fw_size;
2973
2974	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2975		return -EINVAL;
2976
2977	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2978		adev->gfx.pfp_fw->data;
2979	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2980		adev->gfx.ce_fw->data;
2981	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2982		adev->gfx.me_fw->data;
2983
2984	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2985	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2986	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2987
2988	gfx_v9_0_cp_gfx_enable(adev, false);
2989
2990	/* PFP */
2991	fw_data = (const __le32 *)
2992		(adev->gfx.pfp_fw->data +
2993		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2994	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2995	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2996	for (i = 0; i < fw_size; i++)
2997		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2998	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2999
3000	/* CE */
3001	fw_data = (const __le32 *)
3002		(adev->gfx.ce_fw->data +
3003		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3004	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3005	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3006	for (i = 0; i < fw_size; i++)
3007		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3008	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3009
3010	/* ME */
3011	fw_data = (const __le32 *)
3012		(adev->gfx.me_fw->data +
3013		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3014	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3015	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3016	for (i = 0; i < fw_size; i++)
3017		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3018	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3019
3020	return 0;
3021}
3022
3023static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3024{
3025	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3026	const struct cs_section_def *sect = NULL;
3027	const struct cs_extent_def *ext = NULL;
3028	int r, i, tmp;
3029
3030	/* init the CP */
3031	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3032	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3033
3034	gfx_v9_0_cp_gfx_enable(adev, true);
3035
3036	/* Now only limit the quirk on the APU gfx9 series and already
3037	 * confirmed that the APU gfx10/gfx11 needn't such update.
3038	 */
3039	if (adev->flags & AMD_IS_APU &&
3040			adev->in_s3 && !adev->suspend_complete) {
3041		DRM_INFO(" Will skip the CSB packet resubmit\n");
3042		return 0;
3043	}
3044	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3045	if (r) {
3046		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3047		return r;
3048	}
3049
3050	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3051	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3052
3053	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3054	amdgpu_ring_write(ring, 0x80000000);
3055	amdgpu_ring_write(ring, 0x80000000);
3056
3057	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3058		for (ext = sect->section; ext->extent != NULL; ++ext) {
3059			if (sect->id == SECT_CONTEXT) {
3060				amdgpu_ring_write(ring,
3061				       PACKET3(PACKET3_SET_CONTEXT_REG,
3062					       ext->reg_count));
3063				amdgpu_ring_write(ring,
3064				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3065				for (i = 0; i < ext->reg_count; i++)
3066					amdgpu_ring_write(ring, ext->extent[i]);
3067			}
3068		}
3069	}
3070
3071	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3072	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3073
3074	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3075	amdgpu_ring_write(ring, 0);
3076
3077	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3078	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3079	amdgpu_ring_write(ring, 0x8000);
3080	amdgpu_ring_write(ring, 0x8000);
3081
3082	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3083	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3084		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3085	amdgpu_ring_write(ring, tmp);
3086	amdgpu_ring_write(ring, 0);
3087
3088	amdgpu_ring_commit(ring);
3089
3090	return 0;
3091}
3092
3093static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3094{
3095	struct amdgpu_ring *ring;
3096	u32 tmp;
3097	u32 rb_bufsz;
3098	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3099
3100	/* Set the write pointer delay */
3101	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3102
3103	/* set the RB to use vmid 0 */
3104	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3105
3106	/* Set ring buffer size */
3107	ring = &adev->gfx.gfx_ring[0];
3108	rb_bufsz = order_base_2(ring->ring_size / 8);
3109	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3110	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3111#ifdef __BIG_ENDIAN
3112	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3113#endif
3114	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3115
3116	/* Initialize the ring buffer's write pointers */
3117	ring->wptr = 0;
3118	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3119	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3120
3121	/* set the wb address wether it's enabled or not */
3122	rptr_addr = ring->rptr_gpu_addr;
3123	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3124	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3125
3126	wptr_gpu_addr = ring->wptr_gpu_addr;
3127	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3128	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3129
3130	mdelay(1);
3131	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3132
3133	rb_addr = ring->gpu_addr >> 8;
3134	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3135	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3136
3137	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3138	if (ring->use_doorbell) {
3139		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3140				    DOORBELL_OFFSET, ring->doorbell_index);
3141		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3142				    DOORBELL_EN, 1);
3143	} else {
3144		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3145	}
3146	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3147
3148	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3149			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3150	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3151
3152	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3153		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3154
3155
3156	/* start the ring */
3157	gfx_v9_0_cp_gfx_start(adev);
3158
3159	return 0;
3160}
3161
3162static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3163{
3164	if (enable) {
3165		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3166	} else {
3167		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3168			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3169		adev->gfx.kiq[0].ring.sched.ready = false;
3170	}
3171	udelay(50);
3172}
3173
3174static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3175{
3176	const struct gfx_firmware_header_v1_0 *mec_hdr;
3177	const __le32 *fw_data;
3178	unsigned i;
3179	u32 tmp;
3180
3181	if (!adev->gfx.mec_fw)
3182		return -EINVAL;
3183
3184	gfx_v9_0_cp_compute_enable(adev, false);
3185
3186	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3187	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3188
3189	fw_data = (const __le32 *)
3190		(adev->gfx.mec_fw->data +
3191		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3192	tmp = 0;
3193	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3194	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3195	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3196
3197	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3198		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3199	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3200		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3201
3202	/* MEC1 */
3203	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3204			 mec_hdr->jt_offset);
3205	for (i = 0; i < mec_hdr->jt_size; i++)
3206		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3207			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3208
3209	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3210			adev->gfx.mec_fw_version);
3211	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3212
3213	return 0;
3214}
3215
3216/* KIQ functions */
3217static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3218{
3219	uint32_t tmp;
3220	struct amdgpu_device *adev = ring->adev;
3221
3222	/* tell RLC which is KIQ queue */
3223	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3224	tmp &= 0xffffff00;
3225	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3226	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3227	tmp |= 0x80;
3228	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3229}
3230
3231static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3232{
3233	struct amdgpu_device *adev = ring->adev;
3234
3235	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3236		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3237			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3238			mqd->cp_hqd_queue_priority =
3239				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3240		}
3241	}
3242}
3243
3244static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3245{
3246	struct amdgpu_device *adev = ring->adev;
3247	struct v9_mqd *mqd = ring->mqd_ptr;
3248	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3249	uint32_t tmp;
3250
3251	mqd->header = 0xC0310800;
3252	mqd->compute_pipelinestat_enable = 0x00000001;
3253	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3254	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3255	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3256	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3257	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3258	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3259	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3260	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3261	mqd->compute_misc_reserved = 0x00000003;
3262
3263	mqd->dynamic_cu_mask_addr_lo =
3264		lower_32_bits(ring->mqd_gpu_addr
3265			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3266	mqd->dynamic_cu_mask_addr_hi =
3267		upper_32_bits(ring->mqd_gpu_addr
3268			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3269
3270	eop_base_addr = ring->eop_gpu_addr >> 8;
3271	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3272	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3273
3274	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3275	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3276	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3277			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3278
3279	mqd->cp_hqd_eop_control = tmp;
3280
3281	/* enable doorbell? */
3282	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3283
3284	if (ring->use_doorbell) {
3285		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3286				    DOORBELL_OFFSET, ring->doorbell_index);
3287		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3288				    DOORBELL_EN, 1);
3289		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3290				    DOORBELL_SOURCE, 0);
3291		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3292				    DOORBELL_HIT, 0);
3293	} else {
3294		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3295					 DOORBELL_EN, 0);
3296	}
3297
3298	mqd->cp_hqd_pq_doorbell_control = tmp;
3299
3300	/* disable the queue if it's active */
3301	ring->wptr = 0;
3302	mqd->cp_hqd_dequeue_request = 0;
3303	mqd->cp_hqd_pq_rptr = 0;
3304	mqd->cp_hqd_pq_wptr_lo = 0;
3305	mqd->cp_hqd_pq_wptr_hi = 0;
3306
3307	/* set the pointer to the MQD */
3308	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3309	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3310
3311	/* set MQD vmid to 0 */
3312	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3313	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3314	mqd->cp_mqd_control = tmp;
3315
3316	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3317	hqd_gpu_addr = ring->gpu_addr >> 8;
3318	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3319	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3320
3321	/* set up the HQD, this is similar to CP_RB0_CNTL */
3322	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3323	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3324			    (order_base_2(ring->ring_size / 4) - 1));
3325	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3326			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3327#ifdef __BIG_ENDIAN
3328	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3329#endif
3330	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3331	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3332	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3333	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3334	mqd->cp_hqd_pq_control = tmp;
3335
3336	/* set the wb address whether it's enabled or not */
3337	wb_gpu_addr = ring->rptr_gpu_addr;
3338	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3339	mqd->cp_hqd_pq_rptr_report_addr_hi =
3340		upper_32_bits(wb_gpu_addr) & 0xffff;
3341
3342	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3343	wb_gpu_addr = ring->wptr_gpu_addr;
3344	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3345	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3346
3347	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3348	ring->wptr = 0;
3349	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3350
3351	/* set the vmid for the queue */
3352	mqd->cp_hqd_vmid = 0;
3353
3354	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3355	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3356	mqd->cp_hqd_persistent_state = tmp;
3357
3358	/* set MIN_IB_AVAIL_SIZE */
3359	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3360	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3361	mqd->cp_hqd_ib_control = tmp;
3362
3363	/* set static priority for a queue/ring */
3364	gfx_v9_0_mqd_set_priority(ring, mqd);
3365	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3366
3367	/* map_queues packet doesn't need activate the queue,
3368	 * so only kiq need set this field.
3369	 */
3370	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3371		mqd->cp_hqd_active = 1;
3372
3373	return 0;
3374}
3375
3376static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3377{
3378	struct amdgpu_device *adev = ring->adev;
3379	struct v9_mqd *mqd = ring->mqd_ptr;
3380	int j;
3381
3382	/* disable wptr polling */
3383	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3384
3385	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3386	       mqd->cp_hqd_eop_base_addr_lo);
3387	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3388	       mqd->cp_hqd_eop_base_addr_hi);
3389
3390	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3391	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3392	       mqd->cp_hqd_eop_control);
3393
3394	/* enable doorbell? */
3395	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3396	       mqd->cp_hqd_pq_doorbell_control);
3397
3398	/* disable the queue if it's active */
3399	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3400		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3401		for (j = 0; j < adev->usec_timeout; j++) {
3402			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3403				break;
3404			udelay(1);
3405		}
3406		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3407		       mqd->cp_hqd_dequeue_request);
3408		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3409		       mqd->cp_hqd_pq_rptr);
3410		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3411		       mqd->cp_hqd_pq_wptr_lo);
3412		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3413		       mqd->cp_hqd_pq_wptr_hi);
3414	}
3415
3416	/* set the pointer to the MQD */
3417	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3418	       mqd->cp_mqd_base_addr_lo);
3419	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3420	       mqd->cp_mqd_base_addr_hi);
3421
3422	/* set MQD vmid to 0 */
3423	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3424	       mqd->cp_mqd_control);
3425
3426	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3427	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3428	       mqd->cp_hqd_pq_base_lo);
3429	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3430	       mqd->cp_hqd_pq_base_hi);
3431
3432	/* set up the HQD, this is similar to CP_RB0_CNTL */
3433	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3434	       mqd->cp_hqd_pq_control);
3435
3436	/* set the wb address whether it's enabled or not */
3437	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3438				mqd->cp_hqd_pq_rptr_report_addr_lo);
3439	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3440				mqd->cp_hqd_pq_rptr_report_addr_hi);
3441
3442	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3443	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3444	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3445	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3446	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3447
3448	/* enable the doorbell if requested */
3449	if (ring->use_doorbell) {
3450		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3451					(adev->doorbell_index.kiq * 2) << 2);
3452		/* If GC has entered CGPG, ringing doorbell > first page
3453		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3454		 * workaround this issue. And this change has to align with firmware
3455		 * update.
3456		 */
3457		if (check_if_enlarge_doorbell_range(adev))
3458			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3459					(adev->doorbell.size - 4));
3460		else
3461			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3462					(adev->doorbell_index.userqueue_end * 2) << 2);
3463	}
3464
3465	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3466	       mqd->cp_hqd_pq_doorbell_control);
3467
3468	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3469	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3470	       mqd->cp_hqd_pq_wptr_lo);
3471	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3472	       mqd->cp_hqd_pq_wptr_hi);
3473
3474	/* set the vmid for the queue */
3475	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3476
3477	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3478	       mqd->cp_hqd_persistent_state);
3479
3480	/* activate the queue */
3481	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3482	       mqd->cp_hqd_active);
3483
3484	if (ring->use_doorbell)
3485		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3486
3487	return 0;
3488}
3489
3490static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3491{
3492	struct amdgpu_device *adev = ring->adev;
3493	int j;
3494
3495	/* disable the queue if it's active */
3496	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3497
3498		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3499
3500		for (j = 0; j < adev->usec_timeout; j++) {
3501			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3502				break;
3503			udelay(1);
3504		}
3505
3506		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3507			DRM_DEBUG("KIQ dequeue request failed.\n");
3508
3509			/* Manual disable if dequeue request times out */
3510			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3511		}
3512
3513		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3514		      0);
3515	}
3516
3517	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3518	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3519	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3520	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3521	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3522	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3523	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3524	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3525
3526	return 0;
3527}
3528
3529static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3530{
3531	struct amdgpu_device *adev = ring->adev;
3532	struct v9_mqd *mqd = ring->mqd_ptr;
3533	struct v9_mqd *tmp_mqd;
3534
3535	gfx_v9_0_kiq_setting(ring);
3536
3537	/* GPU could be in bad state during probe, driver trigger the reset
3538	 * after load the SMU, in this case , the mqd is not be initialized.
3539	 * driver need to re-init the mqd.
3540	 * check mqd->cp_hqd_pq_control since this value should not be 0
3541	 */
3542	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3543	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3544		/* for GPU_RESET case , reset MQD to a clean status */
3545		if (adev->gfx.kiq[0].mqd_backup)
3546			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3547
3548		/* reset ring buffer */
3549		ring->wptr = 0;
3550		amdgpu_ring_clear_ring(ring);
3551
3552		mutex_lock(&adev->srbm_mutex);
3553		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3554		gfx_v9_0_kiq_init_register(ring);
3555		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3556		mutex_unlock(&adev->srbm_mutex);
3557	} else {
3558		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3559		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3560		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3561		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3562			amdgpu_ring_clear_ring(ring);
3563		mutex_lock(&adev->srbm_mutex);
3564		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3565		gfx_v9_0_mqd_init(ring);
3566		gfx_v9_0_kiq_init_register(ring);
3567		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3568		mutex_unlock(&adev->srbm_mutex);
3569
3570		if (adev->gfx.kiq[0].mqd_backup)
3571			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3572	}
3573
3574	return 0;
3575}
3576
3577static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3578{
3579	struct amdgpu_device *adev = ring->adev;
3580	struct v9_mqd *mqd = ring->mqd_ptr;
3581	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3582	struct v9_mqd *tmp_mqd;
3583
3584	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3585	 * is not be initialized before
3586	 */
3587	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3588
3589	if (!tmp_mqd->cp_hqd_pq_control ||
3590	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3591		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3592		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3593		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3594		mutex_lock(&adev->srbm_mutex);
3595		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3596		gfx_v9_0_mqd_init(ring);
3597		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3598		mutex_unlock(&adev->srbm_mutex);
3599
3600		if (adev->gfx.mec.mqd_backup[mqd_idx])
3601			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3602	} else {
3603		/* restore MQD to a clean status */
3604		if (adev->gfx.mec.mqd_backup[mqd_idx])
3605			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3606		/* reset ring buffer */
3607		ring->wptr = 0;
3608		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3609		amdgpu_ring_clear_ring(ring);
3610	}
3611
3612	return 0;
3613}
3614
3615static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3616{
3617	struct amdgpu_ring *ring;
3618	int r;
3619
3620	ring = &adev->gfx.kiq[0].ring;
3621
3622	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3623	if (unlikely(r != 0))
3624		return r;
3625
3626	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3627	if (unlikely(r != 0)) {
3628		amdgpu_bo_unreserve(ring->mqd_obj);
3629		return r;
3630	}
3631
3632	gfx_v9_0_kiq_init_queue(ring);
3633	amdgpu_bo_kunmap(ring->mqd_obj);
3634	ring->mqd_ptr = NULL;
3635	amdgpu_bo_unreserve(ring->mqd_obj);
3636	return 0;
3637}
3638
3639static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3640{
3641	struct amdgpu_ring *ring = NULL;
3642	int r = 0, i;
3643
3644	gfx_v9_0_cp_compute_enable(adev, true);
3645
3646	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3647		ring = &adev->gfx.compute_ring[i];
3648
3649		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3650		if (unlikely(r != 0))
3651			goto done;
3652		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3653		if (!r) {
3654			r = gfx_v9_0_kcq_init_queue(ring);
3655			amdgpu_bo_kunmap(ring->mqd_obj);
3656			ring->mqd_ptr = NULL;
3657		}
3658		amdgpu_bo_unreserve(ring->mqd_obj);
3659		if (r)
3660			goto done;
3661	}
3662
3663	r = amdgpu_gfx_enable_kcq(adev, 0);
3664done:
3665	return r;
3666}
3667
3668static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3669{
3670	int r, i;
3671	struct amdgpu_ring *ring;
3672
3673	if (!(adev->flags & AMD_IS_APU))
3674		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3675
3676	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3677		if (adev->gfx.num_gfx_rings) {
3678			/* legacy firmware loading */
3679			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3680			if (r)
3681				return r;
3682		}
3683
3684		r = gfx_v9_0_cp_compute_load_microcode(adev);
3685		if (r)
3686			return r;
3687	}
3688
3689	r = gfx_v9_0_kiq_resume(adev);
3690	if (r)
3691		return r;
3692
3693	if (adev->gfx.num_gfx_rings) {
3694		r = gfx_v9_0_cp_gfx_resume(adev);
3695		if (r)
3696			return r;
3697	}
3698
3699	r = gfx_v9_0_kcq_resume(adev);
3700	if (r)
3701		return r;
3702
3703	if (adev->gfx.num_gfx_rings) {
3704		ring = &adev->gfx.gfx_ring[0];
3705		r = amdgpu_ring_test_helper(ring);
3706		if (r)
3707			return r;
3708	}
3709
3710	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3711		ring = &adev->gfx.compute_ring[i];
3712		amdgpu_ring_test_helper(ring);
3713	}
3714
3715	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3716
3717	return 0;
3718}
3719
3720static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3721{
3722	u32 tmp;
3723
3724	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3725	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3726		return;
3727
3728	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3729	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3730				adev->df.hash_status.hash_64k);
3731	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3732				adev->df.hash_status.hash_2m);
3733	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3734				adev->df.hash_status.hash_1g);
3735	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3736}
3737
3738static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3739{
3740	if (adev->gfx.num_gfx_rings)
3741		gfx_v9_0_cp_gfx_enable(adev, enable);
3742	gfx_v9_0_cp_compute_enable(adev, enable);
3743}
3744
3745static int gfx_v9_0_hw_init(void *handle)
3746{
3747	int r;
3748	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3749
3750	if (!amdgpu_sriov_vf(adev))
3751		gfx_v9_0_init_golden_registers(adev);
3752
3753	gfx_v9_0_constants_init(adev);
3754
3755	gfx_v9_0_init_tcp_config(adev);
3756
3757	r = adev->gfx.rlc.funcs->resume(adev);
3758	if (r)
3759		return r;
3760
3761	r = gfx_v9_0_cp_resume(adev);
3762	if (r)
3763		return r;
3764
3765	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3766		gfx_v9_4_2_set_power_brake_sequence(adev);
3767
3768	return r;
3769}
3770
3771static int gfx_v9_0_hw_fini(void *handle)
3772{
3773	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3774
3775	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3776		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3777	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3778	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3779
3780	/* DF freeze and kcq disable will fail */
3781	if (!amdgpu_ras_intr_triggered())
3782		/* disable KCQ to avoid CPC touch memory not valid anymore */
3783		amdgpu_gfx_disable_kcq(adev, 0);
3784
3785	if (amdgpu_sriov_vf(adev)) {
3786		gfx_v9_0_cp_gfx_enable(adev, false);
3787		/* must disable polling for SRIOV when hw finished, otherwise
3788		 * CPC engine may still keep fetching WB address which is already
3789		 * invalid after sw finished and trigger DMAR reading error in
3790		 * hypervisor side.
3791		 */
3792		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3793		return 0;
3794	}
3795
3796	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3797	 * otherwise KIQ is hanging when binding back
3798	 */
3799	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3800		mutex_lock(&adev->srbm_mutex);
3801		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3802				adev->gfx.kiq[0].ring.pipe,
3803				adev->gfx.kiq[0].ring.queue, 0, 0);
3804		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3805		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3806		mutex_unlock(&adev->srbm_mutex);
3807	}
3808
3809	gfx_v9_0_cp_enable(adev, false);
3810
3811	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3812	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3813	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
3814		dev_dbg(adev->dev, "Skipping RLC halt\n");
3815		return 0;
3816	}
3817
3818	adev->gfx.rlc.funcs->stop(adev);
3819	return 0;
3820}
3821
3822static int gfx_v9_0_suspend(void *handle)
3823{
3824	return gfx_v9_0_hw_fini(handle);
3825}
3826
3827static int gfx_v9_0_resume(void *handle)
3828{
3829	return gfx_v9_0_hw_init(handle);
3830}
3831
3832static bool gfx_v9_0_is_idle(void *handle)
3833{
3834	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3835
3836	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3837				GRBM_STATUS, GUI_ACTIVE))
3838		return false;
3839	else
3840		return true;
3841}
3842
3843static int gfx_v9_0_wait_for_idle(void *handle)
3844{
3845	unsigned i;
3846	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3847
3848	for (i = 0; i < adev->usec_timeout; i++) {
3849		if (gfx_v9_0_is_idle(handle))
3850			return 0;
3851		udelay(1);
3852	}
3853	return -ETIMEDOUT;
3854}
3855
3856static int gfx_v9_0_soft_reset(void *handle)
3857{
3858	u32 grbm_soft_reset = 0;
3859	u32 tmp;
3860	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3861
3862	/* GRBM_STATUS */
3863	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3864	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3865		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3866		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3867		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3868		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3869		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3870		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3871						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3872		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3873						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3874	}
3875
3876	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3877		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3878						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3879	}
3880
3881	/* GRBM_STATUS2 */
3882	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3883	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3884		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3885						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3886
3887
3888	if (grbm_soft_reset) {
3889		/* stop the rlc */
3890		adev->gfx.rlc.funcs->stop(adev);
3891
3892		if (adev->gfx.num_gfx_rings)
3893			/* Disable GFX parsing/prefetching */
3894			gfx_v9_0_cp_gfx_enable(adev, false);
3895
3896		/* Disable MEC parsing/prefetching */
3897		gfx_v9_0_cp_compute_enable(adev, false);
3898
3899		if (grbm_soft_reset) {
3900			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3901			tmp |= grbm_soft_reset;
3902			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3903			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3904			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3905
3906			udelay(50);
3907
3908			tmp &= ~grbm_soft_reset;
3909			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3910			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3911		}
3912
3913		/* Wait a little for things to settle down */
3914		udelay(50);
3915	}
3916	return 0;
3917}
3918
3919static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3920{
3921	signed long r, cnt = 0;
3922	unsigned long flags;
3923	uint32_t seq, reg_val_offs = 0;
3924	uint64_t value = 0;
3925	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
3926	struct amdgpu_ring *ring = &kiq->ring;
3927
3928	BUG_ON(!ring->funcs->emit_rreg);
3929
3930	spin_lock_irqsave(&kiq->ring_lock, flags);
3931	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
3932		pr_err("critical bug! too many kiq readers\n");
3933		goto failed_unlock;
3934	}
3935	amdgpu_ring_alloc(ring, 32);
3936	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3937	amdgpu_ring_write(ring, 9 |	/* src: register*/
3938				(5 << 8) |	/* dst: memory */
3939				(1 << 16) |	/* count sel */
3940				(1 << 20));	/* write confirm */
3941	amdgpu_ring_write(ring, 0);
3942	amdgpu_ring_write(ring, 0);
3943	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3944				reg_val_offs * 4));
3945	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3946				reg_val_offs * 4));
3947	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3948	if (r)
3949		goto failed_undo;
3950
3951	amdgpu_ring_commit(ring);
3952	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3953
3954	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3955
3956	/* don't wait anymore for gpu reset case because this way may
3957	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3958	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3959	 * never return if we keep waiting in virt_kiq_rreg, which cause
3960	 * gpu_recover() hang there.
3961	 *
3962	 * also don't wait anymore for IRQ context
3963	 * */
3964	if (r < 1 && (amdgpu_in_reset(adev)))
3965		goto failed_kiq_read;
3966
3967	might_sleep();
3968	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3969		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3970		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3971	}
3972
3973	if (cnt > MAX_KIQ_REG_TRY)
3974		goto failed_kiq_read;
3975
3976	mb();
3977	value = (uint64_t)adev->wb.wb[reg_val_offs] |
3978		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
3979	amdgpu_device_wb_free(adev, reg_val_offs);
3980	return value;
3981
3982failed_undo:
3983	amdgpu_ring_undo(ring);
3984failed_unlock:
3985	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3986failed_kiq_read:
3987	if (reg_val_offs)
3988		amdgpu_device_wb_free(adev, reg_val_offs);
3989	pr_err("failed to read gpu clock\n");
3990	return ~0;
3991}
3992
3993static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3994{
3995	uint64_t clock, clock_lo, clock_hi, hi_check;
3996
3997	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3998	case IP_VERSION(9, 3, 0):
3999		preempt_disable();
4000		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4001		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4002		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4003		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4004		 * roughly every 42 seconds.
4005		 */
4006		if (hi_check != clock_hi) {
4007			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4008			clock_hi = hi_check;
4009		}
4010		preempt_enable();
4011		clock = clock_lo | (clock_hi << 32ULL);
4012		break;
4013	default:
4014		amdgpu_gfx_off_ctrl(adev, false);
4015		mutex_lock(&adev->gfx.gpu_clock_mutex);
4016		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4017			    IP_VERSION(9, 0, 1) &&
4018		    amdgpu_sriov_runtime(adev)) {
4019			clock = gfx_v9_0_kiq_read_clock(adev);
4020		} else {
4021			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4022			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4023				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4024		}
4025		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4026		amdgpu_gfx_off_ctrl(adev, true);
4027		break;
4028	}
4029	return clock;
4030}
4031
4032static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4033					  uint32_t vmid,
4034					  uint32_t gds_base, uint32_t gds_size,
4035					  uint32_t gws_base, uint32_t gws_size,
4036					  uint32_t oa_base, uint32_t oa_size)
4037{
4038	struct amdgpu_device *adev = ring->adev;
4039
4040	/* GDS Base */
4041	gfx_v9_0_write_data_to_reg(ring, 0, false,
4042				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4043				   gds_base);
4044
4045	/* GDS Size */
4046	gfx_v9_0_write_data_to_reg(ring, 0, false,
4047				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4048				   gds_size);
4049
4050	/* GWS */
4051	gfx_v9_0_write_data_to_reg(ring, 0, false,
4052				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4053				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4054
4055	/* OA */
4056	gfx_v9_0_write_data_to_reg(ring, 0, false,
4057				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4058				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4059}
4060
4061static const u32 vgpr_init_compute_shader[] =
4062{
4063	0xb07c0000, 0xbe8000ff,
4064	0x000000f8, 0xbf110800,
4065	0x7e000280, 0x7e020280,
4066	0x7e040280, 0x7e060280,
4067	0x7e080280, 0x7e0a0280,
4068	0x7e0c0280, 0x7e0e0280,
4069	0x80808800, 0xbe803200,
4070	0xbf84fff5, 0xbf9c0000,
4071	0xd28c0001, 0x0001007f,
4072	0xd28d0001, 0x0002027e,
4073	0x10020288, 0xb8810904,
4074	0xb7814000, 0xd1196a01,
4075	0x00000301, 0xbe800087,
4076	0xbefc00c1, 0xd89c4000,
4077	0x00020201, 0xd89cc080,
4078	0x00040401, 0x320202ff,
4079	0x00000800, 0x80808100,
4080	0xbf84fff8, 0x7e020280,
4081	0xbf810000, 0x00000000,
4082};
4083
4084static const u32 sgpr_init_compute_shader[] =
4085{
4086	0xb07c0000, 0xbe8000ff,
4087	0x0000005f, 0xbee50080,
4088	0xbe812c65, 0xbe822c65,
4089	0xbe832c65, 0xbe842c65,
4090	0xbe852c65, 0xb77c0005,
4091	0x80808500, 0xbf84fff8,
4092	0xbe800080, 0xbf810000,
4093};
4094
4095static const u32 vgpr_init_compute_shader_arcturus[] = {
4096	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4097	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4098	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4099	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4100	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4101	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4102	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4103	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4104	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4105	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4106	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4107	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4108	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4109	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4110	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4111	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4112	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4113	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4114	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4115	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4116	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4117	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4118	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4119	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4120	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4121	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4122	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4123	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4124	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4125	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4126	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4127	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4128	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4129	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4130	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4131	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4132	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4133	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4134	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4135	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4136	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4137	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4138	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4139	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4140	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4141	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4142	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4143	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4144	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4145	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4146	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4147	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4148	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4149	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4150	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4151	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4152	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4153	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4154	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4155	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4156	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4157	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4158	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4159	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4160	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4161	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4162	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4163	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4164	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4165	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4166	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4167	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4168	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4169	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4170	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4171	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4172	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4173	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4174	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4175	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4176	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4177	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4178	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4179	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4180	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4181	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4182	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4183	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4184	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4185	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4186	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4187	0xbf84fff8, 0xbf810000,
4188};
4189
4190/* When below register arrays changed, please update gpr_reg_size,
4191  and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4192  to cover all gfx9 ASICs */
4193static const struct soc15_reg_entry vgpr_init_regs[] = {
4194   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4195   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4196   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4197   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4198   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4199   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4200   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4201   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4202   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4203   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4204   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4205   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4206   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4207   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4208};
4209
4210static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4211   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4212   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4213   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4214   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4215   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4216   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4217   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4218   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4219   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4220   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4221   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4222   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4223   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4224   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4225};
4226
4227static const struct soc15_reg_entry sgpr1_init_regs[] = {
4228   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4229   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4230   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4231   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4232   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4233   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4234   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4235   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4236   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4237   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4238   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4239   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4240   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4241   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4242};
4243
4244static const struct soc15_reg_entry sgpr2_init_regs[] = {
4245   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4246   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4247   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4248   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4249   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4250   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4251   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4252   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4253   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4254   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4255   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4256   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4257   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4258   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4259};
4260
4261static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4262   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4263   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4264   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4265   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4266   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4267   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4268   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4269   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4270   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4271   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4272   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4273   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4274   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4275   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4276   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4277   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4278   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4279   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4280   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4281   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4282   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4283   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4284   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4285   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4286   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4287   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4288   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4289   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4290   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4291   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4292   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4293   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4294   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4295};
4296
4297static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4298{
4299	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4300	int i, r;
4301
4302	/* only support when RAS is enabled */
4303	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4304		return 0;
4305
4306	r = amdgpu_ring_alloc(ring, 7);
4307	if (r) {
4308		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4309			ring->name, r);
4310		return r;
4311	}
4312
4313	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4314	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4315
4316	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4317	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4318				PACKET3_DMA_DATA_DST_SEL(1) |
4319				PACKET3_DMA_DATA_SRC_SEL(2) |
4320				PACKET3_DMA_DATA_ENGINE(0)));
4321	amdgpu_ring_write(ring, 0);
4322	amdgpu_ring_write(ring, 0);
4323	amdgpu_ring_write(ring, 0);
4324	amdgpu_ring_write(ring, 0);
4325	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4326				adev->gds.gds_size);
4327
4328	amdgpu_ring_commit(ring);
4329
4330	for (i = 0; i < adev->usec_timeout; i++) {
4331		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4332			break;
4333		udelay(1);
4334	}
4335
4336	if (i >= adev->usec_timeout)
4337		r = -ETIMEDOUT;
4338
4339	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4340
4341	return r;
4342}
4343
4344static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4345{
4346	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4347	struct amdgpu_ib ib;
4348	struct dma_fence *f = NULL;
4349	int r, i;
4350	unsigned total_size, vgpr_offset, sgpr_offset;
4351	u64 gpu_addr;
4352
4353	int compute_dim_x = adev->gfx.config.max_shader_engines *
4354						adev->gfx.config.max_cu_per_sh *
4355						adev->gfx.config.max_sh_per_se;
4356	int sgpr_work_group_size = 5;
4357	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4358	int vgpr_init_shader_size;
4359	const u32 *vgpr_init_shader_ptr;
4360	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4361
4362	/* only support when RAS is enabled */
4363	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4364		return 0;
4365
4366	/* bail if the compute ring is not ready */
4367	if (!ring->sched.ready)
4368		return 0;
4369
4370	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4371		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4372		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4373		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4374	} else {
4375		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4376		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4377		vgpr_init_regs_ptr = vgpr_init_regs;
4378	}
4379
4380	total_size =
4381		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4382	total_size +=
4383		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4384	total_size +=
4385		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4386	total_size = ALIGN(total_size, 256);
4387	vgpr_offset = total_size;
4388	total_size += ALIGN(vgpr_init_shader_size, 256);
4389	sgpr_offset = total_size;
4390	total_size += sizeof(sgpr_init_compute_shader);
4391
4392	/* allocate an indirect buffer to put the commands in */
4393	memset(&ib, 0, sizeof(ib));
4394	r = amdgpu_ib_get(adev, NULL, total_size,
4395					AMDGPU_IB_POOL_DIRECT, &ib);
4396	if (r) {
4397		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4398		return r;
4399	}
4400
4401	/* load the compute shaders */
4402	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4403		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4404
4405	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4406		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4407
4408	/* init the ib length to 0 */
4409	ib.length_dw = 0;
4410
4411	/* VGPR */
4412	/* write the register state for the compute dispatch */
4413	for (i = 0; i < gpr_reg_size; i++) {
4414		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4415		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4416								- PACKET3_SET_SH_REG_START;
4417		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4418	}
4419	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4420	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4421	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4422	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4423							- PACKET3_SET_SH_REG_START;
4424	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4425	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4426
4427	/* write dispatch packet */
4428	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4429	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4430	ib.ptr[ib.length_dw++] = 1; /* y */
4431	ib.ptr[ib.length_dw++] = 1; /* z */
4432	ib.ptr[ib.length_dw++] =
4433		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4434
4435	/* write CS partial flush packet */
4436	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4437	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4438
4439	/* SGPR1 */
4440	/* write the register state for the compute dispatch */
4441	for (i = 0; i < gpr_reg_size; i++) {
4442		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4443		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4444								- PACKET3_SET_SH_REG_START;
4445		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4446	}
4447	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4448	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4449	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4450	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4451							- PACKET3_SET_SH_REG_START;
4452	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4453	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4454
4455	/* write dispatch packet */
4456	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4457	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4458	ib.ptr[ib.length_dw++] = 1; /* y */
4459	ib.ptr[ib.length_dw++] = 1; /* z */
4460	ib.ptr[ib.length_dw++] =
4461		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4462
4463	/* write CS partial flush packet */
4464	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4465	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4466
4467	/* SGPR2 */
4468	/* write the register state for the compute dispatch */
4469	for (i = 0; i < gpr_reg_size; i++) {
4470		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4471		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4472								- PACKET3_SET_SH_REG_START;
4473		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4474	}
4475	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4476	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4477	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4478	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4479							- PACKET3_SET_SH_REG_START;
4480	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4481	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4482
4483	/* write dispatch packet */
4484	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4485	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4486	ib.ptr[ib.length_dw++] = 1; /* y */
4487	ib.ptr[ib.length_dw++] = 1; /* z */
4488	ib.ptr[ib.length_dw++] =
4489		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4490
4491	/* write CS partial flush packet */
4492	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4493	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4494
4495	/* shedule the ib on the ring */
4496	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4497	if (r) {
4498		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4499		goto fail;
4500	}
4501
4502	/* wait for the GPU to finish processing the IB */
4503	r = dma_fence_wait(f, false);
4504	if (r) {
4505		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4506		goto fail;
4507	}
4508
4509fail:
4510	amdgpu_ib_free(adev, &ib, NULL);
4511	dma_fence_put(f);
4512
4513	return r;
4514}
4515
4516static int gfx_v9_0_early_init(void *handle)
4517{
4518	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4519
4520	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4521
4522	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4523	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4524		adev->gfx.num_gfx_rings = 0;
4525	else
4526		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4527	adev->gfx.xcc_mask = 1;
4528	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4529					  AMDGPU_MAX_COMPUTE_RINGS);
4530	gfx_v9_0_set_kiq_pm4_funcs(adev);
4531	gfx_v9_0_set_ring_funcs(adev);
4532	gfx_v9_0_set_irq_funcs(adev);
4533	gfx_v9_0_set_gds_init(adev);
4534	gfx_v9_0_set_rlc_funcs(adev);
4535
4536	/* init rlcg reg access ctrl */
4537	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4538
4539	return gfx_v9_0_init_microcode(adev);
4540}
4541
4542static int gfx_v9_0_ecc_late_init(void *handle)
4543{
4544	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4545	int r;
4546
4547	/*
4548	 * Temp workaround to fix the issue that CP firmware fails to
4549	 * update read pointer when CPDMA is writing clearing operation
4550	 * to GDS in suspend/resume sequence on several cards. So just
4551	 * limit this operation in cold boot sequence.
4552	 */
4553	if ((!adev->in_suspend) &&
4554	    (adev->gds.gds_size)) {
4555		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4556		if (r)
4557			return r;
4558	}
4559
4560	/* requires IBs so do in late init after IB pool is initialized */
4561	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4562		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4563	else
4564		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4565
4566	if (r)
4567		return r;
4568
4569	if (adev->gfx.ras &&
4570	    adev->gfx.ras->enable_watchdog_timer)
4571		adev->gfx.ras->enable_watchdog_timer(adev);
4572
4573	return 0;
4574}
4575
4576static int gfx_v9_0_late_init(void *handle)
4577{
4578	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4579	int r;
4580
4581	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4582	if (r)
4583		return r;
4584
4585	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4586	if (r)
4587		return r;
4588
4589	r = gfx_v9_0_ecc_late_init(handle);
4590	if (r)
4591		return r;
4592
4593	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4594		gfx_v9_4_2_debug_trap_config_init(adev,
4595			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4596	else
4597		gfx_v9_0_debug_trap_config_init(adev,
4598			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4599
4600	return 0;
4601}
4602
4603static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4604{
4605	uint32_t rlc_setting;
4606
4607	/* if RLC is not enabled, do nothing */
4608	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4609	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4610		return false;
4611
4612	return true;
4613}
4614
4615static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4616{
4617	uint32_t data;
4618	unsigned i;
4619
4620	data = RLC_SAFE_MODE__CMD_MASK;
4621	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4622	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4623
4624	/* wait for RLC_SAFE_MODE */
4625	for (i = 0; i < adev->usec_timeout; i++) {
4626		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4627			break;
4628		udelay(1);
4629	}
4630}
4631
4632static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4633{
4634	uint32_t data;
4635
4636	data = RLC_SAFE_MODE__CMD_MASK;
4637	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4638}
4639
4640static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4641						bool enable)
4642{
4643	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4644
4645	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4646		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4647		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4648			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4649	} else {
4650		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4651		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4652			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4653	}
4654
4655	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4656}
4657
4658static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4659						bool enable)
4660{
4661	/* TODO: double check if we need to perform under safe mode */
4662	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4663
4664	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4665		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4666	else
4667		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4668
4669	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4670		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4671	else
4672		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4673
4674	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4675}
4676
4677static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4678						      bool enable)
4679{
4680	uint32_t data, def;
4681
4682	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4683
4684	/* It is disabled by HW by default */
4685	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4686		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4687		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4688
4689		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4690			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4691
4692		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4693			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4694			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4695
4696		/* only for Vega10 & Raven1 */
4697		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4698
4699		if (def != data)
4700			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4701
4702		/* MGLS is a global flag to control all MGLS in GFX */
4703		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4704			/* 2 - RLC memory Light sleep */
4705			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4706				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4707				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4708				if (def != data)
4709					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4710			}
4711			/* 3 - CP memory Light sleep */
4712			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4713				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4714				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4715				if (def != data)
4716					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4717			}
4718		}
4719	} else {
4720		/* 1 - MGCG_OVERRIDE */
4721		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4722
4723		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4724			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4725
4726		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4727			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4728			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4729			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4730
4731		if (def != data)
4732			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4733
4734		/* 2 - disable MGLS in RLC */
4735		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4736		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4737			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4738			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4739		}
4740
4741		/* 3 - disable MGLS in CP */
4742		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4743		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4744			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4745			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4746		}
4747	}
4748
4749	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4750}
4751
4752static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4753					   bool enable)
4754{
4755	uint32_t data, def;
4756
4757	if (!adev->gfx.num_gfx_rings)
4758		return;
4759
4760	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4761
4762	/* Enable 3D CGCG/CGLS */
4763	if (enable) {
4764		/* write cmd to clear cgcg/cgls ov */
4765		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4766		/* unset CGCG override */
4767		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4768		/* update CGCG and CGLS override bits */
4769		if (def != data)
4770			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4771
4772		/* enable 3Dcgcg FSM(0x0000363f) */
4773		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4774
4775		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4776			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4777				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4778		else
4779			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4780
4781		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4782			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4783				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4784		if (def != data)
4785			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4786
4787		/* set IDLE_POLL_COUNT(0x00900100) */
4788		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4789		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4790			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4791		if (def != data)
4792			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4793	} else {
4794		/* Disable CGCG/CGLS */
4795		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4796		/* disable cgcg, cgls should be disabled */
4797		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4798			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4799		/* disable cgcg and cgls in FSM */
4800		if (def != data)
4801			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4802	}
4803
4804	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4805}
4806
4807static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4808						      bool enable)
4809{
4810	uint32_t def, data;
4811
4812	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4813
4814	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4815		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4816		/* unset CGCG override */
4817		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4818		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4819			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4820		else
4821			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4822		/* update CGCG and CGLS override bits */
4823		if (def != data)
4824			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4825
4826		/* enable cgcg FSM(0x0000363F) */
4827		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4828
4829		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
4830			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4831				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4832		else
4833			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4834				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4835		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4836			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4837				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4838		if (def != data)
4839			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4840
4841		/* set IDLE_POLL_COUNT(0x00900100) */
4842		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4843		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4844			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4845		if (def != data)
4846			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4847	} else {
4848		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4849		/* reset CGCG/CGLS bits */
4850		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4851		/* disable cgcg and cgls in FSM */
4852		if (def != data)
4853			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4854	}
4855
4856	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4857}
4858
4859static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4860					    bool enable)
4861{
4862	if (enable) {
4863		/* CGCG/CGLS should be enabled after MGCG/MGLS
4864		 * ===  MGCG + MGLS ===
4865		 */
4866		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4867		/* ===  CGCG /CGLS for GFX 3D Only === */
4868		gfx_v9_0_update_3d_clock_gating(adev, enable);
4869		/* ===  CGCG + CGLS === */
4870		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4871	} else {
4872		/* CGCG/CGLS should be disabled before MGCG/MGLS
4873		 * ===  CGCG + CGLS ===
4874		 */
4875		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4876		/* ===  CGCG /CGLS for GFX 3D Only === */
4877		gfx_v9_0_update_3d_clock_gating(adev, enable);
4878		/* ===  MGCG + MGLS === */
4879		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4880	}
4881	return 0;
4882}
4883
4884static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
4885					      unsigned int vmid)
4886{
4887	u32 reg, data;
4888
4889	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4890	if (amdgpu_sriov_is_pp_one_vf(adev))
4891		data = RREG32_NO_KIQ(reg);
4892	else
4893		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4894
4895	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4896	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4897
4898	if (amdgpu_sriov_is_pp_one_vf(adev))
4899		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4900	else
4901		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4902}
4903
4904static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
4905{
4906	amdgpu_gfx_off_ctrl(adev, false);
4907
4908	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
4909
4910	amdgpu_gfx_off_ctrl(adev, true);
4911}
4912
4913static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4914					uint32_t offset,
4915					struct soc15_reg_rlcg *entries, int arr_size)
4916{
4917	int i;
4918	uint32_t reg;
4919
4920	if (!entries)
4921		return false;
4922
4923	for (i = 0; i < arr_size; i++) {
4924		const struct soc15_reg_rlcg *entry;
4925
4926		entry = &entries[i];
4927		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4928		if (offset == reg)
4929			return true;
4930	}
4931
4932	return false;
4933}
4934
4935static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4936{
4937	return gfx_v9_0_check_rlcg_range(adev, offset,
4938					(void *)rlcg_access_gc_9_0,
4939					ARRAY_SIZE(rlcg_access_gc_9_0));
4940}
4941
4942static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4943	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4944	.set_safe_mode = gfx_v9_0_set_safe_mode,
4945	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4946	.init = gfx_v9_0_rlc_init,
4947	.get_csb_size = gfx_v9_0_get_csb_size,
4948	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4949	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4950	.resume = gfx_v9_0_rlc_resume,
4951	.stop = gfx_v9_0_rlc_stop,
4952	.reset = gfx_v9_0_rlc_reset,
4953	.start = gfx_v9_0_rlc_start,
4954	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
4955	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4956};
4957
4958static int gfx_v9_0_set_powergating_state(void *handle,
4959					  enum amd_powergating_state state)
4960{
4961	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4962	bool enable = (state == AMD_PG_STATE_GATE);
4963
4964	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4965	case IP_VERSION(9, 2, 2):
4966	case IP_VERSION(9, 1, 0):
4967	case IP_VERSION(9, 3, 0):
4968		if (!enable)
4969			amdgpu_gfx_off_ctrl(adev, false);
4970
4971		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4972			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4973			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4974		} else {
4975			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4976			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4977		}
4978
4979		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4980			gfx_v9_0_enable_cp_power_gating(adev, true);
4981		else
4982			gfx_v9_0_enable_cp_power_gating(adev, false);
4983
4984		/* update gfx cgpg state */
4985		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4986
4987		/* update mgcg state */
4988		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4989
4990		if (enable)
4991			amdgpu_gfx_off_ctrl(adev, true);
4992		break;
4993	case IP_VERSION(9, 2, 1):
4994		amdgpu_gfx_off_ctrl(adev, enable);
4995		break;
4996	default:
4997		break;
4998	}
4999
5000	return 0;
5001}
5002
5003static int gfx_v9_0_set_clockgating_state(void *handle,
5004					  enum amd_clockgating_state state)
5005{
5006	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5007
5008	if (amdgpu_sriov_vf(adev))
5009		return 0;
5010
5011	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5012	case IP_VERSION(9, 0, 1):
5013	case IP_VERSION(9, 2, 1):
5014	case IP_VERSION(9, 4, 0):
5015	case IP_VERSION(9, 2, 2):
5016	case IP_VERSION(9, 1, 0):
5017	case IP_VERSION(9, 4, 1):
5018	case IP_VERSION(9, 3, 0):
5019	case IP_VERSION(9, 4, 2):
5020		gfx_v9_0_update_gfx_clock_gating(adev,
5021						 state == AMD_CG_STATE_GATE);
5022		break;
5023	default:
5024		break;
5025	}
5026	return 0;
5027}
5028
5029static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5030{
5031	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5032	int data;
5033
5034	if (amdgpu_sriov_vf(adev))
5035		*flags = 0;
5036
5037	/* AMD_CG_SUPPORT_GFX_MGCG */
5038	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5039	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5040		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5041
5042	/* AMD_CG_SUPPORT_GFX_CGCG */
5043	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5044	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5045		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5046
5047	/* AMD_CG_SUPPORT_GFX_CGLS */
5048	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5049		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5050
5051	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5052	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5053	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5054		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5055
5056	/* AMD_CG_SUPPORT_GFX_CP_LS */
5057	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5058	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5059		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5060
5061	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5062		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5063		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5064		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5065			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5066
5067		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5068		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5069			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5070	}
5071}
5072
5073static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5074{
5075	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5076}
5077
5078static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5079{
5080	struct amdgpu_device *adev = ring->adev;
5081	u64 wptr;
5082
5083	/* XXX check if swapping is necessary on BE */
5084	if (ring->use_doorbell) {
5085		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5086	} else {
5087		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5088		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5089	}
5090
5091	return wptr;
5092}
5093
5094static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5095{
5096	struct amdgpu_device *adev = ring->adev;
5097
5098	if (ring->use_doorbell) {
5099		/* XXX check if swapping is necessary on BE */
5100		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5101		WDOORBELL64(ring->doorbell_index, ring->wptr);
5102	} else {
5103		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5104		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5105	}
5106}
5107
5108static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5109{
5110	struct amdgpu_device *adev = ring->adev;
5111	u32 ref_and_mask, reg_mem_engine;
5112	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5113
5114	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5115		switch (ring->me) {
5116		case 1:
5117			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5118			break;
5119		case 2:
5120			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5121			break;
5122		default:
5123			return;
5124		}
5125		reg_mem_engine = 0;
5126	} else {
5127		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5128		reg_mem_engine = 1; /* pfp */
5129	}
5130
5131	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5132			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5133			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5134			      ref_and_mask, ref_and_mask, 0x20);
5135}
5136
5137static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5138					struct amdgpu_job *job,
5139					struct amdgpu_ib *ib,
5140					uint32_t flags)
5141{
5142	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5143	u32 header, control = 0;
5144
5145	if (ib->flags & AMDGPU_IB_FLAG_CE)
5146		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5147	else
5148		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5149
5150	control |= ib->length_dw | (vmid << 24);
5151
5152	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5153		control |= INDIRECT_BUFFER_PRE_ENB(1);
5154
5155		if (flags & AMDGPU_IB_PREEMPTED)
5156			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5157
5158		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5159			gfx_v9_0_ring_emit_de_meta(ring,
5160						   (!amdgpu_sriov_vf(ring->adev) &&
5161						   flags & AMDGPU_IB_PREEMPTED) ?
5162						   true : false,
5163						   job->gds_size > 0 && job->gds_base != 0);
5164	}
5165
5166	amdgpu_ring_write(ring, header);
5167	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5168	amdgpu_ring_write(ring,
5169#ifdef __BIG_ENDIAN
5170		(2 << 0) |
5171#endif
5172		lower_32_bits(ib->gpu_addr));
5173	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5174	amdgpu_ring_ib_on_emit_cntl(ring);
5175	amdgpu_ring_write(ring, control);
5176}
5177
5178static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5179				     unsigned offset)
5180{
5181	u32 control = ring->ring[offset];
5182
5183	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5184	ring->ring[offset] = control;
5185}
5186
5187static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5188					unsigned offset)
5189{
5190	struct amdgpu_device *adev = ring->adev;
5191	void *ce_payload_cpu_addr;
5192	uint64_t payload_offset, payload_size;
5193
5194	payload_size = sizeof(struct v9_ce_ib_state);
5195
5196	if (ring->is_mes_queue) {
5197		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5198					  gfx[0].gfx_meta_data) +
5199			offsetof(struct v9_gfx_meta_data, ce_payload);
5200		ce_payload_cpu_addr =
5201			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5202	} else {
5203		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5204		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5205	}
5206
5207	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5208		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5209	} else {
5210		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5211		       (ring->buf_mask + 1 - offset) << 2);
5212		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5213		memcpy((void *)&ring->ring[0],
5214		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5215		       payload_size);
5216	}
5217}
5218
5219static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5220					unsigned offset)
5221{
5222	struct amdgpu_device *adev = ring->adev;
5223	void *de_payload_cpu_addr;
5224	uint64_t payload_offset, payload_size;
5225
5226	payload_size = sizeof(struct v9_de_ib_state);
5227
5228	if (ring->is_mes_queue) {
5229		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5230					  gfx[0].gfx_meta_data) +
5231			offsetof(struct v9_gfx_meta_data, de_payload);
5232		de_payload_cpu_addr =
5233			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5234	} else {
5235		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5236		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5237	}
5238
5239	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5240		IB_COMPLETION_STATUS_PREEMPTED;
5241
5242	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5243		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5244	} else {
5245		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5246		       (ring->buf_mask + 1 - offset) << 2);
5247		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5248		memcpy((void *)&ring->ring[0],
5249		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5250		       payload_size);
5251	}
5252}
5253
5254static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5255					  struct amdgpu_job *job,
5256					  struct amdgpu_ib *ib,
5257					  uint32_t flags)
5258{
5259	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5260	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5261
5262	/* Currently, there is a high possibility to get wave ID mismatch
5263	 * between ME and GDS, leading to a hw deadlock, because ME generates
5264	 * different wave IDs than the GDS expects. This situation happens
5265	 * randomly when at least 5 compute pipes use GDS ordered append.
5266	 * The wave IDs generated by ME are also wrong after suspend/resume.
5267	 * Those are probably bugs somewhere else in the kernel driver.
5268	 *
5269	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5270	 * GDS to 0 for this ring (me/pipe).
5271	 */
5272	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5273		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5274		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5275		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5276	}
5277
5278	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5279	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5280	amdgpu_ring_write(ring,
5281#ifdef __BIG_ENDIAN
5282				(2 << 0) |
5283#endif
5284				lower_32_bits(ib->gpu_addr));
5285	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5286	amdgpu_ring_write(ring, control);
5287}
5288
5289static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5290				     u64 seq, unsigned flags)
5291{
5292	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5293	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5294	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5295	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5296	uint32_t dw2 = 0;
5297
5298	/* RELEASE_MEM - flush caches, send int */
5299	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5300
5301	if (writeback) {
5302		dw2 = EOP_TC_NC_ACTION_EN;
5303	} else {
5304		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5305				EOP_TC_MD_ACTION_EN;
5306	}
5307	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5308				EVENT_INDEX(5);
5309	if (exec)
5310		dw2 |= EOP_EXEC;
5311
5312	amdgpu_ring_write(ring, dw2);
5313	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5314
5315	/*
5316	 * the address should be Qword aligned if 64bit write, Dword
5317	 * aligned if only send 32bit data low (discard data high)
5318	 */
5319	if (write64bit)
5320		BUG_ON(addr & 0x7);
5321	else
5322		BUG_ON(addr & 0x3);
5323	amdgpu_ring_write(ring, lower_32_bits(addr));
5324	amdgpu_ring_write(ring, upper_32_bits(addr));
5325	amdgpu_ring_write(ring, lower_32_bits(seq));
5326	amdgpu_ring_write(ring, upper_32_bits(seq));
5327	amdgpu_ring_write(ring, 0);
5328}
5329
5330static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5331{
5332	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5333	uint32_t seq = ring->fence_drv.sync_seq;
5334	uint64_t addr = ring->fence_drv.gpu_addr;
5335
5336	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5337			      lower_32_bits(addr), upper_32_bits(addr),
5338			      seq, 0xffffffff, 4);
5339}
5340
5341static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5342					unsigned vmid, uint64_t pd_addr)
5343{
5344	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5345
5346	/* compute doesn't have PFP */
5347	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5348		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5349		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5350		amdgpu_ring_write(ring, 0x0);
5351	}
5352}
5353
5354static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5355{
5356	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5357}
5358
5359static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5360{
5361	u64 wptr;
5362
5363	/* XXX check if swapping is necessary on BE */
5364	if (ring->use_doorbell)
5365		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5366	else
5367		BUG();
5368	return wptr;
5369}
5370
5371static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5372{
5373	struct amdgpu_device *adev = ring->adev;
5374
5375	/* XXX check if swapping is necessary on BE */
5376	if (ring->use_doorbell) {
5377		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5378		WDOORBELL64(ring->doorbell_index, ring->wptr);
5379	} else{
5380		BUG(); /* only DOORBELL method supported on gfx9 now */
5381	}
5382}
5383
5384static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5385					 u64 seq, unsigned int flags)
5386{
5387	struct amdgpu_device *adev = ring->adev;
5388
5389	/* we only allocate 32bit for each seq wb address */
5390	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5391
5392	/* write fence seq to the "addr" */
5393	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5394	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5395				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5396	amdgpu_ring_write(ring, lower_32_bits(addr));
5397	amdgpu_ring_write(ring, upper_32_bits(addr));
5398	amdgpu_ring_write(ring, lower_32_bits(seq));
5399
5400	if (flags & AMDGPU_FENCE_FLAG_INT) {
5401		/* set register to trigger INT */
5402		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5403		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5404					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5405		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5406		amdgpu_ring_write(ring, 0);
5407		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5408	}
5409}
5410
5411static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5412{
5413	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5414	amdgpu_ring_write(ring, 0);
5415}
5416
5417static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5418{
5419	struct amdgpu_device *adev = ring->adev;
5420	struct v9_ce_ib_state ce_payload = {0};
5421	uint64_t offset, ce_payload_gpu_addr;
5422	void *ce_payload_cpu_addr;
5423	int cnt;
5424
5425	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5426
5427	if (ring->is_mes_queue) {
5428		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5429				  gfx[0].gfx_meta_data) +
5430			offsetof(struct v9_gfx_meta_data, ce_payload);
5431		ce_payload_gpu_addr =
5432			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5433		ce_payload_cpu_addr =
5434			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5435	} else {
5436		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5437		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5438		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5439	}
5440
5441	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5442	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5443				 WRITE_DATA_DST_SEL(8) |
5444				 WR_CONFIRM) |
5445				 WRITE_DATA_CACHE_POLICY(0));
5446	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5447	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5448
5449	amdgpu_ring_ib_on_emit_ce(ring);
5450
5451	if (resume)
5452		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5453					   sizeof(ce_payload) >> 2);
5454	else
5455		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5456					   sizeof(ce_payload) >> 2);
5457}
5458
5459static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5460{
5461	int i, r = 0;
5462	struct amdgpu_device *adev = ring->adev;
5463	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5464	struct amdgpu_ring *kiq_ring = &kiq->ring;
5465	unsigned long flags;
5466
5467	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5468		return -EINVAL;
5469
5470	spin_lock_irqsave(&kiq->ring_lock, flags);
5471
5472	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5473		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5474		return -ENOMEM;
5475	}
5476
5477	/* assert preemption condition */
5478	amdgpu_ring_set_preempt_cond_exec(ring, false);
5479
5480	ring->trail_seq += 1;
5481	amdgpu_ring_alloc(ring, 13);
5482	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5483				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5484
5485	/* assert IB preemption, emit the trailing fence */
5486	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5487				   ring->trail_fence_gpu_addr,
5488				   ring->trail_seq);
5489
5490	amdgpu_ring_commit(kiq_ring);
5491	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5492
5493	/* poll the trailing fence */
5494	for (i = 0; i < adev->usec_timeout; i++) {
5495		if (ring->trail_seq ==
5496			le32_to_cpu(*ring->trail_fence_cpu_addr))
5497			break;
5498		udelay(1);
5499	}
5500
5501	if (i >= adev->usec_timeout) {
5502		r = -EINVAL;
5503		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5504	}
5505
5506	/*reset the CP_VMID_PREEMPT after trailing fence*/
5507	amdgpu_ring_emit_wreg(ring,
5508			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5509			      0x0);
5510	amdgpu_ring_commit(ring);
5511
5512	/* deassert preemption condition */
5513	amdgpu_ring_set_preempt_cond_exec(ring, true);
5514	return r;
5515}
5516
5517static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5518{
5519	struct amdgpu_device *adev = ring->adev;
5520	struct v9_de_ib_state de_payload = {0};
5521	uint64_t offset, gds_addr, de_payload_gpu_addr;
5522	void *de_payload_cpu_addr;
5523	int cnt;
5524
5525	if (ring->is_mes_queue) {
5526		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5527				  gfx[0].gfx_meta_data) +
5528			offsetof(struct v9_gfx_meta_data, de_payload);
5529		de_payload_gpu_addr =
5530			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5531		de_payload_cpu_addr =
5532			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5533
5534		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5535				  gfx[0].gds_backup) +
5536			offsetof(struct v9_gfx_meta_data, de_payload);
5537		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5538	} else {
5539		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5540		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5541		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5542
5543		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5544				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5545				 PAGE_SIZE);
5546	}
5547
5548	if (usegds) {
5549		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5550		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5551	}
5552
5553	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5554	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5555	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5556				 WRITE_DATA_DST_SEL(8) |
5557				 WR_CONFIRM) |
5558				 WRITE_DATA_CACHE_POLICY(0));
5559	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5560	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5561
5562	amdgpu_ring_ib_on_emit_de(ring);
5563	if (resume)
5564		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5565					   sizeof(de_payload) >> 2);
5566	else
5567		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5568					   sizeof(de_payload) >> 2);
5569}
5570
5571static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5572				   bool secure)
5573{
5574	uint32_t v = secure ? FRAME_TMZ : 0;
5575
5576	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5577	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5578}
5579
5580static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5581{
5582	uint32_t dw2 = 0;
5583
5584	gfx_v9_0_ring_emit_ce_meta(ring,
5585				   (!amdgpu_sriov_vf(ring->adev) &&
5586				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5587
5588	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5589	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5590		/* set load_global_config & load_global_uconfig */
5591		dw2 |= 0x8001;
5592		/* set load_cs_sh_regs */
5593		dw2 |= 0x01000000;
5594		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5595		dw2 |= 0x10002;
5596
5597		/* set load_ce_ram if preamble presented */
5598		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5599			dw2 |= 0x10000000;
5600	} else {
5601		/* still load_ce_ram if this is the first time preamble presented
5602		 * although there is no context switch happens.
5603		 */
5604		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5605			dw2 |= 0x10000000;
5606	}
5607
5608	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5609	amdgpu_ring_write(ring, dw2);
5610	amdgpu_ring_write(ring, 0);
5611}
5612
5613static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5614						  uint64_t addr)
5615{
5616	unsigned ret;
5617	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5618	amdgpu_ring_write(ring, lower_32_bits(addr));
5619	amdgpu_ring_write(ring, upper_32_bits(addr));
5620	/* discard following DWs if *cond_exec_gpu_addr==0 */
5621	amdgpu_ring_write(ring, 0);
5622	ret = ring->wptr & ring->buf_mask;
5623	/* patch dummy value later */
5624	amdgpu_ring_write(ring, 0);
5625	return ret;
5626}
5627
5628static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5629				    uint32_t reg_val_offs)
5630{
5631	struct amdgpu_device *adev = ring->adev;
5632
5633	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5634	amdgpu_ring_write(ring, 0 |	/* src: register*/
5635				(5 << 8) |	/* dst: memory */
5636				(1 << 20));	/* write confirm */
5637	amdgpu_ring_write(ring, reg);
5638	amdgpu_ring_write(ring, 0);
5639	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5640				reg_val_offs * 4));
5641	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5642				reg_val_offs * 4));
5643}
5644
5645static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5646				    uint32_t val)
5647{
5648	uint32_t cmd = 0;
5649
5650	switch (ring->funcs->type) {
5651	case AMDGPU_RING_TYPE_GFX:
5652		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5653		break;
5654	case AMDGPU_RING_TYPE_KIQ:
5655		cmd = (1 << 16); /* no inc addr */
5656		break;
5657	default:
5658		cmd = WR_CONFIRM;
5659		break;
5660	}
5661	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662	amdgpu_ring_write(ring, cmd);
5663	amdgpu_ring_write(ring, reg);
5664	amdgpu_ring_write(ring, 0);
5665	amdgpu_ring_write(ring, val);
5666}
5667
5668static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5669					uint32_t val, uint32_t mask)
5670{
5671	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5672}
5673
5674static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5675						  uint32_t reg0, uint32_t reg1,
5676						  uint32_t ref, uint32_t mask)
5677{
5678	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5679	struct amdgpu_device *adev = ring->adev;
5680	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5681		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5682
5683	if (fw_version_ok)
5684		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5685				      ref, mask, 0x20);
5686	else
5687		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5688							   ref, mask);
5689}
5690
5691static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5692{
5693	struct amdgpu_device *adev = ring->adev;
5694	uint32_t value = 0;
5695
5696	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5697	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5698	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5699	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5700	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5701}
5702
5703static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5704						 enum amdgpu_interrupt_state state)
5705{
5706	switch (state) {
5707	case AMDGPU_IRQ_STATE_DISABLE:
5708	case AMDGPU_IRQ_STATE_ENABLE:
5709		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5710			       TIME_STAMP_INT_ENABLE,
5711			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5712		break;
5713	default:
5714		break;
5715	}
5716}
5717
5718static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5719						     int me, int pipe,
5720						     enum amdgpu_interrupt_state state)
5721{
5722	u32 mec_int_cntl, mec_int_cntl_reg;
5723
5724	/*
5725	 * amdgpu controls only the first MEC. That's why this function only
5726	 * handles the setting of interrupts for this specific MEC. All other
5727	 * pipes' interrupts are set by amdkfd.
5728	 */
5729
5730	if (me == 1) {
5731		switch (pipe) {
5732		case 0:
5733			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5734			break;
5735		case 1:
5736			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5737			break;
5738		case 2:
5739			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5740			break;
5741		case 3:
5742			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5743			break;
5744		default:
5745			DRM_DEBUG("invalid pipe %d\n", pipe);
5746			return;
5747		}
5748	} else {
5749		DRM_DEBUG("invalid me %d\n", me);
5750		return;
5751	}
5752
5753	switch (state) {
5754	case AMDGPU_IRQ_STATE_DISABLE:
5755		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5756		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5757					     TIME_STAMP_INT_ENABLE, 0);
5758		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5759		break;
5760	case AMDGPU_IRQ_STATE_ENABLE:
5761		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5762		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5763					     TIME_STAMP_INT_ENABLE, 1);
5764		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5765		break;
5766	default:
5767		break;
5768	}
5769}
5770
5771static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5772					     struct amdgpu_irq_src *source,
5773					     unsigned type,
5774					     enum amdgpu_interrupt_state state)
5775{
5776	switch (state) {
5777	case AMDGPU_IRQ_STATE_DISABLE:
5778	case AMDGPU_IRQ_STATE_ENABLE:
5779		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5780			       PRIV_REG_INT_ENABLE,
5781			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5782		break;
5783	default:
5784		break;
5785	}
5786
5787	return 0;
5788}
5789
5790static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5791					      struct amdgpu_irq_src *source,
5792					      unsigned type,
5793					      enum amdgpu_interrupt_state state)
5794{
5795	switch (state) {
5796	case AMDGPU_IRQ_STATE_DISABLE:
5797	case AMDGPU_IRQ_STATE_ENABLE:
5798		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5799			       PRIV_INSTR_INT_ENABLE,
5800			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5801		break;
5802	default:
5803		break;
5804	}
5805
5806	return 0;
5807}
5808
5809#define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5810	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5811			CP_ECC_ERROR_INT_ENABLE, 1)
5812
5813#define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5814	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5815			CP_ECC_ERROR_INT_ENABLE, 0)
5816
5817static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5818					      struct amdgpu_irq_src *source,
5819					      unsigned type,
5820					      enum amdgpu_interrupt_state state)
5821{
5822	switch (state) {
5823	case AMDGPU_IRQ_STATE_DISABLE:
5824		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5825				CP_ECC_ERROR_INT_ENABLE, 0);
5826		DISABLE_ECC_ON_ME_PIPE(1, 0);
5827		DISABLE_ECC_ON_ME_PIPE(1, 1);
5828		DISABLE_ECC_ON_ME_PIPE(1, 2);
5829		DISABLE_ECC_ON_ME_PIPE(1, 3);
5830		break;
5831
5832	case AMDGPU_IRQ_STATE_ENABLE:
5833		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5834				CP_ECC_ERROR_INT_ENABLE, 1);
5835		ENABLE_ECC_ON_ME_PIPE(1, 0);
5836		ENABLE_ECC_ON_ME_PIPE(1, 1);
5837		ENABLE_ECC_ON_ME_PIPE(1, 2);
5838		ENABLE_ECC_ON_ME_PIPE(1, 3);
5839		break;
5840	default:
5841		break;
5842	}
5843
5844	return 0;
5845}
5846
5847
5848static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5849					    struct amdgpu_irq_src *src,
5850					    unsigned type,
5851					    enum amdgpu_interrupt_state state)
5852{
5853	switch (type) {
5854	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5855		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5856		break;
5857	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5858		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5859		break;
5860	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5861		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5862		break;
5863	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5864		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5865		break;
5866	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5867		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5868		break;
5869	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5870		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5871		break;
5872	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5873		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5874		break;
5875	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5876		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5877		break;
5878	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5879		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5880		break;
5881	default:
5882		break;
5883	}
5884	return 0;
5885}
5886
5887static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5888			    struct amdgpu_irq_src *source,
5889			    struct amdgpu_iv_entry *entry)
5890{
5891	int i;
5892	u8 me_id, pipe_id, queue_id;
5893	struct amdgpu_ring *ring;
5894
5895	DRM_DEBUG("IH: CP EOP\n");
5896	me_id = (entry->ring_id & 0x0c) >> 2;
5897	pipe_id = (entry->ring_id & 0x03) >> 0;
5898	queue_id = (entry->ring_id & 0x70) >> 4;
5899
5900	switch (me_id) {
5901	case 0:
5902		if (adev->gfx.num_gfx_rings) {
5903			if (!adev->gfx.mcbp) {
5904				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5905			} else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
5906				/* Fence signals are handled on the software rings*/
5907				for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5908					amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5909			}
5910		}
5911		break;
5912	case 1:
5913	case 2:
5914		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5915			ring = &adev->gfx.compute_ring[i];
5916			/* Per-queue interrupt is supported for MEC starting from VI.
5917			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5918			  */
5919			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5920				amdgpu_fence_process(ring);
5921		}
5922		break;
5923	}
5924	return 0;
5925}
5926
5927static void gfx_v9_0_fault(struct amdgpu_device *adev,
5928			   struct amdgpu_iv_entry *entry)
5929{
5930	u8 me_id, pipe_id, queue_id;
5931	struct amdgpu_ring *ring;
5932	int i;
5933
5934	me_id = (entry->ring_id & 0x0c) >> 2;
5935	pipe_id = (entry->ring_id & 0x03) >> 0;
5936	queue_id = (entry->ring_id & 0x70) >> 4;
5937
5938	switch (me_id) {
5939	case 0:
5940		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5941		break;
5942	case 1:
5943	case 2:
5944		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5945			ring = &adev->gfx.compute_ring[i];
5946			if (ring->me == me_id && ring->pipe == pipe_id &&
5947			    ring->queue == queue_id)
5948				drm_sched_fault(&ring->sched);
5949		}
5950		break;
5951	}
5952}
5953
5954static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5955				 struct amdgpu_irq_src *source,
5956				 struct amdgpu_iv_entry *entry)
5957{
5958	DRM_ERROR("Illegal register access in command stream\n");
5959	gfx_v9_0_fault(adev, entry);
5960	return 0;
5961}
5962
5963static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5964				  struct amdgpu_irq_src *source,
5965				  struct amdgpu_iv_entry *entry)
5966{
5967	DRM_ERROR("Illegal instruction in command stream\n");
5968	gfx_v9_0_fault(adev, entry);
5969	return 0;
5970}
5971
5972
5973static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5974	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5975	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5976	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5977	},
5978	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5979	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5980	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5981	},
5982	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5983	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5984	  0, 0
5985	},
5986	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5987	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5988	  0, 0
5989	},
5990	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5991	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5992	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5993	},
5994	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5995	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5996	  0, 0
5997	},
5998	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5999	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6000	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6001	},
6002	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6003	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6004	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6005	},
6006	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6007	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6008	  0, 0
6009	},
6010	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6011	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6012	  0, 0
6013	},
6014	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6015	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6016	  0, 0
6017	},
6018	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6019	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6020	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6021	},
6022	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6023	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6024	  0, 0
6025	},
6026	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6027	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6028	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6029	},
6030	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6031	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6032	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6033	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6034	},
6035	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6036	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6037	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6038	  0, 0
6039	},
6040	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6041	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6042	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6043	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6044	},
6045	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6046	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6047	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6048	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6049	},
6050	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6051	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6052	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6053	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6054	},
6055	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6056	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6057	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6058	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6059	},
6060	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6061	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6062	  0, 0
6063	},
6064	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6065	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6066	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6067	},
6068	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6069	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6070	  0, 0
6071	},
6072	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6073	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6074	  0, 0
6075	},
6076	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6077	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6078	  0, 0
6079	},
6080	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6081	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6082	  0, 0
6083	},
6084	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6085	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6086	  0, 0
6087	},
6088	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6089	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6090	  0, 0
6091	},
6092	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6093	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6094	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6095	},
6096	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6097	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6098	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6099	},
6100	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6101	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6102	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6103	},
6104	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6105	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6106	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6107	},
6108	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6109	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6110	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6111	},
6112	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6113	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6114	  0, 0
6115	},
6116	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6117	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6118	  0, 0
6119	},
6120	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6121	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6122	  0, 0
6123	},
6124	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6125	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6126	  0, 0
6127	},
6128	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6129	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6130	  0, 0
6131	},
6132	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6133	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6134	  0, 0
6135	},
6136	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6137	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6138	  0, 0
6139	},
6140	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6141	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6142	  0, 0
6143	},
6144	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6145	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6146	  0, 0
6147	},
6148	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6149	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6150	  0, 0
6151	},
6152	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6153	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6154	  0, 0
6155	},
6156	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6157	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6158	  0, 0
6159	},
6160	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6161	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6162	  0, 0
6163	},
6164	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6165	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6166	  0, 0
6167	},
6168	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6169	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6170	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6171	},
6172	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6173	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6174	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6175	},
6176	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6177	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6178	  0, 0
6179	},
6180	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6181	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6182	  0, 0
6183	},
6184	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6185	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6186	  0, 0
6187	},
6188	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6189	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6190	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6191	},
6192	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6193	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6194	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6195	},
6196	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6197	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6198	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6199	},
6200	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6201	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6202	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6203	},
6204	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6205	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6206	  0, 0
6207	},
6208	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6209	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6210	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6211	},
6212	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6213	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6214	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6215	},
6216	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6217	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6218	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6219	},
6220	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6221	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6222	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6223	},
6224	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6225	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6226	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6227	},
6228	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6229	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6230	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6231	},
6232	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6233	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6234	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6235	},
6236	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6237	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6238	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6239	},
6240	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6241	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6242	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6243	},
6244	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6245	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6246	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6247	},
6248	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6249	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6250	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6251	},
6252	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6253	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6254	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6255	},
6256	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6257	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6258	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6259	},
6260	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6261	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6262	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6263	},
6264	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6265	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6266	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6267	},
6268	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6269	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6270	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6271	},
6272	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6273	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6274	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6275	},
6276	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6277	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6278	  0, 0
6279	},
6280	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6281	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6282	  0, 0
6283	},
6284	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6285	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6286	  0, 0
6287	},
6288	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6289	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6290	  0, 0
6291	},
6292	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6293	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6294	  0, 0
6295	},
6296	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6297	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6298	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6299	},
6300	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6301	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6302	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6303	},
6304	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6305	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6306	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6307	},
6308	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6309	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6310	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6311	},
6312	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6313	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6314	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6315	},
6316	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6317	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6318	  0, 0
6319	},
6320	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6321	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6322	  0, 0
6323	},
6324	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6325	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6326	  0, 0
6327	},
6328	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6329	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6330	  0, 0
6331	},
6332	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6333	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6334	  0, 0
6335	},
6336	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6337	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6338	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6339	},
6340	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6341	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6342	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6343	},
6344	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6345	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6346	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6347	},
6348	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6349	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6350	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6351	},
6352	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6353	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6354	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6355	},
6356	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6357	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6358	  0, 0
6359	},
6360	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6361	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6362	  0, 0
6363	},
6364	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6365	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6366	  0, 0
6367	},
6368	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6369	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6370	  0, 0
6371	},
6372	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6373	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6374	  0, 0
6375	},
6376	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6377	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6378	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6379	},
6380	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6381	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6382	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6383	},
6384	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6385	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6386	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6387	},
6388	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6389	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6390	  0, 0
6391	},
6392	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6393	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6394	  0, 0
6395	},
6396	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6397	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6398	  0, 0
6399	},
6400	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6401	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6402	  0, 0
6403	},
6404	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6405	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6406	  0, 0
6407	},
6408	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6409	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6410	  0, 0
6411	}
6412};
6413
6414static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6415				     void *inject_if, uint32_t instance_mask)
6416{
6417	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6418	int ret;
6419	struct ta_ras_trigger_error_input block_info = { 0 };
6420
6421	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6422		return -EINVAL;
6423
6424	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6425		return -EINVAL;
6426
6427	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6428		return -EPERM;
6429
6430	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6431	      info->head.type)) {
6432		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6433			ras_gfx_subblocks[info->head.sub_block_index].name,
6434			info->head.type);
6435		return -EPERM;
6436	}
6437
6438	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6439	      info->head.type)) {
6440		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6441			ras_gfx_subblocks[info->head.sub_block_index].name,
6442			info->head.type);
6443		return -EPERM;
6444	}
6445
6446	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6447	block_info.sub_block_index =
6448		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6449	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6450	block_info.address = info->address;
6451	block_info.value = info->value;
6452
6453	mutex_lock(&adev->grbm_idx_mutex);
6454	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6455	mutex_unlock(&adev->grbm_idx_mutex);
6456
6457	return ret;
6458}
6459
6460static const char * const vml2_mems[] = {
6461	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6462	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6463	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6464	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6465	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6466	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6467	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6468	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6469	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6470	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6471	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6472	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6473	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6474	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6475	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6476	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6477};
6478
6479static const char * const vml2_walker_mems[] = {
6480	"UTC_VML2_CACHE_PDE0_MEM0",
6481	"UTC_VML2_CACHE_PDE0_MEM1",
6482	"UTC_VML2_CACHE_PDE1_MEM0",
6483	"UTC_VML2_CACHE_PDE1_MEM1",
6484	"UTC_VML2_CACHE_PDE2_MEM0",
6485	"UTC_VML2_CACHE_PDE2_MEM1",
6486	"UTC_VML2_RDIF_LOG_FIFO",
6487};
6488
6489static const char * const atc_l2_cache_2m_mems[] = {
6490	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6491	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6492	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6493	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6494};
6495
6496static const char *atc_l2_cache_4k_mems[] = {
6497	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6498	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6499	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6500	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6501	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6502	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6503	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6504	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6505	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6506	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6507	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6508	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6509	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6510	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6511	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6512	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6513	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6514	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6515	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6516	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6517	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6518	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6519	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6520	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6521	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6522	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6523	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6524	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6525	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6526	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6527	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6528	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6529};
6530
6531static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6532					 struct ras_err_data *err_data)
6533{
6534	uint32_t i, data;
6535	uint32_t sec_count, ded_count;
6536
6537	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6538	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6539	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6540	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6541	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6542	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6543	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6544	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6545
6546	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6547		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6548		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6549
6550		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6551		if (sec_count) {
6552			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6553				"SEC %d\n", i, vml2_mems[i], sec_count);
6554			err_data->ce_count += sec_count;
6555		}
6556
6557		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6558		if (ded_count) {
6559			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6560				"DED %d\n", i, vml2_mems[i], ded_count);
6561			err_data->ue_count += ded_count;
6562		}
6563	}
6564
6565	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6566		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6567		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6568
6569		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6570						SEC_COUNT);
6571		if (sec_count) {
6572			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6573				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6574			err_data->ce_count += sec_count;
6575		}
6576
6577		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6578						DED_COUNT);
6579		if (ded_count) {
6580			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6581				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6582			err_data->ue_count += ded_count;
6583		}
6584	}
6585
6586	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6587		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6588		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6589
6590		sec_count = (data & 0x00006000L) >> 0xd;
6591		if (sec_count) {
6592			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6593				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6594				sec_count);
6595			err_data->ce_count += sec_count;
6596		}
6597	}
6598
6599	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6600		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6601		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6602
6603		sec_count = (data & 0x00006000L) >> 0xd;
6604		if (sec_count) {
6605			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6606				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6607				sec_count);
6608			err_data->ce_count += sec_count;
6609		}
6610
6611		ded_count = (data & 0x00018000L) >> 0xf;
6612		if (ded_count) {
6613			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6614				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6615				ded_count);
6616			err_data->ue_count += ded_count;
6617		}
6618	}
6619
6620	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6621	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6622	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6623	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6624
6625	return 0;
6626}
6627
6628static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6629	const struct soc15_reg_entry *reg,
6630	uint32_t se_id, uint32_t inst_id, uint32_t value,
6631	uint32_t *sec_count, uint32_t *ded_count)
6632{
6633	uint32_t i;
6634	uint32_t sec_cnt, ded_cnt;
6635
6636	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6637		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6638			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6639			gfx_v9_0_ras_fields[i].inst != reg->inst)
6640			continue;
6641
6642		sec_cnt = (value &
6643				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6644				gfx_v9_0_ras_fields[i].sec_count_shift;
6645		if (sec_cnt) {
6646			dev_info(adev->dev, "GFX SubBlock %s, "
6647				"Instance[%d][%d], SEC %d\n",
6648				gfx_v9_0_ras_fields[i].name,
6649				se_id, inst_id,
6650				sec_cnt);
6651			*sec_count += sec_cnt;
6652		}
6653
6654		ded_cnt = (value &
6655				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6656				gfx_v9_0_ras_fields[i].ded_count_shift;
6657		if (ded_cnt) {
6658			dev_info(adev->dev, "GFX SubBlock %s, "
6659				"Instance[%d][%d], DED %d\n",
6660				gfx_v9_0_ras_fields[i].name,
6661				se_id, inst_id,
6662				ded_cnt);
6663			*ded_count += ded_cnt;
6664		}
6665	}
6666
6667	return 0;
6668}
6669
6670static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6671{
6672	int i, j, k;
6673
6674	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6675		return;
6676
6677	/* read back registers to clear the counters */
6678	mutex_lock(&adev->grbm_idx_mutex);
6679	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6680		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6681			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6682				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6683				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6684			}
6685		}
6686	}
6687	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6688	mutex_unlock(&adev->grbm_idx_mutex);
6689
6690	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6691	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6692	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6693	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6694	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6695	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6696	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6697	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6698
6699	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6700		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6701		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6702	}
6703
6704	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6705		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6706		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6707	}
6708
6709	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6710		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6711		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6712	}
6713
6714	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6715		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6716		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6717	}
6718
6719	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6720	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6721	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6722	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6723}
6724
6725static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6726					  void *ras_error_status)
6727{
6728	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6729	uint32_t sec_count = 0, ded_count = 0;
6730	uint32_t i, j, k;
6731	uint32_t reg_value;
6732
6733	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6734		return;
6735
6736	err_data->ue_count = 0;
6737	err_data->ce_count = 0;
6738
6739	mutex_lock(&adev->grbm_idx_mutex);
6740
6741	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6742		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6743			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6744				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6745				reg_value =
6746					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6747				if (reg_value)
6748					gfx_v9_0_ras_error_count(adev,
6749						&gfx_v9_0_edc_counter_regs[i],
6750						j, k, reg_value,
6751						&sec_count, &ded_count);
6752			}
6753		}
6754	}
6755
6756	err_data->ce_count += sec_count;
6757	err_data->ue_count += ded_count;
6758
6759	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6760	mutex_unlock(&adev->grbm_idx_mutex);
6761
6762	gfx_v9_0_query_utc_edc_status(adev, err_data);
6763}
6764
6765static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6766{
6767	const unsigned int cp_coher_cntl =
6768			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6769			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6770			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6771			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6772			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6773
6774	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6775	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6776	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6777	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6778	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6779	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6780	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6781	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6782}
6783
6784static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6785					uint32_t pipe, bool enable)
6786{
6787	struct amdgpu_device *adev = ring->adev;
6788	uint32_t val;
6789	uint32_t wcl_cs_reg;
6790
6791	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6792	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6793
6794	switch (pipe) {
6795	case 0:
6796		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6797		break;
6798	case 1:
6799		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6800		break;
6801	case 2:
6802		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6803		break;
6804	case 3:
6805		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6806		break;
6807	default:
6808		DRM_DEBUG("invalid pipe %d\n", pipe);
6809		return;
6810	}
6811
6812	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6813
6814}
6815static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6816{
6817	struct amdgpu_device *adev = ring->adev;
6818	uint32_t val;
6819	int i;
6820
6821
6822	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6823	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6824	 * around 25% of gpu resources.
6825	 */
6826	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6827	amdgpu_ring_emit_wreg(ring,
6828			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6829			      val);
6830
6831	/* Restrict waves for normal/low priority compute queues as well
6832	 * to get best QoS for high priority compute jobs.
6833	 *
6834	 * amdgpu controls only 1st ME(0-3 CS pipes).
6835	 */
6836	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6837		if (i != ring->pipe)
6838			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6839
6840	}
6841}
6842
6843static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6844	.name = "gfx_v9_0",
6845	.early_init = gfx_v9_0_early_init,
6846	.late_init = gfx_v9_0_late_init,
6847	.sw_init = gfx_v9_0_sw_init,
6848	.sw_fini = gfx_v9_0_sw_fini,
6849	.hw_init = gfx_v9_0_hw_init,
6850	.hw_fini = gfx_v9_0_hw_fini,
6851	.suspend = gfx_v9_0_suspend,
6852	.resume = gfx_v9_0_resume,
6853	.is_idle = gfx_v9_0_is_idle,
6854	.wait_for_idle = gfx_v9_0_wait_for_idle,
6855	.soft_reset = gfx_v9_0_soft_reset,
6856	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6857	.set_powergating_state = gfx_v9_0_set_powergating_state,
6858	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6859};
6860
6861static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6862	.type = AMDGPU_RING_TYPE_GFX,
6863	.align_mask = 0xff,
6864	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6865	.support_64bit_ptrs = true,
6866	.secure_submission_supported = true,
6867	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6868	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6869	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6870	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6871		5 +  /* COND_EXEC */
6872		7 +  /* PIPELINE_SYNC */
6873		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6874		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6875		2 + /* VM_FLUSH */
6876		8 +  /* FENCE for VM_FLUSH */
6877		20 + /* GDS switch */
6878		4 + /* double SWITCH_BUFFER,
6879		       the first COND_EXEC jump to the place just
6880			   prior to this double SWITCH_BUFFER  */
6881		5 + /* COND_EXEC */
6882		7 +	 /*	HDP_flush */
6883		4 +	 /*	VGT_flush */
6884		14 + /*	CE_META */
6885		31 + /*	DE_META */
6886		3 + /* CNTX_CTRL */
6887		5 + /* HDP_INVL */
6888		8 + 8 + /* FENCE x2 */
6889		2 + /* SWITCH_BUFFER */
6890		7, /* gfx_v9_0_emit_mem_sync */
6891	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6892	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6893	.emit_fence = gfx_v9_0_ring_emit_fence,
6894	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6895	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6896	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6897	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6898	.test_ring = gfx_v9_0_ring_test_ring,
6899	.insert_nop = amdgpu_ring_insert_nop,
6900	.pad_ib = amdgpu_ring_generic_pad_ib,
6901	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6902	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6903	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6904	.preempt_ib = gfx_v9_0_ring_preempt_ib,
6905	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6906	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6907	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6908	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6909	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6910	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6911};
6912
6913static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6914	.type = AMDGPU_RING_TYPE_GFX,
6915	.align_mask = 0xff,
6916	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6917	.support_64bit_ptrs = true,
6918	.secure_submission_supported = true,
6919	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6920	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6921	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6922	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6923		5 +  /* COND_EXEC */
6924		7 +  /* PIPELINE_SYNC */
6925		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6926		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6927		2 + /* VM_FLUSH */
6928		8 +  /* FENCE for VM_FLUSH */
6929		20 + /* GDS switch */
6930		4 + /* double SWITCH_BUFFER,
6931		     * the first COND_EXEC jump to the place just
6932		     * prior to this double SWITCH_BUFFER
6933		     */
6934		5 + /* COND_EXEC */
6935		7 +	 /*	HDP_flush */
6936		4 +	 /*	VGT_flush */
6937		14 + /*	CE_META */
6938		31 + /*	DE_META */
6939		3 + /* CNTX_CTRL */
6940		5 + /* HDP_INVL */
6941		8 + 8 + /* FENCE x2 */
6942		2 + /* SWITCH_BUFFER */
6943		7, /* gfx_v9_0_emit_mem_sync */
6944	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6945	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6946	.emit_fence = gfx_v9_0_ring_emit_fence,
6947	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6948	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6949	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6950	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6951	.test_ring = gfx_v9_0_ring_test_ring,
6952	.test_ib = gfx_v9_0_ring_test_ib,
6953	.insert_nop = amdgpu_sw_ring_insert_nop,
6954	.pad_ib = amdgpu_ring_generic_pad_ib,
6955	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6956	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6957	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6958	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6959	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6960	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6961	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6962	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6963	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6964	.patch_cntl = gfx_v9_0_ring_patch_cntl,
6965	.patch_de = gfx_v9_0_ring_patch_de_meta,
6966	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
6967};
6968
6969static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6970	.type = AMDGPU_RING_TYPE_COMPUTE,
6971	.align_mask = 0xff,
6972	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6973	.support_64bit_ptrs = true,
6974	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6975	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6976	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6977	.emit_frame_size =
6978		20 + /* gfx_v9_0_ring_emit_gds_switch */
6979		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6980		5 + /* hdp invalidate */
6981		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6982		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6983		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6984		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6985		7 + /* gfx_v9_0_emit_mem_sync */
6986		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6987		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6988	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6989	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6990	.emit_fence = gfx_v9_0_ring_emit_fence,
6991	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6992	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6993	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6994	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6995	.test_ring = gfx_v9_0_ring_test_ring,
6996	.test_ib = gfx_v9_0_ring_test_ib,
6997	.insert_nop = amdgpu_ring_insert_nop,
6998	.pad_ib = amdgpu_ring_generic_pad_ib,
6999	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7000	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7001	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7002	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7003	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7004};
7005
7006static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7007	.type = AMDGPU_RING_TYPE_KIQ,
7008	.align_mask = 0xff,
7009	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7010	.support_64bit_ptrs = true,
7011	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7012	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7013	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7014	.emit_frame_size =
7015		20 + /* gfx_v9_0_ring_emit_gds_switch */
7016		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7017		5 + /* hdp invalidate */
7018		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7019		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7020		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7021		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7022	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7023	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7024	.test_ring = gfx_v9_0_ring_test_ring,
7025	.insert_nop = amdgpu_ring_insert_nop,
7026	.pad_ib = amdgpu_ring_generic_pad_ib,
7027	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7028	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7029	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7030	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7031};
7032
7033static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7034{
7035	int i;
7036
7037	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7038
7039	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7040		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7041
7042	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7043		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7044			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7045	}
7046
7047	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7048		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7049}
7050
7051static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7052	.set = gfx_v9_0_set_eop_interrupt_state,
7053	.process = gfx_v9_0_eop_irq,
7054};
7055
7056static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7057	.set = gfx_v9_0_set_priv_reg_fault_state,
7058	.process = gfx_v9_0_priv_reg_irq,
7059};
7060
7061static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7062	.set = gfx_v9_0_set_priv_inst_fault_state,
7063	.process = gfx_v9_0_priv_inst_irq,
7064};
7065
7066static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7067	.set = gfx_v9_0_set_cp_ecc_error_state,
7068	.process = amdgpu_gfx_cp_ecc_error_irq,
7069};
7070
7071
7072static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7073{
7074	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7075	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7076
7077	adev->gfx.priv_reg_irq.num_types = 1;
7078	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7079
7080	adev->gfx.priv_inst_irq.num_types = 1;
7081	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7082
7083	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7084	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7085}
7086
7087static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7088{
7089	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7090	case IP_VERSION(9, 0, 1):
7091	case IP_VERSION(9, 2, 1):
7092	case IP_VERSION(9, 4, 0):
7093	case IP_VERSION(9, 2, 2):
7094	case IP_VERSION(9, 1, 0):
7095	case IP_VERSION(9, 4, 1):
7096	case IP_VERSION(9, 3, 0):
7097	case IP_VERSION(9, 4, 2):
7098		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7099		break;
7100	default:
7101		break;
7102	}
7103}
7104
7105static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7106{
7107	/* init asci gds info */
7108	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7109	case IP_VERSION(9, 0, 1):
7110	case IP_VERSION(9, 2, 1):
7111	case IP_VERSION(9, 4, 0):
7112		adev->gds.gds_size = 0x10000;
7113		break;
7114	case IP_VERSION(9, 2, 2):
7115	case IP_VERSION(9, 1, 0):
7116	case IP_VERSION(9, 4, 1):
7117		adev->gds.gds_size = 0x1000;
7118		break;
7119	case IP_VERSION(9, 4, 2):
7120		/* aldebaran removed all the GDS internal memory,
7121		 * only support GWS opcode in kernel, like barrier
7122		 * semaphore.etc */
7123		adev->gds.gds_size = 0;
7124		break;
7125	default:
7126		adev->gds.gds_size = 0x10000;
7127		break;
7128	}
7129
7130	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7131	case IP_VERSION(9, 0, 1):
7132	case IP_VERSION(9, 4, 0):
7133		adev->gds.gds_compute_max_wave_id = 0x7ff;
7134		break;
7135	case IP_VERSION(9, 2, 1):
7136		adev->gds.gds_compute_max_wave_id = 0x27f;
7137		break;
7138	case IP_VERSION(9, 2, 2):
7139	case IP_VERSION(9, 1, 0):
7140		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7141			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7142		else
7143			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7144		break;
7145	case IP_VERSION(9, 4, 1):
7146		adev->gds.gds_compute_max_wave_id = 0xfff;
7147		break;
7148	case IP_VERSION(9, 4, 2):
7149		/* deprecated for Aldebaran, no usage at all */
7150		adev->gds.gds_compute_max_wave_id = 0;
7151		break;
7152	default:
7153		/* this really depends on the chip */
7154		adev->gds.gds_compute_max_wave_id = 0x7ff;
7155		break;
7156	}
7157
7158	adev->gds.gws_size = 64;
7159	adev->gds.oa_size = 16;
7160}
7161
7162static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7163						 u32 bitmap)
7164{
7165	u32 data;
7166
7167	if (!bitmap)
7168		return;
7169
7170	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7171	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7172
7173	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7174}
7175
7176static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7177{
7178	u32 data, mask;
7179
7180	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7181	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7182
7183	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7184	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7185
7186	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7187
7188	return (~data) & mask;
7189}
7190
7191static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7192				 struct amdgpu_cu_info *cu_info)
7193{
7194	int i, j, k, counter, active_cu_number = 0;
7195	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7196	unsigned disable_masks[4 * 4];
7197
7198	if (!adev || !cu_info)
7199		return -EINVAL;
7200
7201	/*
7202	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7203	 */
7204	if (adev->gfx.config.max_shader_engines *
7205		adev->gfx.config.max_sh_per_se > 16)
7206		return -EINVAL;
7207
7208	amdgpu_gfx_parse_disable_cu(disable_masks,
7209				    adev->gfx.config.max_shader_engines,
7210				    adev->gfx.config.max_sh_per_se);
7211
7212	mutex_lock(&adev->grbm_idx_mutex);
7213	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7214		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7215			mask = 1;
7216			ao_bitmap = 0;
7217			counter = 0;
7218			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7219			gfx_v9_0_set_user_cu_inactive_bitmap(
7220				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7221			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7222
7223			/*
7224			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7225			 * 4x4 size array, and it's usually suitable for Vega
7226			 * ASICs which has 4*2 SE/SH layout.
7227			 * But for Arcturus, SE/SH layout is changed to 8*1.
7228			 * To mostly reduce the impact, we make it compatible
7229			 * with current bitmap array as below:
7230			 *    SE4,SH0 --> bitmap[0][1]
7231			 *    SE5,SH0 --> bitmap[1][1]
7232			 *    SE6,SH0 --> bitmap[2][1]
7233			 *    SE7,SH0 --> bitmap[3][1]
7234			 */
7235			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7236
7237			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7238				if (bitmap & mask) {
7239					if (counter < adev->gfx.config.max_cu_per_sh)
7240						ao_bitmap |= mask;
7241					counter ++;
7242				}
7243				mask <<= 1;
7244			}
7245			active_cu_number += counter;
7246			if (i < 2 && j < 2)
7247				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7248			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7249		}
7250	}
7251	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7252	mutex_unlock(&adev->grbm_idx_mutex);
7253
7254	cu_info->number = active_cu_number;
7255	cu_info->ao_cu_mask = ao_cu_mask;
7256	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7257
7258	return 0;
7259}
7260
7261const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7262{
7263	.type = AMD_IP_BLOCK_TYPE_GFX,
7264	.major = 9,
7265	.minor = 0,
7266	.rev = 0,
7267	.funcs = &gfx_v9_0_ip_funcs,
7268};
7269