amdgpu_sdma_v4_0.c revision 1.2
1/*	$NetBSD: amdgpu_sdma_v4_0.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $	*/
2
3/*
4 * Copyright 2016 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26#include <sys/cdefs.h>
27__KERNEL_RCSID(0, "$NetBSD: amdgpu_sdma_v4_0.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $");
28
29#include <linux/delay.h>
30#include <linux/firmware.h>
31#include <linux/module.h>
32#include <linux/pci.h>
33
34#include "amdgpu.h"
35#include "amdgpu_ucode.h"
36#include "amdgpu_trace.h"
37
38#include "sdma0/sdma0_4_2_offset.h"
39#include "sdma0/sdma0_4_2_sh_mask.h"
40#include "sdma1/sdma1_4_2_offset.h"
41#include "sdma1/sdma1_4_2_sh_mask.h"
42#include "sdma2/sdma2_4_2_2_offset.h"
43#include "sdma2/sdma2_4_2_2_sh_mask.h"
44#include "sdma3/sdma3_4_2_2_offset.h"
45#include "sdma3/sdma3_4_2_2_sh_mask.h"
46#include "sdma4/sdma4_4_2_2_offset.h"
47#include "sdma4/sdma4_4_2_2_sh_mask.h"
48#include "sdma5/sdma5_4_2_2_offset.h"
49#include "sdma5/sdma5_4_2_2_sh_mask.h"
50#include "sdma6/sdma6_4_2_2_offset.h"
51#include "sdma6/sdma6_4_2_2_sh_mask.h"
52#include "sdma7/sdma7_4_2_2_offset.h"
53#include "sdma7/sdma7_4_2_2_sh_mask.h"
54#include "hdp/hdp_4_0_offset.h"
55#include "sdma0/sdma0_4_1_default.h"
56
57#include "soc15_common.h"
58#include "soc15.h"
59#include "vega10_sdma_pkt_open.h"
60
61#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
62#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
63
64#include "amdgpu_ras.h"
65
66MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
67MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
68MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
69MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
70MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
71MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
72MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
73MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
74MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
75MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
76MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
77
78#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
79#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
80
81#define WREG32_SDMA(instance, offset, value) \
82	WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
83#define RREG32_SDMA(instance, offset) \
84	RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
85
86static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
87static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
88static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
89static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
90static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
91
92static const struct soc15_reg_golden golden_settings_sdma_4[] = {
93	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
94	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100),
95	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100),
96	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
97	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
98	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
99	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000),
100	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
101	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
102	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
103	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
104	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
105	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
106	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
107	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
108	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
109	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
110	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
111	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000),
112	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
113	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
114	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
115	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
116	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
117	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xfc000000, 0x00000000)
118};
119
120static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
121	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
122	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
123	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
124	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
125	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
126};
127
128static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
129	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
130	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
131	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
132	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
133	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
134};
135
136static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
137	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
138	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
139	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
140	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
141	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051),
142	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100),
143	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
144	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
145	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
146	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
147	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
148};
149
150static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
151	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
152};
153
154static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
155{
156	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
157	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
158	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
159	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
160	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
161	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
162	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
163	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
164	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
165	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
166	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
167	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
168	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
169	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
170	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
171	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
172	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
173	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
174	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
175	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
176	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
177	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
178	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
179	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
180	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
181	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
182};
183
184static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
185	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
186	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
187	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
188	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
189	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
190	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
191	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
192	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
193	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
194	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
195	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
196	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
197	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
198	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
199	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
200	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
201	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
202	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
203	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
204	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
205	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
206	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
207	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
208	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
209	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
210	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
211};
212
213static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
214{
215	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
216	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
217};
218
219static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
220{
221	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
222	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
223};
224
225static const struct soc15_reg_golden golden_settings_sdma_arct[] =
226{
227	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
228	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
229	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
230	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
231	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
232	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
233	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
234	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
235	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
236	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
237	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
238	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
239	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
240	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
241	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
242	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
243	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
244	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
245	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
246	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
247	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
248	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
249	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
250	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002)
251};
252
253static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
254	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
255	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
256	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
257	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002),
258	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
259	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
260	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
261	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
262	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
263	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
264};
265
266static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
267	{ "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
268	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
269	0, 0,
270	},
271	{ "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
272	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
273	0, 0,
274	},
275	{ "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
276	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
277	0, 0,
278	},
279	{ "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
280	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
281	0, 0,
282	},
283	{ "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
284	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
285	0, 0,
286	},
287	{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
288	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
289	0, 0,
290	},
291	{ "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
292	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
293	0, 0,
294	},
295	{ "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
296	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
297	0, 0,
298	},
299	{ "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
300	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
301	0, 0,
302	},
303	{ "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
304	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
305	0, 0,
306	},
307	{ "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
308	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
309	0, 0,
310	},
311	{ "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
312	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
313	0, 0,
314	},
315	{ "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
316	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
317	0, 0,
318	},
319	{ "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
320	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
321	0, 0,
322	},
323	{ "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
324	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
325	0, 0,
326	},
327	{ "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
328	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
329	0, 0,
330	},
331	{ "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
332	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
333	0, 0,
334	},
335	{ "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
336	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
337	0, 0,
338	},
339	{ "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
340	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
341	0, 0,
342	},
343	{ "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
344	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
345	0, 0,
346	},
347	{ "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
348	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
349	0, 0,
350	},
351	{ "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
352	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
353	0, 0,
354	},
355	{ "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
356	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
357	0, 0,
358	},
359	{ "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
360	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
361	0, 0,
362	},
363};
364
365static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
366		u32 instance, u32 offset)
367{
368	switch (instance) {
369	case 0:
370		return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
371	case 1:
372		return (adev->reg_offset[SDMA1_HWIP][0][0] + offset);
373	case 2:
374		return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
375	case 3:
376		return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
377	case 4:
378		return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
379	case 5:
380		return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
381	case 6:
382		return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
383	case 7:
384		return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
385	default:
386		break;
387	}
388	return 0;
389}
390
391static unsigned sdma_v4_0_seq_to_irq_id(int seq_num)
392{
393	switch (seq_num) {
394	case 0:
395		return SOC15_IH_CLIENTID_SDMA0;
396	case 1:
397		return SOC15_IH_CLIENTID_SDMA1;
398	case 2:
399		return SOC15_IH_CLIENTID_SDMA2;
400	case 3:
401		return SOC15_IH_CLIENTID_SDMA3;
402	case 4:
403		return SOC15_IH_CLIENTID_SDMA4;
404	case 5:
405		return SOC15_IH_CLIENTID_SDMA5;
406	case 6:
407		return SOC15_IH_CLIENTID_SDMA6;
408	case 7:
409		return SOC15_IH_CLIENTID_SDMA7;
410	default:
411		break;
412	}
413	return -EINVAL;
414}
415
416static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
417{
418	switch (client_id) {
419	case SOC15_IH_CLIENTID_SDMA0:
420		return 0;
421	case SOC15_IH_CLIENTID_SDMA1:
422		return 1;
423	case SOC15_IH_CLIENTID_SDMA2:
424		return 2;
425	case SOC15_IH_CLIENTID_SDMA3:
426		return 3;
427	case SOC15_IH_CLIENTID_SDMA4:
428		return 4;
429	case SOC15_IH_CLIENTID_SDMA5:
430		return 5;
431	case SOC15_IH_CLIENTID_SDMA6:
432		return 6;
433	case SOC15_IH_CLIENTID_SDMA7:
434		return 7;
435	default:
436		break;
437	}
438	return -EINVAL;
439}
440
441static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
442{
443	switch (adev->asic_type) {
444	case CHIP_VEGA10:
445		soc15_program_register_sequence(adev,
446						golden_settings_sdma_4,
447						ARRAY_SIZE(golden_settings_sdma_4));
448		soc15_program_register_sequence(adev,
449						golden_settings_sdma_vg10,
450						ARRAY_SIZE(golden_settings_sdma_vg10));
451		break;
452	case CHIP_VEGA12:
453		soc15_program_register_sequence(adev,
454						golden_settings_sdma_4,
455						ARRAY_SIZE(golden_settings_sdma_4));
456		soc15_program_register_sequence(adev,
457						golden_settings_sdma_vg12,
458						ARRAY_SIZE(golden_settings_sdma_vg12));
459		break;
460	case CHIP_VEGA20:
461		soc15_program_register_sequence(adev,
462						golden_settings_sdma0_4_2_init,
463						ARRAY_SIZE(golden_settings_sdma0_4_2_init));
464		soc15_program_register_sequence(adev,
465						golden_settings_sdma0_4_2,
466						ARRAY_SIZE(golden_settings_sdma0_4_2));
467		soc15_program_register_sequence(adev,
468						golden_settings_sdma1_4_2,
469						ARRAY_SIZE(golden_settings_sdma1_4_2));
470		break;
471	case CHIP_ARCTURUS:
472		soc15_program_register_sequence(adev,
473						golden_settings_sdma_arct,
474						ARRAY_SIZE(golden_settings_sdma_arct));
475		break;
476	case CHIP_RAVEN:
477		soc15_program_register_sequence(adev,
478						golden_settings_sdma_4_1,
479						ARRAY_SIZE(golden_settings_sdma_4_1));
480		if (adev->rev_id >= 8)
481			soc15_program_register_sequence(adev,
482							golden_settings_sdma_rv2,
483							ARRAY_SIZE(golden_settings_sdma_rv2));
484		else
485			soc15_program_register_sequence(adev,
486							golden_settings_sdma_rv1,
487							ARRAY_SIZE(golden_settings_sdma_rv1));
488		break;
489	case CHIP_RENOIR:
490		soc15_program_register_sequence(adev,
491						golden_settings_sdma_4_3,
492						ARRAY_SIZE(golden_settings_sdma_4_3));
493		break;
494	default:
495		break;
496	}
497}
498
499static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
500{
501	int err = 0;
502	const struct sdma_firmware_header_v1_0 *hdr;
503
504	err = amdgpu_ucode_validate(sdma_inst->fw);
505	if (err)
506		return err;
507
508	hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
509	sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
510	sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
511
512	if (sdma_inst->feature_version >= 20)
513		sdma_inst->burst_nop = true;
514
515	return 0;
516}
517
518static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
519{
520	int i;
521
522	for (i = 0; i < adev->sdma.num_instances; i++) {
523		if (adev->sdma.instance[i].fw != NULL)
524			release_firmware(adev->sdma.instance[i].fw);
525
526		/* arcturus shares the same FW memory across
527		   all SDMA isntances */
528		if (adev->asic_type == CHIP_ARCTURUS)
529			break;
530	}
531
532	memset((void*)adev->sdma.instance, 0,
533		sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
534}
535
536/**
537 * sdma_v4_0_init_microcode - load ucode images from disk
538 *
539 * @adev: amdgpu_device pointer
540 *
541 * Use the firmware interface to load the ucode images into
542 * the driver (not loaded into hw).
543 * Returns 0 on success, error on failure.
544 */
545
546// emulation only, won't work on real chip
547// vega10 real chip need to use PSP to load firmware
548static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
549{
550	const char *chip_name;
551	char fw_name[30];
552	int err = 0, i;
553	struct amdgpu_firmware_info *info = NULL;
554	const struct common_firmware_header *header = NULL;
555
556	DRM_DEBUG("\n");
557
558	switch (adev->asic_type) {
559	case CHIP_VEGA10:
560		chip_name = "vega10";
561		break;
562	case CHIP_VEGA12:
563		chip_name = "vega12";
564		break;
565	case CHIP_VEGA20:
566		chip_name = "vega20";
567		break;
568	case CHIP_RAVEN:
569		if (adev->rev_id >= 8)
570			chip_name = "raven2";
571		else if (adev->pdev->device == 0x15d8)
572			chip_name = "picasso";
573		else
574			chip_name = "raven";
575		break;
576	case CHIP_ARCTURUS:
577		chip_name = "arcturus";
578		break;
579	case CHIP_RENOIR:
580		chip_name = "renoir";
581		break;
582	default:
583		BUG();
584	}
585
586	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
587
588	err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
589	if (err)
590		goto out;
591
592	err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
593	if (err)
594		goto out;
595
596	for (i = 1; i < adev->sdma.num_instances; i++) {
597		if (adev->asic_type == CHIP_ARCTURUS) {
598			/* Acturus will leverage the same FW memory
599			   for every SDMA instance */
600			memcpy((void*)&adev->sdma.instance[i],
601			       (void*)&adev->sdma.instance[0],
602			       sizeof(struct amdgpu_sdma_instance));
603		}
604		else {
605			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
606
607			err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
608			if (err)
609				goto out;
610
611			err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
612			if (err)
613				goto out;
614		}
615	}
616
617	DRM_DEBUG("psp_load == '%s'\n",
618		adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
619
620	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
621		for (i = 0; i < adev->sdma.num_instances; i++) {
622			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
623			info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
624			info->fw = adev->sdma.instance[i].fw;
625			header = (const struct common_firmware_header *)info->fw->data;
626			adev->firmware.fw_size +=
627				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
628		}
629	}
630
631out:
632	if (err) {
633		DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
634		sdma_v4_0_destroy_inst_ctx(adev);
635	}
636	return err;
637}
638
639/**
640 * sdma_v4_0_ring_get_rptr - get the current read pointer
641 *
642 * @ring: amdgpu ring pointer
643 *
644 * Get the current rptr from the hardware (VEGA10+).
645 */
646static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
647{
648	u64 *rptr;
649
650	/* XXX check if swapping is necessary on BE */
651	rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
652
653	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
654	return ((*rptr) >> 2);
655}
656
657/**
658 * sdma_v4_0_ring_get_wptr - get the current write pointer
659 *
660 * @ring: amdgpu ring pointer
661 *
662 * Get the current wptr from the hardware (VEGA10+).
663 */
664static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
665{
666	struct amdgpu_device *adev = ring->adev;
667	u64 wptr;
668
669	if (ring->use_doorbell) {
670		/* XXX check if swapping is necessary on BE */
671		wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
672		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
673	} else {
674		wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
675		wptr = wptr << 32;
676		wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
677		DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
678				ring->me, wptr);
679	}
680
681	return wptr >> 2;
682}
683
684/**
685 * sdma_v4_0_ring_set_wptr - commit the write pointer
686 *
687 * @ring: amdgpu ring pointer
688 *
689 * Write the wptr back to the hardware (VEGA10+).
690 */
691static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
692{
693	struct amdgpu_device *adev = ring->adev;
694
695	DRM_DEBUG("Setting write pointer\n");
696	if (ring->use_doorbell) {
697		u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
698
699		DRM_DEBUG("Using doorbell -- "
700				"wptr_offs == 0x%08x "
701				"lower_32_bits(ring->wptr) << 2 == 0x%08x "
702				"upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
703				ring->wptr_offs,
704				lower_32_bits(ring->wptr << 2),
705				upper_32_bits(ring->wptr << 2));
706		/* XXX check if swapping is necessary on BE */
707		WRITE_ONCE(*wb, (ring->wptr << 2));
708		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
709				ring->doorbell_index, ring->wptr << 2);
710		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
711	} else {
712		DRM_DEBUG("Not using doorbell -- "
713				"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
714				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
715				ring->me,
716				lower_32_bits(ring->wptr << 2),
717				ring->me,
718				upper_32_bits(ring->wptr << 2));
719		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
720			    lower_32_bits(ring->wptr << 2));
721		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
722			    upper_32_bits(ring->wptr << 2));
723	}
724}
725
726/**
727 * sdma_v4_0_page_ring_get_wptr - get the current write pointer
728 *
729 * @ring: amdgpu ring pointer
730 *
731 * Get the current wptr from the hardware (VEGA10+).
732 */
733static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
734{
735	struct amdgpu_device *adev = ring->adev;
736	u64 wptr;
737
738	if (ring->use_doorbell) {
739		/* XXX check if swapping is necessary on BE */
740		wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
741	} else {
742		wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
743		wptr = wptr << 32;
744		wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
745	}
746
747	return wptr >> 2;
748}
749
750/**
751 * sdma_v4_0_ring_set_wptr - commit the write pointer
752 *
753 * @ring: amdgpu ring pointer
754 *
755 * Write the wptr back to the hardware (VEGA10+).
756 */
757static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
758{
759	struct amdgpu_device *adev = ring->adev;
760
761	if (ring->use_doorbell) {
762		u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
763
764		/* XXX check if swapping is necessary on BE */
765		WRITE_ONCE(*wb, (ring->wptr << 2));
766		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
767	} else {
768		uint64_t wptr = ring->wptr << 2;
769
770		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
771			    lower_32_bits(wptr));
772		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
773			    upper_32_bits(wptr));
774	}
775}
776
777static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
778{
779	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
780	int i;
781
782	for (i = 0; i < count; i++)
783		if (sdma && sdma->burst_nop && (i == 0))
784			amdgpu_ring_write(ring, ring->funcs->nop |
785				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
786		else
787			amdgpu_ring_write(ring, ring->funcs->nop);
788}
789
790/**
791 * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine
792 *
793 * @ring: amdgpu ring pointer
794 * @ib: IB object to schedule
795 *
796 * Schedule an IB in the DMA ring (VEGA10).
797 */
798static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
799				   struct amdgpu_job *job,
800				   struct amdgpu_ib *ib,
801				   uint32_t flags)
802{
803	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
804
805	/* IB packet must end on a 8 DW boundary */
806	sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
807
808	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
809			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
810	/* base must be 32 byte aligned */
811	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
812	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
813	amdgpu_ring_write(ring, ib->length_dw);
814	amdgpu_ring_write(ring, 0);
815	amdgpu_ring_write(ring, 0);
816
817}
818
819static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
820				   int mem_space, int hdp,
821				   uint32_t addr0, uint32_t addr1,
822				   uint32_t ref, uint32_t mask,
823				   uint32_t inv)
824{
825	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
826			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
827			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
828			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
829	if (mem_space) {
830		/* memory */
831		amdgpu_ring_write(ring, addr0);
832		amdgpu_ring_write(ring, addr1);
833	} else {
834		/* registers */
835		amdgpu_ring_write(ring, addr0 << 2);
836		amdgpu_ring_write(ring, addr1 << 2);
837	}
838	amdgpu_ring_write(ring, ref); /* reference */
839	amdgpu_ring_write(ring, mask); /* mask */
840	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
841			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
842}
843
844/**
845 * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
846 *
847 * @ring: amdgpu ring pointer
848 *
849 * Emit an hdp flush packet on the requested DMA ring.
850 */
851static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
852{
853	struct amdgpu_device *adev = ring->adev;
854	u32 ref_and_mask = 0;
855	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
856
857	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
858
859	sdma_v4_0_wait_reg_mem(ring, 0, 1,
860			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
861			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
862			       ref_and_mask, ref_and_mask, 10);
863}
864
865/**
866 * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring
867 *
868 * @ring: amdgpu ring pointer
869 * @fence: amdgpu fence object
870 *
871 * Add a DMA fence packet to the ring to write
872 * the fence seq number and DMA trap packet to generate
873 * an interrupt if needed (VEGA10).
874 */
875static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
876				      unsigned flags)
877{
878	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
879	/* write the fence */
880	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
881	/* zero in first two bits */
882	BUG_ON(addr & 0x3);
883	amdgpu_ring_write(ring, lower_32_bits(addr));
884	amdgpu_ring_write(ring, upper_32_bits(addr));
885	amdgpu_ring_write(ring, lower_32_bits(seq));
886
887	/* optionally write high bits as well */
888	if (write64bit) {
889		addr += 4;
890		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
891		/* zero in first two bits */
892		BUG_ON(addr & 0x3);
893		amdgpu_ring_write(ring, lower_32_bits(addr));
894		amdgpu_ring_write(ring, upper_32_bits(addr));
895		amdgpu_ring_write(ring, upper_32_bits(seq));
896	}
897
898	/* generate an interrupt */
899	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
900	amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
901}
902
903
904/**
905 * sdma_v4_0_gfx_stop - stop the gfx async dma engines
906 *
907 * @adev: amdgpu_device pointer
908 *
909 * Stop the gfx async dma ring buffers (VEGA10).
910 */
911static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
912{
913	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
914	u32 rb_cntl, ib_cntl;
915	int i, unset = 0;
916
917	for (i = 0; i < adev->sdma.num_instances; i++) {
918		sdma[i] = &adev->sdma.instance[i].ring;
919
920		if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
921			amdgpu_ttm_set_buffer_funcs_status(adev, false);
922			unset = 1;
923		}
924
925		rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
926		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
927		WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
928		ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
929		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
930		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
931
932		sdma[i]->sched.ready = false;
933	}
934}
935
936/**
937 * sdma_v4_0_rlc_stop - stop the compute async dma engines
938 *
939 * @adev: amdgpu_device pointer
940 *
941 * Stop the compute async dma queues (VEGA10).
942 */
943static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
944{
945	/* XXX todo */
946}
947
948/**
949 * sdma_v4_0_page_stop - stop the page async dma engines
950 *
951 * @adev: amdgpu_device pointer
952 *
953 * Stop the page async dma ring buffers (VEGA10).
954 */
955static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
956{
957	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
958	u32 rb_cntl, ib_cntl;
959	int i;
960	bool unset = false;
961
962	for (i = 0; i < adev->sdma.num_instances; i++) {
963		sdma[i] = &adev->sdma.instance[i].page;
964
965		if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
966			(unset == false)) {
967			amdgpu_ttm_set_buffer_funcs_status(adev, false);
968			unset = true;
969		}
970
971		rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
972		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
973					RB_ENABLE, 0);
974		WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
975		ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
976		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
977					IB_ENABLE, 0);
978		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
979
980		sdma[i]->sched.ready = false;
981	}
982}
983
984/**
985 * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
986 *
987 * @adev: amdgpu_device pointer
988 * @enable: enable/disable the DMA MEs context switch.
989 *
990 * Halt or unhalt the async dma engines context switch (VEGA10).
991 */
992static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
993{
994	u32 f32_cntl, phase_quantum = 0;
995	int i;
996
997	if (amdgpu_sdma_phase_quantum) {
998		unsigned value = amdgpu_sdma_phase_quantum;
999		unsigned unit = 0;
1000
1001		while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
1002				SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
1003			value = (value + 1) >> 1;
1004			unit++;
1005		}
1006		if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
1007			    SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
1008			value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
1009				 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
1010			unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
1011				SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
1012			WARN_ONCE(1,
1013			"clamping sdma_phase_quantum to %uK clock cycles\n",
1014				  value << unit);
1015		}
1016		phase_quantum =
1017			value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
1018			unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
1019	}
1020
1021	for (i = 0; i < adev->sdma.num_instances; i++) {
1022		f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
1023		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
1024				AUTO_CTXSW_ENABLE, enable ? 1 : 0);
1025		if (enable && amdgpu_sdma_phase_quantum) {
1026			WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
1027			WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
1028			WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
1029		}
1030		WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
1031	}
1032
1033}
1034
1035/**
1036 * sdma_v4_0_enable - stop the async dma engines
1037 *
1038 * @adev: amdgpu_device pointer
1039 * @enable: enable/disable the DMA MEs.
1040 *
1041 * Halt or unhalt the async dma engines (VEGA10).
1042 */
1043static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
1044{
1045	u32 f32_cntl;
1046	int i;
1047
1048	if (enable == false) {
1049		sdma_v4_0_gfx_stop(adev);
1050		sdma_v4_0_rlc_stop(adev);
1051		if (adev->sdma.has_page_queue)
1052			sdma_v4_0_page_stop(adev);
1053	}
1054
1055	for (i = 0; i < adev->sdma.num_instances; i++) {
1056		f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
1057		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
1058		WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
1059	}
1060}
1061
1062/**
1063 * sdma_v4_0_rb_cntl - get parameters for rb_cntl
1064 */
1065static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
1066{
1067	/* Set ring buffer size in dwords */
1068	uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
1069
1070	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
1071#ifdef __BIG_ENDIAN
1072	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
1073	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
1074				RPTR_WRITEBACK_SWAP_ENABLE, 1);
1075#endif
1076	return rb_cntl;
1077}
1078
1079/**
1080 * sdma_v4_0_gfx_resume - setup and start the async dma engines
1081 *
1082 * @adev: amdgpu_device pointer
1083 * @i: instance to resume
1084 *
1085 * Set up the gfx DMA ring buffers and enable them (VEGA10).
1086 * Returns 0 for success, error for failure.
1087 */
1088static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
1089{
1090	struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
1091	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
1092	u32 wb_offset;
1093	u32 doorbell;
1094	u32 doorbell_offset;
1095	u64 wptr_gpu_addr;
1096
1097	wb_offset = (ring->rptr_offs * 4);
1098
1099	rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
1100	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
1101	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
1102
1103	/* Initialize the ring buffer's read and write pointers */
1104	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
1105	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
1106	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
1107	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
1108
1109	/* set the wb address whether it's enabled or not */
1110	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
1111	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
1112	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
1113	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
1114
1115	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
1116				RPTR_WRITEBACK_ENABLE, 1);
1117
1118	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
1119	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
1120
1121	ring->wptr = 0;
1122
1123	/* before programing wptr to a less value, need set minor_ptr_update first */
1124	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
1125
1126	doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
1127	doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
1128
1129	doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
1130				 ring->use_doorbell);
1131	doorbell_offset = REG_SET_FIELD(doorbell_offset,
1132					SDMA0_GFX_DOORBELL_OFFSET,
1133					OFFSET, ring->doorbell_index);
1134	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
1135	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
1136
1137	sdma_v4_0_ring_set_wptr(ring);
1138
1139	/* set minor_ptr_update to 0 after wptr programed */
1140	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
1141
1142	/* setup the wptr shadow polling */
1143	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1144	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
1145		    lower_32_bits(wptr_gpu_addr));
1146	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
1147		    upper_32_bits(wptr_gpu_addr));
1148	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
1149	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
1150				       SDMA0_GFX_RB_WPTR_POLL_CNTL,
1151				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
1152	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
1153
1154	/* enable DMA RB */
1155	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
1156	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
1157
1158	ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
1159	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
1160#ifdef __BIG_ENDIAN
1161	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
1162#endif
1163	/* enable DMA IBs */
1164	WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
1165
1166	ring->sched.ready = true;
1167}
1168
1169/**
1170 * sdma_v4_0_page_resume - setup and start the async dma engines
1171 *
1172 * @adev: amdgpu_device pointer
1173 * @i: instance to resume
1174 *
1175 * Set up the page DMA ring buffers and enable them (VEGA10).
1176 * Returns 0 for success, error for failure.
1177 */
1178static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
1179{
1180	struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
1181	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
1182	u32 wb_offset;
1183	u32 doorbell;
1184	u32 doorbell_offset;
1185	u64 wptr_gpu_addr;
1186
1187	wb_offset = (ring->rptr_offs * 4);
1188
1189	rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
1190	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
1191	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
1192
1193	/* Initialize the ring buffer's read and write pointers */
1194	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
1195	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
1196	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
1197	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
1198
1199	/* set the wb address whether it's enabled or not */
1200	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
1201	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
1202	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
1203	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
1204
1205	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
1206				RPTR_WRITEBACK_ENABLE, 1);
1207
1208	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
1209	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
1210
1211	ring->wptr = 0;
1212
1213	/* before programing wptr to a less value, need set minor_ptr_update first */
1214	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
1215
1216	doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
1217	doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
1218
1219	doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
1220				 ring->use_doorbell);
1221	doorbell_offset = REG_SET_FIELD(doorbell_offset,
1222					SDMA0_PAGE_DOORBELL_OFFSET,
1223					OFFSET, ring->doorbell_index);
1224	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
1225	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
1226
1227	/* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */
1228	sdma_v4_0_page_ring_set_wptr(ring);
1229
1230	/* set minor_ptr_update to 0 after wptr programed */
1231	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
1232
1233	/* setup the wptr shadow polling */
1234	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1235	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
1236		    lower_32_bits(wptr_gpu_addr));
1237	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
1238		    upper_32_bits(wptr_gpu_addr));
1239	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
1240	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
1241				       SDMA0_PAGE_RB_WPTR_POLL_CNTL,
1242				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
1243	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
1244
1245	/* enable DMA RB */
1246	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
1247	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
1248
1249	ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
1250	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
1251#ifdef __BIG_ENDIAN
1252	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
1253#endif
1254	/* enable DMA IBs */
1255	WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
1256
1257	ring->sched.ready = true;
1258}
1259
1260static void
1261sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
1262{
1263	uint32_t def, data;
1264
1265	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
1266		/* enable idle interrupt */
1267		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
1268		data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
1269
1270		if (data != def)
1271			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
1272	} else {
1273		/* disable idle interrupt */
1274		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
1275		data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
1276		if (data != def)
1277			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
1278	}
1279}
1280
1281static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev)
1282{
1283	uint32_t def, data;
1284
1285	/* Enable HW based PG. */
1286	def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1287	data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK;
1288	if (data != def)
1289		WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1290
1291	/* enable interrupt */
1292	def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
1293	data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
1294	if (data != def)
1295		WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
1296
1297	/* Configure hold time to filter in-valid power on/off request. Use default right now */
1298	def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1299	data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK;
1300	data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK);
1301	/* Configure switch time for hysteresis purpose. Use default right now */
1302	data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK;
1303	data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK);
1304	if(data != def)
1305		WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1306}
1307
1308static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
1309{
1310	if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
1311		return;
1312
1313	switch (adev->asic_type) {
1314	case CHIP_RAVEN:
1315	case CHIP_RENOIR:
1316		sdma_v4_1_init_power_gating(adev);
1317		sdma_v4_1_update_power_gating(adev, true);
1318		break;
1319	default:
1320		break;
1321	}
1322}
1323
1324/**
1325 * sdma_v4_0_rlc_resume - setup and start the async dma engines
1326 *
1327 * @adev: amdgpu_device pointer
1328 *
1329 * Set up the compute DMA queues and enable them (VEGA10).
1330 * Returns 0 for success, error for failure.
1331 */
1332static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev)
1333{
1334	sdma_v4_0_init_pg(adev);
1335
1336	return 0;
1337}
1338
1339/**
1340 * sdma_v4_0_load_microcode - load the sDMA ME ucode
1341 *
1342 * @adev: amdgpu_device pointer
1343 *
1344 * Loads the sDMA0/1 ucode.
1345 * Returns 0 for success, -EINVAL if the ucode is not available.
1346 */
1347static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
1348{
1349	const struct sdma_firmware_header_v1_0 *hdr;
1350	const __le32 *fw_data;
1351	u32 fw_size;
1352	int i, j;
1353
1354	/* halt the MEs */
1355	sdma_v4_0_enable(adev, false);
1356
1357	for (i = 0; i < adev->sdma.num_instances; i++) {
1358		if (!adev->sdma.instance[i].fw)
1359			return -EINVAL;
1360
1361		hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
1362		amdgpu_ucode_print_sdma_hdr(&hdr->header);
1363		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1364
1365		fw_data = (const __le32 *)
1366			(adev->sdma.instance[i].fw->data +
1367				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1368
1369		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
1370
1371		for (j = 0; j < fw_size; j++)
1372			WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
1373				    le32_to_cpup(fw_data++));
1374
1375		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
1376			    adev->sdma.instance[i].fw_version);
1377	}
1378
1379	return 0;
1380}
1381
1382/**
1383 * sdma_v4_0_start - setup and start the async dma engines
1384 *
1385 * @adev: amdgpu_device pointer
1386 *
1387 * Set up the DMA engines and enable them (VEGA10).
1388 * Returns 0 for success, error for failure.
1389 */
1390static int sdma_v4_0_start(struct amdgpu_device *adev)
1391{
1392	struct amdgpu_ring *ring;
1393	int i, r = 0;
1394
1395	if (amdgpu_sriov_vf(adev)) {
1396		sdma_v4_0_ctx_switch_enable(adev, false);
1397		sdma_v4_0_enable(adev, false);
1398	} else {
1399
1400		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
1401			r = sdma_v4_0_load_microcode(adev);
1402			if (r)
1403				return r;
1404		}
1405
1406		/* unhalt the MEs */
1407		sdma_v4_0_enable(adev, true);
1408		/* enable sdma ring preemption */
1409		sdma_v4_0_ctx_switch_enable(adev, true);
1410	}
1411
1412	/* start the gfx rings and rlc compute queues */
1413	for (i = 0; i < adev->sdma.num_instances; i++) {
1414		uint32_t temp;
1415
1416		WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
1417		sdma_v4_0_gfx_resume(adev, i);
1418		if (adev->sdma.has_page_queue)
1419			sdma_v4_0_page_resume(adev, i);
1420
1421		/* set utc l1 enable flag always to 1 */
1422		temp = RREG32_SDMA(i, mmSDMA0_CNTL);
1423		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
1424		WREG32_SDMA(i, mmSDMA0_CNTL, temp);
1425
1426		if (!amdgpu_sriov_vf(adev)) {
1427			/* unhalt engine */
1428			temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
1429			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
1430			WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
1431		}
1432	}
1433
1434	if (amdgpu_sriov_vf(adev)) {
1435		sdma_v4_0_ctx_switch_enable(adev, true);
1436		sdma_v4_0_enable(adev, true);
1437	} else {
1438		r = sdma_v4_0_rlc_resume(adev);
1439		if (r)
1440			return r;
1441	}
1442
1443	for (i = 0; i < adev->sdma.num_instances; i++) {
1444		ring = &adev->sdma.instance[i].ring;
1445
1446		r = amdgpu_ring_test_helper(ring);
1447		if (r)
1448			return r;
1449
1450		if (adev->sdma.has_page_queue) {
1451			struct amdgpu_ring *page = &adev->sdma.instance[i].page;
1452
1453			r = amdgpu_ring_test_helper(page);
1454			if (r)
1455				return r;
1456
1457			if (adev->mman.buffer_funcs_ring == page)
1458				amdgpu_ttm_set_buffer_funcs_status(adev, true);
1459		}
1460
1461		if (adev->mman.buffer_funcs_ring == ring)
1462			amdgpu_ttm_set_buffer_funcs_status(adev, true);
1463	}
1464
1465	return r;
1466}
1467
1468/**
1469 * sdma_v4_0_ring_test_ring - simple async dma engine test
1470 *
1471 * @ring: amdgpu_ring structure holding ring information
1472 *
1473 * Test the DMA engine by writing using it to write an
1474 * value to memory. (VEGA10).
1475 * Returns 0 for success, error for failure.
1476 */
1477static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
1478{
1479	struct amdgpu_device *adev = ring->adev;
1480	unsigned i;
1481	unsigned index;
1482	int r;
1483	u32 tmp;
1484	u64 gpu_addr;
1485
1486	r = amdgpu_device_wb_get(adev, &index);
1487	if (r)
1488		return r;
1489
1490	gpu_addr = adev->wb.gpu_addr + (index * 4);
1491	tmp = 0xCAFEDEAD;
1492	adev->wb.wb[index] = cpu_to_le32(tmp);
1493
1494	r = amdgpu_ring_alloc(ring, 5);
1495	if (r)
1496		goto error_free_wb;
1497
1498	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1499			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
1500	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
1501	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
1502	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
1503	amdgpu_ring_write(ring, 0xDEADBEEF);
1504	amdgpu_ring_commit(ring);
1505
1506	for (i = 0; i < adev->usec_timeout; i++) {
1507		tmp = le32_to_cpu(adev->wb.wb[index]);
1508		if (tmp == 0xDEADBEEF)
1509			break;
1510		udelay(1);
1511	}
1512
1513	if (i >= adev->usec_timeout)
1514		r = -ETIMEDOUT;
1515
1516error_free_wb:
1517	amdgpu_device_wb_free(adev, index);
1518	return r;
1519}
1520
1521/**
1522 * sdma_v4_0_ring_test_ib - test an IB on the DMA engine
1523 *
1524 * @ring: amdgpu_ring structure holding ring information
1525 *
1526 * Test a simple IB in the DMA ring (VEGA10).
1527 * Returns 0 on success, error on failure.
1528 */
1529static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1530{
1531	struct amdgpu_device *adev = ring->adev;
1532	struct amdgpu_ib ib;
1533	struct dma_fence *f = NULL;
1534	unsigned index;
1535	long r;
1536	u32 tmp = 0;
1537	u64 gpu_addr;
1538
1539	r = amdgpu_device_wb_get(adev, &index);
1540	if (r)
1541		return r;
1542
1543	gpu_addr = adev->wb.gpu_addr + (index * 4);
1544	tmp = 0xCAFEDEAD;
1545	adev->wb.wb[index] = cpu_to_le32(tmp);
1546	memset(&ib, 0, sizeof(ib));
1547	r = amdgpu_ib_get(adev, NULL, 256, &ib);
1548	if (r)
1549		goto err0;
1550
1551	ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1552		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1553	ib.ptr[1] = lower_32_bits(gpu_addr);
1554	ib.ptr[2] = upper_32_bits(gpu_addr);
1555	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1556	ib.ptr[4] = 0xDEADBEEF;
1557	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1558	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1559	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1560	ib.length_dw = 8;
1561
1562	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1563	if (r)
1564		goto err1;
1565
1566	r = dma_fence_wait_timeout(f, false, timeout);
1567	if (r == 0) {
1568		r = -ETIMEDOUT;
1569		goto err1;
1570	} else if (r < 0) {
1571		goto err1;
1572	}
1573	tmp = le32_to_cpu(adev->wb.wb[index]);
1574	if (tmp == 0xDEADBEEF)
1575		r = 0;
1576	else
1577		r = -EINVAL;
1578
1579err1:
1580	amdgpu_ib_free(adev, &ib, NULL);
1581	dma_fence_put(f);
1582err0:
1583	amdgpu_device_wb_free(adev, index);
1584	return r;
1585}
1586
1587
1588/**
1589 * sdma_v4_0_vm_copy_pte - update PTEs by copying them from the GART
1590 *
1591 * @ib: indirect buffer to fill with commands
1592 * @pe: addr of the page entry
1593 * @src: src addr to copy from
1594 * @count: number of page entries to update
1595 *
1596 * Update PTEs by copying them from the GART using sDMA (VEGA10).
1597 */
1598static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib,
1599				  uint64_t pe, uint64_t src,
1600				  unsigned count)
1601{
1602	unsigned bytes = count * 8;
1603
1604	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1605		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1606	ib->ptr[ib->length_dw++] = bytes - 1;
1607	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1608	ib->ptr[ib->length_dw++] = lower_32_bits(src);
1609	ib->ptr[ib->length_dw++] = upper_32_bits(src);
1610	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1611	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1612
1613}
1614
1615/**
1616 * sdma_v4_0_vm_write_pte - update PTEs by writing them manually
1617 *
1618 * @ib: indirect buffer to fill with commands
1619 * @pe: addr of the page entry
1620 * @addr: dst addr to write into pe
1621 * @count: number of page entries to update
1622 * @incr: increase next addr by incr bytes
1623 * @flags: access flags
1624 *
1625 * Update PTEs by writing them manually using sDMA (VEGA10).
1626 */
1627static void sdma_v4_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1628				   uint64_t value, unsigned count,
1629				   uint32_t incr)
1630{
1631	unsigned ndw = count * 2;
1632
1633	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1634		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1635	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1636	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1637	ib->ptr[ib->length_dw++] = ndw - 1;
1638	for (; ndw > 0; ndw -= 2) {
1639		ib->ptr[ib->length_dw++] = lower_32_bits(value);
1640		ib->ptr[ib->length_dw++] = upper_32_bits(value);
1641		value += incr;
1642	}
1643}
1644
1645/**
1646 * sdma_v4_0_vm_set_pte_pde - update the page tables using sDMA
1647 *
1648 * @ib: indirect buffer to fill with commands
1649 * @pe: addr of the page entry
1650 * @addr: dst addr to write into pe
1651 * @count: number of page entries to update
1652 * @incr: increase next addr by incr bytes
1653 * @flags: access flags
1654 *
1655 * Update the page tables using sDMA (VEGA10).
1656 */
1657static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1658				     uint64_t pe,
1659				     uint64_t addr, unsigned count,
1660				     uint32_t incr, uint64_t flags)
1661{
1662	/* for physically contiguous pages (vram) */
1663	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1664	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1665	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1666	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1667	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1668	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1669	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1670	ib->ptr[ib->length_dw++] = incr; /* increment size */
1671	ib->ptr[ib->length_dw++] = 0;
1672	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1673}
1674
1675/**
1676 * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw
1677 *
1678 * @ib: indirect buffer to fill with padding
1679 *
1680 */
1681static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1682{
1683	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1684	u32 pad_count;
1685	int i;
1686
1687	pad_count = (-ib->length_dw) & 7;
1688	for (i = 0; i < pad_count; i++)
1689		if (sdma && sdma->burst_nop && (i == 0))
1690			ib->ptr[ib->length_dw++] =
1691				SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1692				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1693		else
1694			ib->ptr[ib->length_dw++] =
1695				SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1696}
1697
1698
1699/**
1700 * sdma_v4_0_ring_emit_pipeline_sync - sync the pipeline
1701 *
1702 * @ring: amdgpu_ring pointer
1703 *
1704 * Make sure all previous operations are completed (CIK).
1705 */
1706static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1707{
1708	uint32_t seq = ring->fence_drv.sync_seq;
1709	uint64_t addr = ring->fence_drv.gpu_addr;
1710
1711	/* wait for idle */
1712	sdma_v4_0_wait_reg_mem(ring, 1, 0,
1713			       addr & 0xfffffffc,
1714			       upper_32_bits(addr) & 0xffffffff,
1715			       seq, 0xffffffff, 4);
1716}
1717
1718
1719/**
1720 * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA
1721 *
1722 * @ring: amdgpu_ring pointer
1723 * @vm: amdgpu_vm pointer
1724 *
1725 * Update the page table base and flush the VM TLB
1726 * using sDMA (VEGA10).
1727 */
1728static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1729					 unsigned vmid, uint64_t pd_addr)
1730{
1731	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1732}
1733
1734static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
1735				     uint32_t reg, uint32_t val)
1736{
1737	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1738			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1739	amdgpu_ring_write(ring, reg);
1740	amdgpu_ring_write(ring, val);
1741}
1742
1743static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1744					 uint32_t val, uint32_t mask)
1745{
1746	sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
1747}
1748
1749static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
1750{
1751	uint fw_version = adev->sdma.instance[0].fw_version;
1752
1753	switch (adev->asic_type) {
1754	case CHIP_VEGA10:
1755		return fw_version >= 430;
1756	case CHIP_VEGA12:
1757		/*return fw_version >= 31;*/
1758		return false;
1759	case CHIP_VEGA20:
1760		return fw_version >= 123;
1761	default:
1762		return false;
1763	}
1764}
1765
1766static int sdma_v4_0_early_init(void *handle)
1767{
1768	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1769	int r;
1770
1771	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR)
1772		adev->sdma.num_instances = 1;
1773	else if (adev->asic_type == CHIP_ARCTURUS)
1774		adev->sdma.num_instances = 8;
1775	else
1776		adev->sdma.num_instances = 2;
1777
1778	r = sdma_v4_0_init_microcode(adev);
1779	if (r) {
1780		DRM_ERROR("Failed to load sdma firmware!\n");
1781		return r;
1782	}
1783
1784	/* TODO: Page queue breaks driver reload under SRIOV */
1785	if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev)))
1786		adev->sdma.has_page_queue = false;
1787	else if (sdma_v4_0_fw_support_paging_queue(adev))
1788		adev->sdma.has_page_queue = true;
1789
1790	sdma_v4_0_set_ring_funcs(adev);
1791	sdma_v4_0_set_buffer_funcs(adev);
1792	sdma_v4_0_set_vm_pte_funcs(adev);
1793	sdma_v4_0_set_irq_funcs(adev);
1794	sdma_v4_0_set_ras_funcs(adev);
1795
1796	return 0;
1797}
1798
1799static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
1800		void *err_data,
1801		struct amdgpu_iv_entry *entry);
1802
1803static int sdma_v4_0_late_init(void *handle)
1804{
1805	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1806	struct ras_ih_if ih_info = {
1807		.cb = sdma_v4_0_process_ras_data_cb,
1808	};
1809	int i;
1810
1811	/* read back edc counter registers to clear the counters */
1812	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
1813		for (i = 0; i < adev->sdma.num_instances; i++)
1814			RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
1815	}
1816
1817	if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
1818		return adev->sdma.funcs->ras_late_init(adev, &ih_info);
1819	else
1820		return 0;
1821}
1822
1823static int sdma_v4_0_sw_init(void *handle)
1824{
1825	struct amdgpu_ring *ring;
1826	int r, i;
1827	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1828
1829	/* SDMA trap event */
1830	for (i = 0; i < adev->sdma.num_instances; i++) {
1831		r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
1832				      SDMA0_4_0__SRCID__SDMA_TRAP,
1833				      &adev->sdma.trap_irq);
1834		if (r)
1835			return r;
1836	}
1837
1838	/* SDMA SRAM ECC event */
1839	for (i = 0; i < adev->sdma.num_instances; i++) {
1840		r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
1841				      SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
1842				      &adev->sdma.ecc_irq);
1843		if (r)
1844			return r;
1845	}
1846
1847	for (i = 0; i < adev->sdma.num_instances; i++) {
1848		ring = &adev->sdma.instance[i].ring;
1849		ring->ring_obj = NULL;
1850		ring->use_doorbell = true;
1851
1852		DRM_INFO("use_doorbell being set to: [%s]\n",
1853				ring->use_doorbell?"true":"false");
1854
1855		/* doorbell size is 2 dwords, get DWORD offset */
1856		ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
1857
1858		sprintf(ring->name, "sdma%d", i);
1859		r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
1860				     AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1861		if (r)
1862			return r;
1863
1864		if (adev->sdma.has_page_queue) {
1865			ring = &adev->sdma.instance[i].page;
1866			ring->ring_obj = NULL;
1867			ring->use_doorbell = true;
1868
1869			/* paging queue use same doorbell index/routing as gfx queue
1870			 * with 0x400 (4096 dwords) offset on second doorbell page
1871			 */
1872			ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
1873			ring->doorbell_index += 0x400;
1874
1875			sprintf(ring->name, "page%d", i);
1876			r = amdgpu_ring_init(adev, ring, 1024,
1877					     &adev->sdma.trap_irq,
1878					     AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1879			if (r)
1880				return r;
1881		}
1882	}
1883
1884	return r;
1885}
1886
1887static int sdma_v4_0_sw_fini(void *handle)
1888{
1889	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1890	int i;
1891
1892	if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
1893		adev->sdma.funcs->ras_fini(adev);
1894
1895	for (i = 0; i < adev->sdma.num_instances; i++) {
1896		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1897		if (adev->sdma.has_page_queue)
1898			amdgpu_ring_fini(&adev->sdma.instance[i].page);
1899	}
1900
1901	sdma_v4_0_destroy_inst_ctx(adev);
1902
1903	return 0;
1904}
1905
1906static int sdma_v4_0_hw_init(void *handle)
1907{
1908	int r;
1909	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1910
1911	if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
1912			adev->powerplay.pp_funcs->set_powergating_by_smu) ||
1913			(adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset))
1914		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
1915
1916	if (!amdgpu_sriov_vf(adev))
1917		sdma_v4_0_init_golden_registers(adev);
1918
1919	r = sdma_v4_0_start(adev);
1920
1921	return r;
1922}
1923
1924static int sdma_v4_0_hw_fini(void *handle)
1925{
1926	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1927	int i;
1928
1929	if (amdgpu_sriov_vf(adev))
1930		return 0;
1931
1932	for (i = 0; i < adev->sdma.num_instances; i++) {
1933		amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
1934			       AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1935	}
1936
1937	sdma_v4_0_ctx_switch_enable(adev, false);
1938	sdma_v4_0_enable(adev, false);
1939
1940	if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
1941			&& adev->powerplay.pp_funcs->set_powergating_by_smu) ||
1942			adev->asic_type == CHIP_RENOIR)
1943		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
1944
1945	return 0;
1946}
1947
1948static int sdma_v4_0_suspend(void *handle)
1949{
1950	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1951
1952	return sdma_v4_0_hw_fini(adev);
1953}
1954
1955static int sdma_v4_0_resume(void *handle)
1956{
1957	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1958
1959	return sdma_v4_0_hw_init(adev);
1960}
1961
1962static bool sdma_v4_0_is_idle(void *handle)
1963{
1964	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1965	u32 i;
1966
1967	for (i = 0; i < adev->sdma.num_instances; i++) {
1968		u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
1969
1970		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1971			return false;
1972	}
1973
1974	return true;
1975}
1976
1977static int sdma_v4_0_wait_for_idle(void *handle)
1978{
1979	unsigned i, j;
1980	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
1981	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1982
1983	for (i = 0; i < adev->usec_timeout; i++) {
1984		for (j = 0; j < adev->sdma.num_instances; j++) {
1985			sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG);
1986			if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK))
1987				break;
1988		}
1989		if (j == adev->sdma.num_instances)
1990			return 0;
1991		udelay(1);
1992	}
1993	return -ETIMEDOUT;
1994}
1995
1996static int sdma_v4_0_soft_reset(void *handle)
1997{
1998	/* todo */
1999
2000	return 0;
2001}
2002
2003static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
2004					struct amdgpu_irq_src *source,
2005					unsigned type,
2006					enum amdgpu_interrupt_state state)
2007{
2008	u32 sdma_cntl;
2009
2010	sdma_cntl = RREG32_SDMA(type, mmSDMA0_CNTL);
2011	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
2012		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
2013	WREG32_SDMA(type, mmSDMA0_CNTL, sdma_cntl);
2014
2015	return 0;
2016}
2017
2018static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
2019				      struct amdgpu_irq_src *source,
2020				      struct amdgpu_iv_entry *entry)
2021{
2022	uint32_t instance;
2023
2024	DRM_DEBUG("IH: SDMA trap\n");
2025	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2026	switch (entry->ring_id) {
2027	case 0:
2028		amdgpu_fence_process(&adev->sdma.instance[instance].ring);
2029		break;
2030	case 1:
2031		if (adev->asic_type == CHIP_VEGA20)
2032			amdgpu_fence_process(&adev->sdma.instance[instance].page);
2033		break;
2034	case 2:
2035		/* XXX compute */
2036		break;
2037	case 3:
2038		if (adev->asic_type != CHIP_VEGA20)
2039			amdgpu_fence_process(&adev->sdma.instance[instance].page);
2040		break;
2041	}
2042	return 0;
2043}
2044
2045static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
2046		void *err_data,
2047		struct amdgpu_iv_entry *entry)
2048{
2049	int instance;
2050
2051	/* When ���Full RAS��� is enabled, the per-IP interrupt sources should
2052	 * be disabled and the driver should only look for the aggregated
2053	 * interrupt via sync flood
2054	 */
2055	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
2056		goto out;
2057
2058	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2059	if (instance < 0)
2060		goto out;
2061
2062	amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
2063
2064out:
2065	return AMDGPU_RAS_SUCCESS;
2066}
2067
2068static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
2069					      struct amdgpu_irq_src *source,
2070					      struct amdgpu_iv_entry *entry)
2071{
2072	int instance;
2073
2074	DRM_ERROR("Illegal instruction in SDMA command stream\n");
2075
2076	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2077	if (instance < 0)
2078		return 0;
2079
2080	switch (entry->ring_id) {
2081	case 0:
2082		drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
2083		break;
2084	}
2085	return 0;
2086}
2087
2088static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
2089					struct amdgpu_irq_src *source,
2090					unsigned type,
2091					enum amdgpu_interrupt_state state)
2092{
2093	u32 sdma_edc_config;
2094
2095	sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);
2096	sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
2097		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
2098	WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);
2099
2100	return 0;
2101}
2102
2103static void sdma_v4_0_update_medium_grain_clock_gating(
2104		struct amdgpu_device *adev,
2105		bool enable)
2106{
2107	uint32_t data, def;
2108	int i;
2109
2110	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
2111		for (i = 0; i < adev->sdma.num_instances; i++) {
2112			def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
2113			data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
2114				  SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
2115				  SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
2116				  SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
2117				  SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
2118				  SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
2119				  SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
2120				  SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
2121			if (def != data)
2122				WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
2123		}
2124	} else {
2125		for (i = 0; i < adev->sdma.num_instances; i++) {
2126			def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
2127			data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
2128				 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
2129				 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
2130				 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
2131				 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
2132				 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
2133				 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
2134				 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
2135			if (def != data)
2136				WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
2137		}
2138	}
2139}
2140
2141
2142static void sdma_v4_0_update_medium_grain_light_sleep(
2143		struct amdgpu_device *adev,
2144		bool enable)
2145{
2146	uint32_t data, def;
2147	int i;
2148
2149	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
2150		for (i = 0; i < adev->sdma.num_instances; i++) {
2151			/* 1-not override: enable sdma mem light sleep */
2152			def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
2153			data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
2154			if (def != data)
2155				WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
2156		}
2157	} else {
2158		for (i = 0; i < adev->sdma.num_instances; i++) {
2159		/* 0-override:disable sdma mem light sleep */
2160			def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
2161			data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
2162			if (def != data)
2163				WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
2164		}
2165	}
2166}
2167
2168static int sdma_v4_0_set_clockgating_state(void *handle,
2169					  enum amd_clockgating_state state)
2170{
2171	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2172
2173	if (amdgpu_sriov_vf(adev))
2174		return 0;
2175
2176	switch (adev->asic_type) {
2177	case CHIP_VEGA10:
2178	case CHIP_VEGA12:
2179	case CHIP_VEGA20:
2180	case CHIP_RAVEN:
2181	case CHIP_ARCTURUS:
2182	case CHIP_RENOIR:
2183		sdma_v4_0_update_medium_grain_clock_gating(adev,
2184				state == AMD_CG_STATE_GATE);
2185		sdma_v4_0_update_medium_grain_light_sleep(adev,
2186				state == AMD_CG_STATE_GATE);
2187		break;
2188	default:
2189		break;
2190	}
2191	return 0;
2192}
2193
2194static int sdma_v4_0_set_powergating_state(void *handle,
2195					  enum amd_powergating_state state)
2196{
2197	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2198
2199	switch (adev->asic_type) {
2200	case CHIP_RAVEN:
2201		sdma_v4_1_update_power_gating(adev,
2202				state == AMD_PG_STATE_GATE ? true : false);
2203		break;
2204	default:
2205		break;
2206	}
2207
2208	return 0;
2209}
2210
2211static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
2212{
2213	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2214	int data;
2215
2216	if (amdgpu_sriov_vf(adev))
2217		*flags = 0;
2218
2219	/* AMD_CG_SUPPORT_SDMA_MGCG */
2220	data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
2221	if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
2222		*flags |= AMD_CG_SUPPORT_SDMA_MGCG;
2223
2224	/* AMD_CG_SUPPORT_SDMA_LS */
2225	data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
2226	if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
2227		*flags |= AMD_CG_SUPPORT_SDMA_LS;
2228}
2229
2230const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
2231	.name = "sdma_v4_0",
2232	.early_init = sdma_v4_0_early_init,
2233	.late_init = sdma_v4_0_late_init,
2234	.sw_init = sdma_v4_0_sw_init,
2235	.sw_fini = sdma_v4_0_sw_fini,
2236	.hw_init = sdma_v4_0_hw_init,
2237	.hw_fini = sdma_v4_0_hw_fini,
2238	.suspend = sdma_v4_0_suspend,
2239	.resume = sdma_v4_0_resume,
2240	.is_idle = sdma_v4_0_is_idle,
2241	.wait_for_idle = sdma_v4_0_wait_for_idle,
2242	.soft_reset = sdma_v4_0_soft_reset,
2243	.set_clockgating_state = sdma_v4_0_set_clockgating_state,
2244	.set_powergating_state = sdma_v4_0_set_powergating_state,
2245	.get_clockgating_state = sdma_v4_0_get_clockgating_state,
2246};
2247
2248static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
2249	.type = AMDGPU_RING_TYPE_SDMA,
2250	.align_mask = 0xf,
2251	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2252	.support_64bit_ptrs = true,
2253	.vmhub = AMDGPU_MMHUB_0,
2254	.get_rptr = sdma_v4_0_ring_get_rptr,
2255	.get_wptr = sdma_v4_0_ring_get_wptr,
2256	.set_wptr = sdma_v4_0_ring_set_wptr,
2257	.emit_frame_size =
2258		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2259		3 + /* hdp invalidate */
2260		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2261		/* sdma_v4_0_ring_emit_vm_flush */
2262		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2263		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2264		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2265	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2266	.emit_ib = sdma_v4_0_ring_emit_ib,
2267	.emit_fence = sdma_v4_0_ring_emit_fence,
2268	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2269	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2270	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2271	.test_ring = sdma_v4_0_ring_test_ring,
2272	.test_ib = sdma_v4_0_ring_test_ib,
2273	.insert_nop = sdma_v4_0_ring_insert_nop,
2274	.pad_ib = sdma_v4_0_ring_pad_ib,
2275	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2276	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2277	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2278};
2279
2280/*
2281 * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
2282 * So create a individual constant ring_funcs for those instances.
2283 */
2284static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
2285	.type = AMDGPU_RING_TYPE_SDMA,
2286	.align_mask = 0xf,
2287	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2288	.support_64bit_ptrs = true,
2289	.vmhub = AMDGPU_MMHUB_1,
2290	.get_rptr = sdma_v4_0_ring_get_rptr,
2291	.get_wptr = sdma_v4_0_ring_get_wptr,
2292	.set_wptr = sdma_v4_0_ring_set_wptr,
2293	.emit_frame_size =
2294		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2295		3 + /* hdp invalidate */
2296		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2297		/* sdma_v4_0_ring_emit_vm_flush */
2298		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2299		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2300		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2301	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2302	.emit_ib = sdma_v4_0_ring_emit_ib,
2303	.emit_fence = sdma_v4_0_ring_emit_fence,
2304	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2305	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2306	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2307	.test_ring = sdma_v4_0_ring_test_ring,
2308	.test_ib = sdma_v4_0_ring_test_ib,
2309	.insert_nop = sdma_v4_0_ring_insert_nop,
2310	.pad_ib = sdma_v4_0_ring_pad_ib,
2311	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2312	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2313	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2314};
2315
2316static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
2317	.type = AMDGPU_RING_TYPE_SDMA,
2318	.align_mask = 0xf,
2319	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2320	.support_64bit_ptrs = true,
2321	.vmhub = AMDGPU_MMHUB_0,
2322	.get_rptr = sdma_v4_0_ring_get_rptr,
2323	.get_wptr = sdma_v4_0_page_ring_get_wptr,
2324	.set_wptr = sdma_v4_0_page_ring_set_wptr,
2325	.emit_frame_size =
2326		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2327		3 + /* hdp invalidate */
2328		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2329		/* sdma_v4_0_ring_emit_vm_flush */
2330		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2331		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2332		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2333	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2334	.emit_ib = sdma_v4_0_ring_emit_ib,
2335	.emit_fence = sdma_v4_0_ring_emit_fence,
2336	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2337	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2338	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2339	.test_ring = sdma_v4_0_ring_test_ring,
2340	.test_ib = sdma_v4_0_ring_test_ib,
2341	.insert_nop = sdma_v4_0_ring_insert_nop,
2342	.pad_ib = sdma_v4_0_ring_pad_ib,
2343	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2344	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2345	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2346};
2347
2348static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
2349	.type = AMDGPU_RING_TYPE_SDMA,
2350	.align_mask = 0xf,
2351	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2352	.support_64bit_ptrs = true,
2353	.vmhub = AMDGPU_MMHUB_1,
2354	.get_rptr = sdma_v4_0_ring_get_rptr,
2355	.get_wptr = sdma_v4_0_page_ring_get_wptr,
2356	.set_wptr = sdma_v4_0_page_ring_set_wptr,
2357	.emit_frame_size =
2358		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2359		3 + /* hdp invalidate */
2360		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2361		/* sdma_v4_0_ring_emit_vm_flush */
2362		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2363		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2364		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2365	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2366	.emit_ib = sdma_v4_0_ring_emit_ib,
2367	.emit_fence = sdma_v4_0_ring_emit_fence,
2368	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2369	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2370	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2371	.test_ring = sdma_v4_0_ring_test_ring,
2372	.test_ib = sdma_v4_0_ring_test_ib,
2373	.insert_nop = sdma_v4_0_ring_insert_nop,
2374	.pad_ib = sdma_v4_0_ring_pad_ib,
2375	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2376	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2377	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2378};
2379
2380static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
2381{
2382	int i;
2383
2384	for (i = 0; i < adev->sdma.num_instances; i++) {
2385		if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
2386			adev->sdma.instance[i].ring.funcs =
2387					&sdma_v4_0_ring_funcs_2nd_mmhub;
2388		else
2389			adev->sdma.instance[i].ring.funcs =
2390					&sdma_v4_0_ring_funcs;
2391		adev->sdma.instance[i].ring.me = i;
2392		if (adev->sdma.has_page_queue) {
2393			if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
2394				adev->sdma.instance[i].page.funcs =
2395					&sdma_v4_0_page_ring_funcs_2nd_mmhub;
2396			else
2397				adev->sdma.instance[i].page.funcs =
2398					&sdma_v4_0_page_ring_funcs;
2399			adev->sdma.instance[i].page.me = i;
2400		}
2401	}
2402}
2403
2404static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = {
2405	.set = sdma_v4_0_set_trap_irq_state,
2406	.process = sdma_v4_0_process_trap_irq,
2407};
2408
2409static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
2410	.process = sdma_v4_0_process_illegal_inst_irq,
2411};
2412
2413static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
2414	.set = sdma_v4_0_set_ecc_irq_state,
2415	.process = amdgpu_sdma_process_ecc_irq,
2416};
2417
2418
2419
2420static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
2421{
2422	switch (adev->sdma.num_instances) {
2423	case 1:
2424		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
2425		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
2426		break;
2427	case 8:
2428		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
2429		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
2430		break;
2431	case 2:
2432	default:
2433		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
2434		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
2435		break;
2436	}
2437	adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
2438	adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
2439	adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
2440}
2441
2442/**
2443 * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine
2444 *
2445 * @ring: amdgpu_ring structure holding ring information
2446 * @src_offset: src GPU address
2447 * @dst_offset: dst GPU address
2448 * @byte_count: number of bytes to xfer
2449 *
2450 * Copy GPU buffers using the DMA engine (VEGA10/12).
2451 * Used by the amdgpu ttm implementation to move pages if
2452 * registered as the asic copy callback.
2453 */
2454static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
2455				       uint64_t src_offset,
2456				       uint64_t dst_offset,
2457				       uint32_t byte_count)
2458{
2459	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
2460		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
2461	ib->ptr[ib->length_dw++] = byte_count - 1;
2462	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
2463	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
2464	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
2465	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
2466	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
2467}
2468
2469/**
2470 * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine
2471 *
2472 * @ring: amdgpu_ring structure holding ring information
2473 * @src_data: value to write to buffer
2474 * @dst_offset: dst GPU address
2475 * @byte_count: number of bytes to xfer
2476 *
2477 * Fill GPU buffers using the DMA engine (VEGA10/12).
2478 */
2479static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
2480				       uint32_t src_data,
2481				       uint64_t dst_offset,
2482				       uint32_t byte_count)
2483{
2484	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
2485	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
2486	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
2487	ib->ptr[ib->length_dw++] = src_data;
2488	ib->ptr[ib->length_dw++] = byte_count - 1;
2489}
2490
2491static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
2492	.copy_max_bytes = 0x400000,
2493	.copy_num_dw = 7,
2494	.emit_copy_buffer = sdma_v4_0_emit_copy_buffer,
2495
2496	.fill_max_bytes = 0x400000,
2497	.fill_num_dw = 5,
2498	.emit_fill_buffer = sdma_v4_0_emit_fill_buffer,
2499};
2500
2501static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
2502{
2503	adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
2504	if (adev->sdma.has_page_queue)
2505		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
2506	else
2507		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
2508}
2509
2510static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
2511	.copy_pte_num_dw = 7,
2512	.copy_pte = sdma_v4_0_vm_copy_pte,
2513
2514	.write_pte = sdma_v4_0_vm_write_pte,
2515	.set_pte_pde = sdma_v4_0_vm_set_pte_pde,
2516};
2517
2518static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
2519{
2520	struct drm_gpu_scheduler *sched;
2521	unsigned i;
2522
2523	adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
2524	for (i = 0; i < adev->sdma.num_instances; i++) {
2525		if (adev->sdma.has_page_queue)
2526			sched = &adev->sdma.instance[i].page.sched;
2527		else
2528			sched = &adev->sdma.instance[i].ring.sched;
2529		adev->vm_manager.vm_pte_scheds[i] = sched;
2530	}
2531	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
2532}
2533
2534static void sdma_v4_0_get_ras_error_count(uint32_t value,
2535					uint32_t instance,
2536					uint32_t *sec_count)
2537{
2538	uint32_t i;
2539	uint32_t sec_cnt;
2540
2541	/* double bits error (multiple bits) error detection is not supported */
2542	for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
2543		/* the SDMA_EDC_COUNTER register in each sdma instance
2544		 * shares the same sed shift_mask
2545		 * */
2546		sec_cnt = (value &
2547			sdma_v4_0_ras_fields[i].sec_count_mask) >>
2548			sdma_v4_0_ras_fields[i].sec_count_shift;
2549		if (sec_cnt) {
2550			DRM_INFO("Detected %s in SDMA%d, SED %d\n",
2551				sdma_v4_0_ras_fields[i].name,
2552				instance, sec_cnt);
2553			*sec_count += sec_cnt;
2554		}
2555	}
2556}
2557
2558static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
2559			uint32_t instance, void *ras_error_status)
2560{
2561	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
2562	uint32_t sec_count = 0;
2563	uint32_t reg_value = 0;
2564
2565	reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
2566	/* double bit error is not supported */
2567	if (reg_value)
2568		sdma_v4_0_get_ras_error_count(reg_value,
2569				instance, &sec_count);
2570	/* err_data->ce_count should be initialized to 0
2571	 * before calling into this function */
2572	err_data->ce_count += sec_count;
2573	/* double bit error is not supported
2574	 * set ue count to 0 */
2575	err_data->ue_count = 0;
2576
2577	return 0;
2578};
2579
2580static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
2581	.ras_late_init = amdgpu_sdma_ras_late_init,
2582	.ras_fini = amdgpu_sdma_ras_fini,
2583	.query_ras_error_count = sdma_v4_0_query_ras_error_count,
2584};
2585
2586static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
2587{
2588	switch (adev->asic_type) {
2589	case CHIP_VEGA20:
2590	case CHIP_ARCTURUS:
2591		adev->sdma.funcs = &sdma_v4_0_ras_funcs;
2592		break;
2593	default:
2594		break;
2595	}
2596}
2597
2598const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
2599	.type = AMD_IP_BLOCK_TYPE_SDMA,
2600	.major = 4,
2601	.minor = 0,
2602	.rev = 0,
2603	.funcs = &sdma_v4_0_ip_funcs,
2604};
2605