1/*	$NetBSD: amdgpu_sdma_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $	*/
2
3/*
4 * Copyright 2016 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26#include <sys/cdefs.h>
27__KERNEL_RCSID(0, "$NetBSD: amdgpu_sdma_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $");
28
29#include <linux/delay.h>
30#include <linux/firmware.h>
31#include <linux/module.h>
32#include <linux/pci.h>
33
34#include "amdgpu.h"
35#include "amdgpu_ucode.h"
36#include "amdgpu_trace.h"
37
38#include "sdma0/sdma0_4_2_offset.h"
39#include "sdma0/sdma0_4_2_sh_mask.h"
40#include "sdma1/sdma1_4_2_offset.h"
41#include "sdma1/sdma1_4_2_sh_mask.h"
42#include "sdma2/sdma2_4_2_2_offset.h"
43#include "sdma2/sdma2_4_2_2_sh_mask.h"
44#include "sdma3/sdma3_4_2_2_offset.h"
45#include "sdma3/sdma3_4_2_2_sh_mask.h"
46#include "sdma4/sdma4_4_2_2_offset.h"
47#include "sdma4/sdma4_4_2_2_sh_mask.h"
48#include "sdma5/sdma5_4_2_2_offset.h"
49#include "sdma5/sdma5_4_2_2_sh_mask.h"
50#include "sdma6/sdma6_4_2_2_offset.h"
51#include "sdma6/sdma6_4_2_2_sh_mask.h"
52#include "sdma7/sdma7_4_2_2_offset.h"
53#include "sdma7/sdma7_4_2_2_sh_mask.h"
54#include "hdp/hdp_4_0_offset.h"
55#include "sdma0/sdma0_4_1_default.h"
56
57#include "soc15_common.h"
58#include "soc15.h"
59#include "vega10_sdma_pkt_open.h"
60
61#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
62#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
63
64#include "amdgpu_ras.h"
65
66#include <linux/nbsd-namespace.h>
67
68MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
69MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
70MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
71MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
72MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
73MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
74MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
75MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
76MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
77MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
78MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
79
80#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
81#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
82
83#define WREG32_SDMA(instance, offset, value) \
84	WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
85#define RREG32_SDMA(instance, offset) \
86	RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
87
88static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
89static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
90static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
91static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
92static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
93
94static const struct soc15_reg_golden golden_settings_sdma_4[] = {
95	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
96	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100),
97	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100),
98	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
99	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
100	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
101	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000),
102	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
103	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
104	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
105	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
106	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
107	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
108	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
109	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
110	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
111	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
112	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
113	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000),
114	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
115	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
116	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
117	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
118	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
119	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xfc000000, 0x00000000)
120};
121
122static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
123	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
124	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
125	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
126	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
127	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
128};
129
130static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
131	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
132	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
133	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
134	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
135	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
136};
137
138static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
139	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
140	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
141	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
142	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
143	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051),
144	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100),
145	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
146	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
147	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
148	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
149	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
150};
151
152static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
153	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
154};
155
156static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
157{
158	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
159	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
160	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
161	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
162	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
163	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
164	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
165	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
166	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
167	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
168	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
169	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
170	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
171	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
172	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
173	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
174	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
175	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
176	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
177	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
178	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
179	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
180	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
181	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
182	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
183	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
184};
185
186static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
187	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
188	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
189	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
190	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
191	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
192	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
193	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
194	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
195	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
196	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
197	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
198	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
199	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
200	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
201	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
202	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
203	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
204	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
205	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
206	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
207	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
208	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
209	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
210	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
211	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
212	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
213};
214
215static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
216{
217	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
218	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
219};
220
221static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
222{
223	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
224	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
225};
226
227static const struct soc15_reg_golden golden_settings_sdma_arct[] =
228{
229	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
230	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
231	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
232	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
233	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
234	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
235	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
236	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
237	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
238	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
239	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
240	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
241	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
242	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
243	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
244	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
245	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
246	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
247	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
248	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
249	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
250	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
251	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
252	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002)
253};
254
255static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
256	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
257	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
258	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
259	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002),
260	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
261	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
262	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
263	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
264	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
265	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
266};
267
268static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
269	{ "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
270	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
271	0, 0,
272	},
273	{ "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
274	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
275	0, 0,
276	},
277	{ "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
278	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
279	0, 0,
280	},
281	{ "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
282	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
283	0, 0,
284	},
285	{ "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
286	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
287	0, 0,
288	},
289	{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
290	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
291	0, 0,
292	},
293	{ "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
294	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
295	0, 0,
296	},
297	{ "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
298	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
299	0, 0,
300	},
301	{ "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
302	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
303	0, 0,
304	},
305	{ "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
306	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
307	0, 0,
308	},
309	{ "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
310	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
311	0, 0,
312	},
313	{ "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
314	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
315	0, 0,
316	},
317	{ "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
318	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
319	0, 0,
320	},
321	{ "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
322	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
323	0, 0,
324	},
325	{ "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
326	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
327	0, 0,
328	},
329	{ "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
330	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
331	0, 0,
332	},
333	{ "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
334	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
335	0, 0,
336	},
337	{ "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
338	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
339	0, 0,
340	},
341	{ "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
342	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
343	0, 0,
344	},
345	{ "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
346	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
347	0, 0,
348	},
349	{ "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
350	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
351	0, 0,
352	},
353	{ "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
354	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
355	0, 0,
356	},
357	{ "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
358	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
359	0, 0,
360	},
361	{ "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
362	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
363	0, 0,
364	},
365};
366
367static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
368		u32 instance, u32 offset)
369{
370	switch (instance) {
371	case 0:
372		return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
373	case 1:
374		return (adev->reg_offset[SDMA1_HWIP][0][0] + offset);
375	case 2:
376		return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
377	case 3:
378		return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
379	case 4:
380		return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
381	case 5:
382		return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
383	case 6:
384		return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
385	case 7:
386		return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
387	default:
388		break;
389	}
390	return 0;
391}
392
393static unsigned sdma_v4_0_seq_to_irq_id(int seq_num)
394{
395	switch (seq_num) {
396	case 0:
397		return SOC15_IH_CLIENTID_SDMA0;
398	case 1:
399		return SOC15_IH_CLIENTID_SDMA1;
400	case 2:
401		return SOC15_IH_CLIENTID_SDMA2;
402	case 3:
403		return SOC15_IH_CLIENTID_SDMA3;
404	case 4:
405		return SOC15_IH_CLIENTID_SDMA4;
406	case 5:
407		return SOC15_IH_CLIENTID_SDMA5;
408	case 6:
409		return SOC15_IH_CLIENTID_SDMA6;
410	case 7:
411		return SOC15_IH_CLIENTID_SDMA7;
412	default:
413		break;
414	}
415	return -EINVAL;
416}
417
418static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
419{
420	switch (client_id) {
421	case SOC15_IH_CLIENTID_SDMA0:
422		return 0;
423	case SOC15_IH_CLIENTID_SDMA1:
424		return 1;
425	case SOC15_IH_CLIENTID_SDMA2:
426		return 2;
427	case SOC15_IH_CLIENTID_SDMA3:
428		return 3;
429	case SOC15_IH_CLIENTID_SDMA4:
430		return 4;
431	case SOC15_IH_CLIENTID_SDMA5:
432		return 5;
433	case SOC15_IH_CLIENTID_SDMA6:
434		return 6;
435	case SOC15_IH_CLIENTID_SDMA7:
436		return 7;
437	default:
438		break;
439	}
440	return -EINVAL;
441}
442
443static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
444{
445	switch (adev->asic_type) {
446	case CHIP_VEGA10:
447		soc15_program_register_sequence(adev,
448						golden_settings_sdma_4,
449						ARRAY_SIZE(golden_settings_sdma_4));
450		soc15_program_register_sequence(adev,
451						golden_settings_sdma_vg10,
452						ARRAY_SIZE(golden_settings_sdma_vg10));
453		break;
454	case CHIP_VEGA12:
455		soc15_program_register_sequence(adev,
456						golden_settings_sdma_4,
457						ARRAY_SIZE(golden_settings_sdma_4));
458		soc15_program_register_sequence(adev,
459						golden_settings_sdma_vg12,
460						ARRAY_SIZE(golden_settings_sdma_vg12));
461		break;
462	case CHIP_VEGA20:
463		soc15_program_register_sequence(adev,
464						golden_settings_sdma0_4_2_init,
465						ARRAY_SIZE(golden_settings_sdma0_4_2_init));
466		soc15_program_register_sequence(adev,
467						golden_settings_sdma0_4_2,
468						ARRAY_SIZE(golden_settings_sdma0_4_2));
469		soc15_program_register_sequence(adev,
470						golden_settings_sdma1_4_2,
471						ARRAY_SIZE(golden_settings_sdma1_4_2));
472		break;
473	case CHIP_ARCTURUS:
474		soc15_program_register_sequence(adev,
475						golden_settings_sdma_arct,
476						ARRAY_SIZE(golden_settings_sdma_arct));
477		break;
478	case CHIP_RAVEN:
479		soc15_program_register_sequence(adev,
480						golden_settings_sdma_4_1,
481						ARRAY_SIZE(golden_settings_sdma_4_1));
482		if (adev->rev_id >= 8)
483			soc15_program_register_sequence(adev,
484							golden_settings_sdma_rv2,
485							ARRAY_SIZE(golden_settings_sdma_rv2));
486		else
487			soc15_program_register_sequence(adev,
488							golden_settings_sdma_rv1,
489							ARRAY_SIZE(golden_settings_sdma_rv1));
490		break;
491	case CHIP_RENOIR:
492		soc15_program_register_sequence(adev,
493						golden_settings_sdma_4_3,
494						ARRAY_SIZE(golden_settings_sdma_4_3));
495		break;
496	default:
497		break;
498	}
499}
500
501static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
502{
503	int err = 0;
504	const struct sdma_firmware_header_v1_0 *hdr;
505
506	err = amdgpu_ucode_validate(sdma_inst->fw);
507	if (err)
508		return err;
509
510	hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
511	sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
512	sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
513
514	if (sdma_inst->feature_version >= 20)
515		sdma_inst->burst_nop = true;
516
517	return 0;
518}
519
520static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
521{
522	int i;
523
524	for (i = 0; i < adev->sdma.num_instances; i++) {
525		if (adev->sdma.instance[i].fw != NULL)
526			release_firmware(adev->sdma.instance[i].fw);
527
528		/* arcturus shares the same FW memory across
529		   all SDMA isntances */
530		if (adev->asic_type == CHIP_ARCTURUS)
531			break;
532	}
533
534	memset((void*)adev->sdma.instance, 0,
535		sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
536}
537
538/**
539 * sdma_v4_0_init_microcode - load ucode images from disk
540 *
541 * @adev: amdgpu_device pointer
542 *
543 * Use the firmware interface to load the ucode images into
544 * the driver (not loaded into hw).
545 * Returns 0 on success, error on failure.
546 */
547
548// emulation only, won't work on real chip
549// vega10 real chip need to use PSP to load firmware
550static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
551{
552	const char *chip_name;
553	char fw_name[30];
554	int err = 0, i;
555	struct amdgpu_firmware_info *info = NULL;
556	const struct common_firmware_header *header = NULL;
557
558	DRM_DEBUG("\n");
559
560	switch (adev->asic_type) {
561	case CHIP_VEGA10:
562		chip_name = "vega10";
563		break;
564	case CHIP_VEGA12:
565		chip_name = "vega12";
566		break;
567	case CHIP_VEGA20:
568		chip_name = "vega20";
569		break;
570	case CHIP_RAVEN:
571		if (adev->rev_id >= 8)
572			chip_name = "raven2";
573		else if (adev->pdev->device == 0x15d8)
574			chip_name = "picasso";
575		else
576			chip_name = "raven";
577		break;
578	case CHIP_ARCTURUS:
579		chip_name = "arcturus";
580		break;
581	case CHIP_RENOIR:
582		chip_name = "renoir";
583		break;
584	default:
585		BUG();
586	}
587
588	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
589
590	err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
591	if (err)
592		goto out;
593
594	err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
595	if (err)
596		goto out;
597
598	for (i = 1; i < adev->sdma.num_instances; i++) {
599		if (adev->asic_type == CHIP_ARCTURUS) {
600			/* Acturus will leverage the same FW memory
601			   for every SDMA instance */
602			memcpy((void*)&adev->sdma.instance[i],
603			       (void*)&adev->sdma.instance[0],
604			       sizeof(struct amdgpu_sdma_instance));
605		}
606		else {
607			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
608
609			err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
610			if (err)
611				goto out;
612
613			err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
614			if (err)
615				goto out;
616		}
617	}
618
619	DRM_DEBUG("psp_load == '%s'\n",
620		adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
621
622	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
623		for (i = 0; i < adev->sdma.num_instances; i++) {
624			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
625			info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
626			info->fw = adev->sdma.instance[i].fw;
627			header = (const struct common_firmware_header *)info->fw->data;
628			adev->firmware.fw_size +=
629				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
630		}
631	}
632
633out:
634	if (err) {
635		DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
636		sdma_v4_0_destroy_inst_ctx(adev);
637	}
638	return err;
639}
640
641/**
642 * sdma_v4_0_ring_get_rptr - get the current read pointer
643 *
644 * @ring: amdgpu ring pointer
645 *
646 * Get the current rptr from the hardware (VEGA10+).
647 */
648static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
649{
650	volatile u64 *rptr;
651
652	/* XXX check if swapping is necessary on BE */
653	rptr = ((volatile u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
654
655	DRM_DEBUG("rptr before shift == 0x%016"PRIx64"\n", *rptr);
656	return ((*rptr) >> 2);
657}
658
659/**
660 * sdma_v4_0_ring_get_wptr - get the current write pointer
661 *
662 * @ring: amdgpu ring pointer
663 *
664 * Get the current wptr from the hardware (VEGA10+).
665 */
666static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
667{
668	struct amdgpu_device *adev = ring->adev;
669	u64 wptr;
670
671	if (ring->use_doorbell) {
672		/* XXX check if swapping is necessary on BE */
673		wptr = READ_ONCE(*((volatile u64 *)&adev->wb.wb[ring->wptr_offs]));
674		DRM_DEBUG("wptr/doorbell before shift == 0x%016"PRIx64"\n", wptr);
675	} else {
676		wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
677		wptr = wptr << 32;
678		wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
679		DRM_DEBUG("wptr before shift [%i] wptr == 0x%016"PRIx64"\n",
680				ring->me, wptr);
681	}
682
683	return wptr >> 2;
684}
685
686/**
687 * sdma_v4_0_ring_set_wptr - commit the write pointer
688 *
689 * @ring: amdgpu ring pointer
690 *
691 * Write the wptr back to the hardware (VEGA10+).
692 */
693static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
694{
695	struct amdgpu_device *adev = ring->adev;
696
697	DRM_DEBUG("Setting write pointer\n");
698	if (ring->use_doorbell) {
699		volatile u64 *wb = (volatile u64 *)&adev->wb.wb[ring->wptr_offs];
700
701		DRM_DEBUG("Using doorbell -- "
702				"wptr_offs == 0x%08x "
703				"lower_32_bits(ring->wptr) << 2 == 0x%08x "
704				"upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
705				ring->wptr_offs,
706				lower_32_bits(ring->wptr << 2),
707				upper_32_bits(ring->wptr << 2));
708		/* XXX check if swapping is necessary on BE */
709		WRITE_ONCE(*wb, (ring->wptr << 2));
710		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016"PRIx64")\n",
711				ring->doorbell_index, ring->wptr << 2);
712		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
713	} else {
714		DRM_DEBUG("Not using doorbell -- "
715				"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
716				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
717				ring->me,
718				lower_32_bits(ring->wptr << 2),
719				ring->me,
720				upper_32_bits(ring->wptr << 2));
721		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
722			    lower_32_bits(ring->wptr << 2));
723		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
724			    upper_32_bits(ring->wptr << 2));
725	}
726}
727
728/**
729 * sdma_v4_0_page_ring_get_wptr - get the current write pointer
730 *
731 * @ring: amdgpu ring pointer
732 *
733 * Get the current wptr from the hardware (VEGA10+).
734 */
735static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
736{
737	struct amdgpu_device *adev = ring->adev;
738	u64 wptr;
739
740	if (ring->use_doorbell) {
741		/* XXX check if swapping is necessary on BE */
742		wptr = READ_ONCE(*((volatile u64 *)&adev->wb.wb[ring->wptr_offs]));
743	} else {
744		wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
745		wptr = wptr << 32;
746		wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
747	}
748
749	return wptr >> 2;
750}
751
752/**
753 * sdma_v4_0_ring_set_wptr - commit the write pointer
754 *
755 * @ring: amdgpu ring pointer
756 *
757 * Write the wptr back to the hardware (VEGA10+).
758 */
759static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
760{
761	struct amdgpu_device *adev = ring->adev;
762
763	if (ring->use_doorbell) {
764		volatile u64 *wb = (volatile u64 *)&adev->wb.wb[ring->wptr_offs];
765
766		/* XXX check if swapping is necessary on BE */
767		WRITE_ONCE(*wb, (ring->wptr << 2));
768		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
769	} else {
770		uint64_t wptr = ring->wptr << 2;
771
772		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
773			    lower_32_bits(wptr));
774		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
775			    upper_32_bits(wptr));
776	}
777}
778
779static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
780{
781	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
782	int i;
783
784	for (i = 0; i < count; i++)
785		if (sdma && sdma->burst_nop && (i == 0))
786			amdgpu_ring_write(ring, ring->funcs->nop |
787				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
788		else
789			amdgpu_ring_write(ring, ring->funcs->nop);
790}
791
792/**
793 * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine
794 *
795 * @ring: amdgpu ring pointer
796 * @ib: IB object to schedule
797 *
798 * Schedule an IB in the DMA ring (VEGA10).
799 */
800static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
801				   struct amdgpu_job *job,
802				   struct amdgpu_ib *ib,
803				   uint32_t flags)
804{
805	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
806
807	/* IB packet must end on a 8 DW boundary */
808	sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
809
810	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
811			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
812	/* base must be 32 byte aligned */
813	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
814	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
815	amdgpu_ring_write(ring, ib->length_dw);
816	amdgpu_ring_write(ring, 0);
817	amdgpu_ring_write(ring, 0);
818
819}
820
821static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
822				   int mem_space, int hdp,
823				   uint32_t addr0, uint32_t addr1,
824				   uint32_t ref, uint32_t mask,
825				   uint32_t inv)
826{
827	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
828			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
829			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
830			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
831	if (mem_space) {
832		/* memory */
833		amdgpu_ring_write(ring, addr0);
834		amdgpu_ring_write(ring, addr1);
835	} else {
836		/* registers */
837		amdgpu_ring_write(ring, addr0 << 2);
838		amdgpu_ring_write(ring, addr1 << 2);
839	}
840	amdgpu_ring_write(ring, ref); /* reference */
841	amdgpu_ring_write(ring, mask); /* mask */
842	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
843			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
844}
845
846/**
847 * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
848 *
849 * @ring: amdgpu ring pointer
850 *
851 * Emit an hdp flush packet on the requested DMA ring.
852 */
853static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
854{
855	struct amdgpu_device *adev = ring->adev;
856	u32 ref_and_mask = 0;
857	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
858
859	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
860
861	sdma_v4_0_wait_reg_mem(ring, 0, 1,
862			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
863			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
864			       ref_and_mask, ref_and_mask, 10);
865}
866
867/**
868 * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring
869 *
870 * @ring: amdgpu ring pointer
871 * @fence: amdgpu fence object
872 *
873 * Add a DMA fence packet to the ring to write
874 * the fence seq number and DMA trap packet to generate
875 * an interrupt if needed (VEGA10).
876 */
877static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
878				      unsigned flags)
879{
880	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
881	/* write the fence */
882	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
883	/* zero in first two bits */
884	BUG_ON(addr & 0x3);
885	amdgpu_ring_write(ring, lower_32_bits(addr));
886	amdgpu_ring_write(ring, upper_32_bits(addr));
887	amdgpu_ring_write(ring, lower_32_bits(seq));
888
889	/* optionally write high bits as well */
890	if (write64bit) {
891		addr += 4;
892		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
893		/* zero in first two bits */
894		BUG_ON(addr & 0x3);
895		amdgpu_ring_write(ring, lower_32_bits(addr));
896		amdgpu_ring_write(ring, upper_32_bits(addr));
897		amdgpu_ring_write(ring, upper_32_bits(seq));
898	}
899
900	/* generate an interrupt */
901	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
902	amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
903}
904
905
906/**
907 * sdma_v4_0_gfx_stop - stop the gfx async dma engines
908 *
909 * @adev: amdgpu_device pointer
910 *
911 * Stop the gfx async dma ring buffers (VEGA10).
912 */
913static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
914{
915	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
916	u32 rb_cntl, ib_cntl;
917	int i, unset = 0;
918
919	for (i = 0; i < adev->sdma.num_instances; i++) {
920		sdma[i] = &adev->sdma.instance[i].ring;
921
922		if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
923			amdgpu_ttm_set_buffer_funcs_status(adev, false);
924			unset = 1;
925		}
926
927		rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
928		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
929		WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
930		ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
931		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
932		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
933
934		sdma[i]->sched.ready = false;
935	}
936}
937
938/**
939 * sdma_v4_0_rlc_stop - stop the compute async dma engines
940 *
941 * @adev: amdgpu_device pointer
942 *
943 * Stop the compute async dma queues (VEGA10).
944 */
945static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
946{
947	/* XXX todo */
948}
949
950/**
951 * sdma_v4_0_page_stop - stop the page async dma engines
952 *
953 * @adev: amdgpu_device pointer
954 *
955 * Stop the page async dma ring buffers (VEGA10).
956 */
957static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
958{
959	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
960	u32 rb_cntl, ib_cntl;
961	int i;
962	bool unset = false;
963
964	for (i = 0; i < adev->sdma.num_instances; i++) {
965		sdma[i] = &adev->sdma.instance[i].page;
966
967		if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
968			(unset == false)) {
969			amdgpu_ttm_set_buffer_funcs_status(adev, false);
970			unset = true;
971		}
972
973		rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
974		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
975					RB_ENABLE, 0);
976		WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
977		ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
978		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
979					IB_ENABLE, 0);
980		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
981
982		sdma[i]->sched.ready = false;
983	}
984}
985
986/**
987 * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
988 *
989 * @adev: amdgpu_device pointer
990 * @enable: enable/disable the DMA MEs context switch.
991 *
992 * Halt or unhalt the async dma engines context switch (VEGA10).
993 */
994static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
995{
996	u32 f32_cntl, phase_quantum = 0;
997	int i;
998
999	if (amdgpu_sdma_phase_quantum) {
1000		unsigned value = amdgpu_sdma_phase_quantum;
1001		unsigned unit = 0;
1002
1003		while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
1004				SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
1005			value = (value + 1) >> 1;
1006			unit++;
1007		}
1008		if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
1009			    SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
1010			value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
1011				 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
1012			unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
1013				SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
1014			WARN_ONCE(1,
1015			"clamping sdma_phase_quantum to %uK clock cycles\n",
1016				  value << unit);
1017		}
1018		phase_quantum =
1019			value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
1020			unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
1021	}
1022
1023	for (i = 0; i < adev->sdma.num_instances; i++) {
1024		f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
1025		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
1026				AUTO_CTXSW_ENABLE, enable ? 1 : 0);
1027		if (enable && amdgpu_sdma_phase_quantum) {
1028			WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
1029			WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
1030			WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
1031		}
1032		WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
1033	}
1034
1035}
1036
1037/**
1038 * sdma_v4_0_enable - stop the async dma engines
1039 *
1040 * @adev: amdgpu_device pointer
1041 * @enable: enable/disable the DMA MEs.
1042 *
1043 * Halt or unhalt the async dma engines (VEGA10).
1044 */
1045static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
1046{
1047	u32 f32_cntl;
1048	int i;
1049
1050	if (enable == false) {
1051		sdma_v4_0_gfx_stop(adev);
1052		sdma_v4_0_rlc_stop(adev);
1053		if (adev->sdma.has_page_queue)
1054			sdma_v4_0_page_stop(adev);
1055	}
1056
1057	for (i = 0; i < adev->sdma.num_instances; i++) {
1058		f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
1059		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
1060		WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
1061	}
1062}
1063
1064/**
1065 * sdma_v4_0_rb_cntl - get parameters for rb_cntl
1066 */
1067static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
1068{
1069	/* Set ring buffer size in dwords */
1070	uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
1071
1072	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
1073#ifdef __BIG_ENDIAN
1074	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
1075	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
1076				RPTR_WRITEBACK_SWAP_ENABLE, 1);
1077#endif
1078	return rb_cntl;
1079}
1080
1081/**
1082 * sdma_v4_0_gfx_resume - setup and start the async dma engines
1083 *
1084 * @adev: amdgpu_device pointer
1085 * @i: instance to resume
1086 *
1087 * Set up the gfx DMA ring buffers and enable them (VEGA10).
1088 * Returns 0 for success, error for failure.
1089 */
1090static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
1091{
1092	struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
1093	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
1094	u32 wb_offset;
1095	u32 doorbell;
1096	u32 doorbell_offset;
1097	u64 wptr_gpu_addr;
1098
1099	wb_offset = (ring->rptr_offs * 4);
1100
1101	rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
1102	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
1103	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
1104
1105	/* Initialize the ring buffer's read and write pointers */
1106	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
1107	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
1108	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
1109	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
1110
1111	/* set the wb address whether it's enabled or not */
1112	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
1113	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
1114	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
1115	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
1116
1117	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
1118				RPTR_WRITEBACK_ENABLE, 1);
1119
1120	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
1121	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
1122
1123	ring->wptr = 0;
1124
1125	/* before programing wptr to a less value, need set minor_ptr_update first */
1126	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
1127
1128	doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
1129	doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
1130
1131	doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
1132				 ring->use_doorbell);
1133	doorbell_offset = REG_SET_FIELD(doorbell_offset,
1134					SDMA0_GFX_DOORBELL_OFFSET,
1135					OFFSET, ring->doorbell_index);
1136	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
1137	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
1138
1139	sdma_v4_0_ring_set_wptr(ring);
1140
1141	/* set minor_ptr_update to 0 after wptr programed */
1142	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
1143
1144	/* setup the wptr shadow polling */
1145	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1146	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
1147		    lower_32_bits(wptr_gpu_addr));
1148	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
1149		    upper_32_bits(wptr_gpu_addr));
1150	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
1151	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
1152				       SDMA0_GFX_RB_WPTR_POLL_CNTL,
1153				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
1154	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
1155
1156	/* enable DMA RB */
1157	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
1158	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
1159
1160	ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
1161	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
1162#ifdef __BIG_ENDIAN
1163	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
1164#endif
1165	/* enable DMA IBs */
1166	WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
1167
1168	ring->sched.ready = true;
1169}
1170
1171/**
1172 * sdma_v4_0_page_resume - setup and start the async dma engines
1173 *
1174 * @adev: amdgpu_device pointer
1175 * @i: instance to resume
1176 *
1177 * Set up the page DMA ring buffers and enable them (VEGA10).
1178 * Returns 0 for success, error for failure.
1179 */
1180static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
1181{
1182	struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
1183	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
1184	u32 wb_offset;
1185	u32 doorbell;
1186	u32 doorbell_offset;
1187	u64 wptr_gpu_addr;
1188
1189	wb_offset = (ring->rptr_offs * 4);
1190
1191	rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
1192	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
1193	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
1194
1195	/* Initialize the ring buffer's read and write pointers */
1196	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
1197	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
1198	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
1199	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
1200
1201	/* set the wb address whether it's enabled or not */
1202	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
1203	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
1204	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
1205	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
1206
1207	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
1208				RPTR_WRITEBACK_ENABLE, 1);
1209
1210	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
1211	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
1212
1213	ring->wptr = 0;
1214
1215	/* before programing wptr to a less value, need set minor_ptr_update first */
1216	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
1217
1218	doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
1219	doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
1220
1221	doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
1222				 ring->use_doorbell);
1223	doorbell_offset = REG_SET_FIELD(doorbell_offset,
1224					SDMA0_PAGE_DOORBELL_OFFSET,
1225					OFFSET, ring->doorbell_index);
1226	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
1227	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
1228
1229	/* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */
1230	sdma_v4_0_page_ring_set_wptr(ring);
1231
1232	/* set minor_ptr_update to 0 after wptr programed */
1233	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
1234
1235	/* setup the wptr shadow polling */
1236	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1237	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
1238		    lower_32_bits(wptr_gpu_addr));
1239	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
1240		    upper_32_bits(wptr_gpu_addr));
1241	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
1242	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
1243				       SDMA0_PAGE_RB_WPTR_POLL_CNTL,
1244				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
1245	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
1246
1247	/* enable DMA RB */
1248	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
1249	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
1250
1251	ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
1252	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
1253#ifdef __BIG_ENDIAN
1254	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
1255#endif
1256	/* enable DMA IBs */
1257	WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
1258
1259	ring->sched.ready = true;
1260}
1261
1262static void
1263sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
1264{
1265	uint32_t def, data;
1266
1267	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
1268		/* enable idle interrupt */
1269		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
1270		data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
1271
1272		if (data != def)
1273			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
1274	} else {
1275		/* disable idle interrupt */
1276		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
1277		data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
1278		if (data != def)
1279			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
1280	}
1281}
1282
1283static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev)
1284{
1285	uint32_t def, data;
1286
1287	/* Enable HW based PG. */
1288	def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1289	data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK;
1290	if (data != def)
1291		WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1292
1293	/* enable interrupt */
1294	def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
1295	data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
1296	if (data != def)
1297		WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
1298
1299	/* Configure hold time to filter in-valid power on/off request. Use default right now */
1300	def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1301	data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK;
1302	data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK);
1303	/* Configure switch time for hysteresis purpose. Use default right now */
1304	data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK;
1305	data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK);
1306	if(data != def)
1307		WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1308}
1309
1310static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
1311{
1312	if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
1313		return;
1314
1315	switch (adev->asic_type) {
1316	case CHIP_RAVEN:
1317	case CHIP_RENOIR:
1318		sdma_v4_1_init_power_gating(adev);
1319		sdma_v4_1_update_power_gating(adev, true);
1320		break;
1321	default:
1322		break;
1323	}
1324}
1325
1326/**
1327 * sdma_v4_0_rlc_resume - setup and start the async dma engines
1328 *
1329 * @adev: amdgpu_device pointer
1330 *
1331 * Set up the compute DMA queues and enable them (VEGA10).
1332 * Returns 0 for success, error for failure.
1333 */
1334static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev)
1335{
1336	sdma_v4_0_init_pg(adev);
1337
1338	return 0;
1339}
1340
1341/**
1342 * sdma_v4_0_load_microcode - load the sDMA ME ucode
1343 *
1344 * @adev: amdgpu_device pointer
1345 *
1346 * Loads the sDMA0/1 ucode.
1347 * Returns 0 for success, -EINVAL if the ucode is not available.
1348 */
1349static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
1350{
1351	const struct sdma_firmware_header_v1_0 *hdr;
1352	const __le32 *fw_data;
1353	u32 fw_size;
1354	int i, j;
1355
1356	/* halt the MEs */
1357	sdma_v4_0_enable(adev, false);
1358
1359	for (i = 0; i < adev->sdma.num_instances; i++) {
1360		if (!adev->sdma.instance[i].fw)
1361			return -EINVAL;
1362
1363		hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
1364		amdgpu_ucode_print_sdma_hdr(&hdr->header);
1365		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1366
1367		fw_data = (const __le32 *)
1368			(adev->sdma.instance[i].fw->data +
1369				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1370
1371		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
1372
1373		for (j = 0; j < fw_size; j++)
1374			WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
1375				    le32_to_cpup(fw_data++));
1376
1377		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
1378			    adev->sdma.instance[i].fw_version);
1379	}
1380
1381	return 0;
1382}
1383
1384/**
1385 * sdma_v4_0_start - setup and start the async dma engines
1386 *
1387 * @adev: amdgpu_device pointer
1388 *
1389 * Set up the DMA engines and enable them (VEGA10).
1390 * Returns 0 for success, error for failure.
1391 */
1392static int sdma_v4_0_start(struct amdgpu_device *adev)
1393{
1394	struct amdgpu_ring *ring;
1395	int i, r = 0;
1396
1397	if (amdgpu_sriov_vf(adev)) {
1398		sdma_v4_0_ctx_switch_enable(adev, false);
1399		sdma_v4_0_enable(adev, false);
1400	} else {
1401
1402		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
1403			r = sdma_v4_0_load_microcode(adev);
1404			if (r)
1405				return r;
1406		}
1407
1408		/* unhalt the MEs */
1409		sdma_v4_0_enable(adev, true);
1410		/* enable sdma ring preemption */
1411		sdma_v4_0_ctx_switch_enable(adev, true);
1412	}
1413
1414	/* start the gfx rings and rlc compute queues */
1415	for (i = 0; i < adev->sdma.num_instances; i++) {
1416		uint32_t temp;
1417
1418		WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
1419		sdma_v4_0_gfx_resume(adev, i);
1420		if (adev->sdma.has_page_queue)
1421			sdma_v4_0_page_resume(adev, i);
1422
1423		/* set utc l1 enable flag always to 1 */
1424		temp = RREG32_SDMA(i, mmSDMA0_CNTL);
1425		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
1426		WREG32_SDMA(i, mmSDMA0_CNTL, temp);
1427
1428		if (!amdgpu_sriov_vf(adev)) {
1429			/* unhalt engine */
1430			temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
1431			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
1432			WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
1433		}
1434	}
1435
1436	if (amdgpu_sriov_vf(adev)) {
1437		sdma_v4_0_ctx_switch_enable(adev, true);
1438		sdma_v4_0_enable(adev, true);
1439	} else {
1440		r = sdma_v4_0_rlc_resume(adev);
1441		if (r)
1442			return r;
1443	}
1444
1445	for (i = 0; i < adev->sdma.num_instances; i++) {
1446		ring = &adev->sdma.instance[i].ring;
1447
1448		r = amdgpu_ring_test_helper(ring);
1449		if (r)
1450			return r;
1451
1452		if (adev->sdma.has_page_queue) {
1453			struct amdgpu_ring *page = &adev->sdma.instance[i].page;
1454
1455			r = amdgpu_ring_test_helper(page);
1456			if (r)
1457				return r;
1458
1459			if (adev->mman.buffer_funcs_ring == page)
1460				amdgpu_ttm_set_buffer_funcs_status(adev, true);
1461		}
1462
1463		if (adev->mman.buffer_funcs_ring == ring)
1464			amdgpu_ttm_set_buffer_funcs_status(adev, true);
1465	}
1466
1467	return r;
1468}
1469
1470/**
1471 * sdma_v4_0_ring_test_ring - simple async dma engine test
1472 *
1473 * @ring: amdgpu_ring structure holding ring information
1474 *
1475 * Test the DMA engine by writing using it to write an
1476 * value to memory. (VEGA10).
1477 * Returns 0 for success, error for failure.
1478 */
1479static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
1480{
1481	struct amdgpu_device *adev = ring->adev;
1482	unsigned i;
1483	unsigned index;
1484	int r;
1485	u32 tmp;
1486	u64 gpu_addr;
1487
1488	r = amdgpu_device_wb_get(adev, &index);
1489	if (r)
1490		return r;
1491
1492	gpu_addr = adev->wb.gpu_addr + (index * 4);
1493	tmp = 0xCAFEDEAD;
1494	adev->wb.wb[index] = cpu_to_le32(tmp);
1495
1496	r = amdgpu_ring_alloc(ring, 5);
1497	if (r)
1498		goto error_free_wb;
1499
1500	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1501			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
1502	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
1503	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
1504	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
1505	amdgpu_ring_write(ring, 0xDEADBEEF);
1506	amdgpu_ring_commit(ring);
1507
1508	for (i = 0; i < adev->usec_timeout; i++) {
1509		tmp = le32_to_cpu(adev->wb.wb[index]);
1510		if (tmp == 0xDEADBEEF)
1511			break;
1512		udelay(1);
1513	}
1514
1515	if (i >= adev->usec_timeout)
1516		r = -ETIMEDOUT;
1517
1518error_free_wb:
1519	amdgpu_device_wb_free(adev, index);
1520	return r;
1521}
1522
1523/**
1524 * sdma_v4_0_ring_test_ib - test an IB on the DMA engine
1525 *
1526 * @ring: amdgpu_ring structure holding ring information
1527 *
1528 * Test a simple IB in the DMA ring (VEGA10).
1529 * Returns 0 on success, error on failure.
1530 */
1531static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1532{
1533	struct amdgpu_device *adev = ring->adev;
1534	struct amdgpu_ib ib;
1535	struct dma_fence *f = NULL;
1536	unsigned index;
1537	long r;
1538	u32 tmp = 0;
1539	u64 gpu_addr;
1540
1541	r = amdgpu_device_wb_get(adev, &index);
1542	if (r)
1543		return r;
1544
1545	gpu_addr = adev->wb.gpu_addr + (index * 4);
1546	tmp = 0xCAFEDEAD;
1547	adev->wb.wb[index] = cpu_to_le32(tmp);
1548	memset(&ib, 0, sizeof(ib));
1549	r = amdgpu_ib_get(adev, NULL, 256, &ib);
1550	if (r)
1551		goto err0;
1552
1553	ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1554		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1555	ib.ptr[1] = lower_32_bits(gpu_addr);
1556	ib.ptr[2] = upper_32_bits(gpu_addr);
1557	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1558	ib.ptr[4] = 0xDEADBEEF;
1559	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1560	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1561	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1562	ib.length_dw = 8;
1563
1564	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1565	if (r)
1566		goto err1;
1567
1568	r = dma_fence_wait_timeout(f, false, timeout);
1569	if (r == 0) {
1570		r = -ETIMEDOUT;
1571		goto err1;
1572	} else if (r < 0) {
1573		goto err1;
1574	}
1575	tmp = le32_to_cpu(adev->wb.wb[index]);
1576	if (tmp == 0xDEADBEEF)
1577		r = 0;
1578	else
1579		r = -EINVAL;
1580
1581err1:
1582	amdgpu_ib_free(adev, &ib, NULL);
1583	dma_fence_put(f);
1584err0:
1585	amdgpu_device_wb_free(adev, index);
1586	return r;
1587}
1588
1589
1590/**
1591 * sdma_v4_0_vm_copy_pte - update PTEs by copying them from the GART
1592 *
1593 * @ib: indirect buffer to fill with commands
1594 * @pe: addr of the page entry
1595 * @src: src addr to copy from
1596 * @count: number of page entries to update
1597 *
1598 * Update PTEs by copying them from the GART using sDMA (VEGA10).
1599 */
1600static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib,
1601				  uint64_t pe, uint64_t src,
1602				  unsigned count)
1603{
1604	unsigned bytes = count * 8;
1605
1606	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1607		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1608	ib->ptr[ib->length_dw++] = bytes - 1;
1609	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1610	ib->ptr[ib->length_dw++] = lower_32_bits(src);
1611	ib->ptr[ib->length_dw++] = upper_32_bits(src);
1612	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1613	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1614
1615}
1616
1617/**
1618 * sdma_v4_0_vm_write_pte - update PTEs by writing them manually
1619 *
1620 * @ib: indirect buffer to fill with commands
1621 * @pe: addr of the page entry
1622 * @addr: dst addr to write into pe
1623 * @count: number of page entries to update
1624 * @incr: increase next addr by incr bytes
1625 * @flags: access flags
1626 *
1627 * Update PTEs by writing them manually using sDMA (VEGA10).
1628 */
1629static void sdma_v4_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1630				   uint64_t value, unsigned count,
1631				   uint32_t incr)
1632{
1633	unsigned ndw = count * 2;
1634
1635	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1636		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1637	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1638	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1639	ib->ptr[ib->length_dw++] = ndw - 1;
1640	for (; ndw > 0; ndw -= 2) {
1641		ib->ptr[ib->length_dw++] = lower_32_bits(value);
1642		ib->ptr[ib->length_dw++] = upper_32_bits(value);
1643		value += incr;
1644	}
1645}
1646
1647/**
1648 * sdma_v4_0_vm_set_pte_pde - update the page tables using sDMA
1649 *
1650 * @ib: indirect buffer to fill with commands
1651 * @pe: addr of the page entry
1652 * @addr: dst addr to write into pe
1653 * @count: number of page entries to update
1654 * @incr: increase next addr by incr bytes
1655 * @flags: access flags
1656 *
1657 * Update the page tables using sDMA (VEGA10).
1658 */
1659static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1660				     uint64_t pe,
1661				     uint64_t addr, unsigned count,
1662				     uint32_t incr, uint64_t flags)
1663{
1664	/* for physically contiguous pages (vram) */
1665	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1666	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1667	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1668	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1669	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1670	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1671	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1672	ib->ptr[ib->length_dw++] = incr; /* increment size */
1673	ib->ptr[ib->length_dw++] = 0;
1674	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1675}
1676
1677/**
1678 * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw
1679 *
1680 * @ib: indirect buffer to fill with padding
1681 *
1682 */
1683static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1684{
1685	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1686	u32 pad_count;
1687	int i;
1688
1689	pad_count = (-ib->length_dw) & 7;
1690	for (i = 0; i < pad_count; i++)
1691		if (sdma && sdma->burst_nop && (i == 0))
1692			ib->ptr[ib->length_dw++] =
1693				SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1694				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1695		else
1696			ib->ptr[ib->length_dw++] =
1697				SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1698}
1699
1700
1701/**
1702 * sdma_v4_0_ring_emit_pipeline_sync - sync the pipeline
1703 *
1704 * @ring: amdgpu_ring pointer
1705 *
1706 * Make sure all previous operations are completed (CIK).
1707 */
1708static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1709{
1710	uint32_t seq = ring->fence_drv.sync_seq;
1711	uint64_t addr = ring->fence_drv.gpu_addr;
1712
1713	/* wait for idle */
1714	sdma_v4_0_wait_reg_mem(ring, 1, 0,
1715			       addr & 0xfffffffc,
1716			       upper_32_bits(addr) & 0xffffffff,
1717			       seq, 0xffffffff, 4);
1718}
1719
1720
1721/**
1722 * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA
1723 *
1724 * @ring: amdgpu_ring pointer
1725 * @vm: amdgpu_vm pointer
1726 *
1727 * Update the page table base and flush the VM TLB
1728 * using sDMA (VEGA10).
1729 */
1730static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1731					 unsigned vmid, uint64_t pd_addr)
1732{
1733	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1734}
1735
1736static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
1737				     uint32_t reg, uint32_t val)
1738{
1739	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1740			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1741	amdgpu_ring_write(ring, reg);
1742	amdgpu_ring_write(ring, val);
1743}
1744
1745static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1746					 uint32_t val, uint32_t mask)
1747{
1748	sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
1749}
1750
1751static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
1752{
1753	uint fw_version = adev->sdma.instance[0].fw_version;
1754
1755	switch (adev->asic_type) {
1756	case CHIP_VEGA10:
1757		return fw_version >= 430;
1758	case CHIP_VEGA12:
1759		/*return fw_version >= 31;*/
1760		return false;
1761	case CHIP_VEGA20:
1762		return fw_version >= 123;
1763	default:
1764		return false;
1765	}
1766}
1767
1768static int sdma_v4_0_early_init(void *handle)
1769{
1770	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1771	int r;
1772
1773	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR)
1774		adev->sdma.num_instances = 1;
1775	else if (adev->asic_type == CHIP_ARCTURUS)
1776		adev->sdma.num_instances = 8;
1777	else
1778		adev->sdma.num_instances = 2;
1779
1780	r = sdma_v4_0_init_microcode(adev);
1781	if (r) {
1782		DRM_ERROR("Failed to load sdma firmware!\n");
1783		return r;
1784	}
1785
1786	/* TODO: Page queue breaks driver reload under SRIOV */
1787	if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev)))
1788		adev->sdma.has_page_queue = false;
1789	else if (sdma_v4_0_fw_support_paging_queue(adev))
1790		adev->sdma.has_page_queue = true;
1791
1792	sdma_v4_0_set_ring_funcs(adev);
1793	sdma_v4_0_set_buffer_funcs(adev);
1794	sdma_v4_0_set_vm_pte_funcs(adev);
1795	sdma_v4_0_set_irq_funcs(adev);
1796	sdma_v4_0_set_ras_funcs(adev);
1797
1798	return 0;
1799}
1800
1801static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
1802		void *err_data,
1803		struct amdgpu_iv_entry *entry);
1804
1805static int sdma_v4_0_late_init(void *handle)
1806{
1807	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1808	struct ras_ih_if ih_info = {
1809		.cb = sdma_v4_0_process_ras_data_cb,
1810	};
1811	int i;
1812
1813	/* read back edc counter registers to clear the counters */
1814	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
1815		for (i = 0; i < adev->sdma.num_instances; i++)
1816			RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
1817	}
1818
1819	if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
1820		return adev->sdma.funcs->ras_late_init(adev, &ih_info);
1821	else
1822		return 0;
1823}
1824
1825static int sdma_v4_0_sw_init(void *handle)
1826{
1827	struct amdgpu_ring *ring;
1828	int r, i;
1829	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1830
1831	/* SDMA trap event */
1832	for (i = 0; i < adev->sdma.num_instances; i++) {
1833		r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
1834				      SDMA0_4_0__SRCID__SDMA_TRAP,
1835				      &adev->sdma.trap_irq);
1836		if (r)
1837			return r;
1838	}
1839
1840	/* SDMA SRAM ECC event */
1841	for (i = 0; i < adev->sdma.num_instances; i++) {
1842		r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
1843				      SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
1844				      &adev->sdma.ecc_irq);
1845		if (r)
1846			return r;
1847	}
1848
1849	for (i = 0; i < adev->sdma.num_instances; i++) {
1850		ring = &adev->sdma.instance[i].ring;
1851		ring->ring_obj = NULL;
1852		ring->use_doorbell = true;
1853
1854		DRM_INFO("use_doorbell being set to: [%s]\n",
1855				ring->use_doorbell?"true":"false");
1856
1857		/* doorbell size is 2 dwords, get DWORD offset */
1858		ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
1859
1860		snprintf(ring->name, sizeof(ring->name), "sdma%d", i);
1861		r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
1862				     AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1863		if (r)
1864			return r;
1865
1866		if (adev->sdma.has_page_queue) {
1867			ring = &adev->sdma.instance[i].page;
1868			ring->ring_obj = NULL;
1869			ring->use_doorbell = true;
1870
1871			/* paging queue use same doorbell index/routing as gfx queue
1872			 * with 0x400 (4096 dwords) offset on second doorbell page
1873			 */
1874			ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
1875			ring->doorbell_index += 0x400;
1876
1877			snprintf(ring->name, sizeof(ring->name), "page%d", i);
1878			r = amdgpu_ring_init(adev, ring, 1024,
1879					     &adev->sdma.trap_irq,
1880					     AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1881			if (r)
1882				return r;
1883		}
1884	}
1885
1886	return r;
1887}
1888
1889static int sdma_v4_0_sw_fini(void *handle)
1890{
1891	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1892	int i;
1893
1894	if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
1895		adev->sdma.funcs->ras_fini(adev);
1896
1897	for (i = 0; i < adev->sdma.num_instances; i++) {
1898		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1899		if (adev->sdma.has_page_queue)
1900			amdgpu_ring_fini(&adev->sdma.instance[i].page);
1901	}
1902
1903	sdma_v4_0_destroy_inst_ctx(adev);
1904
1905	return 0;
1906}
1907
1908static int sdma_v4_0_hw_init(void *handle)
1909{
1910	int r;
1911	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1912
1913	if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
1914			adev->powerplay.pp_funcs->set_powergating_by_smu) ||
1915			(adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset))
1916		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
1917
1918	if (!amdgpu_sriov_vf(adev))
1919		sdma_v4_0_init_golden_registers(adev);
1920
1921	r = sdma_v4_0_start(adev);
1922
1923	return r;
1924}
1925
1926static int sdma_v4_0_hw_fini(void *handle)
1927{
1928	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1929	int i;
1930
1931	if (amdgpu_sriov_vf(adev))
1932		return 0;
1933
1934	for (i = 0; i < adev->sdma.num_instances; i++) {
1935		amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
1936			       AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1937	}
1938
1939	sdma_v4_0_ctx_switch_enable(adev, false);
1940	sdma_v4_0_enable(adev, false);
1941
1942	if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
1943			&& adev->powerplay.pp_funcs->set_powergating_by_smu) ||
1944			adev->asic_type == CHIP_RENOIR)
1945		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
1946
1947	return 0;
1948}
1949
1950static int sdma_v4_0_suspend(void *handle)
1951{
1952	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1953
1954	return sdma_v4_0_hw_fini(adev);
1955}
1956
1957static int sdma_v4_0_resume(void *handle)
1958{
1959	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1960
1961	return sdma_v4_0_hw_init(adev);
1962}
1963
1964static bool sdma_v4_0_is_idle(void *handle)
1965{
1966	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1967	u32 i;
1968
1969	for (i = 0; i < adev->sdma.num_instances; i++) {
1970		u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
1971
1972		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1973			return false;
1974	}
1975
1976	return true;
1977}
1978
1979static int sdma_v4_0_wait_for_idle(void *handle)
1980{
1981	unsigned i, j;
1982	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
1983	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1984
1985	for (i = 0; i < adev->usec_timeout; i++) {
1986		for (j = 0; j < adev->sdma.num_instances; j++) {
1987			sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG);
1988			if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK))
1989				break;
1990		}
1991		if (j == adev->sdma.num_instances)
1992			return 0;
1993		udelay(1);
1994	}
1995	return -ETIMEDOUT;
1996}
1997
1998static int sdma_v4_0_soft_reset(void *handle)
1999{
2000	/* todo */
2001
2002	return 0;
2003}
2004
2005static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
2006					struct amdgpu_irq_src *source,
2007					unsigned type,
2008					enum amdgpu_interrupt_state state)
2009{
2010	u32 sdma_cntl;
2011
2012	sdma_cntl = RREG32_SDMA(type, mmSDMA0_CNTL);
2013	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
2014		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
2015	WREG32_SDMA(type, mmSDMA0_CNTL, sdma_cntl);
2016
2017	return 0;
2018}
2019
2020static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
2021				      struct amdgpu_irq_src *source,
2022				      struct amdgpu_iv_entry *entry)
2023{
2024	uint32_t instance;
2025
2026	DRM_DEBUG("IH: SDMA trap\n");
2027	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2028	switch (entry->ring_id) {
2029	case 0:
2030		amdgpu_fence_process(&adev->sdma.instance[instance].ring);
2031		break;
2032	case 1:
2033		if (adev->asic_type == CHIP_VEGA20)
2034			amdgpu_fence_process(&adev->sdma.instance[instance].page);
2035		break;
2036	case 2:
2037		/* XXX compute */
2038		break;
2039	case 3:
2040		if (adev->asic_type != CHIP_VEGA20)
2041			amdgpu_fence_process(&adev->sdma.instance[instance].page);
2042		break;
2043	}
2044	return 0;
2045}
2046
2047static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
2048		void *err_data,
2049		struct amdgpu_iv_entry *entry)
2050{
2051	int instance;
2052
2053	/* When ���Full RAS��� is enabled, the per-IP interrupt sources should
2054	 * be disabled and the driver should only look for the aggregated
2055	 * interrupt via sync flood
2056	 */
2057	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
2058		goto out;
2059
2060	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2061	if (instance < 0)
2062		goto out;
2063
2064	amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
2065
2066out:
2067	return AMDGPU_RAS_SUCCESS;
2068}
2069
2070static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
2071					      struct amdgpu_irq_src *source,
2072					      struct amdgpu_iv_entry *entry)
2073{
2074	int instance;
2075
2076	DRM_ERROR("Illegal instruction in SDMA command stream\n");
2077
2078	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2079	if (instance < 0)
2080		return 0;
2081
2082	switch (entry->ring_id) {
2083	case 0:
2084		drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
2085		break;
2086	}
2087	return 0;
2088}
2089
2090static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
2091					struct amdgpu_irq_src *source,
2092					unsigned type,
2093					enum amdgpu_interrupt_state state)
2094{
2095	u32 sdma_edc_config;
2096
2097	sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);
2098	sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
2099		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
2100	WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);
2101
2102	return 0;
2103}
2104
2105static void sdma_v4_0_update_medium_grain_clock_gating(
2106		struct amdgpu_device *adev,
2107		bool enable)
2108{
2109	uint32_t data, def;
2110	int i;
2111
2112	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
2113		for (i = 0; i < adev->sdma.num_instances; i++) {
2114			def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
2115			data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
2116				  SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
2117				  SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
2118				  SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
2119				  SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
2120				  SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
2121				  SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
2122				  SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
2123			if (def != data)
2124				WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
2125		}
2126	} else {
2127		for (i = 0; i < adev->sdma.num_instances; i++) {
2128			def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
2129			data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
2130				 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
2131				 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
2132				 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
2133				 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
2134				 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
2135				 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
2136				 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
2137			if (def != data)
2138				WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
2139		}
2140	}
2141}
2142
2143
2144static void sdma_v4_0_update_medium_grain_light_sleep(
2145		struct amdgpu_device *adev,
2146		bool enable)
2147{
2148	uint32_t data, def;
2149	int i;
2150
2151	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
2152		for (i = 0; i < adev->sdma.num_instances; i++) {
2153			/* 1-not override: enable sdma mem light sleep */
2154			def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
2155			data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
2156			if (def != data)
2157				WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
2158		}
2159	} else {
2160		for (i = 0; i < adev->sdma.num_instances; i++) {
2161		/* 0-override:disable sdma mem light sleep */
2162			def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
2163			data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
2164			if (def != data)
2165				WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
2166		}
2167	}
2168}
2169
2170static int sdma_v4_0_set_clockgating_state(void *handle,
2171					  enum amd_clockgating_state state)
2172{
2173	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2174
2175	if (amdgpu_sriov_vf(adev))
2176		return 0;
2177
2178	switch (adev->asic_type) {
2179	case CHIP_VEGA10:
2180	case CHIP_VEGA12:
2181	case CHIP_VEGA20:
2182	case CHIP_RAVEN:
2183	case CHIP_ARCTURUS:
2184	case CHIP_RENOIR:
2185		sdma_v4_0_update_medium_grain_clock_gating(adev,
2186				state == AMD_CG_STATE_GATE);
2187		sdma_v4_0_update_medium_grain_light_sleep(adev,
2188				state == AMD_CG_STATE_GATE);
2189		break;
2190	default:
2191		break;
2192	}
2193	return 0;
2194}
2195
2196static int sdma_v4_0_set_powergating_state(void *handle,
2197					  enum amd_powergating_state state)
2198{
2199	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2200
2201	switch (adev->asic_type) {
2202	case CHIP_RAVEN:
2203		sdma_v4_1_update_power_gating(adev,
2204				state == AMD_PG_STATE_GATE ? true : false);
2205		break;
2206	default:
2207		break;
2208	}
2209
2210	return 0;
2211}
2212
2213static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
2214{
2215	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2216	int data;
2217
2218	if (amdgpu_sriov_vf(adev))
2219		*flags = 0;
2220
2221	/* AMD_CG_SUPPORT_SDMA_MGCG */
2222	data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
2223	if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
2224		*flags |= AMD_CG_SUPPORT_SDMA_MGCG;
2225
2226	/* AMD_CG_SUPPORT_SDMA_LS */
2227	data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
2228	if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
2229		*flags |= AMD_CG_SUPPORT_SDMA_LS;
2230}
2231
2232const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
2233	.name = "sdma_v4_0",
2234	.early_init = sdma_v4_0_early_init,
2235	.late_init = sdma_v4_0_late_init,
2236	.sw_init = sdma_v4_0_sw_init,
2237	.sw_fini = sdma_v4_0_sw_fini,
2238	.hw_init = sdma_v4_0_hw_init,
2239	.hw_fini = sdma_v4_0_hw_fini,
2240	.suspend = sdma_v4_0_suspend,
2241	.resume = sdma_v4_0_resume,
2242	.is_idle = sdma_v4_0_is_idle,
2243	.wait_for_idle = sdma_v4_0_wait_for_idle,
2244	.soft_reset = sdma_v4_0_soft_reset,
2245	.set_clockgating_state = sdma_v4_0_set_clockgating_state,
2246	.set_powergating_state = sdma_v4_0_set_powergating_state,
2247	.get_clockgating_state = sdma_v4_0_get_clockgating_state,
2248};
2249
2250static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
2251	.type = AMDGPU_RING_TYPE_SDMA,
2252	.align_mask = 0xf,
2253	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2254	.support_64bit_ptrs = true,
2255	.vmhub = AMDGPU_MMHUB_0,
2256	.get_rptr = sdma_v4_0_ring_get_rptr,
2257	.get_wptr = sdma_v4_0_ring_get_wptr,
2258	.set_wptr = sdma_v4_0_ring_set_wptr,
2259	.emit_frame_size =
2260		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2261		3 + /* hdp invalidate */
2262		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2263		/* sdma_v4_0_ring_emit_vm_flush */
2264		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2265		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2266		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2267	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2268	.emit_ib = sdma_v4_0_ring_emit_ib,
2269	.emit_fence = sdma_v4_0_ring_emit_fence,
2270	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2271	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2272	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2273	.test_ring = sdma_v4_0_ring_test_ring,
2274	.test_ib = sdma_v4_0_ring_test_ib,
2275	.insert_nop = sdma_v4_0_ring_insert_nop,
2276	.pad_ib = sdma_v4_0_ring_pad_ib,
2277	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2278	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2279	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2280};
2281
2282/*
2283 * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
2284 * So create a individual constant ring_funcs for those instances.
2285 */
2286static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
2287	.type = AMDGPU_RING_TYPE_SDMA,
2288	.align_mask = 0xf,
2289	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2290	.support_64bit_ptrs = true,
2291	.vmhub = AMDGPU_MMHUB_1,
2292	.get_rptr = sdma_v4_0_ring_get_rptr,
2293	.get_wptr = sdma_v4_0_ring_get_wptr,
2294	.set_wptr = sdma_v4_0_ring_set_wptr,
2295	.emit_frame_size =
2296		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2297		3 + /* hdp invalidate */
2298		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2299		/* sdma_v4_0_ring_emit_vm_flush */
2300		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2301		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2302		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2303	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2304	.emit_ib = sdma_v4_0_ring_emit_ib,
2305	.emit_fence = sdma_v4_0_ring_emit_fence,
2306	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2307	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2308	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2309	.test_ring = sdma_v4_0_ring_test_ring,
2310	.test_ib = sdma_v4_0_ring_test_ib,
2311	.insert_nop = sdma_v4_0_ring_insert_nop,
2312	.pad_ib = sdma_v4_0_ring_pad_ib,
2313	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2314	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2315	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2316};
2317
2318static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
2319	.type = AMDGPU_RING_TYPE_SDMA,
2320	.align_mask = 0xf,
2321	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2322	.support_64bit_ptrs = true,
2323	.vmhub = AMDGPU_MMHUB_0,
2324	.get_rptr = sdma_v4_0_ring_get_rptr,
2325	.get_wptr = sdma_v4_0_page_ring_get_wptr,
2326	.set_wptr = sdma_v4_0_page_ring_set_wptr,
2327	.emit_frame_size =
2328		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2329		3 + /* hdp invalidate */
2330		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2331		/* sdma_v4_0_ring_emit_vm_flush */
2332		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2333		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2334		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2335	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2336	.emit_ib = sdma_v4_0_ring_emit_ib,
2337	.emit_fence = sdma_v4_0_ring_emit_fence,
2338	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2339	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2340	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2341	.test_ring = sdma_v4_0_ring_test_ring,
2342	.test_ib = sdma_v4_0_ring_test_ib,
2343	.insert_nop = sdma_v4_0_ring_insert_nop,
2344	.pad_ib = sdma_v4_0_ring_pad_ib,
2345	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2346	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2347	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2348};
2349
2350static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
2351	.type = AMDGPU_RING_TYPE_SDMA,
2352	.align_mask = 0xf,
2353	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2354	.support_64bit_ptrs = true,
2355	.vmhub = AMDGPU_MMHUB_1,
2356	.get_rptr = sdma_v4_0_ring_get_rptr,
2357	.get_wptr = sdma_v4_0_page_ring_get_wptr,
2358	.set_wptr = sdma_v4_0_page_ring_set_wptr,
2359	.emit_frame_size =
2360		6 + /* sdma_v4_0_ring_emit_hdp_flush */
2361		3 + /* hdp invalidate */
2362		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2363		/* sdma_v4_0_ring_emit_vm_flush */
2364		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2365		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2366		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2367	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2368	.emit_ib = sdma_v4_0_ring_emit_ib,
2369	.emit_fence = sdma_v4_0_ring_emit_fence,
2370	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2371	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2372	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2373	.test_ring = sdma_v4_0_ring_test_ring,
2374	.test_ib = sdma_v4_0_ring_test_ib,
2375	.insert_nop = sdma_v4_0_ring_insert_nop,
2376	.pad_ib = sdma_v4_0_ring_pad_ib,
2377	.emit_wreg = sdma_v4_0_ring_emit_wreg,
2378	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2379	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2380};
2381
2382static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
2383{
2384	int i;
2385
2386	for (i = 0; i < adev->sdma.num_instances; i++) {
2387		if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
2388			adev->sdma.instance[i].ring.funcs =
2389					&sdma_v4_0_ring_funcs_2nd_mmhub;
2390		else
2391			adev->sdma.instance[i].ring.funcs =
2392					&sdma_v4_0_ring_funcs;
2393		adev->sdma.instance[i].ring.me = i;
2394		if (adev->sdma.has_page_queue) {
2395			if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
2396				adev->sdma.instance[i].page.funcs =
2397					&sdma_v4_0_page_ring_funcs_2nd_mmhub;
2398			else
2399				adev->sdma.instance[i].page.funcs =
2400					&sdma_v4_0_page_ring_funcs;
2401			adev->sdma.instance[i].page.me = i;
2402		}
2403	}
2404}
2405
2406static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = {
2407	.set = sdma_v4_0_set_trap_irq_state,
2408	.process = sdma_v4_0_process_trap_irq,
2409};
2410
2411static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
2412	.process = sdma_v4_0_process_illegal_inst_irq,
2413};
2414
2415static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
2416	.set = sdma_v4_0_set_ecc_irq_state,
2417	.process = amdgpu_sdma_process_ecc_irq,
2418};
2419
2420
2421
2422static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
2423{
2424	switch (adev->sdma.num_instances) {
2425	case 1:
2426		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
2427		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
2428		break;
2429	case 8:
2430		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
2431		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
2432		break;
2433	case 2:
2434	default:
2435		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
2436		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
2437		break;
2438	}
2439	adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
2440	adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
2441	adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
2442}
2443
2444/**
2445 * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine
2446 *
2447 * @ring: amdgpu_ring structure holding ring information
2448 * @src_offset: src GPU address
2449 * @dst_offset: dst GPU address
2450 * @byte_count: number of bytes to xfer
2451 *
2452 * Copy GPU buffers using the DMA engine (VEGA10/12).
2453 * Used by the amdgpu ttm implementation to move pages if
2454 * registered as the asic copy callback.
2455 */
2456static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
2457				       uint64_t src_offset,
2458				       uint64_t dst_offset,
2459				       uint32_t byte_count)
2460{
2461	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
2462		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
2463	ib->ptr[ib->length_dw++] = byte_count - 1;
2464	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
2465	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
2466	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
2467	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
2468	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
2469}
2470
2471/**
2472 * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine
2473 *
2474 * @ring: amdgpu_ring structure holding ring information
2475 * @src_data: value to write to buffer
2476 * @dst_offset: dst GPU address
2477 * @byte_count: number of bytes to xfer
2478 *
2479 * Fill GPU buffers using the DMA engine (VEGA10/12).
2480 */
2481static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
2482				       uint32_t src_data,
2483				       uint64_t dst_offset,
2484				       uint32_t byte_count)
2485{
2486	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
2487	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
2488	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
2489	ib->ptr[ib->length_dw++] = src_data;
2490	ib->ptr[ib->length_dw++] = byte_count - 1;
2491}
2492
2493static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
2494	.copy_max_bytes = 0x400000,
2495	.copy_num_dw = 7,
2496	.emit_copy_buffer = sdma_v4_0_emit_copy_buffer,
2497
2498	.fill_max_bytes = 0x400000,
2499	.fill_num_dw = 5,
2500	.emit_fill_buffer = sdma_v4_0_emit_fill_buffer,
2501};
2502
2503static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
2504{
2505	adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
2506	if (adev->sdma.has_page_queue)
2507		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
2508	else
2509		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
2510}
2511
2512static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
2513	.copy_pte_num_dw = 7,
2514	.copy_pte = sdma_v4_0_vm_copy_pte,
2515
2516	.write_pte = sdma_v4_0_vm_write_pte,
2517	.set_pte_pde = sdma_v4_0_vm_set_pte_pde,
2518};
2519
2520static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
2521{
2522	struct drm_gpu_scheduler *sched;
2523	unsigned i;
2524
2525	adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
2526	for (i = 0; i < adev->sdma.num_instances; i++) {
2527		if (adev->sdma.has_page_queue)
2528			sched = &adev->sdma.instance[i].page.sched;
2529		else
2530			sched = &adev->sdma.instance[i].ring.sched;
2531		adev->vm_manager.vm_pte_scheds[i] = sched;
2532	}
2533	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
2534}
2535
2536static void sdma_v4_0_get_ras_error_count(uint32_t value,
2537					uint32_t instance,
2538					uint32_t *sec_count)
2539{
2540	uint32_t i;
2541	uint32_t sec_cnt;
2542
2543	/* double bits error (multiple bits) error detection is not supported */
2544	for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
2545		/* the SDMA_EDC_COUNTER register in each sdma instance
2546		 * shares the same sed shift_mask
2547		 * */
2548		sec_cnt = (value &
2549			sdma_v4_0_ras_fields[i].sec_count_mask) >>
2550			sdma_v4_0_ras_fields[i].sec_count_shift;
2551		if (sec_cnt) {
2552			DRM_INFO("Detected %s in SDMA%d, SED %d\n",
2553				sdma_v4_0_ras_fields[i].name,
2554				instance, sec_cnt);
2555			*sec_count += sec_cnt;
2556		}
2557	}
2558}
2559
2560static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
2561			uint32_t instance, void *ras_error_status)
2562{
2563	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
2564	uint32_t sec_count = 0;
2565	uint32_t reg_value = 0;
2566
2567	reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
2568	/* double bit error is not supported */
2569	if (reg_value)
2570		sdma_v4_0_get_ras_error_count(reg_value,
2571				instance, &sec_count);
2572	/* err_data->ce_count should be initialized to 0
2573	 * before calling into this function */
2574	err_data->ce_count += sec_count;
2575	/* double bit error is not supported
2576	 * set ue count to 0 */
2577	err_data->ue_count = 0;
2578
2579	return 0;
2580};
2581
2582static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
2583	.ras_late_init = amdgpu_sdma_ras_late_init,
2584	.ras_fini = amdgpu_sdma_ras_fini,
2585	.query_ras_error_count = sdma_v4_0_query_ras_error_count,
2586};
2587
2588static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
2589{
2590	switch (adev->asic_type) {
2591	case CHIP_VEGA20:
2592	case CHIP_ARCTURUS:
2593		adev->sdma.funcs = &sdma_v4_0_ras_funcs;
2594		break;
2595	default:
2596		break;
2597	}
2598}
2599
2600const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
2601	.type = AMD_IP_BLOCK_TYPE_SDMA,
2602	.major = 4,
2603	.minor = 0,
2604	.rev = 0,
2605	.funcs = &sdma_v4_0_ip_funcs,
2606};
2607