cik.c revision 1.4
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/slab.h>
26#include <linux/module.h>
27#include <drm/drmP.h>
28#include "radeon.h"
29#include "radeon_asic.h"
30#include "radeon_audio.h"
31#include "cikd.h"
32#include "atom.h"
33#include "cik_blit_shaders.h"
34#include "radeon_ucode.h"
35#include "clearstate_ci.h"
36
37#define SH_MEM_CONFIG_GFX_DEFAULT \
38	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39
40MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49
50MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51MODULE_FIRMWARE("radeon/bonaire_me.bin");
52MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59
60MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69
70MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71MODULE_FIRMWARE("radeon/hawaii_me.bin");
72MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79
80MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86
87MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88MODULE_FIRMWARE("radeon/kaveri_me.bin");
89MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94
95MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96MODULE_FIRMWARE("radeon/KABINI_me.bin");
97MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101
102MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103MODULE_FIRMWARE("radeon/kabini_me.bin");
104MODULE_FIRMWARE("radeon/kabini_ce.bin");
105MODULE_FIRMWARE("radeon/kabini_mec.bin");
106MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108
109MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115
116MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117MODULE_FIRMWARE("radeon/mullins_me.bin");
118MODULE_FIRMWARE("radeon/mullins_ce.bin");
119MODULE_FIRMWARE("radeon/mullins_mec.bin");
120MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122
123extern int r600_ih_ring_alloc(struct radeon_device *rdev);
124extern void r600_ih_ring_fini(struct radeon_device *rdev);
125extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
126extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
127extern bool evergreen_is_display_hung(struct radeon_device *rdev);
128extern void sumo_rlc_fini(struct radeon_device *rdev);
129extern int sumo_rlc_init(struct radeon_device *rdev);
130extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
131extern void si_rlc_reset(struct radeon_device *rdev);
132extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
133static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
134extern int cik_sdma_resume(struct radeon_device *rdev);
135extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
136extern void cik_sdma_fini(struct radeon_device *rdev);
137extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
138static void cik_rlc_stop(struct radeon_device *rdev);
139static void cik_pcie_gen3_enable(struct radeon_device *rdev);
140static void cik_program_aspm(struct radeon_device *rdev);
141static void cik_init_pg(struct radeon_device *rdev);
142static void cik_init_cg(struct radeon_device *rdev);
143static void cik_fini_pg(struct radeon_device *rdev);
144static void cik_fini_cg(struct radeon_device *rdev);
145static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
146					  bool enable);
147
148/**
149 * cik_get_allowed_info_register - fetch the register for the info ioctl
150 *
151 * @rdev: radeon_device pointer
152 * @reg: register offset in bytes
153 * @val: register value
154 *
155 * Returns 0 for success or -EINVAL for an invalid register
156 *
157 */
158int cik_get_allowed_info_register(struct radeon_device *rdev,
159				  u32 reg, u32 *val)
160{
161	switch (reg) {
162	case GRBM_STATUS:
163	case GRBM_STATUS2:
164	case GRBM_STATUS_SE0:
165	case GRBM_STATUS_SE1:
166	case GRBM_STATUS_SE2:
167	case GRBM_STATUS_SE3:
168	case SRBM_STATUS:
169	case SRBM_STATUS2:
170	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
171	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
172	case UVD_STATUS:
173	/* TODO VCE */
174		*val = RREG32(reg);
175		return 0;
176	default:
177		return -EINVAL;
178	}
179}
180
181/*
182 * Indirect registers accessor
183 */
184u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
185{
186	unsigned long flags;
187	u32 r;
188
189	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
190	WREG32(CIK_DIDT_IND_INDEX, (reg));
191	r = RREG32(CIK_DIDT_IND_DATA);
192	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
193	return r;
194}
195
196void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197{
198	unsigned long flags;
199
200	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201	WREG32(CIK_DIDT_IND_INDEX, (reg));
202	WREG32(CIK_DIDT_IND_DATA, (v));
203	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204}
205
206/* get temperature in millidegrees */
207int ci_get_temp(struct radeon_device *rdev)
208{
209	u32 temp;
210	int actual_temp = 0;
211
212	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
213		CTF_TEMP_SHIFT;
214
215	if (temp & 0x200)
216		actual_temp = 255;
217	else
218		actual_temp = temp & 0x1ff;
219
220	actual_temp = actual_temp * 1000;
221
222	return actual_temp;
223}
224
225/* get temperature in millidegrees */
226int kv_get_temp(struct radeon_device *rdev)
227{
228	u32 temp;
229	int actual_temp = 0;
230
231	temp = RREG32_SMC(0xC0300E0C);
232
233	if (temp)
234		actual_temp = (temp / 8) - 49;
235	else
236		actual_temp = 0;
237
238	actual_temp = actual_temp * 1000;
239
240	return actual_temp;
241}
242
243/*
244 * Indirect registers accessor
245 */
246u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247{
248	unsigned long flags;
249	u32 r;
250
251	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252	WREG32(PCIE_INDEX, reg);
253	(void)RREG32(PCIE_INDEX);
254	r = RREG32(PCIE_DATA);
255	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256	return r;
257}
258
259void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260{
261	unsigned long flags;
262
263	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264	WREG32(PCIE_INDEX, reg);
265	(void)RREG32(PCIE_INDEX);
266	WREG32(PCIE_DATA, v);
267	(void)RREG32(PCIE_DATA);
268	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269}
270
271static const u32 spectre_rlc_save_restore_register_list[] =
272{
273	(0x0e00 << 16) | (0xc12c >> 2),
274	0x00000000,
275	(0x0e00 << 16) | (0xc140 >> 2),
276	0x00000000,
277	(0x0e00 << 16) | (0xc150 >> 2),
278	0x00000000,
279	(0x0e00 << 16) | (0xc15c >> 2),
280	0x00000000,
281	(0x0e00 << 16) | (0xc168 >> 2),
282	0x00000000,
283	(0x0e00 << 16) | (0xc170 >> 2),
284	0x00000000,
285	(0x0e00 << 16) | (0xc178 >> 2),
286	0x00000000,
287	(0x0e00 << 16) | (0xc204 >> 2),
288	0x00000000,
289	(0x0e00 << 16) | (0xc2b4 >> 2),
290	0x00000000,
291	(0x0e00 << 16) | (0xc2b8 >> 2),
292	0x00000000,
293	(0x0e00 << 16) | (0xc2bc >> 2),
294	0x00000000,
295	(0x0e00 << 16) | (0xc2c0 >> 2),
296	0x00000000,
297	(0x0e00 << 16) | (0x8228 >> 2),
298	0x00000000,
299	(0x0e00 << 16) | (0x829c >> 2),
300	0x00000000,
301	(0x0e00 << 16) | (0x869c >> 2),
302	0x00000000,
303	(0x0600 << 16) | (0x98f4 >> 2),
304	0x00000000,
305	(0x0e00 << 16) | (0x98f8 >> 2),
306	0x00000000,
307	(0x0e00 << 16) | (0x9900 >> 2),
308	0x00000000,
309	(0x0e00 << 16) | (0xc260 >> 2),
310	0x00000000,
311	(0x0e00 << 16) | (0x90e8 >> 2),
312	0x00000000,
313	(0x0e00 << 16) | (0x3c000 >> 2),
314	0x00000000,
315	(0x0e00 << 16) | (0x3c00c >> 2),
316	0x00000000,
317	(0x0e00 << 16) | (0x8c1c >> 2),
318	0x00000000,
319	(0x0e00 << 16) | (0x9700 >> 2),
320	0x00000000,
321	(0x0e00 << 16) | (0xcd20 >> 2),
322	0x00000000,
323	(0x4e00 << 16) | (0xcd20 >> 2),
324	0x00000000,
325	(0x5e00 << 16) | (0xcd20 >> 2),
326	0x00000000,
327	(0x6e00 << 16) | (0xcd20 >> 2),
328	0x00000000,
329	(0x7e00 << 16) | (0xcd20 >> 2),
330	0x00000000,
331	(0x8e00 << 16) | (0xcd20 >> 2),
332	0x00000000,
333	(0x9e00 << 16) | (0xcd20 >> 2),
334	0x00000000,
335	(0xae00 << 16) | (0xcd20 >> 2),
336	0x00000000,
337	(0xbe00 << 16) | (0xcd20 >> 2),
338	0x00000000,
339	(0x0e00 << 16) | (0x89bc >> 2),
340	0x00000000,
341	(0x0e00 << 16) | (0x8900 >> 2),
342	0x00000000,
343	0x3,
344	(0x0e00 << 16) | (0xc130 >> 2),
345	0x00000000,
346	(0x0e00 << 16) | (0xc134 >> 2),
347	0x00000000,
348	(0x0e00 << 16) | (0xc1fc >> 2),
349	0x00000000,
350	(0x0e00 << 16) | (0xc208 >> 2),
351	0x00000000,
352	(0x0e00 << 16) | (0xc264 >> 2),
353	0x00000000,
354	(0x0e00 << 16) | (0xc268 >> 2),
355	0x00000000,
356	(0x0e00 << 16) | (0xc26c >> 2),
357	0x00000000,
358	(0x0e00 << 16) | (0xc270 >> 2),
359	0x00000000,
360	(0x0e00 << 16) | (0xc274 >> 2),
361	0x00000000,
362	(0x0e00 << 16) | (0xc278 >> 2),
363	0x00000000,
364	(0x0e00 << 16) | (0xc27c >> 2),
365	0x00000000,
366	(0x0e00 << 16) | (0xc280 >> 2),
367	0x00000000,
368	(0x0e00 << 16) | (0xc284 >> 2),
369	0x00000000,
370	(0x0e00 << 16) | (0xc288 >> 2),
371	0x00000000,
372	(0x0e00 << 16) | (0xc28c >> 2),
373	0x00000000,
374	(0x0e00 << 16) | (0xc290 >> 2),
375	0x00000000,
376	(0x0e00 << 16) | (0xc294 >> 2),
377	0x00000000,
378	(0x0e00 << 16) | (0xc298 >> 2),
379	0x00000000,
380	(0x0e00 << 16) | (0xc29c >> 2),
381	0x00000000,
382	(0x0e00 << 16) | (0xc2a0 >> 2),
383	0x00000000,
384	(0x0e00 << 16) | (0xc2a4 >> 2),
385	0x00000000,
386	(0x0e00 << 16) | (0xc2a8 >> 2),
387	0x00000000,
388	(0x0e00 << 16) | (0xc2ac  >> 2),
389	0x00000000,
390	(0x0e00 << 16) | (0xc2b0 >> 2),
391	0x00000000,
392	(0x0e00 << 16) | (0x301d0 >> 2),
393	0x00000000,
394	(0x0e00 << 16) | (0x30238 >> 2),
395	0x00000000,
396	(0x0e00 << 16) | (0x30250 >> 2),
397	0x00000000,
398	(0x0e00 << 16) | (0x30254 >> 2),
399	0x00000000,
400	(0x0e00 << 16) | (0x30258 >> 2),
401	0x00000000,
402	(0x0e00 << 16) | (0x3025c >> 2),
403	0x00000000,
404	(0x4e00 << 16) | (0xc900 >> 2),
405	0x00000000,
406	(0x5e00 << 16) | (0xc900 >> 2),
407	0x00000000,
408	(0x6e00 << 16) | (0xc900 >> 2),
409	0x00000000,
410	(0x7e00 << 16) | (0xc900 >> 2),
411	0x00000000,
412	(0x8e00 << 16) | (0xc900 >> 2),
413	0x00000000,
414	(0x9e00 << 16) | (0xc900 >> 2),
415	0x00000000,
416	(0xae00 << 16) | (0xc900 >> 2),
417	0x00000000,
418	(0xbe00 << 16) | (0xc900 >> 2),
419	0x00000000,
420	(0x4e00 << 16) | (0xc904 >> 2),
421	0x00000000,
422	(0x5e00 << 16) | (0xc904 >> 2),
423	0x00000000,
424	(0x6e00 << 16) | (0xc904 >> 2),
425	0x00000000,
426	(0x7e00 << 16) | (0xc904 >> 2),
427	0x00000000,
428	(0x8e00 << 16) | (0xc904 >> 2),
429	0x00000000,
430	(0x9e00 << 16) | (0xc904 >> 2),
431	0x00000000,
432	(0xae00 << 16) | (0xc904 >> 2),
433	0x00000000,
434	(0xbe00 << 16) | (0xc904 >> 2),
435	0x00000000,
436	(0x4e00 << 16) | (0xc908 >> 2),
437	0x00000000,
438	(0x5e00 << 16) | (0xc908 >> 2),
439	0x00000000,
440	(0x6e00 << 16) | (0xc908 >> 2),
441	0x00000000,
442	(0x7e00 << 16) | (0xc908 >> 2),
443	0x00000000,
444	(0x8e00 << 16) | (0xc908 >> 2),
445	0x00000000,
446	(0x9e00 << 16) | (0xc908 >> 2),
447	0x00000000,
448	(0xae00 << 16) | (0xc908 >> 2),
449	0x00000000,
450	(0xbe00 << 16) | (0xc908 >> 2),
451	0x00000000,
452	(0x4e00 << 16) | (0xc90c >> 2),
453	0x00000000,
454	(0x5e00 << 16) | (0xc90c >> 2),
455	0x00000000,
456	(0x6e00 << 16) | (0xc90c >> 2),
457	0x00000000,
458	(0x7e00 << 16) | (0xc90c >> 2),
459	0x00000000,
460	(0x8e00 << 16) | (0xc90c >> 2),
461	0x00000000,
462	(0x9e00 << 16) | (0xc90c >> 2),
463	0x00000000,
464	(0xae00 << 16) | (0xc90c >> 2),
465	0x00000000,
466	(0xbe00 << 16) | (0xc90c >> 2),
467	0x00000000,
468	(0x4e00 << 16) | (0xc910 >> 2),
469	0x00000000,
470	(0x5e00 << 16) | (0xc910 >> 2),
471	0x00000000,
472	(0x6e00 << 16) | (0xc910 >> 2),
473	0x00000000,
474	(0x7e00 << 16) | (0xc910 >> 2),
475	0x00000000,
476	(0x8e00 << 16) | (0xc910 >> 2),
477	0x00000000,
478	(0x9e00 << 16) | (0xc910 >> 2),
479	0x00000000,
480	(0xae00 << 16) | (0xc910 >> 2),
481	0x00000000,
482	(0xbe00 << 16) | (0xc910 >> 2),
483	0x00000000,
484	(0x0e00 << 16) | (0xc99c >> 2),
485	0x00000000,
486	(0x0e00 << 16) | (0x9834 >> 2),
487	0x00000000,
488	(0x0000 << 16) | (0x30f00 >> 2),
489	0x00000000,
490	(0x0001 << 16) | (0x30f00 >> 2),
491	0x00000000,
492	(0x0000 << 16) | (0x30f04 >> 2),
493	0x00000000,
494	(0x0001 << 16) | (0x30f04 >> 2),
495	0x00000000,
496	(0x0000 << 16) | (0x30f08 >> 2),
497	0x00000000,
498	(0x0001 << 16) | (0x30f08 >> 2),
499	0x00000000,
500	(0x0000 << 16) | (0x30f0c >> 2),
501	0x00000000,
502	(0x0001 << 16) | (0x30f0c >> 2),
503	0x00000000,
504	(0x0600 << 16) | (0x9b7c >> 2),
505	0x00000000,
506	(0x0e00 << 16) | (0x8a14 >> 2),
507	0x00000000,
508	(0x0e00 << 16) | (0x8a18 >> 2),
509	0x00000000,
510	(0x0600 << 16) | (0x30a00 >> 2),
511	0x00000000,
512	(0x0e00 << 16) | (0x8bf0 >> 2),
513	0x00000000,
514	(0x0e00 << 16) | (0x8bcc >> 2),
515	0x00000000,
516	(0x0e00 << 16) | (0x8b24 >> 2),
517	0x00000000,
518	(0x0e00 << 16) | (0x30a04 >> 2),
519	0x00000000,
520	(0x0600 << 16) | (0x30a10 >> 2),
521	0x00000000,
522	(0x0600 << 16) | (0x30a14 >> 2),
523	0x00000000,
524	(0x0600 << 16) | (0x30a18 >> 2),
525	0x00000000,
526	(0x0600 << 16) | (0x30a2c >> 2),
527	0x00000000,
528	(0x0e00 << 16) | (0xc700 >> 2),
529	0x00000000,
530	(0x0e00 << 16) | (0xc704 >> 2),
531	0x00000000,
532	(0x0e00 << 16) | (0xc708 >> 2),
533	0x00000000,
534	(0x0e00 << 16) | (0xc768 >> 2),
535	0x00000000,
536	(0x0400 << 16) | (0xc770 >> 2),
537	0x00000000,
538	(0x0400 << 16) | (0xc774 >> 2),
539	0x00000000,
540	(0x0400 << 16) | (0xc778 >> 2),
541	0x00000000,
542	(0x0400 << 16) | (0xc77c >> 2),
543	0x00000000,
544	(0x0400 << 16) | (0xc780 >> 2),
545	0x00000000,
546	(0x0400 << 16) | (0xc784 >> 2),
547	0x00000000,
548	(0x0400 << 16) | (0xc788 >> 2),
549	0x00000000,
550	(0x0400 << 16) | (0xc78c >> 2),
551	0x00000000,
552	(0x0400 << 16) | (0xc798 >> 2),
553	0x00000000,
554	(0x0400 << 16) | (0xc79c >> 2),
555	0x00000000,
556	(0x0400 << 16) | (0xc7a0 >> 2),
557	0x00000000,
558	(0x0400 << 16) | (0xc7a4 >> 2),
559	0x00000000,
560	(0x0400 << 16) | (0xc7a8 >> 2),
561	0x00000000,
562	(0x0400 << 16) | (0xc7ac >> 2),
563	0x00000000,
564	(0x0400 << 16) | (0xc7b0 >> 2),
565	0x00000000,
566	(0x0400 << 16) | (0xc7b4 >> 2),
567	0x00000000,
568	(0x0e00 << 16) | (0x9100 >> 2),
569	0x00000000,
570	(0x0e00 << 16) | (0x3c010 >> 2),
571	0x00000000,
572	(0x0e00 << 16) | (0x92a8 >> 2),
573	0x00000000,
574	(0x0e00 << 16) | (0x92ac >> 2),
575	0x00000000,
576	(0x0e00 << 16) | (0x92b4 >> 2),
577	0x00000000,
578	(0x0e00 << 16) | (0x92b8 >> 2),
579	0x00000000,
580	(0x0e00 << 16) | (0x92bc >> 2),
581	0x00000000,
582	(0x0e00 << 16) | (0x92c0 >> 2),
583	0x00000000,
584	(0x0e00 << 16) | (0x92c4 >> 2),
585	0x00000000,
586	(0x0e00 << 16) | (0x92c8 >> 2),
587	0x00000000,
588	(0x0e00 << 16) | (0x92cc >> 2),
589	0x00000000,
590	(0x0e00 << 16) | (0x92d0 >> 2),
591	0x00000000,
592	(0x0e00 << 16) | (0x8c00 >> 2),
593	0x00000000,
594	(0x0e00 << 16) | (0x8c04 >> 2),
595	0x00000000,
596	(0x0e00 << 16) | (0x8c20 >> 2),
597	0x00000000,
598	(0x0e00 << 16) | (0x8c38 >> 2),
599	0x00000000,
600	(0x0e00 << 16) | (0x8c3c >> 2),
601	0x00000000,
602	(0x0e00 << 16) | (0xae00 >> 2),
603	0x00000000,
604	(0x0e00 << 16) | (0x9604 >> 2),
605	0x00000000,
606	(0x0e00 << 16) | (0xac08 >> 2),
607	0x00000000,
608	(0x0e00 << 16) | (0xac0c >> 2),
609	0x00000000,
610	(0x0e00 << 16) | (0xac10 >> 2),
611	0x00000000,
612	(0x0e00 << 16) | (0xac14 >> 2),
613	0x00000000,
614	(0x0e00 << 16) | (0xac58 >> 2),
615	0x00000000,
616	(0x0e00 << 16) | (0xac68 >> 2),
617	0x00000000,
618	(0x0e00 << 16) | (0xac6c >> 2),
619	0x00000000,
620	(0x0e00 << 16) | (0xac70 >> 2),
621	0x00000000,
622	(0x0e00 << 16) | (0xac74 >> 2),
623	0x00000000,
624	(0x0e00 << 16) | (0xac78 >> 2),
625	0x00000000,
626	(0x0e00 << 16) | (0xac7c >> 2),
627	0x00000000,
628	(0x0e00 << 16) | (0xac80 >> 2),
629	0x00000000,
630	(0x0e00 << 16) | (0xac84 >> 2),
631	0x00000000,
632	(0x0e00 << 16) | (0xac88 >> 2),
633	0x00000000,
634	(0x0e00 << 16) | (0xac8c >> 2),
635	0x00000000,
636	(0x0e00 << 16) | (0x970c >> 2),
637	0x00000000,
638	(0x0e00 << 16) | (0x9714 >> 2),
639	0x00000000,
640	(0x0e00 << 16) | (0x9718 >> 2),
641	0x00000000,
642	(0x0e00 << 16) | (0x971c >> 2),
643	0x00000000,
644	(0x0e00 << 16) | (0x31068 >> 2),
645	0x00000000,
646	(0x4e00 << 16) | (0x31068 >> 2),
647	0x00000000,
648	(0x5e00 << 16) | (0x31068 >> 2),
649	0x00000000,
650	(0x6e00 << 16) | (0x31068 >> 2),
651	0x00000000,
652	(0x7e00 << 16) | (0x31068 >> 2),
653	0x00000000,
654	(0x8e00 << 16) | (0x31068 >> 2),
655	0x00000000,
656	(0x9e00 << 16) | (0x31068 >> 2),
657	0x00000000,
658	(0xae00 << 16) | (0x31068 >> 2),
659	0x00000000,
660	(0xbe00 << 16) | (0x31068 >> 2),
661	0x00000000,
662	(0x0e00 << 16) | (0xcd10 >> 2),
663	0x00000000,
664	(0x0e00 << 16) | (0xcd14 >> 2),
665	0x00000000,
666	(0x0e00 << 16) | (0x88b0 >> 2),
667	0x00000000,
668	(0x0e00 << 16) | (0x88b4 >> 2),
669	0x00000000,
670	(0x0e00 << 16) | (0x88b8 >> 2),
671	0x00000000,
672	(0x0e00 << 16) | (0x88bc >> 2),
673	0x00000000,
674	(0x0400 << 16) | (0x89c0 >> 2),
675	0x00000000,
676	(0x0e00 << 16) | (0x88c4 >> 2),
677	0x00000000,
678	(0x0e00 << 16) | (0x88c8 >> 2),
679	0x00000000,
680	(0x0e00 << 16) | (0x88d0 >> 2),
681	0x00000000,
682	(0x0e00 << 16) | (0x88d4 >> 2),
683	0x00000000,
684	(0x0e00 << 16) | (0x88d8 >> 2),
685	0x00000000,
686	(0x0e00 << 16) | (0x8980 >> 2),
687	0x00000000,
688	(0x0e00 << 16) | (0x30938 >> 2),
689	0x00000000,
690	(0x0e00 << 16) | (0x3093c >> 2),
691	0x00000000,
692	(0x0e00 << 16) | (0x30940 >> 2),
693	0x00000000,
694	(0x0e00 << 16) | (0x89a0 >> 2),
695	0x00000000,
696	(0x0e00 << 16) | (0x30900 >> 2),
697	0x00000000,
698	(0x0e00 << 16) | (0x30904 >> 2),
699	0x00000000,
700	(0x0e00 << 16) | (0x89b4 >> 2),
701	0x00000000,
702	(0x0e00 << 16) | (0x3c210 >> 2),
703	0x00000000,
704	(0x0e00 << 16) | (0x3c214 >> 2),
705	0x00000000,
706	(0x0e00 << 16) | (0x3c218 >> 2),
707	0x00000000,
708	(0x0e00 << 16) | (0x8904 >> 2),
709	0x00000000,
710	0x5,
711	(0x0e00 << 16) | (0x8c28 >> 2),
712	(0x0e00 << 16) | (0x8c2c >> 2),
713	(0x0e00 << 16) | (0x8c30 >> 2),
714	(0x0e00 << 16) | (0x8c34 >> 2),
715	(0x0e00 << 16) | (0x9600 >> 2),
716};
717
718static const u32 kalindi_rlc_save_restore_register_list[] =
719{
720	(0x0e00 << 16) | (0xc12c >> 2),
721	0x00000000,
722	(0x0e00 << 16) | (0xc140 >> 2),
723	0x00000000,
724	(0x0e00 << 16) | (0xc150 >> 2),
725	0x00000000,
726	(0x0e00 << 16) | (0xc15c >> 2),
727	0x00000000,
728	(0x0e00 << 16) | (0xc168 >> 2),
729	0x00000000,
730	(0x0e00 << 16) | (0xc170 >> 2),
731	0x00000000,
732	(0x0e00 << 16) | (0xc204 >> 2),
733	0x00000000,
734	(0x0e00 << 16) | (0xc2b4 >> 2),
735	0x00000000,
736	(0x0e00 << 16) | (0xc2b8 >> 2),
737	0x00000000,
738	(0x0e00 << 16) | (0xc2bc >> 2),
739	0x00000000,
740	(0x0e00 << 16) | (0xc2c0 >> 2),
741	0x00000000,
742	(0x0e00 << 16) | (0x8228 >> 2),
743	0x00000000,
744	(0x0e00 << 16) | (0x829c >> 2),
745	0x00000000,
746	(0x0e00 << 16) | (0x869c >> 2),
747	0x00000000,
748	(0x0600 << 16) | (0x98f4 >> 2),
749	0x00000000,
750	(0x0e00 << 16) | (0x98f8 >> 2),
751	0x00000000,
752	(0x0e00 << 16) | (0x9900 >> 2),
753	0x00000000,
754	(0x0e00 << 16) | (0xc260 >> 2),
755	0x00000000,
756	(0x0e00 << 16) | (0x90e8 >> 2),
757	0x00000000,
758	(0x0e00 << 16) | (0x3c000 >> 2),
759	0x00000000,
760	(0x0e00 << 16) | (0x3c00c >> 2),
761	0x00000000,
762	(0x0e00 << 16) | (0x8c1c >> 2),
763	0x00000000,
764	(0x0e00 << 16) | (0x9700 >> 2),
765	0x00000000,
766	(0x0e00 << 16) | (0xcd20 >> 2),
767	0x00000000,
768	(0x4e00 << 16) | (0xcd20 >> 2),
769	0x00000000,
770	(0x5e00 << 16) | (0xcd20 >> 2),
771	0x00000000,
772	(0x6e00 << 16) | (0xcd20 >> 2),
773	0x00000000,
774	(0x7e00 << 16) | (0xcd20 >> 2),
775	0x00000000,
776	(0x0e00 << 16) | (0x89bc >> 2),
777	0x00000000,
778	(0x0e00 << 16) | (0x8900 >> 2),
779	0x00000000,
780	0x3,
781	(0x0e00 << 16) | (0xc130 >> 2),
782	0x00000000,
783	(0x0e00 << 16) | (0xc134 >> 2),
784	0x00000000,
785	(0x0e00 << 16) | (0xc1fc >> 2),
786	0x00000000,
787	(0x0e00 << 16) | (0xc208 >> 2),
788	0x00000000,
789	(0x0e00 << 16) | (0xc264 >> 2),
790	0x00000000,
791	(0x0e00 << 16) | (0xc268 >> 2),
792	0x00000000,
793	(0x0e00 << 16) | (0xc26c >> 2),
794	0x00000000,
795	(0x0e00 << 16) | (0xc270 >> 2),
796	0x00000000,
797	(0x0e00 << 16) | (0xc274 >> 2),
798	0x00000000,
799	(0x0e00 << 16) | (0xc28c >> 2),
800	0x00000000,
801	(0x0e00 << 16) | (0xc290 >> 2),
802	0x00000000,
803	(0x0e00 << 16) | (0xc294 >> 2),
804	0x00000000,
805	(0x0e00 << 16) | (0xc298 >> 2),
806	0x00000000,
807	(0x0e00 << 16) | (0xc2a0 >> 2),
808	0x00000000,
809	(0x0e00 << 16) | (0xc2a4 >> 2),
810	0x00000000,
811	(0x0e00 << 16) | (0xc2a8 >> 2),
812	0x00000000,
813	(0x0e00 << 16) | (0xc2ac >> 2),
814	0x00000000,
815	(0x0e00 << 16) | (0x301d0 >> 2),
816	0x00000000,
817	(0x0e00 << 16) | (0x30238 >> 2),
818	0x00000000,
819	(0x0e00 << 16) | (0x30250 >> 2),
820	0x00000000,
821	(0x0e00 << 16) | (0x30254 >> 2),
822	0x00000000,
823	(0x0e00 << 16) | (0x30258 >> 2),
824	0x00000000,
825	(0x0e00 << 16) | (0x3025c >> 2),
826	0x00000000,
827	(0x4e00 << 16) | (0xc900 >> 2),
828	0x00000000,
829	(0x5e00 << 16) | (0xc900 >> 2),
830	0x00000000,
831	(0x6e00 << 16) | (0xc900 >> 2),
832	0x00000000,
833	(0x7e00 << 16) | (0xc900 >> 2),
834	0x00000000,
835	(0x4e00 << 16) | (0xc904 >> 2),
836	0x00000000,
837	(0x5e00 << 16) | (0xc904 >> 2),
838	0x00000000,
839	(0x6e00 << 16) | (0xc904 >> 2),
840	0x00000000,
841	(0x7e00 << 16) | (0xc904 >> 2),
842	0x00000000,
843	(0x4e00 << 16) | (0xc908 >> 2),
844	0x00000000,
845	(0x5e00 << 16) | (0xc908 >> 2),
846	0x00000000,
847	(0x6e00 << 16) | (0xc908 >> 2),
848	0x00000000,
849	(0x7e00 << 16) | (0xc908 >> 2),
850	0x00000000,
851	(0x4e00 << 16) | (0xc90c >> 2),
852	0x00000000,
853	(0x5e00 << 16) | (0xc90c >> 2),
854	0x00000000,
855	(0x6e00 << 16) | (0xc90c >> 2),
856	0x00000000,
857	(0x7e00 << 16) | (0xc90c >> 2),
858	0x00000000,
859	(0x4e00 << 16) | (0xc910 >> 2),
860	0x00000000,
861	(0x5e00 << 16) | (0xc910 >> 2),
862	0x00000000,
863	(0x6e00 << 16) | (0xc910 >> 2),
864	0x00000000,
865	(0x7e00 << 16) | (0xc910 >> 2),
866	0x00000000,
867	(0x0e00 << 16) | (0xc99c >> 2),
868	0x00000000,
869	(0x0e00 << 16) | (0x9834 >> 2),
870	0x00000000,
871	(0x0000 << 16) | (0x30f00 >> 2),
872	0x00000000,
873	(0x0000 << 16) | (0x30f04 >> 2),
874	0x00000000,
875	(0x0000 << 16) | (0x30f08 >> 2),
876	0x00000000,
877	(0x0000 << 16) | (0x30f0c >> 2),
878	0x00000000,
879	(0x0600 << 16) | (0x9b7c >> 2),
880	0x00000000,
881	(0x0e00 << 16) | (0x8a14 >> 2),
882	0x00000000,
883	(0x0e00 << 16) | (0x8a18 >> 2),
884	0x00000000,
885	(0x0600 << 16) | (0x30a00 >> 2),
886	0x00000000,
887	(0x0e00 << 16) | (0x8bf0 >> 2),
888	0x00000000,
889	(0x0e00 << 16) | (0x8bcc >> 2),
890	0x00000000,
891	(0x0e00 << 16) | (0x8b24 >> 2),
892	0x00000000,
893	(0x0e00 << 16) | (0x30a04 >> 2),
894	0x00000000,
895	(0x0600 << 16) | (0x30a10 >> 2),
896	0x00000000,
897	(0x0600 << 16) | (0x30a14 >> 2),
898	0x00000000,
899	(0x0600 << 16) | (0x30a18 >> 2),
900	0x00000000,
901	(0x0600 << 16) | (0x30a2c >> 2),
902	0x00000000,
903	(0x0e00 << 16) | (0xc700 >> 2),
904	0x00000000,
905	(0x0e00 << 16) | (0xc704 >> 2),
906	0x00000000,
907	(0x0e00 << 16) | (0xc708 >> 2),
908	0x00000000,
909	(0x0e00 << 16) | (0xc768 >> 2),
910	0x00000000,
911	(0x0400 << 16) | (0xc770 >> 2),
912	0x00000000,
913	(0x0400 << 16) | (0xc774 >> 2),
914	0x00000000,
915	(0x0400 << 16) | (0xc798 >> 2),
916	0x00000000,
917	(0x0400 << 16) | (0xc79c >> 2),
918	0x00000000,
919	(0x0e00 << 16) | (0x9100 >> 2),
920	0x00000000,
921	(0x0e00 << 16) | (0x3c010 >> 2),
922	0x00000000,
923	(0x0e00 << 16) | (0x8c00 >> 2),
924	0x00000000,
925	(0x0e00 << 16) | (0x8c04 >> 2),
926	0x00000000,
927	(0x0e00 << 16) | (0x8c20 >> 2),
928	0x00000000,
929	(0x0e00 << 16) | (0x8c38 >> 2),
930	0x00000000,
931	(0x0e00 << 16) | (0x8c3c >> 2),
932	0x00000000,
933	(0x0e00 << 16) | (0xae00 >> 2),
934	0x00000000,
935	(0x0e00 << 16) | (0x9604 >> 2),
936	0x00000000,
937	(0x0e00 << 16) | (0xac08 >> 2),
938	0x00000000,
939	(0x0e00 << 16) | (0xac0c >> 2),
940	0x00000000,
941	(0x0e00 << 16) | (0xac10 >> 2),
942	0x00000000,
943	(0x0e00 << 16) | (0xac14 >> 2),
944	0x00000000,
945	(0x0e00 << 16) | (0xac58 >> 2),
946	0x00000000,
947	(0x0e00 << 16) | (0xac68 >> 2),
948	0x00000000,
949	(0x0e00 << 16) | (0xac6c >> 2),
950	0x00000000,
951	(0x0e00 << 16) | (0xac70 >> 2),
952	0x00000000,
953	(0x0e00 << 16) | (0xac74 >> 2),
954	0x00000000,
955	(0x0e00 << 16) | (0xac78 >> 2),
956	0x00000000,
957	(0x0e00 << 16) | (0xac7c >> 2),
958	0x00000000,
959	(0x0e00 << 16) | (0xac80 >> 2),
960	0x00000000,
961	(0x0e00 << 16) | (0xac84 >> 2),
962	0x00000000,
963	(0x0e00 << 16) | (0xac88 >> 2),
964	0x00000000,
965	(0x0e00 << 16) | (0xac8c >> 2),
966	0x00000000,
967	(0x0e00 << 16) | (0x970c >> 2),
968	0x00000000,
969	(0x0e00 << 16) | (0x9714 >> 2),
970	0x00000000,
971	(0x0e00 << 16) | (0x9718 >> 2),
972	0x00000000,
973	(0x0e00 << 16) | (0x971c >> 2),
974	0x00000000,
975	(0x0e00 << 16) | (0x31068 >> 2),
976	0x00000000,
977	(0x4e00 << 16) | (0x31068 >> 2),
978	0x00000000,
979	(0x5e00 << 16) | (0x31068 >> 2),
980	0x00000000,
981	(0x6e00 << 16) | (0x31068 >> 2),
982	0x00000000,
983	(0x7e00 << 16) | (0x31068 >> 2),
984	0x00000000,
985	(0x0e00 << 16) | (0xcd10 >> 2),
986	0x00000000,
987	(0x0e00 << 16) | (0xcd14 >> 2),
988	0x00000000,
989	(0x0e00 << 16) | (0x88b0 >> 2),
990	0x00000000,
991	(0x0e00 << 16) | (0x88b4 >> 2),
992	0x00000000,
993	(0x0e00 << 16) | (0x88b8 >> 2),
994	0x00000000,
995	(0x0e00 << 16) | (0x88bc >> 2),
996	0x00000000,
997	(0x0400 << 16) | (0x89c0 >> 2),
998	0x00000000,
999	(0x0e00 << 16) | (0x88c4 >> 2),
1000	0x00000000,
1001	(0x0e00 << 16) | (0x88c8 >> 2),
1002	0x00000000,
1003	(0x0e00 << 16) | (0x88d0 >> 2),
1004	0x00000000,
1005	(0x0e00 << 16) | (0x88d4 >> 2),
1006	0x00000000,
1007	(0x0e00 << 16) | (0x88d8 >> 2),
1008	0x00000000,
1009	(0x0e00 << 16) | (0x8980 >> 2),
1010	0x00000000,
1011	(0x0e00 << 16) | (0x30938 >> 2),
1012	0x00000000,
1013	(0x0e00 << 16) | (0x3093c >> 2),
1014	0x00000000,
1015	(0x0e00 << 16) | (0x30940 >> 2),
1016	0x00000000,
1017	(0x0e00 << 16) | (0x89a0 >> 2),
1018	0x00000000,
1019	(0x0e00 << 16) | (0x30900 >> 2),
1020	0x00000000,
1021	(0x0e00 << 16) | (0x30904 >> 2),
1022	0x00000000,
1023	(0x0e00 << 16) | (0x89b4 >> 2),
1024	0x00000000,
1025	(0x0e00 << 16) | (0x3e1fc >> 2),
1026	0x00000000,
1027	(0x0e00 << 16) | (0x3c210 >> 2),
1028	0x00000000,
1029	(0x0e00 << 16) | (0x3c214 >> 2),
1030	0x00000000,
1031	(0x0e00 << 16) | (0x3c218 >> 2),
1032	0x00000000,
1033	(0x0e00 << 16) | (0x8904 >> 2),
1034	0x00000000,
1035	0x5,
1036	(0x0e00 << 16) | (0x8c28 >> 2),
1037	(0x0e00 << 16) | (0x8c2c >> 2),
1038	(0x0e00 << 16) | (0x8c30 >> 2),
1039	(0x0e00 << 16) | (0x8c34 >> 2),
1040	(0x0e00 << 16) | (0x9600 >> 2),
1041};
1042
1043static const u32 bonaire_golden_spm_registers[] =
1044{
1045	0x30800, 0xe0ffffff, 0xe0000000
1046};
1047
1048static const u32 bonaire_golden_common_registers[] =
1049{
1050	0xc770, 0xffffffff, 0x00000800,
1051	0xc774, 0xffffffff, 0x00000800,
1052	0xc798, 0xffffffff, 0x00007fbf,
1053	0xc79c, 0xffffffff, 0x00007faf
1054};
1055
1056static const u32 bonaire_golden_registers[] =
1057{
1058	0x3354, 0x00000333, 0x00000333,
1059	0x3350, 0x000c0fc0, 0x00040200,
1060	0x9a10, 0x00010000, 0x00058208,
1061	0x3c000, 0xffff1fff, 0x00140000,
1062	0x3c200, 0xfdfc0fff, 0x00000100,
1063	0x3c234, 0x40000000, 0x40000200,
1064	0x9830, 0xffffffff, 0x00000000,
1065	0x9834, 0xf00fffff, 0x00000400,
1066	0x9838, 0x0002021c, 0x00020200,
1067	0xc78, 0x00000080, 0x00000000,
1068	0x5bb0, 0x000000f0, 0x00000070,
1069	0x5bc0, 0xf0311fff, 0x80300000,
1070	0x98f8, 0x73773777, 0x12010001,
1071	0x350c, 0x00810000, 0x408af000,
1072	0x7030, 0x31000111, 0x00000011,
1073	0x2f48, 0x73773777, 0x12010001,
1074	0x220c, 0x00007fb6, 0x0021a1b1,
1075	0x2210, 0x00007fb6, 0x002021b1,
1076	0x2180, 0x00007fb6, 0x00002191,
1077	0x2218, 0x00007fb6, 0x002121b1,
1078	0x221c, 0x00007fb6, 0x002021b1,
1079	0x21dc, 0x00007fb6, 0x00002191,
1080	0x21e0, 0x00007fb6, 0x00002191,
1081	0x3628, 0x0000003f, 0x0000000a,
1082	0x362c, 0x0000003f, 0x0000000a,
1083	0x2ae4, 0x00073ffe, 0x000022a2,
1084	0x240c, 0x000007ff, 0x00000000,
1085	0x8a14, 0xf000003f, 0x00000007,
1086	0x8bf0, 0x00002001, 0x00000001,
1087	0x8b24, 0xffffffff, 0x00ffffff,
1088	0x30a04, 0x0000ff0f, 0x00000000,
1089	0x28a4c, 0x07ffffff, 0x06000000,
1090	0x4d8, 0x00000fff, 0x00000100,
1091	0x3e78, 0x00000001, 0x00000002,
1092	0x9100, 0x03000000, 0x0362c688,
1093	0x8c00, 0x000000ff, 0x00000001,
1094	0xe40, 0x00001fff, 0x00001fff,
1095	0x9060, 0x0000007f, 0x00000020,
1096	0x9508, 0x00010000, 0x00010000,
1097	0xac14, 0x000003ff, 0x000000f3,
1098	0xac0c, 0xffffffff, 0x00001032
1099};
1100
1101static const u32 bonaire_mgcg_cgcg_init[] =
1102{
1103	0xc420, 0xffffffff, 0xfffffffc,
1104	0x30800, 0xffffffff, 0xe0000000,
1105	0x3c2a0, 0xffffffff, 0x00000100,
1106	0x3c208, 0xffffffff, 0x00000100,
1107	0x3c2c0, 0xffffffff, 0xc0000100,
1108	0x3c2c8, 0xffffffff, 0xc0000100,
1109	0x3c2c4, 0xffffffff, 0xc0000100,
1110	0x55e4, 0xffffffff, 0x00600100,
1111	0x3c280, 0xffffffff, 0x00000100,
1112	0x3c214, 0xffffffff, 0x06000100,
1113	0x3c220, 0xffffffff, 0x00000100,
1114	0x3c218, 0xffffffff, 0x06000100,
1115	0x3c204, 0xffffffff, 0x00000100,
1116	0x3c2e0, 0xffffffff, 0x00000100,
1117	0x3c224, 0xffffffff, 0x00000100,
1118	0x3c200, 0xffffffff, 0x00000100,
1119	0x3c230, 0xffffffff, 0x00000100,
1120	0x3c234, 0xffffffff, 0x00000100,
1121	0x3c250, 0xffffffff, 0x00000100,
1122	0x3c254, 0xffffffff, 0x00000100,
1123	0x3c258, 0xffffffff, 0x00000100,
1124	0x3c25c, 0xffffffff, 0x00000100,
1125	0x3c260, 0xffffffff, 0x00000100,
1126	0x3c27c, 0xffffffff, 0x00000100,
1127	0x3c278, 0xffffffff, 0x00000100,
1128	0x3c210, 0xffffffff, 0x06000100,
1129	0x3c290, 0xffffffff, 0x00000100,
1130	0x3c274, 0xffffffff, 0x00000100,
1131	0x3c2b4, 0xffffffff, 0x00000100,
1132	0x3c2b0, 0xffffffff, 0x00000100,
1133	0x3c270, 0xffffffff, 0x00000100,
1134	0x30800, 0xffffffff, 0xe0000000,
1135	0x3c020, 0xffffffff, 0x00010000,
1136	0x3c024, 0xffffffff, 0x00030002,
1137	0x3c028, 0xffffffff, 0x00040007,
1138	0x3c02c, 0xffffffff, 0x00060005,
1139	0x3c030, 0xffffffff, 0x00090008,
1140	0x3c034, 0xffffffff, 0x00010000,
1141	0x3c038, 0xffffffff, 0x00030002,
1142	0x3c03c, 0xffffffff, 0x00040007,
1143	0x3c040, 0xffffffff, 0x00060005,
1144	0x3c044, 0xffffffff, 0x00090008,
1145	0x3c048, 0xffffffff, 0x00010000,
1146	0x3c04c, 0xffffffff, 0x00030002,
1147	0x3c050, 0xffffffff, 0x00040007,
1148	0x3c054, 0xffffffff, 0x00060005,
1149	0x3c058, 0xffffffff, 0x00090008,
1150	0x3c05c, 0xffffffff, 0x00010000,
1151	0x3c060, 0xffffffff, 0x00030002,
1152	0x3c064, 0xffffffff, 0x00040007,
1153	0x3c068, 0xffffffff, 0x00060005,
1154	0x3c06c, 0xffffffff, 0x00090008,
1155	0x3c070, 0xffffffff, 0x00010000,
1156	0x3c074, 0xffffffff, 0x00030002,
1157	0x3c078, 0xffffffff, 0x00040007,
1158	0x3c07c, 0xffffffff, 0x00060005,
1159	0x3c080, 0xffffffff, 0x00090008,
1160	0x3c084, 0xffffffff, 0x00010000,
1161	0x3c088, 0xffffffff, 0x00030002,
1162	0x3c08c, 0xffffffff, 0x00040007,
1163	0x3c090, 0xffffffff, 0x00060005,
1164	0x3c094, 0xffffffff, 0x00090008,
1165	0x3c098, 0xffffffff, 0x00010000,
1166	0x3c09c, 0xffffffff, 0x00030002,
1167	0x3c0a0, 0xffffffff, 0x00040007,
1168	0x3c0a4, 0xffffffff, 0x00060005,
1169	0x3c0a8, 0xffffffff, 0x00090008,
1170	0x3c000, 0xffffffff, 0x96e00200,
1171	0x8708, 0xffffffff, 0x00900100,
1172	0xc424, 0xffffffff, 0x0020003f,
1173	0x38, 0xffffffff, 0x0140001c,
1174	0x3c, 0x000f0000, 0x000f0000,
1175	0x220, 0xffffffff, 0xC060000C,
1176	0x224, 0xc0000fff, 0x00000100,
1177	0xf90, 0xffffffff, 0x00000100,
1178	0xf98, 0x00000101, 0x00000000,
1179	0x20a8, 0xffffffff, 0x00000104,
1180	0x55e4, 0xff000fff, 0x00000100,
1181	0x30cc, 0xc0000fff, 0x00000104,
1182	0xc1e4, 0x00000001, 0x00000001,
1183	0xd00c, 0xff000ff0, 0x00000100,
1184	0xd80c, 0xff000ff0, 0x00000100
1185};
1186
1187static const u32 spectre_golden_spm_registers[] =
1188{
1189	0x30800, 0xe0ffffff, 0xe0000000
1190};
1191
1192static const u32 spectre_golden_common_registers[] =
1193{
1194	0xc770, 0xffffffff, 0x00000800,
1195	0xc774, 0xffffffff, 0x00000800,
1196	0xc798, 0xffffffff, 0x00007fbf,
1197	0xc79c, 0xffffffff, 0x00007faf
1198};
1199
1200static const u32 spectre_golden_registers[] =
1201{
1202	0x3c000, 0xffff1fff, 0x96940200,
1203	0x3c00c, 0xffff0001, 0xff000000,
1204	0x3c200, 0xfffc0fff, 0x00000100,
1205	0x6ed8, 0x00010101, 0x00010000,
1206	0x9834, 0xf00fffff, 0x00000400,
1207	0x9838, 0xfffffffc, 0x00020200,
1208	0x5bb0, 0x000000f0, 0x00000070,
1209	0x5bc0, 0xf0311fff, 0x80300000,
1210	0x98f8, 0x73773777, 0x12010001,
1211	0x9b7c, 0x00ff0000, 0x00fc0000,
1212	0x2f48, 0x73773777, 0x12010001,
1213	0x8a14, 0xf000003f, 0x00000007,
1214	0x8b24, 0xffffffff, 0x00ffffff,
1215	0x28350, 0x3f3f3fff, 0x00000082,
1216	0x28354, 0x0000003f, 0x00000000,
1217	0x3e78, 0x00000001, 0x00000002,
1218	0x913c, 0xffff03df, 0x00000004,
1219	0xc768, 0x00000008, 0x00000008,
1220	0x8c00, 0x000008ff, 0x00000800,
1221	0x9508, 0x00010000, 0x00010000,
1222	0xac0c, 0xffffffff, 0x54763210,
1223	0x214f8, 0x01ff01ff, 0x00000002,
1224	0x21498, 0x007ff800, 0x00200000,
1225	0x2015c, 0xffffffff, 0x00000f40,
1226	0x30934, 0xffffffff, 0x00000001
1227};
1228
1229static const u32 spectre_mgcg_cgcg_init[] =
1230{
1231	0xc420, 0xffffffff, 0xfffffffc,
1232	0x30800, 0xffffffff, 0xe0000000,
1233	0x3c2a0, 0xffffffff, 0x00000100,
1234	0x3c208, 0xffffffff, 0x00000100,
1235	0x3c2c0, 0xffffffff, 0x00000100,
1236	0x3c2c8, 0xffffffff, 0x00000100,
1237	0x3c2c4, 0xffffffff, 0x00000100,
1238	0x55e4, 0xffffffff, 0x00600100,
1239	0x3c280, 0xffffffff, 0x00000100,
1240	0x3c214, 0xffffffff, 0x06000100,
1241	0x3c220, 0xffffffff, 0x00000100,
1242	0x3c218, 0xffffffff, 0x06000100,
1243	0x3c204, 0xffffffff, 0x00000100,
1244	0x3c2e0, 0xffffffff, 0x00000100,
1245	0x3c224, 0xffffffff, 0x00000100,
1246	0x3c200, 0xffffffff, 0x00000100,
1247	0x3c230, 0xffffffff, 0x00000100,
1248	0x3c234, 0xffffffff, 0x00000100,
1249	0x3c250, 0xffffffff, 0x00000100,
1250	0x3c254, 0xffffffff, 0x00000100,
1251	0x3c258, 0xffffffff, 0x00000100,
1252	0x3c25c, 0xffffffff, 0x00000100,
1253	0x3c260, 0xffffffff, 0x00000100,
1254	0x3c27c, 0xffffffff, 0x00000100,
1255	0x3c278, 0xffffffff, 0x00000100,
1256	0x3c210, 0xffffffff, 0x06000100,
1257	0x3c290, 0xffffffff, 0x00000100,
1258	0x3c274, 0xffffffff, 0x00000100,
1259	0x3c2b4, 0xffffffff, 0x00000100,
1260	0x3c2b0, 0xffffffff, 0x00000100,
1261	0x3c270, 0xffffffff, 0x00000100,
1262	0x30800, 0xffffffff, 0xe0000000,
1263	0x3c020, 0xffffffff, 0x00010000,
1264	0x3c024, 0xffffffff, 0x00030002,
1265	0x3c028, 0xffffffff, 0x00040007,
1266	0x3c02c, 0xffffffff, 0x00060005,
1267	0x3c030, 0xffffffff, 0x00090008,
1268	0x3c034, 0xffffffff, 0x00010000,
1269	0x3c038, 0xffffffff, 0x00030002,
1270	0x3c03c, 0xffffffff, 0x00040007,
1271	0x3c040, 0xffffffff, 0x00060005,
1272	0x3c044, 0xffffffff, 0x00090008,
1273	0x3c048, 0xffffffff, 0x00010000,
1274	0x3c04c, 0xffffffff, 0x00030002,
1275	0x3c050, 0xffffffff, 0x00040007,
1276	0x3c054, 0xffffffff, 0x00060005,
1277	0x3c058, 0xffffffff, 0x00090008,
1278	0x3c05c, 0xffffffff, 0x00010000,
1279	0x3c060, 0xffffffff, 0x00030002,
1280	0x3c064, 0xffffffff, 0x00040007,
1281	0x3c068, 0xffffffff, 0x00060005,
1282	0x3c06c, 0xffffffff, 0x00090008,
1283	0x3c070, 0xffffffff, 0x00010000,
1284	0x3c074, 0xffffffff, 0x00030002,
1285	0x3c078, 0xffffffff, 0x00040007,
1286	0x3c07c, 0xffffffff, 0x00060005,
1287	0x3c080, 0xffffffff, 0x00090008,
1288	0x3c084, 0xffffffff, 0x00010000,
1289	0x3c088, 0xffffffff, 0x00030002,
1290	0x3c08c, 0xffffffff, 0x00040007,
1291	0x3c090, 0xffffffff, 0x00060005,
1292	0x3c094, 0xffffffff, 0x00090008,
1293	0x3c098, 0xffffffff, 0x00010000,
1294	0x3c09c, 0xffffffff, 0x00030002,
1295	0x3c0a0, 0xffffffff, 0x00040007,
1296	0x3c0a4, 0xffffffff, 0x00060005,
1297	0x3c0a8, 0xffffffff, 0x00090008,
1298	0x3c0ac, 0xffffffff, 0x00010000,
1299	0x3c0b0, 0xffffffff, 0x00030002,
1300	0x3c0b4, 0xffffffff, 0x00040007,
1301	0x3c0b8, 0xffffffff, 0x00060005,
1302	0x3c0bc, 0xffffffff, 0x00090008,
1303	0x3c000, 0xffffffff, 0x96e00200,
1304	0x8708, 0xffffffff, 0x00900100,
1305	0xc424, 0xffffffff, 0x0020003f,
1306	0x38, 0xffffffff, 0x0140001c,
1307	0x3c, 0x000f0000, 0x000f0000,
1308	0x220, 0xffffffff, 0xC060000C,
1309	0x224, 0xc0000fff, 0x00000100,
1310	0xf90, 0xffffffff, 0x00000100,
1311	0xf98, 0x00000101, 0x00000000,
1312	0x20a8, 0xffffffff, 0x00000104,
1313	0x55e4, 0xff000fff, 0x00000100,
1314	0x30cc, 0xc0000fff, 0x00000104,
1315	0xc1e4, 0x00000001, 0x00000001,
1316	0xd00c, 0xff000ff0, 0x00000100,
1317	0xd80c, 0xff000ff0, 0x00000100
1318};
1319
1320static const u32 kalindi_golden_spm_registers[] =
1321{
1322	0x30800, 0xe0ffffff, 0xe0000000
1323};
1324
1325static const u32 kalindi_golden_common_registers[] =
1326{
1327	0xc770, 0xffffffff, 0x00000800,
1328	0xc774, 0xffffffff, 0x00000800,
1329	0xc798, 0xffffffff, 0x00007fbf,
1330	0xc79c, 0xffffffff, 0x00007faf
1331};
1332
1333static const u32 kalindi_golden_registers[] =
1334{
1335	0x3c000, 0xffffdfff, 0x6e944040,
1336	0x55e4, 0xff607fff, 0xfc000100,
1337	0x3c220, 0xff000fff, 0x00000100,
1338	0x3c224, 0xff000fff, 0x00000100,
1339	0x3c200, 0xfffc0fff, 0x00000100,
1340	0x6ed8, 0x00010101, 0x00010000,
1341	0x9830, 0xffffffff, 0x00000000,
1342	0x9834, 0xf00fffff, 0x00000400,
1343	0x5bb0, 0x000000f0, 0x00000070,
1344	0x5bc0, 0xf0311fff, 0x80300000,
1345	0x98f8, 0x73773777, 0x12010001,
1346	0x98fc, 0xffffffff, 0x00000010,
1347	0x9b7c, 0x00ff0000, 0x00fc0000,
1348	0x8030, 0x00001f0f, 0x0000100a,
1349	0x2f48, 0x73773777, 0x12010001,
1350	0x2408, 0x000fffff, 0x000c007f,
1351	0x8a14, 0xf000003f, 0x00000007,
1352	0x8b24, 0x3fff3fff, 0x00ffcfff,
1353	0x30a04, 0x0000ff0f, 0x00000000,
1354	0x28a4c, 0x07ffffff, 0x06000000,
1355	0x4d8, 0x00000fff, 0x00000100,
1356	0x3e78, 0x00000001, 0x00000002,
1357	0xc768, 0x00000008, 0x00000008,
1358	0x8c00, 0x000000ff, 0x00000003,
1359	0x214f8, 0x01ff01ff, 0x00000002,
1360	0x21498, 0x007ff800, 0x00200000,
1361	0x2015c, 0xffffffff, 0x00000f40,
1362	0x88c4, 0x001f3ae3, 0x00000082,
1363	0x88d4, 0x0000001f, 0x00000010,
1364	0x30934, 0xffffffff, 0x00000000
1365};
1366
1367static const u32 kalindi_mgcg_cgcg_init[] =
1368{
1369	0xc420, 0xffffffff, 0xfffffffc,
1370	0x30800, 0xffffffff, 0xe0000000,
1371	0x3c2a0, 0xffffffff, 0x00000100,
1372	0x3c208, 0xffffffff, 0x00000100,
1373	0x3c2c0, 0xffffffff, 0x00000100,
1374	0x3c2c8, 0xffffffff, 0x00000100,
1375	0x3c2c4, 0xffffffff, 0x00000100,
1376	0x55e4, 0xffffffff, 0x00600100,
1377	0x3c280, 0xffffffff, 0x00000100,
1378	0x3c214, 0xffffffff, 0x06000100,
1379	0x3c220, 0xffffffff, 0x00000100,
1380	0x3c218, 0xffffffff, 0x06000100,
1381	0x3c204, 0xffffffff, 0x00000100,
1382	0x3c2e0, 0xffffffff, 0x00000100,
1383	0x3c224, 0xffffffff, 0x00000100,
1384	0x3c200, 0xffffffff, 0x00000100,
1385	0x3c230, 0xffffffff, 0x00000100,
1386	0x3c234, 0xffffffff, 0x00000100,
1387	0x3c250, 0xffffffff, 0x00000100,
1388	0x3c254, 0xffffffff, 0x00000100,
1389	0x3c258, 0xffffffff, 0x00000100,
1390	0x3c25c, 0xffffffff, 0x00000100,
1391	0x3c260, 0xffffffff, 0x00000100,
1392	0x3c27c, 0xffffffff, 0x00000100,
1393	0x3c278, 0xffffffff, 0x00000100,
1394	0x3c210, 0xffffffff, 0x06000100,
1395	0x3c290, 0xffffffff, 0x00000100,
1396	0x3c274, 0xffffffff, 0x00000100,
1397	0x3c2b4, 0xffffffff, 0x00000100,
1398	0x3c2b0, 0xffffffff, 0x00000100,
1399	0x3c270, 0xffffffff, 0x00000100,
1400	0x30800, 0xffffffff, 0xe0000000,
1401	0x3c020, 0xffffffff, 0x00010000,
1402	0x3c024, 0xffffffff, 0x00030002,
1403	0x3c028, 0xffffffff, 0x00040007,
1404	0x3c02c, 0xffffffff, 0x00060005,
1405	0x3c030, 0xffffffff, 0x00090008,
1406	0x3c034, 0xffffffff, 0x00010000,
1407	0x3c038, 0xffffffff, 0x00030002,
1408	0x3c03c, 0xffffffff, 0x00040007,
1409	0x3c040, 0xffffffff, 0x00060005,
1410	0x3c044, 0xffffffff, 0x00090008,
1411	0x3c000, 0xffffffff, 0x96e00200,
1412	0x8708, 0xffffffff, 0x00900100,
1413	0xc424, 0xffffffff, 0x0020003f,
1414	0x38, 0xffffffff, 0x0140001c,
1415	0x3c, 0x000f0000, 0x000f0000,
1416	0x220, 0xffffffff, 0xC060000C,
1417	0x224, 0xc0000fff, 0x00000100,
1418	0x20a8, 0xffffffff, 0x00000104,
1419	0x55e4, 0xff000fff, 0x00000100,
1420	0x30cc, 0xc0000fff, 0x00000104,
1421	0xc1e4, 0x00000001, 0x00000001,
1422	0xd00c, 0xff000ff0, 0x00000100,
1423	0xd80c, 0xff000ff0, 0x00000100
1424};
1425
1426static const u32 hawaii_golden_spm_registers[] =
1427{
1428	0x30800, 0xe0ffffff, 0xe0000000
1429};
1430
1431static const u32 hawaii_golden_common_registers[] =
1432{
1433	0x30800, 0xffffffff, 0xe0000000,
1434	0x28350, 0xffffffff, 0x3a00161a,
1435	0x28354, 0xffffffff, 0x0000002e,
1436	0x9a10, 0xffffffff, 0x00018208,
1437	0x98f8, 0xffffffff, 0x12011003
1438};
1439
1440static const u32 hawaii_golden_registers[] =
1441{
1442	0x3354, 0x00000333, 0x00000333,
1443	0x9a10, 0x00010000, 0x00058208,
1444	0x9830, 0xffffffff, 0x00000000,
1445	0x9834, 0xf00fffff, 0x00000400,
1446	0x9838, 0x0002021c, 0x00020200,
1447	0xc78, 0x00000080, 0x00000000,
1448	0x5bb0, 0x000000f0, 0x00000070,
1449	0x5bc0, 0xf0311fff, 0x80300000,
1450	0x350c, 0x00810000, 0x408af000,
1451	0x7030, 0x31000111, 0x00000011,
1452	0x2f48, 0x73773777, 0x12010001,
1453	0x2120, 0x0000007f, 0x0000001b,
1454	0x21dc, 0x00007fb6, 0x00002191,
1455	0x3628, 0x0000003f, 0x0000000a,
1456	0x362c, 0x0000003f, 0x0000000a,
1457	0x2ae4, 0x00073ffe, 0x000022a2,
1458	0x240c, 0x000007ff, 0x00000000,
1459	0x8bf0, 0x00002001, 0x00000001,
1460	0x8b24, 0xffffffff, 0x00ffffff,
1461	0x30a04, 0x0000ff0f, 0x00000000,
1462	0x28a4c, 0x07ffffff, 0x06000000,
1463	0x3e78, 0x00000001, 0x00000002,
1464	0xc768, 0x00000008, 0x00000008,
1465	0xc770, 0x00000f00, 0x00000800,
1466	0xc774, 0x00000f00, 0x00000800,
1467	0xc798, 0x00ffffff, 0x00ff7fbf,
1468	0xc79c, 0x00ffffff, 0x00ff7faf,
1469	0x8c00, 0x000000ff, 0x00000800,
1470	0xe40, 0x00001fff, 0x00001fff,
1471	0x9060, 0x0000007f, 0x00000020,
1472	0x9508, 0x00010000, 0x00010000,
1473	0xae00, 0x00100000, 0x000ff07c,
1474	0xac14, 0x000003ff, 0x0000000f,
1475	0xac10, 0xffffffff, 0x7564fdec,
1476	0xac0c, 0xffffffff, 0x3120b9a8,
1477	0xac08, 0x20000000, 0x0f9c0000
1478};
1479
1480static const u32 hawaii_mgcg_cgcg_init[] =
1481{
1482	0xc420, 0xffffffff, 0xfffffffd,
1483	0x30800, 0xffffffff, 0xe0000000,
1484	0x3c2a0, 0xffffffff, 0x00000100,
1485	0x3c208, 0xffffffff, 0x00000100,
1486	0x3c2c0, 0xffffffff, 0x00000100,
1487	0x3c2c8, 0xffffffff, 0x00000100,
1488	0x3c2c4, 0xffffffff, 0x00000100,
1489	0x55e4, 0xffffffff, 0x00200100,
1490	0x3c280, 0xffffffff, 0x00000100,
1491	0x3c214, 0xffffffff, 0x06000100,
1492	0x3c220, 0xffffffff, 0x00000100,
1493	0x3c218, 0xffffffff, 0x06000100,
1494	0x3c204, 0xffffffff, 0x00000100,
1495	0x3c2e0, 0xffffffff, 0x00000100,
1496	0x3c224, 0xffffffff, 0x00000100,
1497	0x3c200, 0xffffffff, 0x00000100,
1498	0x3c230, 0xffffffff, 0x00000100,
1499	0x3c234, 0xffffffff, 0x00000100,
1500	0x3c250, 0xffffffff, 0x00000100,
1501	0x3c254, 0xffffffff, 0x00000100,
1502	0x3c258, 0xffffffff, 0x00000100,
1503	0x3c25c, 0xffffffff, 0x00000100,
1504	0x3c260, 0xffffffff, 0x00000100,
1505	0x3c27c, 0xffffffff, 0x00000100,
1506	0x3c278, 0xffffffff, 0x00000100,
1507	0x3c210, 0xffffffff, 0x06000100,
1508	0x3c290, 0xffffffff, 0x00000100,
1509	0x3c274, 0xffffffff, 0x00000100,
1510	0x3c2b4, 0xffffffff, 0x00000100,
1511	0x3c2b0, 0xffffffff, 0x00000100,
1512	0x3c270, 0xffffffff, 0x00000100,
1513	0x30800, 0xffffffff, 0xe0000000,
1514	0x3c020, 0xffffffff, 0x00010000,
1515	0x3c024, 0xffffffff, 0x00030002,
1516	0x3c028, 0xffffffff, 0x00040007,
1517	0x3c02c, 0xffffffff, 0x00060005,
1518	0x3c030, 0xffffffff, 0x00090008,
1519	0x3c034, 0xffffffff, 0x00010000,
1520	0x3c038, 0xffffffff, 0x00030002,
1521	0x3c03c, 0xffffffff, 0x00040007,
1522	0x3c040, 0xffffffff, 0x00060005,
1523	0x3c044, 0xffffffff, 0x00090008,
1524	0x3c048, 0xffffffff, 0x00010000,
1525	0x3c04c, 0xffffffff, 0x00030002,
1526	0x3c050, 0xffffffff, 0x00040007,
1527	0x3c054, 0xffffffff, 0x00060005,
1528	0x3c058, 0xffffffff, 0x00090008,
1529	0x3c05c, 0xffffffff, 0x00010000,
1530	0x3c060, 0xffffffff, 0x00030002,
1531	0x3c064, 0xffffffff, 0x00040007,
1532	0x3c068, 0xffffffff, 0x00060005,
1533	0x3c06c, 0xffffffff, 0x00090008,
1534	0x3c070, 0xffffffff, 0x00010000,
1535	0x3c074, 0xffffffff, 0x00030002,
1536	0x3c078, 0xffffffff, 0x00040007,
1537	0x3c07c, 0xffffffff, 0x00060005,
1538	0x3c080, 0xffffffff, 0x00090008,
1539	0x3c084, 0xffffffff, 0x00010000,
1540	0x3c088, 0xffffffff, 0x00030002,
1541	0x3c08c, 0xffffffff, 0x00040007,
1542	0x3c090, 0xffffffff, 0x00060005,
1543	0x3c094, 0xffffffff, 0x00090008,
1544	0x3c098, 0xffffffff, 0x00010000,
1545	0x3c09c, 0xffffffff, 0x00030002,
1546	0x3c0a0, 0xffffffff, 0x00040007,
1547	0x3c0a4, 0xffffffff, 0x00060005,
1548	0x3c0a8, 0xffffffff, 0x00090008,
1549	0x3c0ac, 0xffffffff, 0x00010000,
1550	0x3c0b0, 0xffffffff, 0x00030002,
1551	0x3c0b4, 0xffffffff, 0x00040007,
1552	0x3c0b8, 0xffffffff, 0x00060005,
1553	0x3c0bc, 0xffffffff, 0x00090008,
1554	0x3c0c0, 0xffffffff, 0x00010000,
1555	0x3c0c4, 0xffffffff, 0x00030002,
1556	0x3c0c8, 0xffffffff, 0x00040007,
1557	0x3c0cc, 0xffffffff, 0x00060005,
1558	0x3c0d0, 0xffffffff, 0x00090008,
1559	0x3c0d4, 0xffffffff, 0x00010000,
1560	0x3c0d8, 0xffffffff, 0x00030002,
1561	0x3c0dc, 0xffffffff, 0x00040007,
1562	0x3c0e0, 0xffffffff, 0x00060005,
1563	0x3c0e4, 0xffffffff, 0x00090008,
1564	0x3c0e8, 0xffffffff, 0x00010000,
1565	0x3c0ec, 0xffffffff, 0x00030002,
1566	0x3c0f0, 0xffffffff, 0x00040007,
1567	0x3c0f4, 0xffffffff, 0x00060005,
1568	0x3c0f8, 0xffffffff, 0x00090008,
1569	0xc318, 0xffffffff, 0x00020200,
1570	0x3350, 0xffffffff, 0x00000200,
1571	0x15c0, 0xffffffff, 0x00000400,
1572	0x55e8, 0xffffffff, 0x00000000,
1573	0x2f50, 0xffffffff, 0x00000902,
1574	0x3c000, 0xffffffff, 0x96940200,
1575	0x8708, 0xffffffff, 0x00900100,
1576	0xc424, 0xffffffff, 0x0020003f,
1577	0x38, 0xffffffff, 0x0140001c,
1578	0x3c, 0x000f0000, 0x000f0000,
1579	0x220, 0xffffffff, 0xc060000c,
1580	0x224, 0xc0000fff, 0x00000100,
1581	0xf90, 0xffffffff, 0x00000100,
1582	0xf98, 0x00000101, 0x00000000,
1583	0x20a8, 0xffffffff, 0x00000104,
1584	0x55e4, 0xff000fff, 0x00000100,
1585	0x30cc, 0xc0000fff, 0x00000104,
1586	0xc1e4, 0x00000001, 0x00000001,
1587	0xd00c, 0xff000ff0, 0x00000100,
1588	0xd80c, 0xff000ff0, 0x00000100
1589};
1590
1591static const u32 godavari_golden_registers[] =
1592{
1593	0x55e4, 0xff607fff, 0xfc000100,
1594	0x6ed8, 0x00010101, 0x00010000,
1595	0x9830, 0xffffffff, 0x00000000,
1596	0x98302, 0xf00fffff, 0x00000400,
1597	0x6130, 0xffffffff, 0x00010000,
1598	0x5bb0, 0x000000f0, 0x00000070,
1599	0x5bc0, 0xf0311fff, 0x80300000,
1600	0x98f8, 0x73773777, 0x12010001,
1601	0x98fc, 0xffffffff, 0x00000010,
1602	0x8030, 0x00001f0f, 0x0000100a,
1603	0x2f48, 0x73773777, 0x12010001,
1604	0x2408, 0x000fffff, 0x000c007f,
1605	0x8a14, 0xf000003f, 0x00000007,
1606	0x8b24, 0xffffffff, 0x00ff0fff,
1607	0x30a04, 0x0000ff0f, 0x00000000,
1608	0x28a4c, 0x07ffffff, 0x06000000,
1609	0x4d8, 0x00000fff, 0x00000100,
1610	0xd014, 0x00010000, 0x00810001,
1611	0xd814, 0x00010000, 0x00810001,
1612	0x3e78, 0x00000001, 0x00000002,
1613	0xc768, 0x00000008, 0x00000008,
1614	0xc770, 0x00000f00, 0x00000800,
1615	0xc774, 0x00000f00, 0x00000800,
1616	0xc798, 0x00ffffff, 0x00ff7fbf,
1617	0xc79c, 0x00ffffff, 0x00ff7faf,
1618	0x8c00, 0x000000ff, 0x00000001,
1619	0x214f8, 0x01ff01ff, 0x00000002,
1620	0x21498, 0x007ff800, 0x00200000,
1621	0x2015c, 0xffffffff, 0x00000f40,
1622	0x88c4, 0x001f3ae3, 0x00000082,
1623	0x88d4, 0x0000001f, 0x00000010,
1624	0x30934, 0xffffffff, 0x00000000
1625};
1626
1627
1628static void cik_init_golden_registers(struct radeon_device *rdev)
1629{
1630	switch (rdev->family) {
1631	case CHIP_BONAIRE:
1632		radeon_program_register_sequence(rdev,
1633						 bonaire_mgcg_cgcg_init,
1634						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635		radeon_program_register_sequence(rdev,
1636						 bonaire_golden_registers,
1637						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638		radeon_program_register_sequence(rdev,
1639						 bonaire_golden_common_registers,
1640						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641		radeon_program_register_sequence(rdev,
1642						 bonaire_golden_spm_registers,
1643						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644		break;
1645	case CHIP_KABINI:
1646		radeon_program_register_sequence(rdev,
1647						 kalindi_mgcg_cgcg_init,
1648						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649		radeon_program_register_sequence(rdev,
1650						 kalindi_golden_registers,
1651						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652		radeon_program_register_sequence(rdev,
1653						 kalindi_golden_common_registers,
1654						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655		radeon_program_register_sequence(rdev,
1656						 kalindi_golden_spm_registers,
1657						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658		break;
1659	case CHIP_MULLINS:
1660		radeon_program_register_sequence(rdev,
1661						 kalindi_mgcg_cgcg_init,
1662						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663		radeon_program_register_sequence(rdev,
1664						 godavari_golden_registers,
1665						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1666		radeon_program_register_sequence(rdev,
1667						 kalindi_golden_common_registers,
1668						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669		radeon_program_register_sequence(rdev,
1670						 kalindi_golden_spm_registers,
1671						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672		break;
1673	case CHIP_KAVERI:
1674		radeon_program_register_sequence(rdev,
1675						 spectre_mgcg_cgcg_init,
1676						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677		radeon_program_register_sequence(rdev,
1678						 spectre_golden_registers,
1679						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1680		radeon_program_register_sequence(rdev,
1681						 spectre_golden_common_registers,
1682						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683		radeon_program_register_sequence(rdev,
1684						 spectre_golden_spm_registers,
1685						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686		break;
1687	case CHIP_HAWAII:
1688		radeon_program_register_sequence(rdev,
1689						 hawaii_mgcg_cgcg_init,
1690						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691		radeon_program_register_sequence(rdev,
1692						 hawaii_golden_registers,
1693						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694		radeon_program_register_sequence(rdev,
1695						 hawaii_golden_common_registers,
1696						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697		radeon_program_register_sequence(rdev,
1698						 hawaii_golden_spm_registers,
1699						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700		break;
1701	default:
1702		break;
1703	}
1704}
1705
1706/**
1707 * cik_get_xclk - get the xclk
1708 *
1709 * @rdev: radeon_device pointer
1710 *
1711 * Returns the reference clock used by the gfx engine
1712 * (CIK).
1713 */
1714u32 cik_get_xclk(struct radeon_device *rdev)
1715{
1716	u32 reference_clock = rdev->clock.spll.reference_freq;
1717
1718	if (rdev->flags & RADEON_IS_IGP) {
1719		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1720			return reference_clock / 2;
1721	} else {
1722		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1723			return reference_clock / 4;
1724	}
1725	return reference_clock;
1726}
1727
1728/**
1729 * cik_mm_rdoorbell - read a doorbell dword
1730 *
1731 * @rdev: radeon_device pointer
1732 * @index: doorbell index
1733 *
1734 * Returns the value in the doorbell aperture at the
1735 * requested doorbell index (CIK).
1736 */
1737u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1738{
1739	if (index < rdev->doorbell.num_doorbells) {
1740		return bus_space_read_4(rdev->memt, rdev->doorbell.bsh, index * 4);
1741	} else {
1742		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1743		return 0;
1744	}
1745}
1746
1747/**
1748 * cik_mm_wdoorbell - write a doorbell dword
1749 *
1750 * @rdev: radeon_device pointer
1751 * @index: doorbell index
1752 * @v: value to write
1753 *
1754 * Writes @v to the doorbell aperture at the
1755 * requested doorbell index (CIK).
1756 */
1757void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1758{
1759	if (index < rdev->doorbell.num_doorbells) {
1760		bus_space_write_4(rdev->memt, rdev->doorbell.bsh, index * 4, v);
1761	} else {
1762		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1763	}
1764}
1765
1766#define BONAIRE_IO_MC_REGS_SIZE 36
1767
1768static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1769{
1770	{0x00000070, 0x04400000},
1771	{0x00000071, 0x80c01803},
1772	{0x00000072, 0x00004004},
1773	{0x00000073, 0x00000100},
1774	{0x00000074, 0x00ff0000},
1775	{0x00000075, 0x34000000},
1776	{0x00000076, 0x08000014},
1777	{0x00000077, 0x00cc08ec},
1778	{0x00000078, 0x00000400},
1779	{0x00000079, 0x00000000},
1780	{0x0000007a, 0x04090000},
1781	{0x0000007c, 0x00000000},
1782	{0x0000007e, 0x4408a8e8},
1783	{0x0000007f, 0x00000304},
1784	{0x00000080, 0x00000000},
1785	{0x00000082, 0x00000001},
1786	{0x00000083, 0x00000002},
1787	{0x00000084, 0xf3e4f400},
1788	{0x00000085, 0x052024e3},
1789	{0x00000087, 0x00000000},
1790	{0x00000088, 0x01000000},
1791	{0x0000008a, 0x1c0a0000},
1792	{0x0000008b, 0xff010000},
1793	{0x0000008d, 0xffffefff},
1794	{0x0000008e, 0xfff3efff},
1795	{0x0000008f, 0xfff3efbf},
1796	{0x00000092, 0xf7ffffff},
1797	{0x00000093, 0xffffff7f},
1798	{0x00000095, 0x00101101},
1799	{0x00000096, 0x00000fff},
1800	{0x00000097, 0x00116fff},
1801	{0x00000098, 0x60010000},
1802	{0x00000099, 0x10010000},
1803	{0x0000009a, 0x00006000},
1804	{0x0000009b, 0x00001000},
1805	{0x0000009f, 0x00b48000}
1806};
1807
1808#define HAWAII_IO_MC_REGS_SIZE 22
1809
1810static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1811{
1812	{0x0000007d, 0x40000000},
1813	{0x0000007e, 0x40180304},
1814	{0x0000007f, 0x0000ff00},
1815	{0x00000081, 0x00000000},
1816	{0x00000083, 0x00000800},
1817	{0x00000086, 0x00000000},
1818	{0x00000087, 0x00000100},
1819	{0x00000088, 0x00020100},
1820	{0x00000089, 0x00000000},
1821	{0x0000008b, 0x00040000},
1822	{0x0000008c, 0x00000100},
1823	{0x0000008e, 0xff010000},
1824	{0x00000090, 0xffffefff},
1825	{0x00000091, 0xfff3efff},
1826	{0x00000092, 0xfff3efbf},
1827	{0x00000093, 0xf7ffffff},
1828	{0x00000094, 0xffffff7f},
1829	{0x00000095, 0x00000fff},
1830	{0x00000096, 0x00116fff},
1831	{0x00000097, 0x60010000},
1832	{0x00000098, 0x10010000},
1833	{0x0000009f, 0x00c79000}
1834};
1835
1836
1837/**
1838 * cik_srbm_select - select specific register instances
1839 *
1840 * @rdev: radeon_device pointer
1841 * @me: selected ME (micro engine)
1842 * @pipe: pipe
1843 * @queue: queue
1844 * @vmid: VMID
1845 *
1846 * Switches the currently active registers instances.  Some
1847 * registers are instanced per VMID, others are instanced per
1848 * me/pipe/queue combination.
1849 */
1850static void cik_srbm_select(struct radeon_device *rdev,
1851			    u32 me, u32 pipe, u32 queue, u32 vmid)
1852{
1853	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1854			     MEID(me & 0x3) |
1855			     VMID(vmid & 0xf) |
1856			     QUEUEID(queue & 0x7));
1857	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1858}
1859
1860/* ucode loading */
1861/**
1862 * ci_mc_load_microcode - load MC ucode into the hw
1863 *
1864 * @rdev: radeon_device pointer
1865 *
1866 * Load the GDDR MC ucode into the hw (CIK).
1867 * Returns 0 on success, error on failure.
1868 */
1869int ci_mc_load_microcode(struct radeon_device *rdev)
1870{
1871	const __be32 *fw_data = NULL;
1872	const __le32 *new_fw_data = NULL;
1873	u32 running, tmp;
1874	u32 *io_mc_regs = NULL;
1875	const __le32 *new_io_mc_regs = NULL;
1876	int i, regs_size, ucode_size;
1877
1878	if (!rdev->mc_fw)
1879		return -EINVAL;
1880
1881	if (rdev->new_fw) {
1882		const struct mc_firmware_header_v1_0 *hdr =
1883			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1884
1885		radeon_ucode_print_mc_hdr(&hdr->header);
1886
1887		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1888		new_io_mc_regs = (const __le32 *)
1889			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1890		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1891		new_fw_data = (const __le32 *)
1892			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1893	} else {
1894		ucode_size = rdev->mc_fw->size / 4;
1895
1896		switch (rdev->family) {
1897		case CHIP_BONAIRE:
1898			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1899			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1900			break;
1901		case CHIP_HAWAII:
1902			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1903			regs_size = HAWAII_IO_MC_REGS_SIZE;
1904			break;
1905		default:
1906			return -EINVAL;
1907		}
1908		fw_data = (const __be32 *)rdev->mc_fw->data;
1909	}
1910
1911	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1912
1913	if (running == 0) {
1914		/* reset the engine and set to writable */
1915		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1916		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1917
1918		/* load mc io regs */
1919		for (i = 0; i < regs_size; i++) {
1920			if (rdev->new_fw) {
1921				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1922				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1923			} else {
1924				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1925				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1926			}
1927		}
1928
1929		tmp = RREG32(MC_SEQ_MISC0);
1930		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1931			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1932			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1933			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1934			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1935		}
1936
1937		/* load the MC ucode */
1938		for (i = 0; i < ucode_size; i++) {
1939			if (rdev->new_fw)
1940				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1941			else
1942				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1943		}
1944
1945		/* put the engine back into the active state */
1946		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1947		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1948		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1949
1950		/* wait for training to complete */
1951		for (i = 0; i < rdev->usec_timeout; i++) {
1952			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1953				break;
1954			udelay(1);
1955		}
1956		for (i = 0; i < rdev->usec_timeout; i++) {
1957			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1958				break;
1959			udelay(1);
1960		}
1961	}
1962
1963	return 0;
1964}
1965
1966/**
1967 * cik_init_microcode - load ucode images from disk
1968 *
1969 * @rdev: radeon_device pointer
1970 *
1971 * Use the firmware interface to load the ucode images into
1972 * the driver (not loaded into hw).
1973 * Returns 0 on success, error on failure.
1974 */
1975static int cik_init_microcode(struct radeon_device *rdev)
1976{
1977	const char *chip_name;
1978	const char *new_chip_name;
1979	size_t pfp_req_size, me_req_size, ce_req_size,
1980		mec_req_size, rlc_req_size, mc_req_size = 0,
1981		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1982	char fw_name[30];
1983	int new_fw = 0;
1984	int err;
1985	int num_fw;
1986	bool new_smc = false;
1987
1988	DRM_DEBUG("\n");
1989
1990	switch (rdev->family) {
1991	case CHIP_BONAIRE:
1992		chip_name = "BONAIRE";
1993		if ((rdev->pdev->revision == 0x80) ||
1994		    (rdev->pdev->revision == 0x81) ||
1995		    (rdev->pdev->device == 0x665f))
1996			new_smc = true;
1997		new_chip_name = "bonaire";
1998		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1999		me_req_size = CIK_ME_UCODE_SIZE * 4;
2000		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2001		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2002		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2003		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2004		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2005		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2006		smc_req_size = roundup2(BONAIRE_SMC_UCODE_SIZE, 4);
2007		num_fw = 8;
2008		break;
2009	case CHIP_HAWAII:
2010		chip_name = "HAWAII";
2011		if (rdev->pdev->revision == 0x80)
2012			new_smc = true;
2013		new_chip_name = "hawaii";
2014		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022		smc_req_size = roundup2(HAWAII_SMC_UCODE_SIZE, 4);
2023		num_fw = 8;
2024		break;
2025	case CHIP_KAVERI:
2026		chip_name = "KAVERI";
2027		new_chip_name = "kaveri";
2028		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034		num_fw = 7;
2035		break;
2036	case CHIP_KABINI:
2037		chip_name = "KABINI";
2038		new_chip_name = "kabini";
2039		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045		num_fw = 6;
2046		break;
2047	case CHIP_MULLINS:
2048		chip_name = "MULLINS";
2049		new_chip_name = "mullins";
2050		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056		num_fw = 6;
2057		break;
2058	default: BUG();
2059	}
2060
2061	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065	if (err) {
2066		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068		if (err)
2069			goto out;
2070		if (rdev->pfp_fw->size != pfp_req_size) {
2071			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2072			       rdev->pfp_fw->size, fw_name);
2073			err = -EINVAL;
2074			goto out;
2075		}
2076	} else {
2077		err = radeon_ucode_validate(rdev->pfp_fw);
2078		if (err) {
2079			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2080			       fw_name);
2081			goto out;
2082		} else {
2083			new_fw++;
2084		}
2085	}
2086
2087	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2088	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2089	if (err) {
2090		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2091		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092		if (err)
2093			goto out;
2094		if (rdev->me_fw->size != me_req_size) {
2095			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2096			       rdev->me_fw->size, fw_name);
2097			err = -EINVAL;
2098		}
2099	} else {
2100		err = radeon_ucode_validate(rdev->me_fw);
2101		if (err) {
2102			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2103			       fw_name);
2104			goto out;
2105		} else {
2106			new_fw++;
2107		}
2108	}
2109
2110	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2111	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2112	if (err) {
2113		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2114		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115		if (err)
2116			goto out;
2117		if (rdev->ce_fw->size != ce_req_size) {
2118			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2119			       rdev->ce_fw->size, fw_name);
2120			err = -EINVAL;
2121		}
2122	} else {
2123		err = radeon_ucode_validate(rdev->ce_fw);
2124		if (err) {
2125			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2126			       fw_name);
2127			goto out;
2128		} else {
2129			new_fw++;
2130		}
2131	}
2132
2133	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2134	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2135	if (err) {
2136		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2137		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138		if (err)
2139			goto out;
2140		if (rdev->mec_fw->size != mec_req_size) {
2141			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2142			       rdev->mec_fw->size, fw_name);
2143			err = -EINVAL;
2144		}
2145	} else {
2146		err = radeon_ucode_validate(rdev->mec_fw);
2147		if (err) {
2148			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2149			       fw_name);
2150			goto out;
2151		} else {
2152			new_fw++;
2153		}
2154	}
2155
2156	if (rdev->family == CHIP_KAVERI) {
2157		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2158		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2159		if (err) {
2160			goto out;
2161		} else {
2162			err = radeon_ucode_validate(rdev->mec2_fw);
2163			if (err) {
2164				goto out;
2165			} else {
2166				new_fw++;
2167			}
2168		}
2169	}
2170
2171	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2172	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2173	if (err) {
2174		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2175		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176		if (err)
2177			goto out;
2178		if (rdev->rlc_fw->size != rlc_req_size) {
2179			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2180			       rdev->rlc_fw->size, fw_name);
2181			err = -EINVAL;
2182		}
2183	} else {
2184		err = radeon_ucode_validate(rdev->rlc_fw);
2185		if (err) {
2186			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2187			       fw_name);
2188			goto out;
2189		} else {
2190			new_fw++;
2191		}
2192	}
2193
2194	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2195	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2196	if (err) {
2197		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2198		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199		if (err)
2200			goto out;
2201		if (rdev->sdma_fw->size != sdma_req_size) {
2202			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2203			       rdev->sdma_fw->size, fw_name);
2204			err = -EINVAL;
2205		}
2206	} else {
2207		err = radeon_ucode_validate(rdev->sdma_fw);
2208		if (err) {
2209			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2210			       fw_name);
2211			goto out;
2212		} else {
2213			new_fw++;
2214		}
2215	}
2216
2217	/* No SMC, MC ucode on APUs */
2218	if (!(rdev->flags & RADEON_IS_IGP)) {
2219		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2220		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2221		if (err) {
2222			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2223			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224			if (err) {
2225				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2226				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227				if (err)
2228					goto out;
2229			}
2230			if ((rdev->mc_fw->size != mc_req_size) &&
2231			    (rdev->mc_fw->size != mc2_req_size)){
2232				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2233				       rdev->mc_fw->size, fw_name);
2234				err = -EINVAL;
2235			}
2236			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2237		} else {
2238			err = radeon_ucode_validate(rdev->mc_fw);
2239			if (err) {
2240				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2241				       fw_name);
2242				goto out;
2243			} else {
2244				new_fw++;
2245			}
2246		}
2247
2248		if (new_smc)
2249			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2250		else
2251			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2252		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2253		if (err) {
2254			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2255			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256			if (err) {
2257				pr_err("smc: error loading firmware \"%s\"\n",
2258				       fw_name);
2259				release_firmware(rdev->smc_fw);
2260				rdev->smc_fw = NULL;
2261				err = 0;
2262			} else if (rdev->smc_fw->size != smc_req_size) {
2263				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2264				       rdev->smc_fw->size, fw_name);
2265				err = -EINVAL;
2266			}
2267		} else {
2268			err = radeon_ucode_validate(rdev->smc_fw);
2269			if (err) {
2270				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2271				       fw_name);
2272				goto out;
2273			} else {
2274				new_fw++;
2275			}
2276		}
2277	}
2278
2279	if (new_fw == 0) {
2280		rdev->new_fw = false;
2281	} else if (new_fw < num_fw) {
2282		pr_err("ci_fw: mixing new and old firmware!\n");
2283		err = -EINVAL;
2284	} else {
2285		rdev->new_fw = true;
2286	}
2287
2288out:
2289	if (err) {
2290		if (err != -EINVAL)
2291			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2292			       fw_name);
2293		release_firmware(rdev->pfp_fw);
2294		rdev->pfp_fw = NULL;
2295		release_firmware(rdev->me_fw);
2296		rdev->me_fw = NULL;
2297		release_firmware(rdev->ce_fw);
2298		rdev->ce_fw = NULL;
2299		release_firmware(rdev->mec_fw);
2300		rdev->mec_fw = NULL;
2301		release_firmware(rdev->mec2_fw);
2302		rdev->mec2_fw = NULL;
2303		release_firmware(rdev->rlc_fw);
2304		rdev->rlc_fw = NULL;
2305		release_firmware(rdev->sdma_fw);
2306		rdev->sdma_fw = NULL;
2307		release_firmware(rdev->mc_fw);
2308		rdev->mc_fw = NULL;
2309		release_firmware(rdev->smc_fw);
2310		rdev->smc_fw = NULL;
2311	}
2312	return err;
2313}
2314
2315/*
2316 * Core functions
2317 */
2318/**
2319 * cik_tiling_mode_table_init - init the hw tiling table
2320 *
2321 * @rdev: radeon_device pointer
2322 *
2323 * Starting with SI, the tiling setup is done globally in a
2324 * set of 32 tiling modes.  Rather than selecting each set of
2325 * parameters per surface as on older asics, we just select
2326 * which index in the tiling table we want to use, and the
2327 * surface uses those parameters (CIK).
2328 */
2329static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2330{
2331	u32 *tile = rdev->config.cik.tile_mode_array;
2332	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2333	const u32 num_tile_mode_states =
2334			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2335	const u32 num_secondary_tile_mode_states =
2336			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2337	u32 reg_offset, split_equal_to_row_size;
2338	u32 num_pipe_configs;
2339	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2340		rdev->config.cik.max_shader_engines;
2341
2342	switch (rdev->config.cik.mem_row_size_in_kb) {
2343	case 1:
2344		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2345		break;
2346	case 2:
2347	default:
2348		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2349		break;
2350	case 4:
2351		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2352		break;
2353	}
2354
2355	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2356	if (num_pipe_configs > 8)
2357		num_pipe_configs = 16;
2358
2359	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2360		tile[reg_offset] = 0;
2361	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2362		macrotile[reg_offset] = 0;
2363
2364	switch(num_pipe_configs) {
2365	case 16:
2366		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2368			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2370		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2374		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2378		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2382		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385			   TILE_SPLIT(split_equal_to_row_size));
2386		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2387			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			   TILE_SPLIT(split_equal_to_row_size));
2397		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2398			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2399		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2402		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2404			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2409			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2417		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2432		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444
2445		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448			   NUM_BANKS(ADDR_SURF_16_BANK));
2449		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452			   NUM_BANKS(ADDR_SURF_16_BANK));
2453		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456			   NUM_BANKS(ADDR_SURF_16_BANK));
2457		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460			   NUM_BANKS(ADDR_SURF_16_BANK));
2461		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464			   NUM_BANKS(ADDR_SURF_8_BANK));
2465		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468			   NUM_BANKS(ADDR_SURF_4_BANK));
2469		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472			   NUM_BANKS(ADDR_SURF_2_BANK));
2473		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476			   NUM_BANKS(ADDR_SURF_16_BANK));
2477		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480			   NUM_BANKS(ADDR_SURF_16_BANK));
2481		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484			    NUM_BANKS(ADDR_SURF_16_BANK));
2485		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488			    NUM_BANKS(ADDR_SURF_8_BANK));
2489		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492			    NUM_BANKS(ADDR_SURF_4_BANK));
2493		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496			    NUM_BANKS(ADDR_SURF_2_BANK));
2497		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500			    NUM_BANKS(ADDR_SURF_2_BANK));
2501
2502		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2503			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2504		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2506		break;
2507
2508	case 8:
2509		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2511			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2513		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2517		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2521		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2525		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528			   TILE_SPLIT(split_equal_to_row_size));
2529		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2533			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			   TILE_SPLIT(split_equal_to_row_size));
2540		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2545		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2560		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2575		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587
2588		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2591				NUM_BANKS(ADDR_SURF_16_BANK));
2592		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2595				NUM_BANKS(ADDR_SURF_16_BANK));
2596		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599				NUM_BANKS(ADDR_SURF_16_BANK));
2600		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603				NUM_BANKS(ADDR_SURF_16_BANK));
2604		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2607				NUM_BANKS(ADDR_SURF_8_BANK));
2608		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611				NUM_BANKS(ADDR_SURF_4_BANK));
2612		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615				NUM_BANKS(ADDR_SURF_2_BANK));
2616		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2618				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619				NUM_BANKS(ADDR_SURF_16_BANK));
2620		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623				NUM_BANKS(ADDR_SURF_16_BANK));
2624		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627				NUM_BANKS(ADDR_SURF_16_BANK));
2628		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631				NUM_BANKS(ADDR_SURF_16_BANK));
2632		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635				NUM_BANKS(ADDR_SURF_8_BANK));
2636		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639				NUM_BANKS(ADDR_SURF_4_BANK));
2640		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643				NUM_BANKS(ADDR_SURF_2_BANK));
2644
2645		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2646			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2647		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2649		break;
2650
2651	case 4:
2652		if (num_rbs == 4) {
2653		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2661		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2665		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2669		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672			   TILE_SPLIT(split_equal_to_row_size));
2673		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2677			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			   TILE_SPLIT(split_equal_to_row_size));
2684		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2685			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2686		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2687			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2689		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2691			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2696			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2704		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2719		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2720			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731
2732		} else if (num_rbs < 4) {
2733		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2735			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2737		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2741		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2745		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2749		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752			   TILE_SPLIT(split_equal_to_row_size));
2753		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			   TILE_SPLIT(split_equal_to_row_size));
2764		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2765			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2766		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2769		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2771			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2773		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2784		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2799		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2800			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811		}
2812
2813		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816				NUM_BANKS(ADDR_SURF_16_BANK));
2817		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820				NUM_BANKS(ADDR_SURF_16_BANK));
2821		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824				NUM_BANKS(ADDR_SURF_16_BANK));
2825		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828				NUM_BANKS(ADDR_SURF_16_BANK));
2829		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832				NUM_BANKS(ADDR_SURF_16_BANK));
2833		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836				NUM_BANKS(ADDR_SURF_8_BANK));
2837		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2840				NUM_BANKS(ADDR_SURF_4_BANK));
2841		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2842				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2843				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844				NUM_BANKS(ADDR_SURF_16_BANK));
2845		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2847				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848				NUM_BANKS(ADDR_SURF_16_BANK));
2849		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852				NUM_BANKS(ADDR_SURF_16_BANK));
2853		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856				NUM_BANKS(ADDR_SURF_16_BANK));
2857		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860				NUM_BANKS(ADDR_SURF_16_BANK));
2861		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864				NUM_BANKS(ADDR_SURF_8_BANK));
2865		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868				NUM_BANKS(ADDR_SURF_4_BANK));
2869
2870		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2871			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2872		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2873			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2874		break;
2875
2876	case 2:
2877		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879			   PIPE_CONFIG(ADDR_SURF_P2) |
2880			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883			   PIPE_CONFIG(ADDR_SURF_P2) |
2884			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2885		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887			   PIPE_CONFIG(ADDR_SURF_P2) |
2888			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2889		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891			   PIPE_CONFIG(ADDR_SURF_P2) |
2892			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2893		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895			   PIPE_CONFIG(ADDR_SURF_P2) |
2896			   TILE_SPLIT(split_equal_to_row_size));
2897		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898			   PIPE_CONFIG(ADDR_SURF_P2) |
2899			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2901			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902			   PIPE_CONFIG(ADDR_SURF_P2) |
2903			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906			   PIPE_CONFIG(ADDR_SURF_P2) |
2907			   TILE_SPLIT(split_equal_to_row_size));
2908		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2909			   PIPE_CONFIG(ADDR_SURF_P2);
2910		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2911			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912			   PIPE_CONFIG(ADDR_SURF_P2));
2913		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915			    PIPE_CONFIG(ADDR_SURF_P2) |
2916			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919			    PIPE_CONFIG(ADDR_SURF_P2) |
2920			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923			    PIPE_CONFIG(ADDR_SURF_P2) |
2924			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926			    PIPE_CONFIG(ADDR_SURF_P2) |
2927			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2928		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930			    PIPE_CONFIG(ADDR_SURF_P2) |
2931			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934			    PIPE_CONFIG(ADDR_SURF_P2) |
2935			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938			    PIPE_CONFIG(ADDR_SURF_P2) |
2939			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942			    PIPE_CONFIG(ADDR_SURF_P2));
2943		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2944			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945			    PIPE_CONFIG(ADDR_SURF_P2) |
2946			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949			    PIPE_CONFIG(ADDR_SURF_P2) |
2950			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953			    PIPE_CONFIG(ADDR_SURF_P2) |
2954			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955
2956		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959				NUM_BANKS(ADDR_SURF_16_BANK));
2960		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2962				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963				NUM_BANKS(ADDR_SURF_16_BANK));
2964		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967				NUM_BANKS(ADDR_SURF_16_BANK));
2968		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971				NUM_BANKS(ADDR_SURF_16_BANK));
2972		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975				NUM_BANKS(ADDR_SURF_16_BANK));
2976		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979				NUM_BANKS(ADDR_SURF_16_BANK));
2980		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983				NUM_BANKS(ADDR_SURF_8_BANK));
2984		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2985				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2986				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987				NUM_BANKS(ADDR_SURF_16_BANK));
2988		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991				NUM_BANKS(ADDR_SURF_16_BANK));
2992		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2993				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995				NUM_BANKS(ADDR_SURF_16_BANK));
2996		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999				NUM_BANKS(ADDR_SURF_16_BANK));
3000		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003				NUM_BANKS(ADDR_SURF_16_BANK));
3004		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007				NUM_BANKS(ADDR_SURF_16_BANK));
3008		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011				NUM_BANKS(ADDR_SURF_8_BANK));
3012
3013		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3014			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3015		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3016			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3017		break;
3018
3019	default:
3020		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3021	}
3022}
3023
3024/**
3025 * cik_select_se_sh - select which SE, SH to address
3026 *
3027 * @rdev: radeon_device pointer
3028 * @se_num: shader engine to address
3029 * @sh_num: sh block to address
3030 *
3031 * Select which SE, SH combinations to address. Certain
3032 * registers are instanced per SE or SH.  0xffffffff means
3033 * broadcast to all SEs or SHs (CIK).
3034 */
3035static void cik_select_se_sh(struct radeon_device *rdev,
3036			     u32 se_num, u32 sh_num)
3037{
3038	u32 data = INSTANCE_BROADCAST_WRITES;
3039
3040	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042	else if (se_num == 0xffffffff)
3043		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044	else if (sh_num == 0xffffffff)
3045		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046	else
3047		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048	WREG32(GRBM_GFX_INDEX, data);
3049}
3050
3051/**
3052 * cik_create_bitmask - create a bitmask
3053 *
3054 * @bit_width: length of the mask
3055 *
3056 * create a variable length bit mask (CIK).
3057 * Returns the bitmask.
3058 */
3059static u32 cik_create_bitmask(u32 bit_width)
3060{
3061	u32 i, mask = 0;
3062
3063	for (i = 0; i < bit_width; i++) {
3064		mask <<= 1;
3065		mask |= 1;
3066	}
3067	return mask;
3068}
3069
3070/**
3071 * cik_get_rb_disabled - computes the mask of disabled RBs
3072 *
3073 * @rdev: radeon_device pointer
3074 * @max_rb_num: max RBs (render backends) for the asic
3075 * @se_num: number of SEs (shader engines) for the asic
3076 * @sh_per_se: number of SH blocks per SE for the asic
3077 *
3078 * Calculates the bitmask of disabled RBs (CIK).
3079 * Returns the disabled RB bitmask.
3080 */
3081static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082			      u32 max_rb_num_per_se,
3083			      u32 sh_per_se)
3084{
3085	u32 data, mask;
3086
3087	data = RREG32(CC_RB_BACKEND_DISABLE);
3088	if (data & 1)
3089		data &= BACKEND_DISABLE_MASK;
3090	else
3091		data = 0;
3092	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093
3094	data >>= BACKEND_DISABLE_SHIFT;
3095
3096	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097
3098	return data & mask;
3099}
3100
3101/**
3102 * cik_setup_rb - setup the RBs on the asic
3103 *
3104 * @rdev: radeon_device pointer
3105 * @se_num: number of SEs (shader engines) for the asic
3106 * @sh_per_se: number of SH blocks per SE for the asic
3107 * @max_rb_num: max RBs (render backends) for the asic
3108 *
3109 * Configures per-SE/SH RB registers (CIK).
3110 */
3111static void cik_setup_rb(struct radeon_device *rdev,
3112			 u32 se_num, u32 sh_per_se,
3113			 u32 max_rb_num_per_se)
3114{
3115	int i, j;
3116	u32 data, mask;
3117	u32 disabled_rbs = 0;
3118	u32 enabled_rbs = 0;
3119
3120	for (i = 0; i < se_num; i++) {
3121		for (j = 0; j < sh_per_se; j++) {
3122			cik_select_se_sh(rdev, i, j);
3123			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124			if (rdev->family == CHIP_HAWAII)
3125				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126			else
3127				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128		}
3129	}
3130	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131
3132	mask = 1;
3133	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134		if (!(disabled_rbs & mask))
3135			enabled_rbs |= mask;
3136		mask <<= 1;
3137	}
3138
3139	rdev->config.cik.backend_enable_mask = enabled_rbs;
3140
3141	for (i = 0; i < se_num; i++) {
3142		cik_select_se_sh(rdev, i, 0xffffffff);
3143		data = 0;
3144		for (j = 0; j < sh_per_se; j++) {
3145			switch (enabled_rbs & 3) {
3146			case 0:
3147				if (j == 0)
3148					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149				else
3150					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151				break;
3152			case 1:
3153				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154				break;
3155			case 2:
3156				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157				break;
3158			case 3:
3159			default:
3160				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161				break;
3162			}
3163			enabled_rbs >>= 2;
3164		}
3165		WREG32(PA_SC_RASTER_CONFIG, data);
3166	}
3167	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168}
3169
3170/**
3171 * cik_gpu_init - setup the 3D engine
3172 *
3173 * @rdev: radeon_device pointer
3174 *
3175 * Configures the 3D engine and tiling configuration
3176 * registers so that the 3D engine is usable.
3177 */
3178static void cik_gpu_init(struct radeon_device *rdev)
3179{
3180	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181	u32 mc_shared_chmap, mc_arb_ramcfg;
3182	u32 hdp_host_path_cntl;
3183	u32 tmp;
3184	int i, j;
3185
3186	switch (rdev->family) {
3187	case CHIP_BONAIRE:
3188		rdev->config.cik.max_shader_engines = 2;
3189		rdev->config.cik.max_tile_pipes = 4;
3190		rdev->config.cik.max_cu_per_sh = 7;
3191		rdev->config.cik.max_sh_per_se = 1;
3192		rdev->config.cik.max_backends_per_se = 2;
3193		rdev->config.cik.max_texture_channel_caches = 4;
3194		rdev->config.cik.max_gprs = 256;
3195		rdev->config.cik.max_gs_threads = 32;
3196		rdev->config.cik.max_hw_contexts = 8;
3197
3198		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203		break;
3204	case CHIP_HAWAII:
3205		rdev->config.cik.max_shader_engines = 4;
3206		rdev->config.cik.max_tile_pipes = 16;
3207		rdev->config.cik.max_cu_per_sh = 11;
3208		rdev->config.cik.max_sh_per_se = 1;
3209		rdev->config.cik.max_backends_per_se = 4;
3210		rdev->config.cik.max_texture_channel_caches = 16;
3211		rdev->config.cik.max_gprs = 256;
3212		rdev->config.cik.max_gs_threads = 32;
3213		rdev->config.cik.max_hw_contexts = 8;
3214
3215		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220		break;
3221	case CHIP_KAVERI:
3222		rdev->config.cik.max_shader_engines = 1;
3223		rdev->config.cik.max_tile_pipes = 4;
3224		rdev->config.cik.max_cu_per_sh = 8;
3225		rdev->config.cik.max_backends_per_se = 2;
3226		rdev->config.cik.max_sh_per_se = 1;
3227		rdev->config.cik.max_texture_channel_caches = 4;
3228		rdev->config.cik.max_gprs = 256;
3229		rdev->config.cik.max_gs_threads = 16;
3230		rdev->config.cik.max_hw_contexts = 8;
3231
3232		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3233		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3234		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3235		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3236		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3237		break;
3238	case CHIP_KABINI:
3239	case CHIP_MULLINS:
3240	default:
3241		rdev->config.cik.max_shader_engines = 1;
3242		rdev->config.cik.max_tile_pipes = 2;
3243		rdev->config.cik.max_cu_per_sh = 2;
3244		rdev->config.cik.max_sh_per_se = 1;
3245		rdev->config.cik.max_backends_per_se = 1;
3246		rdev->config.cik.max_texture_channel_caches = 2;
3247		rdev->config.cik.max_gprs = 256;
3248		rdev->config.cik.max_gs_threads = 16;
3249		rdev->config.cik.max_hw_contexts = 8;
3250
3251		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3252		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3253		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3254		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3255		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3256		break;
3257	}
3258
3259	/* Initialize HDP */
3260	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3261		WREG32((0x2c14 + j), 0x00000000);
3262		WREG32((0x2c18 + j), 0x00000000);
3263		WREG32((0x2c1c + j), 0x00000000);
3264		WREG32((0x2c20 + j), 0x00000000);
3265		WREG32((0x2c24 + j), 0x00000000);
3266	}
3267
3268	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3269	WREG32(SRBM_INT_CNTL, 0x1);
3270	WREG32(SRBM_INT_ACK, 0x1);
3271
3272	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3273
3274	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3275	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3276
3277	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3278	rdev->config.cik.mem_max_burst_length_bytes = 256;
3279	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3280	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3281	if (rdev->config.cik.mem_row_size_in_kb > 4)
3282		rdev->config.cik.mem_row_size_in_kb = 4;
3283	/* XXX use MC settings? */
3284	rdev->config.cik.shader_engine_tile_size = 32;
3285	rdev->config.cik.num_gpus = 1;
3286	rdev->config.cik.multi_gpu_tile_size = 64;
3287
3288	/* fix up row size */
3289	gb_addr_config &= ~ROW_SIZE_MASK;
3290	switch (rdev->config.cik.mem_row_size_in_kb) {
3291	case 1:
3292	default:
3293		gb_addr_config |= ROW_SIZE(0);
3294		break;
3295	case 2:
3296		gb_addr_config |= ROW_SIZE(1);
3297		break;
3298	case 4:
3299		gb_addr_config |= ROW_SIZE(2);
3300		break;
3301	}
3302
3303	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3304	 * not have bank info, so create a custom tiling dword.
3305	 * bits 3:0   num_pipes
3306	 * bits 7:4   num_banks
3307	 * bits 11:8  group_size
3308	 * bits 15:12 row_size
3309	 */
3310	rdev->config.cik.tile_config = 0;
3311	switch (rdev->config.cik.num_tile_pipes) {
3312	case 1:
3313		rdev->config.cik.tile_config |= (0 << 0);
3314		break;
3315	case 2:
3316		rdev->config.cik.tile_config |= (1 << 0);
3317		break;
3318	case 4:
3319		rdev->config.cik.tile_config |= (2 << 0);
3320		break;
3321	case 8:
3322	default:
3323		/* XXX what about 12? */
3324		rdev->config.cik.tile_config |= (3 << 0);
3325		break;
3326	}
3327	rdev->config.cik.tile_config |=
3328		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3329	rdev->config.cik.tile_config |=
3330		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3331	rdev->config.cik.tile_config |=
3332		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3333
3334	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3335	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3336	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3337	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3338	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3339	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3340	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3341	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3342
3343	cik_tiling_mode_table_init(rdev);
3344
3345	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3346		     rdev->config.cik.max_sh_per_se,
3347		     rdev->config.cik.max_backends_per_se);
3348
3349	rdev->config.cik.active_cus = 0;
3350	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3351		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3352			rdev->config.cik.active_cus +=
3353				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3354		}
3355	}
3356
3357	/* set HW defaults for 3D engine */
3358	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3359
3360	WREG32(SX_DEBUG_1, 0x20);
3361
3362	WREG32(TA_CNTL_AUX, 0x00010000);
3363
3364	tmp = RREG32(SPI_CONFIG_CNTL);
3365	tmp |= 0x03000000;
3366	WREG32(SPI_CONFIG_CNTL, tmp);
3367
3368	WREG32(SQ_CONFIG, 1);
3369
3370	WREG32(DB_DEBUG, 0);
3371
3372	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3373	tmp |= 0x00000400;
3374	WREG32(DB_DEBUG2, tmp);
3375
3376	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3377	tmp |= 0x00020200;
3378	WREG32(DB_DEBUG3, tmp);
3379
3380	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3381	tmp |= 0x00018208;
3382	WREG32(CB_HW_CONTROL, tmp);
3383
3384	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3385
3386	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3387				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3388				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3389				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3390
3391	WREG32(VGT_NUM_INSTANCES, 1);
3392
3393	WREG32(CP_PERFMON_CNTL, 0);
3394
3395	WREG32(SQ_CONFIG, 0);
3396
3397	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3398					  FORCE_EOV_MAX_REZ_CNT(255)));
3399
3400	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3401	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3402
3403	WREG32(VGT_GS_VERTEX_REUSE, 16);
3404	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3405
3406	tmp = RREG32(HDP_MISC_CNTL);
3407	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3408	WREG32(HDP_MISC_CNTL, tmp);
3409
3410	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3411	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3412
3413	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3414	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3415
3416	udelay(50);
3417}
3418
3419/*
3420 * GPU scratch registers helpers function.
3421 */
3422/**
3423 * cik_scratch_init - setup driver info for CP scratch regs
3424 *
3425 * @rdev: radeon_device pointer
3426 *
3427 * Set up the number and offset of the CP scratch registers.
3428 * NOTE: use of CP scratch registers is a legacy inferface and
3429 * is not used by default on newer asics (r6xx+).  On newer asics,
3430 * memory buffers are used for fences rather than scratch regs.
3431 */
3432static void cik_scratch_init(struct radeon_device *rdev)
3433{
3434	int i;
3435
3436	rdev->scratch.num_reg = 7;
3437	rdev->scratch.reg_base = SCRATCH_REG0;
3438	for (i = 0; i < rdev->scratch.num_reg; i++) {
3439		rdev->scratch.free[i] = true;
3440		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3441	}
3442}
3443
3444/**
3445 * cik_ring_test - basic gfx ring test
3446 *
3447 * @rdev: radeon_device pointer
3448 * @ring: radeon_ring structure holding ring information
3449 *
3450 * Allocate a scratch register and write to it using the gfx ring (CIK).
3451 * Provides a basic gfx ring test to verify that the ring is working.
3452 * Used by cik_cp_gfx_resume();
3453 * Returns 0 on success, error on failure.
3454 */
3455int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3456{
3457	uint32_t scratch;
3458	uint32_t tmp = 0;
3459	unsigned i;
3460	int r;
3461
3462	r = radeon_scratch_get(rdev, &scratch);
3463	if (r) {
3464		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3465		return r;
3466	}
3467	WREG32(scratch, 0xCAFEDEAD);
3468	r = radeon_ring_lock(rdev, ring, 3);
3469	if (r) {
3470		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3471		radeon_scratch_free(rdev, scratch);
3472		return r;
3473	}
3474	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3475	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3476	radeon_ring_write(ring, 0xDEADBEEF);
3477	radeon_ring_unlock_commit(rdev, ring, false);
3478
3479	for (i = 0; i < rdev->usec_timeout; i++) {
3480		tmp = RREG32(scratch);
3481		if (tmp == 0xDEADBEEF)
3482			break;
3483		DRM_UDELAY(1);
3484	}
3485	if (i < rdev->usec_timeout) {
3486		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3487	} else {
3488		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3489			  ring->idx, scratch, tmp);
3490		r = -EINVAL;
3491	}
3492	radeon_scratch_free(rdev, scratch);
3493	return r;
3494}
3495
3496/**
3497 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3498 *
3499 * @rdev: radeon_device pointer
3500 * @ridx: radeon ring index
3501 *
3502 * Emits an hdp flush on the cp.
3503 */
3504static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3505				       int ridx)
3506{
3507	struct radeon_ring *ring = &rdev->ring[ridx];
3508	u32 ref_and_mask;
3509
3510	switch (ring->idx) {
3511	case CAYMAN_RING_TYPE_CP1_INDEX:
3512	case CAYMAN_RING_TYPE_CP2_INDEX:
3513	default:
3514		switch (ring->me) {
3515		case 0:
3516			ref_and_mask = CP2 << ring->pipe;
3517			break;
3518		case 1:
3519			ref_and_mask = CP6 << ring->pipe;
3520			break;
3521		default:
3522			return;
3523		}
3524		break;
3525	case RADEON_RING_TYPE_GFX_INDEX:
3526		ref_and_mask = CP0;
3527		break;
3528	}
3529
3530	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3531	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3532				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3533				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3534	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3535	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3536	radeon_ring_write(ring, ref_and_mask);
3537	radeon_ring_write(ring, ref_and_mask);
3538	radeon_ring_write(ring, 0x20); /* poll interval */
3539}
3540
3541/**
3542 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3543 *
3544 * @rdev: radeon_device pointer
3545 * @fence: radeon fence object
3546 *
3547 * Emits a fence sequnce number on the gfx ring and flushes
3548 * GPU caches.
3549 */
3550void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3551			     struct radeon_fence *fence)
3552{
3553	struct radeon_ring *ring = &rdev->ring[fence->ring];
3554	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3555
3556	/* Workaround for cache flush problems. First send a dummy EOP
3557	 * event down the pipe with seq one below.
3558	 */
3559	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3560	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3561				 EOP_TC_ACTION_EN |
3562				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3563				 EVENT_INDEX(5)));
3564	radeon_ring_write(ring, addr & 0xfffffffc);
3565	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3566				DATA_SEL(1) | INT_SEL(0));
3567	radeon_ring_write(ring, fence->seq - 1);
3568	radeon_ring_write(ring, 0);
3569
3570	/* Then send the real EOP event down the pipe. */
3571	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3572	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3573				 EOP_TC_ACTION_EN |
3574				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3575				 EVENT_INDEX(5)));
3576	radeon_ring_write(ring, addr & 0xfffffffc);
3577	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3578	radeon_ring_write(ring, fence->seq);
3579	radeon_ring_write(ring, 0);
3580}
3581
3582/**
3583 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3584 *
3585 * @rdev: radeon_device pointer
3586 * @fence: radeon fence object
3587 *
3588 * Emits a fence sequnce number on the compute ring and flushes
3589 * GPU caches.
3590 */
3591void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3592				 struct radeon_fence *fence)
3593{
3594	struct radeon_ring *ring = &rdev->ring[fence->ring];
3595	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596
3597	/* RELEASE_MEM - flush caches, send int */
3598	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3599	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3600				 EOP_TC_ACTION_EN |
3601				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3602				 EVENT_INDEX(5)));
3603	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3604	radeon_ring_write(ring, addr & 0xfffffffc);
3605	radeon_ring_write(ring, upper_32_bits(addr));
3606	radeon_ring_write(ring, fence->seq);
3607	radeon_ring_write(ring, 0);
3608}
3609
3610/**
3611 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3612 *
3613 * @rdev: radeon_device pointer
3614 * @ring: radeon ring buffer object
3615 * @semaphore: radeon semaphore object
3616 * @emit_wait: Is this a sempahore wait?
3617 *
3618 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3619 * from running ahead of semaphore waits.
3620 */
3621bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3622			     struct radeon_ring *ring,
3623			     struct radeon_semaphore *semaphore,
3624			     bool emit_wait)
3625{
3626	uint64_t addr = semaphore->gpu_addr;
3627	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3628
3629	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3630	radeon_ring_write(ring, lower_32_bits(addr));
3631	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3632
3633	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3634		/* Prevent the PFP from running ahead of the semaphore wait */
3635		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3636		radeon_ring_write(ring, 0x0);
3637	}
3638
3639	return true;
3640}
3641
3642/**
3643 * cik_copy_cpdma - copy pages using the CP DMA engine
3644 *
3645 * @rdev: radeon_device pointer
3646 * @src_offset: src GPU address
3647 * @dst_offset: dst GPU address
3648 * @num_gpu_pages: number of GPU pages to xfer
3649 * @resv: reservation object to sync to
3650 *
3651 * Copy GPU paging using the CP DMA engine (CIK+).
3652 * Used by the radeon ttm implementation to move pages if
3653 * registered as the asic copy callback.
3654 */
3655struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3656				    uint64_t src_offset, uint64_t dst_offset,
3657				    unsigned num_gpu_pages,
3658				    struct reservation_object *resv)
3659{
3660	struct radeon_fence *fence;
3661	struct radeon_sync sync;
3662	int ring_index = rdev->asic->copy.blit_ring_index;
3663	struct radeon_ring *ring = &rdev->ring[ring_index];
3664	u32 size_in_bytes, cur_size_in_bytes, control;
3665	int i, num_loops;
3666	int r = 0;
3667
3668	radeon_sync_create(&sync);
3669
3670	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3671	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3672	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3673	if (r) {
3674		DRM_ERROR("radeon: moving bo (%d).\n", r);
3675		radeon_sync_free(rdev, &sync, NULL);
3676		return ERR_PTR(r);
3677	}
3678
3679	radeon_sync_resv(rdev, &sync, resv, false);
3680	radeon_sync_rings(rdev, &sync, ring->idx);
3681
3682	for (i = 0; i < num_loops; i++) {
3683		cur_size_in_bytes = size_in_bytes;
3684		if (cur_size_in_bytes > 0x1fffff)
3685			cur_size_in_bytes = 0x1fffff;
3686		size_in_bytes -= cur_size_in_bytes;
3687		control = 0;
3688		if (size_in_bytes == 0)
3689			control |= PACKET3_DMA_DATA_CP_SYNC;
3690		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3691		radeon_ring_write(ring, control);
3692		radeon_ring_write(ring, lower_32_bits(src_offset));
3693		radeon_ring_write(ring, upper_32_bits(src_offset));
3694		radeon_ring_write(ring, lower_32_bits(dst_offset));
3695		radeon_ring_write(ring, upper_32_bits(dst_offset));
3696		radeon_ring_write(ring, cur_size_in_bytes);
3697		src_offset += cur_size_in_bytes;
3698		dst_offset += cur_size_in_bytes;
3699	}
3700
3701	r = radeon_fence_emit(rdev, &fence, ring->idx);
3702	if (r) {
3703		radeon_ring_unlock_undo(rdev, ring);
3704		radeon_sync_free(rdev, &sync, NULL);
3705		return ERR_PTR(r);
3706	}
3707
3708	radeon_ring_unlock_commit(rdev, ring, false);
3709	radeon_sync_free(rdev, &sync, fence);
3710
3711	return fence;
3712}
3713
3714/*
3715 * IB stuff
3716 */
3717/**
3718 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3719 *
3720 * @rdev: radeon_device pointer
3721 * @ib: radeon indirect buffer object
3722 *
3723 * Emits a DE (drawing engine) or CE (constant engine) IB
3724 * on the gfx ring.  IBs are usually generated by userspace
3725 * acceleration drivers and submitted to the kernel for
3726 * scheduling on the ring.  This function schedules the IB
3727 * on the gfx ring for execution by the GPU.
3728 */
3729void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3730{
3731	struct radeon_ring *ring = &rdev->ring[ib->ring];
3732	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3733	u32 header, control = INDIRECT_BUFFER_VALID;
3734
3735	if (ib->is_const_ib) {
3736		/* set switch buffer packet before const IB */
3737		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3738		radeon_ring_write(ring, 0);
3739
3740		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3741	} else {
3742		u32 next_rptr;
3743		if (ring->rptr_save_reg) {
3744			next_rptr = ring->wptr + 3 + 4;
3745			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3746			radeon_ring_write(ring, ((ring->rptr_save_reg -
3747						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3748			radeon_ring_write(ring, next_rptr);
3749		} else if (rdev->wb.enabled) {
3750			next_rptr = ring->wptr + 5 + 4;
3751			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3752			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3753			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3754			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3755			radeon_ring_write(ring, next_rptr);
3756		}
3757
3758		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3759	}
3760
3761	control |= ib->length_dw | (vm_id << 24);
3762
3763	radeon_ring_write(ring, header);
3764	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3765	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3766	radeon_ring_write(ring, control);
3767}
3768
3769/**
3770 * cik_ib_test - basic gfx ring IB test
3771 *
3772 * @rdev: radeon_device pointer
3773 * @ring: radeon_ring structure holding ring information
3774 *
3775 * Allocate an IB and execute it on the gfx ring (CIK).
3776 * Provides a basic gfx ring test to verify that IBs are working.
3777 * Returns 0 on success, error on failure.
3778 */
3779int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3780{
3781	struct radeon_ib ib;
3782	uint32_t scratch;
3783	uint32_t tmp = 0;
3784	unsigned i;
3785	int r;
3786
3787	r = radeon_scratch_get(rdev, &scratch);
3788	if (r) {
3789		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3790		return r;
3791	}
3792	WREG32(scratch, 0xCAFEDEAD);
3793	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3794	if (r) {
3795		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3796		radeon_scratch_free(rdev, scratch);
3797		return r;
3798	}
3799	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3800	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3801	ib.ptr[2] = 0xDEADBEEF;
3802	ib.length_dw = 3;
3803	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3804	if (r) {
3805		radeon_scratch_free(rdev, scratch);
3806		radeon_ib_free(rdev, &ib);
3807		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3808		return r;
3809	}
3810	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3811		RADEON_USEC_IB_TEST_TIMEOUT));
3812	if (r < 0) {
3813		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3814		radeon_scratch_free(rdev, scratch);
3815		radeon_ib_free(rdev, &ib);
3816		return r;
3817	} else if (r == 0) {
3818		DRM_ERROR("radeon: fence wait timed out.\n");
3819		radeon_scratch_free(rdev, scratch);
3820		radeon_ib_free(rdev, &ib);
3821		return -ETIMEDOUT;
3822	}
3823	r = 0;
3824	for (i = 0; i < rdev->usec_timeout; i++) {
3825		tmp = RREG32(scratch);
3826		if (tmp == 0xDEADBEEF)
3827			break;
3828		DRM_UDELAY(1);
3829	}
3830	if (i < rdev->usec_timeout) {
3831		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3832	} else {
3833		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3834			  scratch, tmp);
3835		r = -EINVAL;
3836	}
3837	radeon_scratch_free(rdev, scratch);
3838	radeon_ib_free(rdev, &ib);
3839	return r;
3840}
3841
3842/*
3843 * CP.
3844 * On CIK, gfx and compute now have independant command processors.
3845 *
3846 * GFX
3847 * Gfx consists of a single ring and can process both gfx jobs and
3848 * compute jobs.  The gfx CP consists of three microengines (ME):
3849 * PFP - Pre-Fetch Parser
3850 * ME - Micro Engine
3851 * CE - Constant Engine
3852 * The PFP and ME make up what is considered the Drawing Engine (DE).
3853 * The CE is an asynchronous engine used for updating buffer desciptors
3854 * used by the DE so that they can be loaded into cache in parallel
3855 * while the DE is processing state update packets.
3856 *
3857 * Compute
3858 * The compute CP consists of two microengines (ME):
3859 * MEC1 - Compute MicroEngine 1
3860 * MEC2 - Compute MicroEngine 2
3861 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3862 * The queues are exposed to userspace and are programmed directly
3863 * by the compute runtime.
3864 */
3865/**
3866 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3867 *
3868 * @rdev: radeon_device pointer
3869 * @enable: enable or disable the MEs
3870 *
3871 * Halts or unhalts the gfx MEs.
3872 */
3873static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3874{
3875	if (enable)
3876		WREG32(CP_ME_CNTL, 0);
3877	else {
3878		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3879			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3880		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3881		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3882	}
3883	udelay(50);
3884}
3885
3886/**
3887 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3888 *
3889 * @rdev: radeon_device pointer
3890 *
3891 * Loads the gfx PFP, ME, and CE ucode.
3892 * Returns 0 for success, -EINVAL if the ucode is not available.
3893 */
3894static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3895{
3896	int i;
3897
3898	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3899		return -EINVAL;
3900
3901	cik_cp_gfx_enable(rdev, false);
3902
3903	if (rdev->new_fw) {
3904		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3905			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3906		const struct gfx_firmware_header_v1_0 *ce_hdr =
3907			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3908		const struct gfx_firmware_header_v1_0 *me_hdr =
3909			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3910		const __le32 *fw_data;
3911		u32 fw_size;
3912
3913		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3914		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3915		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3916
3917		/* PFP */
3918		fw_data = (const __le32 *)
3919			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3920		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3921		WREG32(CP_PFP_UCODE_ADDR, 0);
3922		for (i = 0; i < fw_size; i++)
3923			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3924		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3925
3926		/* CE */
3927		fw_data = (const __le32 *)
3928			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3929		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3930		WREG32(CP_CE_UCODE_ADDR, 0);
3931		for (i = 0; i < fw_size; i++)
3932			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3933		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3934
3935		/* ME */
3936		fw_data = (const __be32 *)
3937			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3938		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3939		WREG32(CP_ME_RAM_WADDR, 0);
3940		for (i = 0; i < fw_size; i++)
3941			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3942		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3943		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3944	} else {
3945		const __be32 *fw_data;
3946
3947		/* PFP */
3948		fw_data = (const __be32 *)rdev->pfp_fw->data;
3949		WREG32(CP_PFP_UCODE_ADDR, 0);
3950		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3951			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3952		WREG32(CP_PFP_UCODE_ADDR, 0);
3953
3954		/* CE */
3955		fw_data = (const __be32 *)rdev->ce_fw->data;
3956		WREG32(CP_CE_UCODE_ADDR, 0);
3957		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3958			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3959		WREG32(CP_CE_UCODE_ADDR, 0);
3960
3961		/* ME */
3962		fw_data = (const __be32 *)rdev->me_fw->data;
3963		WREG32(CP_ME_RAM_WADDR, 0);
3964		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3965			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3966		WREG32(CP_ME_RAM_WADDR, 0);
3967	}
3968
3969	return 0;
3970}
3971
3972/**
3973 * cik_cp_gfx_start - start the gfx ring
3974 *
3975 * @rdev: radeon_device pointer
3976 *
3977 * Enables the ring and loads the clear state context and other
3978 * packets required to init the ring.
3979 * Returns 0 for success, error for failure.
3980 */
3981static int cik_cp_gfx_start(struct radeon_device *rdev)
3982{
3983	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3984	int r, i;
3985
3986	/* init the CP */
3987	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3988	WREG32(CP_ENDIAN_SWAP, 0);
3989	WREG32(CP_DEVICE_ID, 1);
3990
3991	cik_cp_gfx_enable(rdev, true);
3992
3993	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3994	if (r) {
3995		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3996		return r;
3997	}
3998
3999	/* init the CE partitions.  CE only used for gfx on CIK */
4000	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4001	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4002	radeon_ring_write(ring, 0x8000);
4003	radeon_ring_write(ring, 0x8000);
4004
4005	/* setup clear context state */
4006	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4008
4009	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4010	radeon_ring_write(ring, 0x80000000);
4011	radeon_ring_write(ring, 0x80000000);
4012
4013	for (i = 0; i < cik_default_size; i++)
4014		radeon_ring_write(ring, cik_default_state[i]);
4015
4016	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4017	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4018
4019	/* set clear context state */
4020	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4021	radeon_ring_write(ring, 0);
4022
4023	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4024	radeon_ring_write(ring, 0x00000316);
4025	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4026	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4027
4028	radeon_ring_unlock_commit(rdev, ring, false);
4029
4030	return 0;
4031}
4032
4033/**
4034 * cik_cp_gfx_fini - stop the gfx ring
4035 *
4036 * @rdev: radeon_device pointer
4037 *
4038 * Stop the gfx ring and tear down the driver ring
4039 * info.
4040 */
4041static void cik_cp_gfx_fini(struct radeon_device *rdev)
4042{
4043	cik_cp_gfx_enable(rdev, false);
4044	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4045}
4046
4047/**
4048 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4049 *
4050 * @rdev: radeon_device pointer
4051 *
4052 * Program the location and size of the gfx ring buffer
4053 * and test it to make sure it's working.
4054 * Returns 0 for success, error for failure.
4055 */
4056static int cik_cp_gfx_resume(struct radeon_device *rdev)
4057{
4058	struct radeon_ring *ring;
4059	u32 tmp;
4060	u32 rb_bufsz;
4061	u64 rb_addr;
4062	int r;
4063
4064	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4065	if (rdev->family != CHIP_HAWAII)
4066		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4067
4068	/* Set the write pointer delay */
4069	WREG32(CP_RB_WPTR_DELAY, 0);
4070
4071	/* set the RB to use vmid 0 */
4072	WREG32(CP_RB_VMID, 0);
4073
4074	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4075
4076	/* ring 0 - compute and gfx */
4077	/* Set ring buffer size */
4078	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4079	rb_bufsz = order_base_2(ring->ring_size / 8);
4080	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4081#ifdef __BIG_ENDIAN
4082	tmp |= BUF_SWAP_32BIT;
4083#endif
4084	WREG32(CP_RB0_CNTL, tmp);
4085
4086	/* Initialize the ring buffer's read and write pointers */
4087	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4088	ring->wptr = 0;
4089	WREG32(CP_RB0_WPTR, ring->wptr);
4090
4091	/* set the wb address wether it's enabled or not */
4092	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4093	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4094
4095	/* scratch register shadowing is no longer supported */
4096	WREG32(SCRATCH_UMSK, 0);
4097
4098	if (!rdev->wb.enabled)
4099		tmp |= RB_NO_UPDATE;
4100
4101	mdelay(1);
4102	WREG32(CP_RB0_CNTL, tmp);
4103
4104	rb_addr = ring->gpu_addr >> 8;
4105	WREG32(CP_RB0_BASE, rb_addr);
4106	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4107
4108	/* start the ring */
4109	cik_cp_gfx_start(rdev);
4110	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4111	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4112	if (r) {
4113		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4114		return r;
4115	}
4116
4117	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4118		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4119
4120	return 0;
4121}
4122
4123u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4124		     struct radeon_ring *ring)
4125{
4126	u32 rptr;
4127
4128	if (rdev->wb.enabled)
4129		rptr = rdev->wb.wb[ring->rptr_offs/4];
4130	else
4131		rptr = RREG32(CP_RB0_RPTR);
4132
4133	return rptr;
4134}
4135
4136u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4137		     struct radeon_ring *ring)
4138{
4139	return RREG32(CP_RB0_WPTR);
4140}
4141
4142void cik_gfx_set_wptr(struct radeon_device *rdev,
4143		      struct radeon_ring *ring)
4144{
4145	WREG32(CP_RB0_WPTR, ring->wptr);
4146	(void)RREG32(CP_RB0_WPTR);
4147}
4148
4149u32 cik_compute_get_rptr(struct radeon_device *rdev,
4150			 struct radeon_ring *ring)
4151{
4152	u32 rptr;
4153
4154	if (rdev->wb.enabled) {
4155		rptr = rdev->wb.wb[ring->rptr_offs/4];
4156	} else {
4157		mutex_lock(&rdev->srbm_mutex);
4158		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4159		rptr = RREG32(CP_HQD_PQ_RPTR);
4160		cik_srbm_select(rdev, 0, 0, 0, 0);
4161		mutex_unlock(&rdev->srbm_mutex);
4162	}
4163
4164	return rptr;
4165}
4166
4167u32 cik_compute_get_wptr(struct radeon_device *rdev,
4168			 struct radeon_ring *ring)
4169{
4170	u32 wptr;
4171
4172	if (rdev->wb.enabled) {
4173		/* XXX check if swapping is necessary on BE */
4174		wptr = rdev->wb.wb[ring->wptr_offs/4];
4175	} else {
4176		mutex_lock(&rdev->srbm_mutex);
4177		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4178		wptr = RREG32(CP_HQD_PQ_WPTR);
4179		cik_srbm_select(rdev, 0, 0, 0, 0);
4180		mutex_unlock(&rdev->srbm_mutex);
4181	}
4182
4183	return wptr;
4184}
4185
4186void cik_compute_set_wptr(struct radeon_device *rdev,
4187			  struct radeon_ring *ring)
4188{
4189	/* XXX check if swapping is necessary on BE */
4190	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4191	WDOORBELL32(ring->doorbell_index, ring->wptr);
4192}
4193
4194static void cik_compute_stop(struct radeon_device *rdev,
4195			     struct radeon_ring *ring)
4196{
4197	u32 j, tmp;
4198
4199	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200	/* Disable wptr polling. */
4201	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4202	tmp &= ~WPTR_POLL_EN;
4203	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4204	/* Disable HQD. */
4205	if (RREG32(CP_HQD_ACTIVE) & 1) {
4206		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4207		for (j = 0; j < rdev->usec_timeout; j++) {
4208			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4209				break;
4210			udelay(1);
4211		}
4212		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4213		WREG32(CP_HQD_PQ_RPTR, 0);
4214		WREG32(CP_HQD_PQ_WPTR, 0);
4215	}
4216	cik_srbm_select(rdev, 0, 0, 0, 0);
4217}
4218
4219/**
4220 * cik_cp_compute_enable - enable/disable the compute CP MEs
4221 *
4222 * @rdev: radeon_device pointer
4223 * @enable: enable or disable the MEs
4224 *
4225 * Halts or unhalts the compute MEs.
4226 */
4227static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4228{
4229	if (enable)
4230		WREG32(CP_MEC_CNTL, 0);
4231	else {
4232		/*
4233		 * To make hibernation reliable we need to clear compute ring
4234		 * configuration before halting the compute ring.
4235		 */
4236		mutex_lock(&rdev->srbm_mutex);
4237		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4238		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4239		mutex_unlock(&rdev->srbm_mutex);
4240
4241		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4242		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4243		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4244	}
4245	udelay(50);
4246}
4247
4248/**
4249 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4250 *
4251 * @rdev: radeon_device pointer
4252 *
4253 * Loads the compute MEC1&2 ucode.
4254 * Returns 0 for success, -EINVAL if the ucode is not available.
4255 */
4256static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4257{
4258	int i;
4259
4260	if (!rdev->mec_fw)
4261		return -EINVAL;
4262
4263	cik_cp_compute_enable(rdev, false);
4264
4265	if (rdev->new_fw) {
4266		const struct gfx_firmware_header_v1_0 *mec_hdr =
4267			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4268		const __le32 *fw_data;
4269		u32 fw_size;
4270
4271		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4272
4273		/* MEC1 */
4274		fw_data = (const __le32 *)
4275			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4276		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4277		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4278		for (i = 0; i < fw_size; i++)
4279			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4280		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4281
4282		/* MEC2 */
4283		if (rdev->family == CHIP_KAVERI) {
4284			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4285				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4286
4287			fw_data = (const __le32 *)
4288				(rdev->mec2_fw->data +
4289				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4290			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4291			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4292			for (i = 0; i < fw_size; i++)
4293				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4294			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4295		}
4296	} else {
4297		const __be32 *fw_data;
4298
4299		/* MEC1 */
4300		fw_data = (const __be32 *)rdev->mec_fw->data;
4301		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4302		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4303			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4304		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4305
4306		if (rdev->family == CHIP_KAVERI) {
4307			/* MEC2 */
4308			fw_data = (const __be32 *)rdev->mec_fw->data;
4309			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4310			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4311				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4312			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4313		}
4314	}
4315
4316	return 0;
4317}
4318
4319/**
4320 * cik_cp_compute_start - start the compute queues
4321 *
4322 * @rdev: radeon_device pointer
4323 *
4324 * Enable the compute queues.
4325 * Returns 0 for success, error for failure.
4326 */
4327static int cik_cp_compute_start(struct radeon_device *rdev)
4328{
4329	cik_cp_compute_enable(rdev, true);
4330
4331	return 0;
4332}
4333
4334/**
4335 * cik_cp_compute_fini - stop the compute queues
4336 *
4337 * @rdev: radeon_device pointer
4338 *
4339 * Stop the compute queues and tear down the driver queue
4340 * info.
4341 */
4342static void cik_cp_compute_fini(struct radeon_device *rdev)
4343{
4344	int i, idx, r;
4345
4346	cik_cp_compute_enable(rdev, false);
4347
4348	for (i = 0; i < 2; i++) {
4349		if (i == 0)
4350			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4351		else
4352			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4353
4354		if (rdev->ring[idx].mqd_obj) {
4355			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4356			if (unlikely(r != 0))
4357				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4358
4359			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4360			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4361
4362			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4363			rdev->ring[idx].mqd_obj = NULL;
4364		}
4365	}
4366}
4367
4368static void cik_mec_fini(struct radeon_device *rdev)
4369{
4370	int r;
4371
4372	if (rdev->mec.hpd_eop_obj) {
4373		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4374		if (unlikely(r != 0))
4375			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4376		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4377		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4378
4379		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4380		rdev->mec.hpd_eop_obj = NULL;
4381	}
4382}
4383
4384#define MEC_HPD_SIZE 2048
4385
4386static int cik_mec_init(struct radeon_device *rdev)
4387{
4388	int r;
4389	u32 *hpd;
4390
4391	/*
4392	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4393	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4394	 */
4395	if (rdev->family == CHIP_KAVERI)
4396		rdev->mec.num_mec = 2;
4397	else
4398		rdev->mec.num_mec = 1;
4399	rdev->mec.num_pipe = 4;
4400	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4401
4402	if (rdev->mec.hpd_eop_obj == NULL) {
4403		r = radeon_bo_create(rdev,
4404				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4405				     PAGE_SIZE, true,
4406				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4407				     &rdev->mec.hpd_eop_obj);
4408		if (r) {
4409			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4410			return r;
4411		}
4412	}
4413
4414	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415	if (unlikely(r != 0)) {
4416		cik_mec_fini(rdev);
4417		return r;
4418	}
4419	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4420			  &rdev->mec.hpd_eop_gpu_addr);
4421	if (r) {
4422		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4423		cik_mec_fini(rdev);
4424		return r;
4425	}
4426	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4427	if (r) {
4428		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4429		cik_mec_fini(rdev);
4430		return r;
4431	}
4432
4433	/* clear memory.  Not sure if this is required or not */
4434	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4435
4436	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4437	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4438
4439	return 0;
4440}
4441
4442struct hqd_registers
4443{
4444	u32 cp_mqd_base_addr;
4445	u32 cp_mqd_base_addr_hi;
4446	u32 cp_hqd_active;
4447	u32 cp_hqd_vmid;
4448	u32 cp_hqd_persistent_state;
4449	u32 cp_hqd_pipe_priority;
4450	u32 cp_hqd_queue_priority;
4451	u32 cp_hqd_quantum;
4452	u32 cp_hqd_pq_base;
4453	u32 cp_hqd_pq_base_hi;
4454	u32 cp_hqd_pq_rptr;
4455	u32 cp_hqd_pq_rptr_report_addr;
4456	u32 cp_hqd_pq_rptr_report_addr_hi;
4457	u32 cp_hqd_pq_wptr_poll_addr;
4458	u32 cp_hqd_pq_wptr_poll_addr_hi;
4459	u32 cp_hqd_pq_doorbell_control;
4460	u32 cp_hqd_pq_wptr;
4461	u32 cp_hqd_pq_control;
4462	u32 cp_hqd_ib_base_addr;
4463	u32 cp_hqd_ib_base_addr_hi;
4464	u32 cp_hqd_ib_rptr;
4465	u32 cp_hqd_ib_control;
4466	u32 cp_hqd_iq_timer;
4467	u32 cp_hqd_iq_rptr;
4468	u32 cp_hqd_dequeue_request;
4469	u32 cp_hqd_dma_offload;
4470	u32 cp_hqd_sema_cmd;
4471	u32 cp_hqd_msg_type;
4472	u32 cp_hqd_atomic0_preop_lo;
4473	u32 cp_hqd_atomic0_preop_hi;
4474	u32 cp_hqd_atomic1_preop_lo;
4475	u32 cp_hqd_atomic1_preop_hi;
4476	u32 cp_hqd_hq_scheduler0;
4477	u32 cp_hqd_hq_scheduler1;
4478	u32 cp_mqd_control;
4479};
4480
4481struct bonaire_mqd
4482{
4483	u32 header;
4484	u32 dispatch_initiator;
4485	u32 dimensions[3];
4486	u32 start_idx[3];
4487	u32 num_threads[3];
4488	u32 pipeline_stat_enable;
4489	u32 perf_counter_enable;
4490	u32 pgm[2];
4491	u32 tba[2];
4492	u32 tma[2];
4493	u32 pgm_rsrc[2];
4494	u32 vmid;
4495	u32 resource_limits;
4496	u32 static_thread_mgmt01[2];
4497	u32 tmp_ring_size;
4498	u32 static_thread_mgmt23[2];
4499	u32 restart[3];
4500	u32 thread_trace_enable;
4501	u32 reserved1;
4502	u32 user_data[16];
4503	u32 vgtcs_invoke_count[2];
4504	struct hqd_registers queue_state;
4505	u32 dequeue_cntr;
4506	u32 interrupt_queue[64];
4507};
4508
4509/**
4510 * cik_cp_compute_resume - setup the compute queue registers
4511 *
4512 * @rdev: radeon_device pointer
4513 *
4514 * Program the compute queues and test them to make sure they
4515 * are working.
4516 * Returns 0 for success, error for failure.
4517 */
4518static int cik_cp_compute_resume(struct radeon_device *rdev)
4519{
4520	int r, i, j, idx;
4521	u32 tmp;
4522	bool use_doorbell = true;
4523	u64 hqd_gpu_addr;
4524	u64 mqd_gpu_addr;
4525	u64 eop_gpu_addr;
4526	u64 wb_gpu_addr;
4527	u32 *buf;
4528	struct bonaire_mqd *mqd;
4529
4530	r = cik_cp_compute_start(rdev);
4531	if (r)
4532		return r;
4533
4534	/* fix up chicken bits */
4535	tmp = RREG32(CP_CPF_DEBUG);
4536	tmp |= (1 << 23);
4537	WREG32(CP_CPF_DEBUG, tmp);
4538
4539	/* init the pipes */
4540	mutex_lock(&rdev->srbm_mutex);
4541
4542	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4543		int me = (i < 4) ? 1 : 2;
4544		int pipe = (i < 4) ? i : (i - 4);
4545
4546		cik_srbm_select(rdev, me, pipe, 0, 0);
4547
4548		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4549		/* write the EOP addr */
4550		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4551		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4552
4553		/* set the VMID assigned */
4554		WREG32(CP_HPD_EOP_VMID, 0);
4555
4556		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4557		tmp = RREG32(CP_HPD_EOP_CONTROL);
4558		tmp &= ~EOP_SIZE_MASK;
4559		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4560		WREG32(CP_HPD_EOP_CONTROL, tmp);
4561
4562	}
4563	cik_srbm_select(rdev, 0, 0, 0, 0);
4564	mutex_unlock(&rdev->srbm_mutex);
4565
4566	/* init the queues.  Just two for now. */
4567	for (i = 0; i < 2; i++) {
4568		if (i == 0)
4569			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4570		else
4571			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4572
4573		if (rdev->ring[idx].mqd_obj == NULL) {
4574			r = radeon_bo_create(rdev,
4575					     sizeof(struct bonaire_mqd),
4576					     PAGE_SIZE, true,
4577					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4578					     NULL, &rdev->ring[idx].mqd_obj);
4579			if (r) {
4580				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4581				return r;
4582			}
4583		}
4584
4585		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4586		if (unlikely(r != 0)) {
4587			cik_cp_compute_fini(rdev);
4588			return r;
4589		}
4590		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4591				  &mqd_gpu_addr);
4592		if (r) {
4593			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4594			cik_cp_compute_fini(rdev);
4595			return r;
4596		}
4597		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4598		if (r) {
4599			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4600			cik_cp_compute_fini(rdev);
4601			return r;
4602		}
4603
4604		/* init the mqd struct */
4605		memset(buf, 0, sizeof(struct bonaire_mqd));
4606
4607		mqd = (struct bonaire_mqd *)buf;
4608		mqd->header = 0xC0310800;
4609		mqd->static_thread_mgmt01[0] = 0xffffffff;
4610		mqd->static_thread_mgmt01[1] = 0xffffffff;
4611		mqd->static_thread_mgmt23[0] = 0xffffffff;
4612		mqd->static_thread_mgmt23[1] = 0xffffffff;
4613
4614		mutex_lock(&rdev->srbm_mutex);
4615		cik_srbm_select(rdev, rdev->ring[idx].me,
4616				rdev->ring[idx].pipe,
4617				rdev->ring[idx].queue, 0);
4618
4619		/* disable wptr polling */
4620		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4621		tmp &= ~WPTR_POLL_EN;
4622		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4623
4624		/* enable doorbell? */
4625		mqd->queue_state.cp_hqd_pq_doorbell_control =
4626			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4627		if (use_doorbell)
4628			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4629		else
4630			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4631		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4632		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4633
4634		/* disable the queue if it's active */
4635		mqd->queue_state.cp_hqd_dequeue_request = 0;
4636		mqd->queue_state.cp_hqd_pq_rptr = 0;
4637		mqd->queue_state.cp_hqd_pq_wptr= 0;
4638		if (RREG32(CP_HQD_ACTIVE) & 1) {
4639			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4640			for (j = 0; j < rdev->usec_timeout; j++) {
4641				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4642					break;
4643				udelay(1);
4644			}
4645			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4646			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4647			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4648		}
4649
4650		/* set the pointer to the MQD */
4651		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4652		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4653		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4654		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4655		/* set MQD vmid to 0 */
4656		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4657		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4658		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4659
4660		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4661		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4662		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4663		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4664		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4665		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4666
4667		/* set up the HQD, this is similar to CP_RB0_CNTL */
4668		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4669		mqd->queue_state.cp_hqd_pq_control &=
4670			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4671
4672		mqd->queue_state.cp_hqd_pq_control |=
4673			order_base_2(rdev->ring[idx].ring_size / 8);
4674		mqd->queue_state.cp_hqd_pq_control |=
4675			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4676#ifdef __BIG_ENDIAN
4677		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4678#endif
4679		mqd->queue_state.cp_hqd_pq_control &=
4680			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4681		mqd->queue_state.cp_hqd_pq_control |=
4682			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4683		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4684
4685		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4686		if (i == 0)
4687			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4688		else
4689			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4690		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4691		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4692		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4693		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4694		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4695
4696		/* set the wb address wether it's enabled or not */
4697		if (i == 0)
4698			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4699		else
4700			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4701		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4702		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4703			upper_32_bits(wb_gpu_addr) & 0xffff;
4704		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4705		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4706		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4707		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4708
4709		/* enable the doorbell if requested */
4710		if (use_doorbell) {
4711			mqd->queue_state.cp_hqd_pq_doorbell_control =
4712				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4713			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4714			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4715				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4716			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4717			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4718				~(DOORBELL_SOURCE | DOORBELL_HIT);
4719
4720		} else {
4721			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4722		}
4723		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4724		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4725
4726		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4727		rdev->ring[idx].wptr = 0;
4728		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4729		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4730		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4731
4732		/* set the vmid for the queue */
4733		mqd->queue_state.cp_hqd_vmid = 0;
4734		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4735
4736		/* activate the queue */
4737		mqd->queue_state.cp_hqd_active = 1;
4738		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4739
4740		cik_srbm_select(rdev, 0, 0, 0, 0);
4741		mutex_unlock(&rdev->srbm_mutex);
4742
4743		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4744		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4745
4746		rdev->ring[idx].ready = true;
4747		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4748		if (r)
4749			rdev->ring[idx].ready = false;
4750	}
4751
4752	return 0;
4753}
4754
4755static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4756{
4757	cik_cp_gfx_enable(rdev, enable);
4758	cik_cp_compute_enable(rdev, enable);
4759}
4760
4761static int cik_cp_load_microcode(struct radeon_device *rdev)
4762{
4763	int r;
4764
4765	r = cik_cp_gfx_load_microcode(rdev);
4766	if (r)
4767		return r;
4768	r = cik_cp_compute_load_microcode(rdev);
4769	if (r)
4770		return r;
4771
4772	return 0;
4773}
4774
4775static void cik_cp_fini(struct radeon_device *rdev)
4776{
4777	cik_cp_gfx_fini(rdev);
4778	cik_cp_compute_fini(rdev);
4779}
4780
4781static int cik_cp_resume(struct radeon_device *rdev)
4782{
4783	int r;
4784
4785	cik_enable_gui_idle_interrupt(rdev, false);
4786
4787	r = cik_cp_load_microcode(rdev);
4788	if (r)
4789		return r;
4790
4791	r = cik_cp_gfx_resume(rdev);
4792	if (r)
4793		return r;
4794	r = cik_cp_compute_resume(rdev);
4795	if (r)
4796		return r;
4797
4798	cik_enable_gui_idle_interrupt(rdev, true);
4799
4800	return 0;
4801}
4802
4803static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4804{
4805	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4806		RREG32(GRBM_STATUS));
4807	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4808		RREG32(GRBM_STATUS2));
4809	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4810		RREG32(GRBM_STATUS_SE0));
4811	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4812		RREG32(GRBM_STATUS_SE1));
4813	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4814		RREG32(GRBM_STATUS_SE2));
4815	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4816		RREG32(GRBM_STATUS_SE3));
4817	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4818		RREG32(SRBM_STATUS));
4819	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4820		RREG32(SRBM_STATUS2));
4821	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4822		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4823	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4824		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4825	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4826	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4827		 RREG32(CP_STALLED_STAT1));
4828	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4829		 RREG32(CP_STALLED_STAT2));
4830	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4831		 RREG32(CP_STALLED_STAT3));
4832	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4833		 RREG32(CP_CPF_BUSY_STAT));
4834	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4835		 RREG32(CP_CPF_STALLED_STAT1));
4836	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4837	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4838	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4839		 RREG32(CP_CPC_STALLED_STAT1));
4840	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4841}
4842
4843/**
4844 * cik_gpu_check_soft_reset - check which blocks are busy
4845 *
4846 * @rdev: radeon_device pointer
4847 *
4848 * Check which blocks are busy and return the relevant reset
4849 * mask to be used by cik_gpu_soft_reset().
4850 * Returns a mask of the blocks to be reset.
4851 */
4852u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4853{
4854	u32 reset_mask = 0;
4855	u32 tmp;
4856
4857	/* GRBM_STATUS */
4858	tmp = RREG32(GRBM_STATUS);
4859	if (tmp & (PA_BUSY | SC_BUSY |
4860		   BCI_BUSY | SX_BUSY |
4861		   TA_BUSY | VGT_BUSY |
4862		   DB_BUSY | CB_BUSY |
4863		   GDS_BUSY | SPI_BUSY |
4864		   IA_BUSY | IA_BUSY_NO_DMA))
4865		reset_mask |= RADEON_RESET_GFX;
4866
4867	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4868		reset_mask |= RADEON_RESET_CP;
4869
4870	/* GRBM_STATUS2 */
4871	tmp = RREG32(GRBM_STATUS2);
4872	if (tmp & RLC_BUSY)
4873		reset_mask |= RADEON_RESET_RLC;
4874
4875	/* SDMA0_STATUS_REG */
4876	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4877	if (!(tmp & SDMA_IDLE))
4878		reset_mask |= RADEON_RESET_DMA;
4879
4880	/* SDMA1_STATUS_REG */
4881	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4882	if (!(tmp & SDMA_IDLE))
4883		reset_mask |= RADEON_RESET_DMA1;
4884
4885	/* SRBM_STATUS2 */
4886	tmp = RREG32(SRBM_STATUS2);
4887	if (tmp & SDMA_BUSY)
4888		reset_mask |= RADEON_RESET_DMA;
4889
4890	if (tmp & SDMA1_BUSY)
4891		reset_mask |= RADEON_RESET_DMA1;
4892
4893	/* SRBM_STATUS */
4894	tmp = RREG32(SRBM_STATUS);
4895
4896	if (tmp & IH_BUSY)
4897		reset_mask |= RADEON_RESET_IH;
4898
4899	if (tmp & SEM_BUSY)
4900		reset_mask |= RADEON_RESET_SEM;
4901
4902	if (tmp & GRBM_RQ_PENDING)
4903		reset_mask |= RADEON_RESET_GRBM;
4904
4905	if (tmp & VMC_BUSY)
4906		reset_mask |= RADEON_RESET_VMC;
4907
4908	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4909		   MCC_BUSY | MCD_BUSY))
4910		reset_mask |= RADEON_RESET_MC;
4911
4912	if (evergreen_is_display_hung(rdev))
4913		reset_mask |= RADEON_RESET_DISPLAY;
4914
4915	/* Skip MC reset as it's mostly likely not hung, just busy */
4916	if (reset_mask & RADEON_RESET_MC) {
4917		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4918		reset_mask &= ~RADEON_RESET_MC;
4919	}
4920
4921	return reset_mask;
4922}
4923
4924/**
4925 * cik_gpu_soft_reset - soft reset GPU
4926 *
4927 * @rdev: radeon_device pointer
4928 * @reset_mask: mask of which blocks to reset
4929 *
4930 * Soft reset the blocks specified in @reset_mask.
4931 */
4932static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4933{
4934	struct evergreen_mc_save save;
4935	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4936	u32 tmp;
4937
4938	if (reset_mask == 0)
4939		return;
4940
4941	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4942
4943	cik_print_gpu_status_regs(rdev);
4944	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4945		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4946	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4947		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4948
4949	/* disable CG/PG */
4950	cik_fini_pg(rdev);
4951	cik_fini_cg(rdev);
4952
4953	/* stop the rlc */
4954	cik_rlc_stop(rdev);
4955
4956	/* Disable GFX parsing/prefetching */
4957	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4958
4959	/* Disable MEC parsing/prefetching */
4960	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4961
4962	if (reset_mask & RADEON_RESET_DMA) {
4963		/* sdma0 */
4964		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4965		tmp |= SDMA_HALT;
4966		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4967	}
4968	if (reset_mask & RADEON_RESET_DMA1) {
4969		/* sdma1 */
4970		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4971		tmp |= SDMA_HALT;
4972		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4973	}
4974
4975	evergreen_mc_stop(rdev, &save);
4976	if (evergreen_mc_wait_for_idle(rdev)) {
4977		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4978	}
4979
4980	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4981		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4982
4983	if (reset_mask & RADEON_RESET_CP) {
4984		grbm_soft_reset |= SOFT_RESET_CP;
4985
4986		srbm_soft_reset |= SOFT_RESET_GRBM;
4987	}
4988
4989	if (reset_mask & RADEON_RESET_DMA)
4990		srbm_soft_reset |= SOFT_RESET_SDMA;
4991
4992	if (reset_mask & RADEON_RESET_DMA1)
4993		srbm_soft_reset |= SOFT_RESET_SDMA1;
4994
4995	if (reset_mask & RADEON_RESET_DISPLAY)
4996		srbm_soft_reset |= SOFT_RESET_DC;
4997
4998	if (reset_mask & RADEON_RESET_RLC)
4999		grbm_soft_reset |= SOFT_RESET_RLC;
5000
5001	if (reset_mask & RADEON_RESET_SEM)
5002		srbm_soft_reset |= SOFT_RESET_SEM;
5003
5004	if (reset_mask & RADEON_RESET_IH)
5005		srbm_soft_reset |= SOFT_RESET_IH;
5006
5007	if (reset_mask & RADEON_RESET_GRBM)
5008		srbm_soft_reset |= SOFT_RESET_GRBM;
5009
5010	if (reset_mask & RADEON_RESET_VMC)
5011		srbm_soft_reset |= SOFT_RESET_VMC;
5012
5013	if (!(rdev->flags & RADEON_IS_IGP)) {
5014		if (reset_mask & RADEON_RESET_MC)
5015			srbm_soft_reset |= SOFT_RESET_MC;
5016	}
5017
5018	if (grbm_soft_reset) {
5019		tmp = RREG32(GRBM_SOFT_RESET);
5020		tmp |= grbm_soft_reset;
5021		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5022		WREG32(GRBM_SOFT_RESET, tmp);
5023		tmp = RREG32(GRBM_SOFT_RESET);
5024
5025		udelay(50);
5026
5027		tmp &= ~grbm_soft_reset;
5028		WREG32(GRBM_SOFT_RESET, tmp);
5029		tmp = RREG32(GRBM_SOFT_RESET);
5030	}
5031
5032	if (srbm_soft_reset) {
5033		tmp = RREG32(SRBM_SOFT_RESET);
5034		tmp |= srbm_soft_reset;
5035		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5036		WREG32(SRBM_SOFT_RESET, tmp);
5037		tmp = RREG32(SRBM_SOFT_RESET);
5038
5039		udelay(50);
5040
5041		tmp &= ~srbm_soft_reset;
5042		WREG32(SRBM_SOFT_RESET, tmp);
5043		tmp = RREG32(SRBM_SOFT_RESET);
5044	}
5045
5046	/* Wait a little for things to settle down */
5047	udelay(50);
5048
5049	evergreen_mc_resume(rdev, &save);
5050	udelay(50);
5051
5052	cik_print_gpu_status_regs(rdev);
5053}
5054
5055struct kv_reset_save_regs {
5056	u32 gmcon_reng_execute;
5057	u32 gmcon_misc;
5058	u32 gmcon_misc3;
5059};
5060
5061static void kv_save_regs_for_reset(struct radeon_device *rdev,
5062				   struct kv_reset_save_regs *save)
5063{
5064	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5065	save->gmcon_misc = RREG32(GMCON_MISC);
5066	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5067
5068	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5069	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5070						STCTRL_STUTTER_EN));
5071}
5072
5073static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5074				      struct kv_reset_save_regs *save)
5075{
5076	int i;
5077
5078	WREG32(GMCON_PGFSM_WRITE, 0);
5079	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5080
5081	for (i = 0; i < 5; i++)
5082		WREG32(GMCON_PGFSM_WRITE, 0);
5083
5084	WREG32(GMCON_PGFSM_WRITE, 0);
5085	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5086
5087	for (i = 0; i < 5; i++)
5088		WREG32(GMCON_PGFSM_WRITE, 0);
5089
5090	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5091	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5092
5093	for (i = 0; i < 5; i++)
5094		WREG32(GMCON_PGFSM_WRITE, 0);
5095
5096	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5097	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5098
5099	for (i = 0; i < 5; i++)
5100		WREG32(GMCON_PGFSM_WRITE, 0);
5101
5102	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5103	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5104
5105	for (i = 0; i < 5; i++)
5106		WREG32(GMCON_PGFSM_WRITE, 0);
5107
5108	WREG32(GMCON_PGFSM_WRITE, 0);
5109	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5110
5111	for (i = 0; i < 5; i++)
5112		WREG32(GMCON_PGFSM_WRITE, 0);
5113
5114	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5115	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5116
5117	for (i = 0; i < 5; i++)
5118		WREG32(GMCON_PGFSM_WRITE, 0);
5119
5120	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5121	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5122
5123	for (i = 0; i < 5; i++)
5124		WREG32(GMCON_PGFSM_WRITE, 0);
5125
5126	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5127	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5128
5129	for (i = 0; i < 5; i++)
5130		WREG32(GMCON_PGFSM_WRITE, 0);
5131
5132	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5133	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5134
5135	for (i = 0; i < 5; i++)
5136		WREG32(GMCON_PGFSM_WRITE, 0);
5137
5138	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5139	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5140
5141	WREG32(GMCON_MISC3, save->gmcon_misc3);
5142	WREG32(GMCON_MISC, save->gmcon_misc);
5143	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5144}
5145
5146static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5147{
5148	struct evergreen_mc_save save;
5149	struct kv_reset_save_regs kv_save = { 0 };
5150	u32 tmp, i;
5151
5152	dev_info(rdev->dev, "GPU pci config reset\n");
5153
5154	/* disable dpm? */
5155
5156	/* disable cg/pg */
5157	cik_fini_pg(rdev);
5158	cik_fini_cg(rdev);
5159
5160	/* Disable GFX parsing/prefetching */
5161	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5162
5163	/* Disable MEC parsing/prefetching */
5164	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5165
5166	/* sdma0 */
5167	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5168	tmp |= SDMA_HALT;
5169	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5170	/* sdma1 */
5171	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5172	tmp |= SDMA_HALT;
5173	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5174	/* XXX other engines? */
5175
5176	/* halt the rlc, disable cp internal ints */
5177	cik_rlc_stop(rdev);
5178
5179	udelay(50);
5180
5181	/* disable mem access */
5182	evergreen_mc_stop(rdev, &save);
5183	if (evergreen_mc_wait_for_idle(rdev)) {
5184		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5185	}
5186
5187	if (rdev->flags & RADEON_IS_IGP)
5188		kv_save_regs_for_reset(rdev, &kv_save);
5189
5190	/* disable BM */
5191	pci_clear_master(rdev->pdev);
5192	/* reset */
5193	radeon_pci_config_reset(rdev);
5194
5195	udelay(100);
5196
5197	/* wait for asic to come out of reset */
5198	for (i = 0; i < rdev->usec_timeout; i++) {
5199		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5200			break;
5201		udelay(1);
5202	}
5203
5204	/* does asic init need to be run first??? */
5205	if (rdev->flags & RADEON_IS_IGP)
5206		kv_restore_regs_for_reset(rdev, &kv_save);
5207}
5208
5209/**
5210 * cik_asic_reset - soft reset GPU
5211 *
5212 * @rdev: radeon_device pointer
5213 * @hard: force hard reset
5214 *
5215 * Look up which blocks are hung and attempt
5216 * to reset them.
5217 * Returns 0 for success.
5218 */
5219int cik_asic_reset(struct radeon_device *rdev, bool hard)
5220{
5221	u32 reset_mask;
5222
5223	if (hard) {
5224		cik_gpu_pci_config_reset(rdev);
5225		return 0;
5226	}
5227
5228	reset_mask = cik_gpu_check_soft_reset(rdev);
5229
5230	if (reset_mask)
5231		r600_set_bios_scratch_engine_hung(rdev, true);
5232
5233	/* try soft reset */
5234	cik_gpu_soft_reset(rdev, reset_mask);
5235
5236	reset_mask = cik_gpu_check_soft_reset(rdev);
5237
5238	/* try pci config reset */
5239	if (reset_mask && radeon_hard_reset)
5240		cik_gpu_pci_config_reset(rdev);
5241
5242	reset_mask = cik_gpu_check_soft_reset(rdev);
5243
5244	if (!reset_mask)
5245		r600_set_bios_scratch_engine_hung(rdev, false);
5246
5247	return 0;
5248}
5249
5250/**
5251 * cik_gfx_is_lockup - check if the 3D engine is locked up
5252 *
5253 * @rdev: radeon_device pointer
5254 * @ring: radeon_ring structure holding ring information
5255 *
5256 * Check if the 3D engine is locked up (CIK).
5257 * Returns true if the engine is locked, false if not.
5258 */
5259bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5260{
5261	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5262
5263	if (!(reset_mask & (RADEON_RESET_GFX |
5264			    RADEON_RESET_COMPUTE |
5265			    RADEON_RESET_CP))) {
5266		radeon_ring_lockup_update(rdev, ring);
5267		return false;
5268	}
5269	return radeon_ring_test_lockup(rdev, ring);
5270}
5271
5272/* MC */
5273/**
5274 * cik_mc_program - program the GPU memory controller
5275 *
5276 * @rdev: radeon_device pointer
5277 *
5278 * Set the location of vram, gart, and AGP in the GPU's
5279 * physical address space (CIK).
5280 */
5281static void cik_mc_program(struct radeon_device *rdev)
5282{
5283	struct evergreen_mc_save save;
5284	u32 tmp;
5285	int i, j;
5286
5287	/* Initialize HDP */
5288	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5289		WREG32((0x2c14 + j), 0x00000000);
5290		WREG32((0x2c18 + j), 0x00000000);
5291		WREG32((0x2c1c + j), 0x00000000);
5292		WREG32((0x2c20 + j), 0x00000000);
5293		WREG32((0x2c24 + j), 0x00000000);
5294	}
5295	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5296
5297	evergreen_mc_stop(rdev, &save);
5298	if (radeon_mc_wait_for_idle(rdev)) {
5299		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5300	}
5301	/* Lockout access through VGA aperture*/
5302	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5303	/* Update configuration */
5304	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5305	       rdev->mc.vram_start >> 12);
5306	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5307	       rdev->mc.vram_end >> 12);
5308	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5309	       rdev->vram_scratch.gpu_addr >> 12);
5310	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5311	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5312	WREG32(MC_VM_FB_LOCATION, tmp);
5313	/* XXX double check these! */
5314	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5315	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5316	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5317	WREG32(MC_VM_AGP_BASE, 0);
5318	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5319	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5320	if (radeon_mc_wait_for_idle(rdev)) {
5321		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5322	}
5323	evergreen_mc_resume(rdev, &save);
5324	/* we need to own VRAM, so turn off the VGA renderer here
5325	 * to stop it overwriting our objects */
5326	rv515_vga_render_disable(rdev);
5327}
5328
5329/**
5330 * cik_mc_init - initialize the memory controller driver params
5331 *
5332 * @rdev: radeon_device pointer
5333 *
5334 * Look up the amount of vram, vram width, and decide how to place
5335 * vram and gart within the GPU's physical address space (CIK).
5336 * Returns 0 for success.
5337 */
5338static int cik_mc_init(struct radeon_device *rdev)
5339{
5340	u32 tmp;
5341	int chansize, numchan;
5342
5343	/* Get VRAM informations */
5344	rdev->mc.vram_is_ddr = true;
5345	tmp = RREG32(MC_ARB_RAMCFG);
5346	if (tmp & CHANSIZE_MASK) {
5347		chansize = 64;
5348	} else {
5349		chansize = 32;
5350	}
5351	tmp = RREG32(MC_SHARED_CHMAP);
5352	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5353	case 0:
5354	default:
5355		numchan = 1;
5356		break;
5357	case 1:
5358		numchan = 2;
5359		break;
5360	case 2:
5361		numchan = 4;
5362		break;
5363	case 3:
5364		numchan = 8;
5365		break;
5366	case 4:
5367		numchan = 3;
5368		break;
5369	case 5:
5370		numchan = 6;
5371		break;
5372	case 6:
5373		numchan = 10;
5374		break;
5375	case 7:
5376		numchan = 12;
5377		break;
5378	case 8:
5379		numchan = 16;
5380		break;
5381	}
5382	rdev->mc.vram_width = numchan * chansize;
5383	/* Could aper size report 0 ? */
5384	rdev->mc.aper_base = rdev->fb_aper_offset;
5385	rdev->mc.aper_size = rdev->fb_aper_size;
5386	/* size in MB on si */
5387	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5388	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5389	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5390	si_vram_gtt_location(rdev, &rdev->mc);
5391	radeon_update_bandwidth_info(rdev);
5392
5393	return 0;
5394}
5395
5396/*
5397 * GART
5398 * VMID 0 is the physical GPU addresses as used by the kernel.
5399 * VMIDs 1-15 are used for userspace clients and are handled
5400 * by the radeon vm/hsa code.
5401 */
5402/**
5403 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5404 *
5405 * @rdev: radeon_device pointer
5406 *
5407 * Flush the TLB for the VMID 0 page table (CIK).
5408 */
5409void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5410{
5411	/* flush hdp cache */
5412	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5413
5414	/* bits 0-15 are the VM contexts0-15 */
5415	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5416}
5417
5418/**
5419 * cik_pcie_gart_enable - gart enable
5420 *
5421 * @rdev: radeon_device pointer
5422 *
5423 * This sets up the TLBs, programs the page tables for VMID0,
5424 * sets up the hw for VMIDs 1-15 which are allocated on
5425 * demand, and sets up the global locations for the LDS, GDS,
5426 * and GPUVM for FSA64 clients (CIK).
5427 * Returns 0 for success, errors for failure.
5428 */
5429static int cik_pcie_gart_enable(struct radeon_device *rdev)
5430{
5431	int r, i;
5432
5433	if (rdev->gart.robj == NULL) {
5434		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5435		return -EINVAL;
5436	}
5437	r = radeon_gart_table_vram_pin(rdev);
5438	if (r)
5439		return r;
5440	/* Setup TLB control */
5441	WREG32(MC_VM_MX_L1_TLB_CNTL,
5442	       (0xA << 7) |
5443	       ENABLE_L1_TLB |
5444	       ENABLE_L1_FRAGMENT_PROCESSING |
5445	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5446	       ENABLE_ADVANCED_DRIVER_MODEL |
5447	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5448	/* Setup L2 cache */
5449	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5450	       ENABLE_L2_FRAGMENT_PROCESSING |
5451	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5452	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5453	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5454	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5455	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5456	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5457	       BANK_SELECT(4) |
5458	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5459	/* setup context0 */
5460	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5461	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5462	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5463	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5464			(u32)(rdev->dummy_page.addr >> 12));
5465	WREG32(VM_CONTEXT0_CNTL2, 0);
5466	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5467				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5468
5469	WREG32(0x15D4, 0);
5470	WREG32(0x15D8, 0);
5471	WREG32(0x15DC, 0);
5472
5473	/* restore context1-15 */
5474	/* set vm size, must be a multiple of 4 */
5475	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5476	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5477	for (i = 1; i < 16; i++) {
5478		if (i < 8)
5479			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5480			       rdev->vm_manager.saved_table_addr[i]);
5481		else
5482			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5483			       rdev->vm_manager.saved_table_addr[i]);
5484	}
5485
5486	/* enable context1-15 */
5487	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5488	       (u32)(rdev->dummy_page.addr >> 12));
5489	WREG32(VM_CONTEXT1_CNTL2, 4);
5490	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5491				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5492				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5494				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5495				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5496				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5498				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5500				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5502				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5504
5505	if (rdev->family == CHIP_KAVERI) {
5506		u32 tmp = RREG32(CHUB_CONTROL);
5507		tmp &= ~BYPASS_VM;
5508		WREG32(CHUB_CONTROL, tmp);
5509	}
5510
5511	/* XXX SH_MEM regs */
5512	/* where to put LDS, scratch, GPUVM in FSA64 space */
5513	mutex_lock(&rdev->srbm_mutex);
5514	for (i = 0; i < 16; i++) {
5515		cik_srbm_select(rdev, 0, 0, 0, i);
5516		/* CP and shaders */
5517		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5518		WREG32(SH_MEM_APE1_BASE, 1);
5519		WREG32(SH_MEM_APE1_LIMIT, 0);
5520		WREG32(SH_MEM_BASES, 0);
5521		/* SDMA GFX */
5522		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5523		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5524		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5525		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5526		/* XXX SDMA RLC - todo */
5527	}
5528	cik_srbm_select(rdev, 0, 0, 0, 0);
5529	mutex_unlock(&rdev->srbm_mutex);
5530
5531	cik_pcie_gart_tlb_flush(rdev);
5532	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5533		 (unsigned)(rdev->mc.gtt_size >> 20),
5534		 (unsigned long long)rdev->gart.table_addr);
5535	rdev->gart.ready = true;
5536	return 0;
5537}
5538
5539/**
5540 * cik_pcie_gart_disable - gart disable
5541 *
5542 * @rdev: radeon_device pointer
5543 *
5544 * This disables all VM page table (CIK).
5545 */
5546static void cik_pcie_gart_disable(struct radeon_device *rdev)
5547{
5548	unsigned i;
5549
5550	for (i = 1; i < 16; ++i) {
5551		uint32_t reg;
5552		if (i < 8)
5553			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5554		else
5555			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5556		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5557	}
5558
5559	/* Disable all tables */
5560	WREG32(VM_CONTEXT0_CNTL, 0);
5561	WREG32(VM_CONTEXT1_CNTL, 0);
5562	/* Setup TLB control */
5563	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5564	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5565	/* Setup L2 cache */
5566	WREG32(VM_L2_CNTL,
5567	       ENABLE_L2_FRAGMENT_PROCESSING |
5568	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5569	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5570	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5571	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5572	WREG32(VM_L2_CNTL2, 0);
5573	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5574	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5575	radeon_gart_table_vram_unpin(rdev);
5576}
5577
5578/**
5579 * cik_pcie_gart_fini - vm fini callback
5580 *
5581 * @rdev: radeon_device pointer
5582 *
5583 * Tears down the driver GART/VM setup (CIK).
5584 */
5585static void cik_pcie_gart_fini(struct radeon_device *rdev)
5586{
5587	cik_pcie_gart_disable(rdev);
5588	radeon_gart_table_vram_free(rdev);
5589	radeon_gart_fini(rdev);
5590}
5591
5592/* vm parser */
5593/**
5594 * cik_ib_parse - vm ib_parse callback
5595 *
5596 * @rdev: radeon_device pointer
5597 * @ib: indirect buffer pointer
5598 *
5599 * CIK uses hw IB checking so this is a nop (CIK).
5600 */
5601int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5602{
5603	return 0;
5604}
5605
5606/*
5607 * vm
5608 * VMID 0 is the physical GPU addresses as used by the kernel.
5609 * VMIDs 1-15 are used for userspace clients and are handled
5610 * by the radeon vm/hsa code.
5611 */
5612/**
5613 * cik_vm_init - cik vm init callback
5614 *
5615 * @rdev: radeon_device pointer
5616 *
5617 * Inits cik specific vm parameters (number of VMs, base of vram for
5618 * VMIDs 1-15) (CIK).
5619 * Returns 0 for success.
5620 */
5621int cik_vm_init(struct radeon_device *rdev)
5622{
5623	/*
5624	 * number of VMs
5625	 * VMID 0 is reserved for System
5626	 * radeon graphics/compute will use VMIDs 1-15
5627	 */
5628	rdev->vm_manager.nvm = 16;
5629	/* base offset of vram pages */
5630	if (rdev->flags & RADEON_IS_IGP) {
5631		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5632		tmp <<= 22;
5633		rdev->vm_manager.vram_base_offset = tmp;
5634	} else
5635		rdev->vm_manager.vram_base_offset = 0;
5636
5637	return 0;
5638}
5639
5640/**
5641 * cik_vm_fini - cik vm fini callback
5642 *
5643 * @rdev: radeon_device pointer
5644 *
5645 * Tear down any asic specific VM setup (CIK).
5646 */
5647void cik_vm_fini(struct radeon_device *rdev)
5648{
5649}
5650
5651/**
5652 * cik_vm_decode_fault - print human readable fault info
5653 *
5654 * @rdev: radeon_device pointer
5655 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5656 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5657 *
5658 * Print human readable fault information (CIK).
5659 */
5660static void cik_vm_decode_fault(struct radeon_device *rdev,
5661				u32 status, u32 addr, u32 mc_client)
5662{
5663	u32 mc_id;
5664	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5665	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5666	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5667		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5668
5669	if (rdev->family == CHIP_HAWAII)
5670		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5671	else
5672		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5673
5674	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5675	       protections, vmid, addr,
5676	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5677	       block, mc_client, mc_id);
5678}
5679
5680/**
5681 * cik_vm_flush - cik vm flush using the CP
5682 *
5683 * @rdev: radeon_device pointer
5684 *
5685 * Update the page table base and flush the VM TLB
5686 * using the CP (CIK).
5687 */
5688void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5689		  unsigned vm_id, uint64_t pd_addr)
5690{
5691	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5692
5693	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5694	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5695				 WRITE_DATA_DST_SEL(0)));
5696	if (vm_id < 8) {
5697		radeon_ring_write(ring,
5698				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5699	} else {
5700		radeon_ring_write(ring,
5701				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5702	}
5703	radeon_ring_write(ring, 0);
5704	radeon_ring_write(ring, pd_addr >> 12);
5705
5706	/* update SH_MEM_* regs */
5707	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5708	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5709				 WRITE_DATA_DST_SEL(0)));
5710	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5711	radeon_ring_write(ring, 0);
5712	radeon_ring_write(ring, VMID(vm_id));
5713
5714	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5715	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716				 WRITE_DATA_DST_SEL(0)));
5717	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5718	radeon_ring_write(ring, 0);
5719
5720	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5721	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5722	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5723	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5724
5725	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727				 WRITE_DATA_DST_SEL(0)));
5728	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5729	radeon_ring_write(ring, 0);
5730	radeon_ring_write(ring, VMID(0));
5731
5732	/* HDP flush */
5733	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5734
5735	/* bits 0-15 are the VM contexts0-15 */
5736	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5737	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5738				 WRITE_DATA_DST_SEL(0)));
5739	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5740	radeon_ring_write(ring, 0);
5741	radeon_ring_write(ring, 1 << vm_id);
5742
5743	/* wait for the invalidate to complete */
5744	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5745	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5746				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5747				 WAIT_REG_MEM_ENGINE(0))); /* me */
5748	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5749	radeon_ring_write(ring, 0);
5750	radeon_ring_write(ring, 0); /* ref */
5751	radeon_ring_write(ring, 0); /* mask */
5752	radeon_ring_write(ring, 0x20); /* poll interval */
5753
5754	/* compute doesn't have PFP */
5755	if (usepfp) {
5756		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5757		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5758		radeon_ring_write(ring, 0x0);
5759	}
5760}
5761
5762/*
5763 * RLC
5764 * The RLC is a multi-purpose microengine that handles a
5765 * variety of functions, the most important of which is
5766 * the interrupt controller.
5767 */
5768static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5769					  bool enable)
5770{
5771	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5772
5773	if (enable)
5774		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5775	else
5776		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5777	WREG32(CP_INT_CNTL_RING0, tmp);
5778}
5779
5780static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5781{
5782	u32 tmp;
5783
5784	tmp = RREG32(RLC_LB_CNTL);
5785	if (enable)
5786		tmp |= LOAD_BALANCE_ENABLE;
5787	else
5788		tmp &= ~LOAD_BALANCE_ENABLE;
5789	WREG32(RLC_LB_CNTL, tmp);
5790}
5791
5792static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5793{
5794	u32 i, j, k;
5795	u32 mask;
5796
5797	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5798		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5799			cik_select_se_sh(rdev, i, j);
5800			for (k = 0; k < rdev->usec_timeout; k++) {
5801				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5802					break;
5803				udelay(1);
5804			}
5805		}
5806	}
5807	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5808
5809	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5810	for (k = 0; k < rdev->usec_timeout; k++) {
5811		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5812			break;
5813		udelay(1);
5814	}
5815}
5816
5817static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5818{
5819	u32 tmp;
5820
5821	tmp = RREG32(RLC_CNTL);
5822	if (tmp != rlc)
5823		WREG32(RLC_CNTL, rlc);
5824}
5825
5826static u32 cik_halt_rlc(struct radeon_device *rdev)
5827{
5828	u32 data, orig;
5829
5830	orig = data = RREG32(RLC_CNTL);
5831
5832	if (data & RLC_ENABLE) {
5833		u32 i;
5834
5835		data &= ~RLC_ENABLE;
5836		WREG32(RLC_CNTL, data);
5837
5838		for (i = 0; i < rdev->usec_timeout; i++) {
5839			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5840				break;
5841			udelay(1);
5842		}
5843
5844		cik_wait_for_rlc_serdes(rdev);
5845	}
5846
5847	return orig;
5848}
5849
5850void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5851{
5852	u32 tmp, i, mask;
5853
5854	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5855	WREG32(RLC_GPR_REG2, tmp);
5856
5857	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5858	for (i = 0; i < rdev->usec_timeout; i++) {
5859		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5860			break;
5861		udelay(1);
5862	}
5863
5864	for (i = 0; i < rdev->usec_timeout; i++) {
5865		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5866			break;
5867		udelay(1);
5868	}
5869}
5870
5871void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5872{
5873	u32 tmp;
5874
5875	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5876	WREG32(RLC_GPR_REG2, tmp);
5877}
5878
5879/**
5880 * cik_rlc_stop - stop the RLC ME
5881 *
5882 * @rdev: radeon_device pointer
5883 *
5884 * Halt the RLC ME (MicroEngine) (CIK).
5885 */
5886static void cik_rlc_stop(struct radeon_device *rdev)
5887{
5888	WREG32(RLC_CNTL, 0);
5889
5890	cik_enable_gui_idle_interrupt(rdev, false);
5891
5892	cik_wait_for_rlc_serdes(rdev);
5893}
5894
5895/**
5896 * cik_rlc_start - start the RLC ME
5897 *
5898 * @rdev: radeon_device pointer
5899 *
5900 * Unhalt the RLC ME (MicroEngine) (CIK).
5901 */
5902static void cik_rlc_start(struct radeon_device *rdev)
5903{
5904	WREG32(RLC_CNTL, RLC_ENABLE);
5905
5906	cik_enable_gui_idle_interrupt(rdev, true);
5907
5908	udelay(50);
5909}
5910
5911/**
5912 * cik_rlc_resume - setup the RLC hw
5913 *
5914 * @rdev: radeon_device pointer
5915 *
5916 * Initialize the RLC registers, load the ucode,
5917 * and start the RLC (CIK).
5918 * Returns 0 for success, -EINVAL if the ucode is not available.
5919 */
5920static int cik_rlc_resume(struct radeon_device *rdev)
5921{
5922	u32 i, size, tmp;
5923
5924	if (!rdev->rlc_fw)
5925		return -EINVAL;
5926
5927	cik_rlc_stop(rdev);
5928
5929	/* disable CG */
5930	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5931	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5932
5933	si_rlc_reset(rdev);
5934
5935	cik_init_pg(rdev);
5936
5937	cik_init_cg(rdev);
5938
5939	WREG32(RLC_LB_CNTR_INIT, 0);
5940	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5941
5942	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5943	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5944	WREG32(RLC_LB_PARAMS, 0x00600408);
5945	WREG32(RLC_LB_CNTL, 0x80000004);
5946
5947	WREG32(RLC_MC_CNTL, 0);
5948	WREG32(RLC_UCODE_CNTL, 0);
5949
5950	if (rdev->new_fw) {
5951		const struct rlc_firmware_header_v1_0 *hdr =
5952			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5953		const __le32 *fw_data = (const __le32 *)
5954			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5955
5956		radeon_ucode_print_rlc_hdr(&hdr->header);
5957
5958		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5959		WREG32(RLC_GPM_UCODE_ADDR, 0);
5960		for (i = 0; i < size; i++)
5961			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5962		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5963	} else {
5964		const __be32 *fw_data;
5965
5966		switch (rdev->family) {
5967		case CHIP_BONAIRE:
5968		case CHIP_HAWAII:
5969		default:
5970			size = BONAIRE_RLC_UCODE_SIZE;
5971			break;
5972		case CHIP_KAVERI:
5973			size = KV_RLC_UCODE_SIZE;
5974			break;
5975		case CHIP_KABINI:
5976			size = KB_RLC_UCODE_SIZE;
5977			break;
5978		case CHIP_MULLINS:
5979			size = ML_RLC_UCODE_SIZE;
5980			break;
5981		}
5982
5983		fw_data = (const __be32 *)rdev->rlc_fw->data;
5984		WREG32(RLC_GPM_UCODE_ADDR, 0);
5985		for (i = 0; i < size; i++)
5986			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5987		WREG32(RLC_GPM_UCODE_ADDR, 0);
5988	}
5989
5990	/* XXX - find out what chips support lbpw */
5991	cik_enable_lbpw(rdev, false);
5992
5993	if (rdev->family == CHIP_BONAIRE)
5994		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5995
5996	cik_rlc_start(rdev);
5997
5998	return 0;
5999}
6000
6001static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6002{
6003	u32 data, orig, tmp, tmp2;
6004
6005	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6006
6007	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6008		cik_enable_gui_idle_interrupt(rdev, true);
6009
6010		tmp = cik_halt_rlc(rdev);
6011
6012		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6013		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6014		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6015		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6016		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6017
6018		cik_update_rlc(rdev, tmp);
6019
6020		data |= CGCG_EN | CGLS_EN;
6021	} else {
6022		cik_enable_gui_idle_interrupt(rdev, false);
6023
6024		RREG32(CB_CGTT_SCLK_CTRL);
6025		RREG32(CB_CGTT_SCLK_CTRL);
6026		RREG32(CB_CGTT_SCLK_CTRL);
6027		RREG32(CB_CGTT_SCLK_CTRL);
6028
6029		data &= ~(CGCG_EN | CGLS_EN);
6030	}
6031
6032	if (orig != data)
6033		WREG32(RLC_CGCG_CGLS_CTRL, data);
6034
6035}
6036
6037static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6038{
6039	u32 data, orig, tmp = 0;
6040
6041	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6042		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6043			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6044				orig = data = RREG32(CP_MEM_SLP_CNTL);
6045				data |= CP_MEM_LS_EN;
6046				if (orig != data)
6047					WREG32(CP_MEM_SLP_CNTL, data);
6048			}
6049		}
6050
6051		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6052		data |= 0x00000001;
6053		data &= 0xfffffffd;
6054		if (orig != data)
6055			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6056
6057		tmp = cik_halt_rlc(rdev);
6058
6059		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6060		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6061		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6062		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6063		WREG32(RLC_SERDES_WR_CTRL, data);
6064
6065		cik_update_rlc(rdev, tmp);
6066
6067		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6068			orig = data = RREG32(CGTS_SM_CTRL_REG);
6069			data &= ~SM_MODE_MASK;
6070			data |= SM_MODE(0x2);
6071			data |= SM_MODE_ENABLE;
6072			data &= ~CGTS_OVERRIDE;
6073			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6074			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6075				data &= ~CGTS_LS_OVERRIDE;
6076			data &= ~ON_MONITOR_ADD_MASK;
6077			data |= ON_MONITOR_ADD_EN;
6078			data |= ON_MONITOR_ADD(0x96);
6079			if (orig != data)
6080				WREG32(CGTS_SM_CTRL_REG, data);
6081		}
6082	} else {
6083		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6084		data |= 0x00000003;
6085		if (orig != data)
6086			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6087
6088		data = RREG32(RLC_MEM_SLP_CNTL);
6089		if (data & RLC_MEM_LS_EN) {
6090			data &= ~RLC_MEM_LS_EN;
6091			WREG32(RLC_MEM_SLP_CNTL, data);
6092		}
6093
6094		data = RREG32(CP_MEM_SLP_CNTL);
6095		if (data & CP_MEM_LS_EN) {
6096			data &= ~CP_MEM_LS_EN;
6097			WREG32(CP_MEM_SLP_CNTL, data);
6098		}
6099
6100		orig = data = RREG32(CGTS_SM_CTRL_REG);
6101		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6102		if (orig != data)
6103			WREG32(CGTS_SM_CTRL_REG, data);
6104
6105		tmp = cik_halt_rlc(rdev);
6106
6107		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6108		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6109		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6110		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6111		WREG32(RLC_SERDES_WR_CTRL, data);
6112
6113		cik_update_rlc(rdev, tmp);
6114	}
6115}
6116
6117static const u32 mc_cg_registers[] =
6118{
6119	MC_HUB_MISC_HUB_CG,
6120	MC_HUB_MISC_SIP_CG,
6121	MC_HUB_MISC_VM_CG,
6122	MC_XPB_CLK_GAT,
6123	ATC_MISC_CG,
6124	MC_CITF_MISC_WR_CG,
6125	MC_CITF_MISC_RD_CG,
6126	MC_CITF_MISC_VM_CG,
6127	VM_L2_CG,
6128};
6129
6130static void cik_enable_mc_ls(struct radeon_device *rdev,
6131			     bool enable)
6132{
6133	int i;
6134	u32 orig, data;
6135
6136	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6137		orig = data = RREG32(mc_cg_registers[i]);
6138		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6139			data |= MC_LS_ENABLE;
6140		else
6141			data &= ~MC_LS_ENABLE;
6142		if (data != orig)
6143			WREG32(mc_cg_registers[i], data);
6144	}
6145}
6146
6147static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6148			       bool enable)
6149{
6150	int i;
6151	u32 orig, data;
6152
6153	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6154		orig = data = RREG32(mc_cg_registers[i]);
6155		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6156			data |= MC_CG_ENABLE;
6157		else
6158			data &= ~MC_CG_ENABLE;
6159		if (data != orig)
6160			WREG32(mc_cg_registers[i], data);
6161	}
6162}
6163
6164static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6165				 bool enable)
6166{
6167	u32 orig, data;
6168
6169	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6170		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6171		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6172	} else {
6173		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6174		data |= 0xff000000;
6175		if (data != orig)
6176			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6177
6178		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6179		data |= 0xff000000;
6180		if (data != orig)
6181			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6182	}
6183}
6184
6185static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6186				 bool enable)
6187{
6188	u32 orig, data;
6189
6190	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6191		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6192		data |= 0x100;
6193		if (orig != data)
6194			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6195
6196		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6197		data |= 0x100;
6198		if (orig != data)
6199			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6200	} else {
6201		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6202		data &= ~0x100;
6203		if (orig != data)
6204			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6205
6206		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6207		data &= ~0x100;
6208		if (orig != data)
6209			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6210	}
6211}
6212
6213static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6214				bool enable)
6215{
6216	u32 orig, data;
6217
6218	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6219		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6220		data = 0xfff;
6221		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6222
6223		orig = data = RREG32(UVD_CGC_CTRL);
6224		data |= DCM;
6225		if (orig != data)
6226			WREG32(UVD_CGC_CTRL, data);
6227	} else {
6228		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6229		data &= ~0xfff;
6230		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6231
6232		orig = data = RREG32(UVD_CGC_CTRL);
6233		data &= ~DCM;
6234		if (orig != data)
6235			WREG32(UVD_CGC_CTRL, data);
6236	}
6237}
6238
6239static void cik_enable_bif_mgls(struct radeon_device *rdev,
6240			       bool enable)
6241{
6242	u32 orig, data;
6243
6244	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6245
6246	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6247		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6248			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6249	else
6250		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6251			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6252
6253	if (orig != data)
6254		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6255}
6256
6257static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6258				bool enable)
6259{
6260	u32 orig, data;
6261
6262	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6263
6264	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6265		data &= ~CLOCK_GATING_DIS;
6266	else
6267		data |= CLOCK_GATING_DIS;
6268
6269	if (orig != data)
6270		WREG32(HDP_HOST_PATH_CNTL, data);
6271}
6272
6273static void cik_enable_hdp_ls(struct radeon_device *rdev,
6274			      bool enable)
6275{
6276	u32 orig, data;
6277
6278	orig = data = RREG32(HDP_MEM_POWER_LS);
6279
6280	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6281		data |= HDP_LS_ENABLE;
6282	else
6283		data &= ~HDP_LS_ENABLE;
6284
6285	if (orig != data)
6286		WREG32(HDP_MEM_POWER_LS, data);
6287}
6288
6289void cik_update_cg(struct radeon_device *rdev,
6290		   u32 block, bool enable)
6291{
6292
6293	if (block & RADEON_CG_BLOCK_GFX) {
6294		cik_enable_gui_idle_interrupt(rdev, false);
6295		/* order matters! */
6296		if (enable) {
6297			cik_enable_mgcg(rdev, true);
6298			cik_enable_cgcg(rdev, true);
6299		} else {
6300			cik_enable_cgcg(rdev, false);
6301			cik_enable_mgcg(rdev, false);
6302		}
6303		cik_enable_gui_idle_interrupt(rdev, true);
6304	}
6305
6306	if (block & RADEON_CG_BLOCK_MC) {
6307		if (!(rdev->flags & RADEON_IS_IGP)) {
6308			cik_enable_mc_mgcg(rdev, enable);
6309			cik_enable_mc_ls(rdev, enable);
6310		}
6311	}
6312
6313	if (block & RADEON_CG_BLOCK_SDMA) {
6314		cik_enable_sdma_mgcg(rdev, enable);
6315		cik_enable_sdma_mgls(rdev, enable);
6316	}
6317
6318	if (block & RADEON_CG_BLOCK_BIF) {
6319		cik_enable_bif_mgls(rdev, enable);
6320	}
6321
6322	if (block & RADEON_CG_BLOCK_UVD) {
6323		if (rdev->has_uvd)
6324			cik_enable_uvd_mgcg(rdev, enable);
6325	}
6326
6327	if (block & RADEON_CG_BLOCK_HDP) {
6328		cik_enable_hdp_mgcg(rdev, enable);
6329		cik_enable_hdp_ls(rdev, enable);
6330	}
6331
6332	if (block & RADEON_CG_BLOCK_VCE) {
6333		vce_v2_0_enable_mgcg(rdev, enable);
6334	}
6335}
6336
6337static void cik_init_cg(struct radeon_device *rdev)
6338{
6339
6340	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6341
6342	if (rdev->has_uvd)
6343		si_init_uvd_internal_cg(rdev);
6344
6345	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6346			     RADEON_CG_BLOCK_SDMA |
6347			     RADEON_CG_BLOCK_BIF |
6348			     RADEON_CG_BLOCK_UVD |
6349			     RADEON_CG_BLOCK_HDP), true);
6350}
6351
6352static void cik_fini_cg(struct radeon_device *rdev)
6353{
6354	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6355			     RADEON_CG_BLOCK_SDMA |
6356			     RADEON_CG_BLOCK_BIF |
6357			     RADEON_CG_BLOCK_UVD |
6358			     RADEON_CG_BLOCK_HDP), false);
6359
6360	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6361}
6362
6363static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6364					  bool enable)
6365{
6366	u32 data, orig;
6367
6368	orig = data = RREG32(RLC_PG_CNTL);
6369	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6370		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6371	else
6372		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6373	if (orig != data)
6374		WREG32(RLC_PG_CNTL, data);
6375}
6376
6377static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6378					  bool enable)
6379{
6380	u32 data, orig;
6381
6382	orig = data = RREG32(RLC_PG_CNTL);
6383	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6384		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6385	else
6386		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6387	if (orig != data)
6388		WREG32(RLC_PG_CNTL, data);
6389}
6390
6391static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6392{
6393	u32 data, orig;
6394
6395	orig = data = RREG32(RLC_PG_CNTL);
6396	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6397		data &= ~DISABLE_CP_PG;
6398	else
6399		data |= DISABLE_CP_PG;
6400	if (orig != data)
6401		WREG32(RLC_PG_CNTL, data);
6402}
6403
6404static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6405{
6406	u32 data, orig;
6407
6408	orig = data = RREG32(RLC_PG_CNTL);
6409	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6410		data &= ~DISABLE_GDS_PG;
6411	else
6412		data |= DISABLE_GDS_PG;
6413	if (orig != data)
6414		WREG32(RLC_PG_CNTL, data);
6415}
6416
6417#define CP_ME_TABLE_SIZE    96
6418#define CP_ME_TABLE_OFFSET  2048
6419#define CP_MEC_TABLE_OFFSET 4096
6420
6421void cik_init_cp_pg_table(struct radeon_device *rdev)
6422{
6423	volatile u32 *dst_ptr;
6424	int me, i, max_me = 4;
6425	u32 bo_offset = 0;
6426	u32 table_offset, table_size;
6427
6428	if (rdev->family == CHIP_KAVERI)
6429		max_me = 5;
6430
6431	if (rdev->rlc.cp_table_ptr == NULL)
6432		return;
6433
6434	/* write the cp table buffer */
6435	dst_ptr = rdev->rlc.cp_table_ptr;
6436	for (me = 0; me < max_me; me++) {
6437		if (rdev->new_fw) {
6438			const __le32 *fw_data;
6439			const struct gfx_firmware_header_v1_0 *hdr;
6440
6441			if (me == 0) {
6442				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6443				fw_data = (const __le32 *)
6444					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6445				table_offset = le32_to_cpu(hdr->jt_offset);
6446				table_size = le32_to_cpu(hdr->jt_size);
6447			} else if (me == 1) {
6448				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6449				fw_data = (const __le32 *)
6450					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6451				table_offset = le32_to_cpu(hdr->jt_offset);
6452				table_size = le32_to_cpu(hdr->jt_size);
6453			} else if (me == 2) {
6454				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6455				fw_data = (const __le32 *)
6456					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6457				table_offset = le32_to_cpu(hdr->jt_offset);
6458				table_size = le32_to_cpu(hdr->jt_size);
6459			} else if (me == 3) {
6460				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6461				fw_data = (const __le32 *)
6462					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6463				table_offset = le32_to_cpu(hdr->jt_offset);
6464				table_size = le32_to_cpu(hdr->jt_size);
6465			} else {
6466				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6467				fw_data = (const __le32 *)
6468					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6469				table_offset = le32_to_cpu(hdr->jt_offset);
6470				table_size = le32_to_cpu(hdr->jt_size);
6471			}
6472
6473			for (i = 0; i < table_size; i ++) {
6474				dst_ptr[bo_offset + i] =
6475					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6476			}
6477			bo_offset += table_size;
6478		} else {
6479			const __be32 *fw_data;
6480			table_size = CP_ME_TABLE_SIZE;
6481
6482			if (me == 0) {
6483				fw_data = (const __be32 *)rdev->ce_fw->data;
6484				table_offset = CP_ME_TABLE_OFFSET;
6485			} else if (me == 1) {
6486				fw_data = (const __be32 *)rdev->pfp_fw->data;
6487				table_offset = CP_ME_TABLE_OFFSET;
6488			} else if (me == 2) {
6489				fw_data = (const __be32 *)rdev->me_fw->data;
6490				table_offset = CP_ME_TABLE_OFFSET;
6491			} else {
6492				fw_data = (const __be32 *)rdev->mec_fw->data;
6493				table_offset = CP_MEC_TABLE_OFFSET;
6494			}
6495
6496			for (i = 0; i < table_size; i ++) {
6497				dst_ptr[bo_offset + i] =
6498					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6499			}
6500			bo_offset += table_size;
6501		}
6502	}
6503}
6504
6505static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6506				bool enable)
6507{
6508	u32 data, orig;
6509
6510	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6511		orig = data = RREG32(RLC_PG_CNTL);
6512		data |= GFX_PG_ENABLE;
6513		if (orig != data)
6514			WREG32(RLC_PG_CNTL, data);
6515
6516		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6517		data |= AUTO_PG_EN;
6518		if (orig != data)
6519			WREG32(RLC_AUTO_PG_CTRL, data);
6520	} else {
6521		orig = data = RREG32(RLC_PG_CNTL);
6522		data &= ~GFX_PG_ENABLE;
6523		if (orig != data)
6524			WREG32(RLC_PG_CNTL, data);
6525
6526		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6527		data &= ~AUTO_PG_EN;
6528		if (orig != data)
6529			WREG32(RLC_AUTO_PG_CTRL, data);
6530
6531		data = RREG32(DB_RENDER_CONTROL);
6532	}
6533}
6534
6535static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6536{
6537	u32 mask = 0, tmp, tmp1;
6538	int i;
6539
6540	cik_select_se_sh(rdev, se, sh);
6541	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6542	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6543	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6544
6545	tmp &= 0xffff0000;
6546
6547	tmp |= tmp1;
6548	tmp >>= 16;
6549
6550	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6551		mask <<= 1;
6552		mask |= 1;
6553	}
6554
6555	return (~tmp) & mask;
6556}
6557
6558static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6559{
6560	u32 i, j, k, active_cu_number = 0;
6561	u32 mask, counter, cu_bitmap;
6562	u32 tmp = 0;
6563
6564	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6565		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6566			mask = 1;
6567			cu_bitmap = 0;
6568			counter = 0;
6569			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6570				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6571					if (counter < 2)
6572						cu_bitmap |= mask;
6573					counter ++;
6574				}
6575				mask <<= 1;
6576			}
6577
6578			active_cu_number += counter;
6579			tmp |= (cu_bitmap << (i * 16 + j * 8));
6580		}
6581	}
6582
6583	WREG32(RLC_PG_AO_CU_MASK, tmp);
6584
6585	tmp = RREG32(RLC_MAX_PG_CU);
6586	tmp &= ~MAX_PU_CU_MASK;
6587	tmp |= MAX_PU_CU(active_cu_number);
6588	WREG32(RLC_MAX_PG_CU, tmp);
6589}
6590
6591static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6592				       bool enable)
6593{
6594	u32 data, orig;
6595
6596	orig = data = RREG32(RLC_PG_CNTL);
6597	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6598		data |= STATIC_PER_CU_PG_ENABLE;
6599	else
6600		data &= ~STATIC_PER_CU_PG_ENABLE;
6601	if (orig != data)
6602		WREG32(RLC_PG_CNTL, data);
6603}
6604
6605static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6606					bool enable)
6607{
6608	u32 data, orig;
6609
6610	orig = data = RREG32(RLC_PG_CNTL);
6611	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6612		data |= DYN_PER_CU_PG_ENABLE;
6613	else
6614		data &= ~DYN_PER_CU_PG_ENABLE;
6615	if (orig != data)
6616		WREG32(RLC_PG_CNTL, data);
6617}
6618
6619#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6620#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6621
6622static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6623{
6624	u32 data, orig;
6625	u32 i;
6626
6627	if (rdev->rlc.cs_data) {
6628		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6629		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6630		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6631		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6632	} else {
6633		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6634		for (i = 0; i < 3; i++)
6635			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6636	}
6637	if (rdev->rlc.reg_list) {
6638		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6639		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6640			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6641	}
6642
6643	orig = data = RREG32(RLC_PG_CNTL);
6644	data |= GFX_PG_SRC;
6645	if (orig != data)
6646		WREG32(RLC_PG_CNTL, data);
6647
6648	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6649	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6650
6651	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6652	data &= ~IDLE_POLL_COUNT_MASK;
6653	data |= IDLE_POLL_COUNT(0x60);
6654	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6655
6656	data = 0x10101010;
6657	WREG32(RLC_PG_DELAY, data);
6658
6659	data = RREG32(RLC_PG_DELAY_2);
6660	data &= ~0xff;
6661	data |= 0x3;
6662	WREG32(RLC_PG_DELAY_2, data);
6663
6664	data = RREG32(RLC_AUTO_PG_CTRL);
6665	data &= ~GRBM_REG_SGIT_MASK;
6666	data |= GRBM_REG_SGIT(0x700);
6667	WREG32(RLC_AUTO_PG_CTRL, data);
6668
6669}
6670
6671static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6672{
6673	cik_enable_gfx_cgpg(rdev, enable);
6674	cik_enable_gfx_static_mgpg(rdev, enable);
6675	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6676}
6677
6678u32 cik_get_csb_size(struct radeon_device *rdev)
6679{
6680	u32 count = 0;
6681	const struct cs_section_def *sect = NULL;
6682	const struct cs_extent_def *ext = NULL;
6683
6684	if (rdev->rlc.cs_data == NULL)
6685		return 0;
6686
6687	/* begin clear state */
6688	count += 2;
6689	/* context control state */
6690	count += 3;
6691
6692	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6693		for (ext = sect->section; ext->extent != NULL; ++ext) {
6694			if (sect->id == SECT_CONTEXT)
6695				count += 2 + ext->reg_count;
6696			else
6697				return 0;
6698		}
6699	}
6700	/* pa_sc_raster_config/pa_sc_raster_config1 */
6701	count += 4;
6702	/* end clear state */
6703	count += 2;
6704	/* clear state */
6705	count += 2;
6706
6707	return count;
6708}
6709
6710void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6711{
6712	u32 count = 0, i;
6713	const struct cs_section_def *sect = NULL;
6714	const struct cs_extent_def *ext = NULL;
6715
6716	if (rdev->rlc.cs_data == NULL)
6717		return;
6718	if (buffer == NULL)
6719		return;
6720
6721	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6722	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6723
6724	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6725	buffer[count++] = cpu_to_le32(0x80000000);
6726	buffer[count++] = cpu_to_le32(0x80000000);
6727
6728	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6729		for (ext = sect->section; ext->extent != NULL; ++ext) {
6730			if (sect->id == SECT_CONTEXT) {
6731				buffer[count++] =
6732					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6733				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6734				for (i = 0; i < ext->reg_count; i++)
6735					buffer[count++] = cpu_to_le32(ext->extent[i]);
6736			} else {
6737				return;
6738			}
6739		}
6740	}
6741
6742	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6743	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6744	switch (rdev->family) {
6745	case CHIP_BONAIRE:
6746		buffer[count++] = cpu_to_le32(0x16000012);
6747		buffer[count++] = cpu_to_le32(0x00000000);
6748		break;
6749	case CHIP_KAVERI:
6750		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6751		buffer[count++] = cpu_to_le32(0x00000000);
6752		break;
6753	case CHIP_KABINI:
6754	case CHIP_MULLINS:
6755		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6756		buffer[count++] = cpu_to_le32(0x00000000);
6757		break;
6758	case CHIP_HAWAII:
6759		buffer[count++] = cpu_to_le32(0x3a00161a);
6760		buffer[count++] = cpu_to_le32(0x0000002e);
6761		break;
6762	default:
6763		buffer[count++] = cpu_to_le32(0x00000000);
6764		buffer[count++] = cpu_to_le32(0x00000000);
6765		break;
6766	}
6767
6768	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6769	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6770
6771	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6772	buffer[count++] = cpu_to_le32(0);
6773}
6774
6775static void cik_init_pg(struct radeon_device *rdev)
6776{
6777	if (rdev->pg_flags) {
6778		cik_enable_sck_slowdown_on_pu(rdev, true);
6779		cik_enable_sck_slowdown_on_pd(rdev, true);
6780		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6781			cik_init_gfx_cgpg(rdev);
6782			cik_enable_cp_pg(rdev, true);
6783			cik_enable_gds_pg(rdev, true);
6784		}
6785		cik_init_ao_cu_mask(rdev);
6786		cik_update_gfx_pg(rdev, true);
6787	}
6788}
6789
6790static void cik_fini_pg(struct radeon_device *rdev)
6791{
6792	if (rdev->pg_flags) {
6793		cik_update_gfx_pg(rdev, false);
6794		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6795			cik_enable_cp_pg(rdev, false);
6796			cik_enable_gds_pg(rdev, false);
6797		}
6798	}
6799}
6800
6801/*
6802 * Interrupts
6803 * Starting with r6xx, interrupts are handled via a ring buffer.
6804 * Ring buffers are areas of GPU accessible memory that the GPU
6805 * writes interrupt vectors into and the host reads vectors out of.
6806 * There is a rptr (read pointer) that determines where the
6807 * host is currently reading, and a wptr (write pointer)
6808 * which determines where the GPU has written.  When the
6809 * pointers are equal, the ring is idle.  When the GPU
6810 * writes vectors to the ring buffer, it increments the
6811 * wptr.  When there is an interrupt, the host then starts
6812 * fetching commands and processing them until the pointers are
6813 * equal again at which point it updates the rptr.
6814 */
6815
6816/**
6817 * cik_enable_interrupts - Enable the interrupt ring buffer
6818 *
6819 * @rdev: radeon_device pointer
6820 *
6821 * Enable the interrupt ring buffer (CIK).
6822 */
6823static void cik_enable_interrupts(struct radeon_device *rdev)
6824{
6825	u32 ih_cntl = RREG32(IH_CNTL);
6826	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6827
6828	ih_cntl |= ENABLE_INTR;
6829	ih_rb_cntl |= IH_RB_ENABLE;
6830	WREG32(IH_CNTL, ih_cntl);
6831	WREG32(IH_RB_CNTL, ih_rb_cntl);
6832	rdev->ih.enabled = true;
6833}
6834
6835/**
6836 * cik_disable_interrupts - Disable the interrupt ring buffer
6837 *
6838 * @rdev: radeon_device pointer
6839 *
6840 * Disable the interrupt ring buffer (CIK).
6841 */
6842static void cik_disable_interrupts(struct radeon_device *rdev)
6843{
6844	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6845	u32 ih_cntl = RREG32(IH_CNTL);
6846
6847	ih_rb_cntl &= ~IH_RB_ENABLE;
6848	ih_cntl &= ~ENABLE_INTR;
6849	WREG32(IH_RB_CNTL, ih_rb_cntl);
6850	WREG32(IH_CNTL, ih_cntl);
6851	/* set rptr, wptr to 0 */
6852	WREG32(IH_RB_RPTR, 0);
6853	WREG32(IH_RB_WPTR, 0);
6854	rdev->ih.enabled = false;
6855	rdev->ih.rptr = 0;
6856}
6857
6858/**
6859 * cik_disable_interrupt_state - Disable all interrupt sources
6860 *
6861 * @rdev: radeon_device pointer
6862 *
6863 * Clear all interrupt enable bits used by the driver (CIK).
6864 */
6865static void cik_disable_interrupt_state(struct radeon_device *rdev)
6866{
6867	u32 tmp;
6868
6869	/* gfx ring */
6870	tmp = RREG32(CP_INT_CNTL_RING0) &
6871		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6872	WREG32(CP_INT_CNTL_RING0, tmp);
6873	/* sdma */
6874	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6875	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6876	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6877	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6878	/* compute queues */
6879	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6880	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6881	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6882	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6883	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6884	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6885	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6886	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6887	/* grbm */
6888	WREG32(GRBM_INT_CNTL, 0);
6889	/* SRBM */
6890	WREG32(SRBM_INT_CNTL, 0);
6891	/* vline/vblank, etc. */
6892	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6893	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6894	if (rdev->num_crtc >= 4) {
6895		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6896		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6897	}
6898	if (rdev->num_crtc >= 6) {
6899		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6900		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6901	}
6902	/* pflip */
6903	if (rdev->num_crtc >= 2) {
6904		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6905		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6906	}
6907	if (rdev->num_crtc >= 4) {
6908		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6909		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6910	}
6911	if (rdev->num_crtc >= 6) {
6912		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6913		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6914	}
6915
6916	/* dac hotplug */
6917	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6918
6919	/* digital hotplug */
6920	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6921	WREG32(DC_HPD1_INT_CONTROL, tmp);
6922	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6923	WREG32(DC_HPD2_INT_CONTROL, tmp);
6924	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925	WREG32(DC_HPD3_INT_CONTROL, tmp);
6926	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927	WREG32(DC_HPD4_INT_CONTROL, tmp);
6928	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929	WREG32(DC_HPD5_INT_CONTROL, tmp);
6930	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931	WREG32(DC_HPD6_INT_CONTROL, tmp);
6932
6933}
6934
6935/**
6936 * cik_irq_init - init and enable the interrupt ring
6937 *
6938 * @rdev: radeon_device pointer
6939 *
6940 * Allocate a ring buffer for the interrupt controller,
6941 * enable the RLC, disable interrupts, enable the IH
6942 * ring buffer and enable it (CIK).
6943 * Called at device load and reume.
6944 * Returns 0 for success, errors for failure.
6945 */
6946static int cik_irq_init(struct radeon_device *rdev)
6947{
6948	int ret = 0;
6949	int rb_bufsz;
6950	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6951
6952	/* allocate ring */
6953	ret = r600_ih_ring_alloc(rdev);
6954	if (ret)
6955		return ret;
6956
6957	/* disable irqs */
6958	cik_disable_interrupts(rdev);
6959
6960	/* init rlc */
6961	ret = cik_rlc_resume(rdev);
6962	if (ret) {
6963		r600_ih_ring_fini(rdev);
6964		return ret;
6965	}
6966
6967	/* setup interrupt control */
6968	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6969	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6970	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6971	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6972	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6973	 */
6974	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6975	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6976	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6977	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6978
6979	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6980	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6981
6982	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6983		      IH_WPTR_OVERFLOW_CLEAR |
6984		      (rb_bufsz << 1));
6985
6986	if (rdev->wb.enabled)
6987		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6988
6989	/* set the writeback address whether it's enabled or not */
6990	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6991	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6992
6993	WREG32(IH_RB_CNTL, ih_rb_cntl);
6994
6995	/* set rptr, wptr to 0 */
6996	WREG32(IH_RB_RPTR, 0);
6997	WREG32(IH_RB_WPTR, 0);
6998
6999	/* Default settings for IH_CNTL (disabled at first) */
7000	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7001	/* RPTR_REARM only works if msi's are enabled */
7002	if (rdev->msi_enabled)
7003		ih_cntl |= RPTR_REARM;
7004	WREG32(IH_CNTL, ih_cntl);
7005
7006	/* force the active interrupt state to all disabled */
7007	cik_disable_interrupt_state(rdev);
7008
7009	pci_set_master(rdev->pdev);
7010
7011	/* enable irqs */
7012	cik_enable_interrupts(rdev);
7013
7014	return ret;
7015}
7016
7017/**
7018 * cik_irq_set - enable/disable interrupt sources
7019 *
7020 * @rdev: radeon_device pointer
7021 *
7022 * Enable interrupt sources on the GPU (vblanks, hpd,
7023 * etc.) (CIK).
7024 * Returns 0 for success, errors for failure.
7025 */
7026int cik_irq_set(struct radeon_device *rdev)
7027{
7028	u32 cp_int_cntl;
7029	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7030	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7031	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7032	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7033	u32 grbm_int_cntl = 0;
7034	u32 dma_cntl, dma_cntl1;
7035
7036	if (!rdev->irq.installed) {
7037		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7038		return -EINVAL;
7039	}
7040	/* don't enable anything if the ih is disabled */
7041	if (!rdev->ih.enabled) {
7042		cik_disable_interrupts(rdev);
7043		/* force the active interrupt state to all disabled */
7044		cik_disable_interrupt_state(rdev);
7045		return 0;
7046	}
7047
7048	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7049		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7050	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7051
7052	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7053	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7054	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7055	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7056	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058
7059	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7060	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7061
7062	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7063	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7064	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7065	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7066	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070
7071	/* enable CP interrupts on all rings */
7072	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7073		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7074		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7075	}
7076	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7077		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7078		DRM_DEBUG("si_irq_set: sw int cp1\n");
7079		if (ring->me == 1) {
7080			switch (ring->pipe) {
7081			case 0:
7082				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7083				break;
7084			case 1:
7085				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7086				break;
7087			case 2:
7088				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7089				break;
7090			case 3:
7091				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7092				break;
7093			default:
7094				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7095				break;
7096			}
7097		} else if (ring->me == 2) {
7098			switch (ring->pipe) {
7099			case 0:
7100				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7101				break;
7102			case 1:
7103				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7104				break;
7105			case 2:
7106				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7107				break;
7108			case 3:
7109				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7110				break;
7111			default:
7112				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7113				break;
7114			}
7115		} else {
7116			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7117		}
7118	}
7119	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7120		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7121		DRM_DEBUG("si_irq_set: sw int cp2\n");
7122		if (ring->me == 1) {
7123			switch (ring->pipe) {
7124			case 0:
7125				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7126				break;
7127			case 1:
7128				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7129				break;
7130			case 2:
7131				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7132				break;
7133			case 3:
7134				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7135				break;
7136			default:
7137				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7138				break;
7139			}
7140		} else if (ring->me == 2) {
7141			switch (ring->pipe) {
7142			case 0:
7143				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7144				break;
7145			case 1:
7146				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7147				break;
7148			case 2:
7149				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7150				break;
7151			case 3:
7152				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7153				break;
7154			default:
7155				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7156				break;
7157			}
7158		} else {
7159			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7160		}
7161	}
7162
7163	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7164		DRM_DEBUG("cik_irq_set: sw int dma\n");
7165		dma_cntl |= TRAP_ENABLE;
7166	}
7167
7168	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7169		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7170		dma_cntl1 |= TRAP_ENABLE;
7171	}
7172
7173	if (rdev->irq.crtc_vblank_int[0] ||
7174	    atomic_read(&rdev->irq.pflip[0])) {
7175		DRM_DEBUG("cik_irq_set: vblank 0\n");
7176		crtc1 |= VBLANK_INTERRUPT_MASK;
7177	}
7178	if (rdev->irq.crtc_vblank_int[1] ||
7179	    atomic_read(&rdev->irq.pflip[1])) {
7180		DRM_DEBUG("cik_irq_set: vblank 1\n");
7181		crtc2 |= VBLANK_INTERRUPT_MASK;
7182	}
7183	if (rdev->irq.crtc_vblank_int[2] ||
7184	    atomic_read(&rdev->irq.pflip[2])) {
7185		DRM_DEBUG("cik_irq_set: vblank 2\n");
7186		crtc3 |= VBLANK_INTERRUPT_MASK;
7187	}
7188	if (rdev->irq.crtc_vblank_int[3] ||
7189	    atomic_read(&rdev->irq.pflip[3])) {
7190		DRM_DEBUG("cik_irq_set: vblank 3\n");
7191		crtc4 |= VBLANK_INTERRUPT_MASK;
7192	}
7193	if (rdev->irq.crtc_vblank_int[4] ||
7194	    atomic_read(&rdev->irq.pflip[4])) {
7195		DRM_DEBUG("cik_irq_set: vblank 4\n");
7196		crtc5 |= VBLANK_INTERRUPT_MASK;
7197	}
7198	if (rdev->irq.crtc_vblank_int[5] ||
7199	    atomic_read(&rdev->irq.pflip[5])) {
7200		DRM_DEBUG("cik_irq_set: vblank 5\n");
7201		crtc6 |= VBLANK_INTERRUPT_MASK;
7202	}
7203	if (rdev->irq.hpd[0]) {
7204		DRM_DEBUG("cik_irq_set: hpd 1\n");
7205		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7206	}
7207	if (rdev->irq.hpd[1]) {
7208		DRM_DEBUG("cik_irq_set: hpd 2\n");
7209		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210	}
7211	if (rdev->irq.hpd[2]) {
7212		DRM_DEBUG("cik_irq_set: hpd 3\n");
7213		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214	}
7215	if (rdev->irq.hpd[3]) {
7216		DRM_DEBUG("cik_irq_set: hpd 4\n");
7217		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218	}
7219	if (rdev->irq.hpd[4]) {
7220		DRM_DEBUG("cik_irq_set: hpd 5\n");
7221		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222	}
7223	if (rdev->irq.hpd[5]) {
7224		DRM_DEBUG("cik_irq_set: hpd 6\n");
7225		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226	}
7227
7228	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7229
7230	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7231	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7232
7233	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7234	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7235	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7236	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7237	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7238	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7239	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7240	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7241
7242	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7243
7244	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7245	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7246	if (rdev->num_crtc >= 4) {
7247		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7248		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7249	}
7250	if (rdev->num_crtc >= 6) {
7251		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7252		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7253	}
7254
7255	if (rdev->num_crtc >= 2) {
7256		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7257		       GRPH_PFLIP_INT_MASK);
7258		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7259		       GRPH_PFLIP_INT_MASK);
7260	}
7261	if (rdev->num_crtc >= 4) {
7262		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7263		       GRPH_PFLIP_INT_MASK);
7264		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7265		       GRPH_PFLIP_INT_MASK);
7266	}
7267	if (rdev->num_crtc >= 6) {
7268		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7269		       GRPH_PFLIP_INT_MASK);
7270		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7271		       GRPH_PFLIP_INT_MASK);
7272	}
7273
7274	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7275	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7276	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7277	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7278	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7279	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7280
7281	/* posting read */
7282	RREG32(SRBM_STATUS);
7283
7284	return 0;
7285}
7286
7287/**
7288 * cik_irq_ack - ack interrupt sources
7289 *
7290 * @rdev: radeon_device pointer
7291 *
7292 * Ack interrupt sources on the GPU (vblanks, hpd,
7293 * etc.) (CIK).  Certain interrupts sources are sw
7294 * generated and do not require an explicit ack.
7295 */
7296static inline void cik_irq_ack(struct radeon_device *rdev)
7297{
7298	u32 tmp;
7299
7300	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7301	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7302	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7303	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7304	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7305	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7306	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7307
7308	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7309		EVERGREEN_CRTC0_REGISTER_OFFSET);
7310	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7311		EVERGREEN_CRTC1_REGISTER_OFFSET);
7312	if (rdev->num_crtc >= 4) {
7313		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7314			EVERGREEN_CRTC2_REGISTER_OFFSET);
7315		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7316			EVERGREEN_CRTC3_REGISTER_OFFSET);
7317	}
7318	if (rdev->num_crtc >= 6) {
7319		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7320			EVERGREEN_CRTC4_REGISTER_OFFSET);
7321		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7322			EVERGREEN_CRTC5_REGISTER_OFFSET);
7323	}
7324
7325	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7326		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7327		       GRPH_PFLIP_INT_CLEAR);
7328	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7329		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7330		       GRPH_PFLIP_INT_CLEAR);
7331	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7332		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7333	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7334		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7335	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7336		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7337	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7338		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7339
7340	if (rdev->num_crtc >= 4) {
7341		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7342			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7343			       GRPH_PFLIP_INT_CLEAR);
7344		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7345			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7346			       GRPH_PFLIP_INT_CLEAR);
7347		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7348			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7349		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7350			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7351		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7352			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7353		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7354			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7355	}
7356
7357	if (rdev->num_crtc >= 6) {
7358		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7359			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7360			       GRPH_PFLIP_INT_CLEAR);
7361		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7362			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7363			       GRPH_PFLIP_INT_CLEAR);
7364		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7365			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7366		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7367			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7368		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7369			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7370		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7371			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7372	}
7373
7374	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7375		tmp = RREG32(DC_HPD1_INT_CONTROL);
7376		tmp |= DC_HPDx_INT_ACK;
7377		WREG32(DC_HPD1_INT_CONTROL, tmp);
7378	}
7379	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7380		tmp = RREG32(DC_HPD2_INT_CONTROL);
7381		tmp |= DC_HPDx_INT_ACK;
7382		WREG32(DC_HPD2_INT_CONTROL, tmp);
7383	}
7384	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7385		tmp = RREG32(DC_HPD3_INT_CONTROL);
7386		tmp |= DC_HPDx_INT_ACK;
7387		WREG32(DC_HPD3_INT_CONTROL, tmp);
7388	}
7389	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7390		tmp = RREG32(DC_HPD4_INT_CONTROL);
7391		tmp |= DC_HPDx_INT_ACK;
7392		WREG32(DC_HPD4_INT_CONTROL, tmp);
7393	}
7394	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7395		tmp = RREG32(DC_HPD5_INT_CONTROL);
7396		tmp |= DC_HPDx_INT_ACK;
7397		WREG32(DC_HPD5_INT_CONTROL, tmp);
7398	}
7399	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7400		tmp = RREG32(DC_HPD6_INT_CONTROL);
7401		tmp |= DC_HPDx_INT_ACK;
7402		WREG32(DC_HPD6_INT_CONTROL, tmp);
7403	}
7404	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7405		tmp = RREG32(DC_HPD1_INT_CONTROL);
7406		tmp |= DC_HPDx_RX_INT_ACK;
7407		WREG32(DC_HPD1_INT_CONTROL, tmp);
7408	}
7409	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7410		tmp = RREG32(DC_HPD2_INT_CONTROL);
7411		tmp |= DC_HPDx_RX_INT_ACK;
7412		WREG32(DC_HPD2_INT_CONTROL, tmp);
7413	}
7414	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7415		tmp = RREG32(DC_HPD3_INT_CONTROL);
7416		tmp |= DC_HPDx_RX_INT_ACK;
7417		WREG32(DC_HPD3_INT_CONTROL, tmp);
7418	}
7419	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7420		tmp = RREG32(DC_HPD4_INT_CONTROL);
7421		tmp |= DC_HPDx_RX_INT_ACK;
7422		WREG32(DC_HPD4_INT_CONTROL, tmp);
7423	}
7424	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7425		tmp = RREG32(DC_HPD5_INT_CONTROL);
7426		tmp |= DC_HPDx_RX_INT_ACK;
7427		WREG32(DC_HPD5_INT_CONTROL, tmp);
7428	}
7429	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7430		tmp = RREG32(DC_HPD6_INT_CONTROL);
7431		tmp |= DC_HPDx_RX_INT_ACK;
7432		WREG32(DC_HPD6_INT_CONTROL, tmp);
7433	}
7434}
7435
7436/**
7437 * cik_irq_disable - disable interrupts
7438 *
7439 * @rdev: radeon_device pointer
7440 *
7441 * Disable interrupts on the hw (CIK).
7442 */
7443static void cik_irq_disable(struct radeon_device *rdev)
7444{
7445	cik_disable_interrupts(rdev);
7446	/* Wait and acknowledge irq */
7447	mdelay(1);
7448	cik_irq_ack(rdev);
7449	cik_disable_interrupt_state(rdev);
7450}
7451
7452/**
7453 * cik_irq_disable - disable interrupts for suspend
7454 *
7455 * @rdev: radeon_device pointer
7456 *
7457 * Disable interrupts and stop the RLC (CIK).
7458 * Used for suspend.
7459 */
7460static void cik_irq_suspend(struct radeon_device *rdev)
7461{
7462	cik_irq_disable(rdev);
7463	cik_rlc_stop(rdev);
7464}
7465
7466/**
7467 * cik_irq_fini - tear down interrupt support
7468 *
7469 * @rdev: radeon_device pointer
7470 *
7471 * Disable interrupts on the hw and free the IH ring
7472 * buffer (CIK).
7473 * Used for driver unload.
7474 */
7475static void cik_irq_fini(struct radeon_device *rdev)
7476{
7477	cik_irq_suspend(rdev);
7478	r600_ih_ring_fini(rdev);
7479}
7480
7481/**
7482 * cik_get_ih_wptr - get the IH ring buffer wptr
7483 *
7484 * @rdev: radeon_device pointer
7485 *
7486 * Get the IH ring buffer wptr from either the register
7487 * or the writeback memory buffer (CIK).  Also check for
7488 * ring buffer overflow and deal with it.
7489 * Used by cik_irq_process().
7490 * Returns the value of the wptr.
7491 */
7492static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7493{
7494	u32 wptr, tmp;
7495
7496	if (rdev->wb.enabled)
7497		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7498	else
7499		wptr = RREG32(IH_RB_WPTR);
7500
7501	if (wptr & RB_OVERFLOW) {
7502		wptr &= ~RB_OVERFLOW;
7503		/* When a ring buffer overflow happen start parsing interrupt
7504		 * from the last not overwritten vector (wptr + 16). Hopefully
7505		 * this should allow us to catchup.
7506		 */
7507		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7508			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7509		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7510		tmp = RREG32(IH_RB_CNTL);
7511		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7512		WREG32(IH_RB_CNTL, tmp);
7513	}
7514	return (wptr & rdev->ih.ptr_mask);
7515}
7516
7517/*        CIK IV Ring
7518 * Each IV ring entry is 128 bits:
7519 * [7:0]    - interrupt source id
7520 * [31:8]   - reserved
7521 * [59:32]  - interrupt source data
7522 * [63:60]  - reserved
7523 * [71:64]  - RINGID
7524 *            CP:
7525 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7526 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7527 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7528 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7529 *            PIPE_ID - ME0 0=3D
7530 *                    - ME1&2 compute dispatcher (4 pipes each)
7531 *            SDMA:
7532 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7533 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7534 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7535 * [79:72]  - VMID
7536 * [95:80]  - PASID
7537 * [127:96] - reserved
7538 */
7539/**
7540 * cik_irq_process - interrupt handler
7541 *
7542 * @rdev: radeon_device pointer
7543 *
7544 * Interrupt hander (CIK).  Walk the IH ring,
7545 * ack interrupts and schedule work to handle
7546 * interrupt events.
7547 * Returns irq process return code.
7548 */
7549int cik_irq_process(struct radeon_device *rdev)
7550{
7551	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7552	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7553	u32 wptr;
7554	u32 rptr;
7555	u32 src_id, src_data, ring_id;
7556	u8 me_id, pipe_id, queue_id;
7557	u32 ring_index;
7558	bool queue_hotplug = false;
7559	bool queue_dp = false;
7560	bool queue_reset = false;
7561	u32 addr, status, mc_client;
7562	bool queue_thermal = false;
7563
7564	if (!rdev->ih.enabled || rdev->shutdown)
7565		return IRQ_NONE;
7566
7567	wptr = cik_get_ih_wptr(rdev);
7568
7569	if (wptr == rdev->ih.rptr)
7570		return IRQ_NONE;
7571restart_ih:
7572	/* is somebody else already processing irqs? */
7573	if (atomic_xchg(&rdev->ih.lock, 1))
7574		return IRQ_NONE;
7575
7576	rptr = rdev->ih.rptr;
7577	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7578
7579	/* Order reading of wptr vs. reading of IH ring data */
7580	rmb();
7581
7582	/* display interrupts */
7583	cik_irq_ack(rdev);
7584
7585	while (rptr != wptr) {
7586		/* wptr/rptr are in bytes! */
7587		ring_index = rptr / 4;
7588
7589		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7590		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7591		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7592
7593		switch (src_id) {
7594		case 1: /* D1 vblank/vline */
7595			switch (src_data) {
7596			case 0: /* D1 vblank */
7597				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7598					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7599
7600				if (rdev->irq.crtc_vblank_int[0]) {
7601					drm_handle_vblank(rdev->ddev, 0);
7602					rdev->pm.vblank_sync = true;
7603					wake_up(&rdev->irq.vblank_queue);
7604				}
7605				if (atomic_read(&rdev->irq.pflip[0]))
7606					radeon_crtc_handle_vblank(rdev, 0);
7607				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7608				DRM_DEBUG("IH: D1 vblank\n");
7609
7610				break;
7611			case 1: /* D1 vline */
7612				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7613					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7614
7615				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7616				DRM_DEBUG("IH: D1 vline\n");
7617
7618				break;
7619			default:
7620				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7621				break;
7622			}
7623			break;
7624		case 2: /* D2 vblank/vline */
7625			switch (src_data) {
7626			case 0: /* D2 vblank */
7627				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7628					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7629
7630				if (rdev->irq.crtc_vblank_int[1]) {
7631					drm_handle_vblank(rdev->ddev, 1);
7632					rdev->pm.vblank_sync = true;
7633					wake_up(&rdev->irq.vblank_queue);
7634				}
7635				if (atomic_read(&rdev->irq.pflip[1]))
7636					radeon_crtc_handle_vblank(rdev, 1);
7637				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7638				DRM_DEBUG("IH: D2 vblank\n");
7639
7640				break;
7641			case 1: /* D2 vline */
7642				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7643					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7644
7645				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7646				DRM_DEBUG("IH: D2 vline\n");
7647
7648				break;
7649			default:
7650				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7651				break;
7652			}
7653			break;
7654		case 3: /* D3 vblank/vline */
7655			switch (src_data) {
7656			case 0: /* D3 vblank */
7657				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7658					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7659
7660				if (rdev->irq.crtc_vblank_int[2]) {
7661					drm_handle_vblank(rdev->ddev, 2);
7662					rdev->pm.vblank_sync = true;
7663					wake_up(&rdev->irq.vblank_queue);
7664				}
7665				if (atomic_read(&rdev->irq.pflip[2]))
7666					radeon_crtc_handle_vblank(rdev, 2);
7667				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7668				DRM_DEBUG("IH: D3 vblank\n");
7669
7670				break;
7671			case 1: /* D3 vline */
7672				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7673					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7674
7675				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7676				DRM_DEBUG("IH: D3 vline\n");
7677
7678				break;
7679			default:
7680				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7681				break;
7682			}
7683			break;
7684		case 4: /* D4 vblank/vline */
7685			switch (src_data) {
7686			case 0: /* D4 vblank */
7687				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7688					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7689
7690				if (rdev->irq.crtc_vblank_int[3]) {
7691					drm_handle_vblank(rdev->ddev, 3);
7692					rdev->pm.vblank_sync = true;
7693					wake_up(&rdev->irq.vblank_queue);
7694				}
7695				if (atomic_read(&rdev->irq.pflip[3]))
7696					radeon_crtc_handle_vblank(rdev, 3);
7697				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7698				DRM_DEBUG("IH: D4 vblank\n");
7699
7700				break;
7701			case 1: /* D4 vline */
7702				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7703					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7704
7705				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7706				DRM_DEBUG("IH: D4 vline\n");
7707
7708				break;
7709			default:
7710				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7711				break;
7712			}
7713			break;
7714		case 5: /* D5 vblank/vline */
7715			switch (src_data) {
7716			case 0: /* D5 vblank */
7717				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7718					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7719
7720				if (rdev->irq.crtc_vblank_int[4]) {
7721					drm_handle_vblank(rdev->ddev, 4);
7722					rdev->pm.vblank_sync = true;
7723					wake_up(&rdev->irq.vblank_queue);
7724				}
7725				if (atomic_read(&rdev->irq.pflip[4]))
7726					radeon_crtc_handle_vblank(rdev, 4);
7727				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7728				DRM_DEBUG("IH: D5 vblank\n");
7729
7730				break;
7731			case 1: /* D5 vline */
7732				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7733					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7734
7735				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7736				DRM_DEBUG("IH: D5 vline\n");
7737
7738				break;
7739			default:
7740				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7741				break;
7742			}
7743			break;
7744		case 6: /* D6 vblank/vline */
7745			switch (src_data) {
7746			case 0: /* D6 vblank */
7747				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7748					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7749
7750				if (rdev->irq.crtc_vblank_int[5]) {
7751					drm_handle_vblank(rdev->ddev, 5);
7752					rdev->pm.vblank_sync = true;
7753					wake_up(&rdev->irq.vblank_queue);
7754				}
7755				if (atomic_read(&rdev->irq.pflip[5]))
7756					radeon_crtc_handle_vblank(rdev, 5);
7757				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7758				DRM_DEBUG("IH: D6 vblank\n");
7759
7760				break;
7761			case 1: /* D6 vline */
7762				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7763					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7764
7765				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7766				DRM_DEBUG("IH: D6 vline\n");
7767
7768				break;
7769			default:
7770				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7771				break;
7772			}
7773			break;
7774		case 8: /* D1 page flip */
7775		case 10: /* D2 page flip */
7776		case 12: /* D3 page flip */
7777		case 14: /* D4 page flip */
7778		case 16: /* D5 page flip */
7779		case 18: /* D6 page flip */
7780			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7781			if (radeon_use_pflipirq > 0)
7782				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7783			break;
7784		case 42: /* HPD hotplug */
7785			switch (src_data) {
7786			case 0:
7787				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7788					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7789
7790				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7791				queue_hotplug = true;
7792				DRM_DEBUG("IH: HPD1\n");
7793
7794				break;
7795			case 1:
7796				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7797					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7798
7799				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7800				queue_hotplug = true;
7801				DRM_DEBUG("IH: HPD2\n");
7802
7803				break;
7804			case 2:
7805				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7806					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7807
7808				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7809				queue_hotplug = true;
7810				DRM_DEBUG("IH: HPD3\n");
7811
7812				break;
7813			case 3:
7814				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7815					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7816
7817				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7818				queue_hotplug = true;
7819				DRM_DEBUG("IH: HPD4\n");
7820
7821				break;
7822			case 4:
7823				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7824					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7825
7826				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7827				queue_hotplug = true;
7828				DRM_DEBUG("IH: HPD5\n");
7829
7830				break;
7831			case 5:
7832				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7833					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7834
7835				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7836				queue_hotplug = true;
7837				DRM_DEBUG("IH: HPD6\n");
7838
7839				break;
7840			case 6:
7841				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7842					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7843
7844				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7845				queue_dp = true;
7846				DRM_DEBUG("IH: HPD_RX 1\n");
7847
7848				break;
7849			case 7:
7850				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7851					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7852
7853				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7854				queue_dp = true;
7855				DRM_DEBUG("IH: HPD_RX 2\n");
7856
7857				break;
7858			case 8:
7859				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7860					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7861
7862				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7863				queue_dp = true;
7864				DRM_DEBUG("IH: HPD_RX 3\n");
7865
7866				break;
7867			case 9:
7868				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7869					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7870
7871				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7872				queue_dp = true;
7873				DRM_DEBUG("IH: HPD_RX 4\n");
7874
7875				break;
7876			case 10:
7877				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7878					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7879
7880				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7881				queue_dp = true;
7882				DRM_DEBUG("IH: HPD_RX 5\n");
7883
7884				break;
7885			case 11:
7886				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7887					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7888
7889				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7890				queue_dp = true;
7891				DRM_DEBUG("IH: HPD_RX 6\n");
7892
7893				break;
7894			default:
7895				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7896				break;
7897			}
7898			break;
7899		case 96:
7900			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7901			WREG32(SRBM_INT_ACK, 0x1);
7902			break;
7903		case 124: /* UVD */
7904			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7905			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7906			break;
7907		case 146:
7908		case 147:
7909			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7910			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7911			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7912			/* reset addr and status */
7913			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7914			if (addr == 0x0 && status == 0x0)
7915				break;
7916			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7917			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7918				addr);
7919			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7920				status);
7921			cik_vm_decode_fault(rdev, status, addr, mc_client);
7922			break;
7923		case 167: /* VCE */
7924			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7925			switch (src_data) {
7926			case 0:
7927				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7928				break;
7929			case 1:
7930				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7931				break;
7932			default:
7933				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7934				break;
7935			}
7936			break;
7937		case 176: /* GFX RB CP_INT */
7938		case 177: /* GFX IB CP_INT */
7939			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7940			break;
7941		case 181: /* CP EOP event */
7942			DRM_DEBUG("IH: CP EOP\n");
7943			/* XXX check the bitfield order! */
7944			me_id = (ring_id & 0x60) >> 5;
7945			pipe_id = (ring_id & 0x18) >> 3;
7946			queue_id = (ring_id & 0x7) >> 0;
7947			switch (me_id) {
7948			case 0:
7949				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7950				break;
7951			case 1:
7952			case 2:
7953				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7954					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7955				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7956					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7957				break;
7958			}
7959			break;
7960		case 184: /* CP Privileged reg access */
7961			DRM_ERROR("Illegal register access in command stream\n");
7962			/* XXX check the bitfield order! */
7963			me_id = (ring_id & 0x60) >> 5;
7964			pipe_id = (ring_id & 0x18) >> 3;
7965			queue_id = (ring_id & 0x7) >> 0;
7966			switch (me_id) {
7967			case 0:
7968				/* This results in a full GPU reset, but all we need to do is soft
7969				 * reset the CP for gfx
7970				 */
7971				queue_reset = true;
7972				break;
7973			case 1:
7974				/* XXX compute */
7975				queue_reset = true;
7976				break;
7977			case 2:
7978				/* XXX compute */
7979				queue_reset = true;
7980				break;
7981			}
7982			break;
7983		case 185: /* CP Privileged inst */
7984			DRM_ERROR("Illegal instruction in command stream\n");
7985			/* XXX check the bitfield order! */
7986			me_id = (ring_id & 0x60) >> 5;
7987			pipe_id = (ring_id & 0x18) >> 3;
7988			queue_id = (ring_id & 0x7) >> 0;
7989			switch (me_id) {
7990			case 0:
7991				/* This results in a full GPU reset, but all we need to do is soft
7992				 * reset the CP for gfx
7993				 */
7994				queue_reset = true;
7995				break;
7996			case 1:
7997				/* XXX compute */
7998				queue_reset = true;
7999				break;
8000			case 2:
8001				/* XXX compute */
8002				queue_reset = true;
8003				break;
8004			}
8005			break;
8006		case 224: /* SDMA trap event */
8007			/* XXX check the bitfield order! */
8008			me_id = (ring_id & 0x3) >> 0;
8009			queue_id = (ring_id & 0xc) >> 2;
8010			DRM_DEBUG("IH: SDMA trap\n");
8011			switch (me_id) {
8012			case 0:
8013				switch (queue_id) {
8014				case 0:
8015					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8016					break;
8017				case 1:
8018					/* XXX compute */
8019					break;
8020				case 2:
8021					/* XXX compute */
8022					break;
8023				}
8024				break;
8025			case 1:
8026				switch (queue_id) {
8027				case 0:
8028					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8029					break;
8030				case 1:
8031					/* XXX compute */
8032					break;
8033				case 2:
8034					/* XXX compute */
8035					break;
8036				}
8037				break;
8038			}
8039			break;
8040		case 230: /* thermal low to high */
8041			DRM_DEBUG("IH: thermal low to high\n");
8042			rdev->pm.dpm.thermal.high_to_low = false;
8043			queue_thermal = true;
8044			break;
8045		case 231: /* thermal high to low */
8046			DRM_DEBUG("IH: thermal high to low\n");
8047			rdev->pm.dpm.thermal.high_to_low = true;
8048			queue_thermal = true;
8049			break;
8050		case 233: /* GUI IDLE */
8051			DRM_DEBUG("IH: GUI idle\n");
8052			break;
8053		case 241: /* SDMA Privileged inst */
8054		case 247: /* SDMA Privileged inst */
8055			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8056			/* XXX check the bitfield order! */
8057			me_id = (ring_id & 0x3) >> 0;
8058			queue_id = (ring_id & 0xc) >> 2;
8059			switch (me_id) {
8060			case 0:
8061				switch (queue_id) {
8062				case 0:
8063					queue_reset = true;
8064					break;
8065				case 1:
8066					/* XXX compute */
8067					queue_reset = true;
8068					break;
8069				case 2:
8070					/* XXX compute */
8071					queue_reset = true;
8072					break;
8073				}
8074				break;
8075			case 1:
8076				switch (queue_id) {
8077				case 0:
8078					queue_reset = true;
8079					break;
8080				case 1:
8081					/* XXX compute */
8082					queue_reset = true;
8083					break;
8084				case 2:
8085					/* XXX compute */
8086					queue_reset = true;
8087					break;
8088				}
8089				break;
8090			}
8091			break;
8092		default:
8093			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8094			break;
8095		}
8096
8097		/* wptr/rptr are in bytes! */
8098		rptr += 16;
8099		rptr &= rdev->ih.ptr_mask;
8100		WREG32(IH_RB_RPTR, rptr);
8101	}
8102	if (queue_dp)
8103		schedule_work(&rdev->dp_work);
8104	if (queue_hotplug)
8105		schedule_delayed_work(&rdev->hotplug_work, 0);
8106	if (queue_reset) {
8107		rdev->needs_reset = true;
8108		wake_up_all(&rdev->fence_queue);
8109	}
8110	if (queue_thermal)
8111		schedule_work(&rdev->pm.dpm.thermal.work);
8112	rdev->ih.rptr = rptr;
8113	atomic_set(&rdev->ih.lock, 0);
8114
8115	/* make sure wptr hasn't changed while processing */
8116	wptr = cik_get_ih_wptr(rdev);
8117	if (wptr != rptr)
8118		goto restart_ih;
8119
8120	return IRQ_HANDLED;
8121}
8122
8123/*
8124 * startup/shutdown callbacks
8125 */
8126static void cik_uvd_init(struct radeon_device *rdev)
8127{
8128	int r;
8129
8130	if (!rdev->has_uvd)
8131		return;
8132
8133	r = radeon_uvd_init(rdev);
8134	if (r) {
8135		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8136		/*
8137		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8138		 * to early fails cik_uvd_start() and thus nothing happens
8139		 * there. So it is pointless to try to go through that code
8140		 * hence why we disable uvd here.
8141		 */
8142		rdev->has_uvd = 0;
8143		return;
8144	}
8145	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8146	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8147}
8148
8149static void cik_uvd_start(struct radeon_device *rdev)
8150{
8151	int r;
8152
8153	if (!rdev->has_uvd)
8154		return;
8155
8156	r = radeon_uvd_resume(rdev);
8157	if (r) {
8158		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8159		goto error;
8160	}
8161	r = uvd_v4_2_resume(rdev);
8162	if (r) {
8163		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8164		goto error;
8165	}
8166	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8167	if (r) {
8168		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8169		goto error;
8170	}
8171	return;
8172
8173error:
8174	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8175}
8176
8177static void cik_uvd_resume(struct radeon_device *rdev)
8178{
8179	struct radeon_ring *ring;
8180	int r;
8181
8182	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8183		return;
8184
8185	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8186	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8187	if (r) {
8188		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8189		return;
8190	}
8191	r = uvd_v1_0_init(rdev);
8192	if (r) {
8193		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8194		return;
8195	}
8196}
8197
8198static void cik_vce_init(struct radeon_device *rdev)
8199{
8200	int r;
8201
8202	if (!rdev->has_vce)
8203		return;
8204
8205	r = radeon_vce_init(rdev);
8206	if (r) {
8207		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8208		/*
8209		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8210		 * to early fails cik_vce_start() and thus nothing happens
8211		 * there. So it is pointless to try to go through that code
8212		 * hence why we disable vce here.
8213		 */
8214		rdev->has_vce = 0;
8215		return;
8216	}
8217	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8218	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8219	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8220	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8221}
8222
8223static void cik_vce_start(struct radeon_device *rdev)
8224{
8225	int r;
8226
8227	if (!rdev->has_vce)
8228		return;
8229
8230	r = radeon_vce_resume(rdev);
8231	if (r) {
8232		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8233		goto error;
8234	}
8235	r = vce_v2_0_resume(rdev);
8236	if (r) {
8237		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8238		goto error;
8239	}
8240	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8241	if (r) {
8242		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8243		goto error;
8244	}
8245	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8246	if (r) {
8247		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8248		goto error;
8249	}
8250	return;
8251
8252error:
8253	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8254	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8255}
8256
8257static void cik_vce_resume(struct radeon_device *rdev)
8258{
8259	struct radeon_ring *ring;
8260	int r;
8261
8262	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8263		return;
8264
8265	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8266	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8267	if (r) {
8268		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8269		return;
8270	}
8271	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8272	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8273	if (r) {
8274		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8275		return;
8276	}
8277	r = vce_v1_0_init(rdev);
8278	if (r) {
8279		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8280		return;
8281	}
8282}
8283
8284/**
8285 * cik_startup - program the asic to a functional state
8286 *
8287 * @rdev: radeon_device pointer
8288 *
8289 * Programs the asic to a functional state (CIK).
8290 * Called by cik_init() and cik_resume().
8291 * Returns 0 for success, error for failure.
8292 */
8293static int cik_startup(struct radeon_device *rdev)
8294{
8295	struct radeon_ring *ring;
8296	u32 nop;
8297	int r;
8298
8299	/* enable pcie gen2/3 link */
8300	cik_pcie_gen3_enable(rdev);
8301	/* enable aspm */
8302	cik_program_aspm(rdev);
8303
8304	/* scratch needs to be initialized before MC */
8305	r = r600_vram_scratch_init(rdev);
8306	if (r)
8307		return r;
8308
8309	cik_mc_program(rdev);
8310
8311	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8312		r = ci_mc_load_microcode(rdev);
8313		if (r) {
8314			DRM_ERROR("Failed to load MC firmware!\n");
8315			return r;
8316		}
8317	}
8318
8319	r = cik_pcie_gart_enable(rdev);
8320	if (r)
8321		return r;
8322	cik_gpu_init(rdev);
8323
8324	/* allocate rlc buffers */
8325	if (rdev->flags & RADEON_IS_IGP) {
8326		if (rdev->family == CHIP_KAVERI) {
8327			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8328			rdev->rlc.reg_list_size =
8329				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8330		} else {
8331			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8332			rdev->rlc.reg_list_size =
8333				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8334		}
8335	}
8336	rdev->rlc.cs_data = ci_cs_data;
8337	rdev->rlc.cp_table_size = roundup2(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8338	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8339	r = sumo_rlc_init(rdev);
8340	if (r) {
8341		DRM_ERROR("Failed to init rlc BOs!\n");
8342		return r;
8343	}
8344
8345	/* allocate wb buffer */
8346	r = radeon_wb_init(rdev);
8347	if (r)
8348		return r;
8349
8350	/* allocate mec buffers */
8351	r = cik_mec_init(rdev);
8352	if (r) {
8353		DRM_ERROR("Failed to init MEC BOs!\n");
8354		return r;
8355	}
8356
8357	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8358	if (r) {
8359		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8360		return r;
8361	}
8362
8363	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8364	if (r) {
8365		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8366		return r;
8367	}
8368
8369	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8370	if (r) {
8371		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8372		return r;
8373	}
8374
8375	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8376	if (r) {
8377		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8378		return r;
8379	}
8380
8381	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8382	if (r) {
8383		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8384		return r;
8385	}
8386
8387	cik_uvd_start(rdev);
8388	cik_vce_start(rdev);
8389
8390	/* Enable IRQ */
8391	if (!rdev->irq.installed) {
8392		r = radeon_irq_kms_init(rdev);
8393		if (r)
8394			return r;
8395	}
8396
8397	r = cik_irq_init(rdev);
8398	if (r) {
8399		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8400		radeon_irq_kms_fini(rdev);
8401		return r;
8402	}
8403	cik_irq_set(rdev);
8404
8405	if (rdev->family == CHIP_HAWAII) {
8406		if (rdev->new_fw)
8407			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8408		else
8409			nop = RADEON_CP_PACKET2;
8410	} else {
8411		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8412	}
8413
8414	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8415	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8416			     nop);
8417	if (r)
8418		return r;
8419
8420	/* set up the compute queues */
8421	/* type-2 packets are deprecated on MEC, use type-3 instead */
8422	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8423	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8424			     nop);
8425	if (r)
8426		return r;
8427	ring->me = 1; /* first MEC */
8428	ring->pipe = 0; /* first pipe */
8429	ring->queue = 0; /* first queue */
8430	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8431
8432	/* type-2 packets are deprecated on MEC, use type-3 instead */
8433	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8434	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8435			     nop);
8436	if (r)
8437		return r;
8438	/* dGPU only have 1 MEC */
8439	ring->me = 1; /* first MEC */
8440	ring->pipe = 0; /* first pipe */
8441	ring->queue = 1; /* second queue */
8442	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8443
8444	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8445	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8446			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8447	if (r)
8448		return r;
8449
8450	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8451	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8452			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8453	if (r)
8454		return r;
8455
8456	r = cik_cp_resume(rdev);
8457	if (r)
8458		return r;
8459
8460	r = cik_sdma_resume(rdev);
8461	if (r)
8462		return r;
8463
8464	cik_uvd_resume(rdev);
8465	cik_vce_resume(rdev);
8466
8467	r = radeon_ib_pool_init(rdev);
8468	if (r) {
8469		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8470		return r;
8471	}
8472
8473	r = radeon_vm_manager_init(rdev);
8474	if (r) {
8475		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8476		return r;
8477	}
8478
8479	r = radeon_audio_init(rdev);
8480	if (r)
8481		return r;
8482
8483	return 0;
8484}
8485
8486/**
8487 * cik_resume - resume the asic to a functional state
8488 *
8489 * @rdev: radeon_device pointer
8490 *
8491 * Programs the asic to a functional state (CIK).
8492 * Called at resume.
8493 * Returns 0 for success, error for failure.
8494 */
8495int cik_resume(struct radeon_device *rdev)
8496{
8497	int r;
8498
8499	/* post card */
8500	atom_asic_init(rdev->mode_info.atom_context);
8501
8502	/* init golden registers */
8503	cik_init_golden_registers(rdev);
8504
8505	if (rdev->pm.pm_method == PM_METHOD_DPM)
8506		radeon_pm_resume(rdev);
8507
8508	rdev->accel_working = true;
8509	r = cik_startup(rdev);
8510	if (r) {
8511		DRM_ERROR("cik startup failed on resume\n");
8512		rdev->accel_working = false;
8513		return r;
8514	}
8515
8516	return r;
8517
8518}
8519
8520/**
8521 * cik_suspend - suspend the asic
8522 *
8523 * @rdev: radeon_device pointer
8524 *
8525 * Bring the chip into a state suitable for suspend (CIK).
8526 * Called at suspend.
8527 * Returns 0 for success.
8528 */
8529int cik_suspend(struct radeon_device *rdev)
8530{
8531	radeon_pm_suspend(rdev);
8532	radeon_audio_fini(rdev);
8533	radeon_vm_manager_fini(rdev);
8534	cik_cp_enable(rdev, false);
8535	cik_sdma_enable(rdev, false);
8536	if (rdev->has_uvd) {
8537		uvd_v1_0_fini(rdev);
8538		radeon_uvd_suspend(rdev);
8539	}
8540	if (rdev->has_vce)
8541		radeon_vce_suspend(rdev);
8542	cik_fini_pg(rdev);
8543	cik_fini_cg(rdev);
8544	cik_irq_suspend(rdev);
8545	radeon_wb_disable(rdev);
8546	cik_pcie_gart_disable(rdev);
8547	return 0;
8548}
8549
8550/* Plan is to move initialization in that function and use
8551 * helper function so that radeon_device_init pretty much
8552 * do nothing more than calling asic specific function. This
8553 * should also allow to remove a bunch of callback function
8554 * like vram_info.
8555 */
8556/**
8557 * cik_init - asic specific driver and hw init
8558 *
8559 * @rdev: radeon_device pointer
8560 *
8561 * Setup asic specific driver variables and program the hw
8562 * to a functional state (CIK).
8563 * Called at driver startup.
8564 * Returns 0 for success, errors for failure.
8565 */
8566int cik_init(struct radeon_device *rdev)
8567{
8568	struct radeon_ring *ring;
8569	int r;
8570
8571	/* Read BIOS */
8572	if (!radeon_get_bios(rdev)) {
8573		if (ASIC_IS_AVIVO(rdev))
8574			return -EINVAL;
8575	}
8576	/* Must be an ATOMBIOS */
8577	if (!rdev->is_atom_bios) {
8578		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8579		return -EINVAL;
8580	}
8581	r = radeon_atombios_init(rdev);
8582	if (r)
8583		return r;
8584
8585	/* Post card if necessary */
8586	if (!radeon_card_posted(rdev)) {
8587		if (!rdev->bios) {
8588			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8589			return -EINVAL;
8590		}
8591		DRM_INFO("GPU not posted. posting now...\n");
8592		atom_asic_init(rdev->mode_info.atom_context);
8593	}
8594	/* init golden registers */
8595	cik_init_golden_registers(rdev);
8596	/* Initialize scratch registers */
8597	cik_scratch_init(rdev);
8598	/* Initialize surface registers */
8599	radeon_surface_init(rdev);
8600	/* Initialize clocks */
8601	radeon_get_clock_info(rdev->ddev);
8602
8603	/* Fence driver */
8604	r = radeon_fence_driver_init(rdev);
8605	if (r)
8606		return r;
8607
8608	/* initialize memory controller */
8609	r = cik_mc_init(rdev);
8610	if (r)
8611		return r;
8612	/* Memory manager */
8613	r = radeon_bo_init(rdev);
8614	if (r)
8615		return r;
8616
8617	if (rdev->flags & RADEON_IS_IGP) {
8618		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8619		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8620			r = cik_init_microcode(rdev);
8621			if (r) {
8622				DRM_ERROR("Failed to load firmware!\n");
8623				return r;
8624			}
8625		}
8626	} else {
8627		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8628		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8629		    !rdev->mc_fw) {
8630			r = cik_init_microcode(rdev);
8631			if (r) {
8632				DRM_ERROR("Failed to load firmware!\n");
8633				return r;
8634			}
8635		}
8636	}
8637
8638	/* Initialize power management */
8639	radeon_pm_init(rdev);
8640
8641	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8642	ring->ring_obj = NULL;
8643	r600_ring_init(rdev, ring, 1024 * 1024);
8644
8645	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8646	ring->ring_obj = NULL;
8647	r600_ring_init(rdev, ring, 1024 * 1024);
8648	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8649	if (r)
8650		return r;
8651
8652	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8653	ring->ring_obj = NULL;
8654	r600_ring_init(rdev, ring, 1024 * 1024);
8655	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8656	if (r)
8657		return r;
8658
8659	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8660	ring->ring_obj = NULL;
8661	r600_ring_init(rdev, ring, 256 * 1024);
8662
8663	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8664	ring->ring_obj = NULL;
8665	r600_ring_init(rdev, ring, 256 * 1024);
8666
8667	cik_uvd_init(rdev);
8668	cik_vce_init(rdev);
8669
8670	rdev->ih.ring_obj = NULL;
8671	r600_ih_ring_init(rdev, 64 * 1024);
8672
8673	r = r600_pcie_gart_init(rdev);
8674	if (r)
8675		return r;
8676
8677	rdev->accel_working = true;
8678	r = cik_startup(rdev);
8679	if (r) {
8680		dev_err(rdev->dev, "disabling GPU acceleration\n");
8681		cik_cp_fini(rdev);
8682		cik_sdma_fini(rdev);
8683		cik_irq_fini(rdev);
8684		sumo_rlc_fini(rdev);
8685		cik_mec_fini(rdev);
8686		radeon_wb_fini(rdev);
8687		radeon_ib_pool_fini(rdev);
8688		radeon_vm_manager_fini(rdev);
8689		radeon_irq_kms_fini(rdev);
8690		cik_pcie_gart_fini(rdev);
8691		rdev->accel_working = false;
8692	}
8693
8694	/* Don't start up if the MC ucode is missing.
8695	 * The default clocks and voltages before the MC ucode
8696	 * is loaded are not suffient for advanced operations.
8697	 */
8698	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8699		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8700		return -EINVAL;
8701	}
8702
8703	return 0;
8704}
8705
8706/**
8707 * cik_fini - asic specific driver and hw fini
8708 *
8709 * @rdev: radeon_device pointer
8710 *
8711 * Tear down the asic specific driver variables and program the hw
8712 * to an idle state (CIK).
8713 * Called at driver unload.
8714 */
8715void cik_fini(struct radeon_device *rdev)
8716{
8717	radeon_pm_fini(rdev);
8718	cik_cp_fini(rdev);
8719	cik_sdma_fini(rdev);
8720	cik_fini_pg(rdev);
8721	cik_fini_cg(rdev);
8722	cik_irq_fini(rdev);
8723	sumo_rlc_fini(rdev);
8724	cik_mec_fini(rdev);
8725	radeon_wb_fini(rdev);
8726	radeon_vm_manager_fini(rdev);
8727	radeon_ib_pool_fini(rdev);
8728	radeon_irq_kms_fini(rdev);
8729	uvd_v1_0_fini(rdev);
8730	radeon_uvd_fini(rdev);
8731	radeon_vce_fini(rdev);
8732	cik_pcie_gart_fini(rdev);
8733	r600_vram_scratch_fini(rdev);
8734	radeon_gem_fini(rdev);
8735	radeon_fence_driver_fini(rdev);
8736	radeon_bo_fini(rdev);
8737	radeon_atombios_fini(rdev);
8738	kfree(rdev->bios);
8739	rdev->bios = NULL;
8740}
8741
8742void dce8_program_fmt(struct drm_encoder *encoder)
8743{
8744	struct drm_device *dev = encoder->dev;
8745	struct radeon_device *rdev = dev->dev_private;
8746	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8747	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8748	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8749	int bpc = 0;
8750	u32 tmp = 0;
8751	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8752
8753	if (connector) {
8754		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8755		bpc = radeon_get_monitor_bpc(connector);
8756		dither = radeon_connector->dither;
8757	}
8758
8759	/* LVDS/eDP FMT is set up by atom */
8760	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8761		return;
8762
8763	/* not needed for analog */
8764	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8765	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8766		return;
8767
8768	if (bpc == 0)
8769		return;
8770
8771	switch (bpc) {
8772	case 6:
8773		if (dither == RADEON_FMT_DITHER_ENABLE)
8774			/* XXX sort out optimal dither settings */
8775			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8776				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8777		else
8778			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8779		break;
8780	case 8:
8781		if (dither == RADEON_FMT_DITHER_ENABLE)
8782			/* XXX sort out optimal dither settings */
8783			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8784				FMT_RGB_RANDOM_ENABLE |
8785				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8786		else
8787			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8788		break;
8789	case 10:
8790		if (dither == RADEON_FMT_DITHER_ENABLE)
8791			/* XXX sort out optimal dither settings */
8792			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8793				FMT_RGB_RANDOM_ENABLE |
8794				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8795		else
8796			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8797		break;
8798	default:
8799		/* not needed */
8800		break;
8801	}
8802
8803	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8804}
8805
8806/* display watermark setup */
8807/**
8808 * dce8_line_buffer_adjust - Set up the line buffer
8809 *
8810 * @rdev: radeon_device pointer
8811 * @radeon_crtc: the selected display controller
8812 * @mode: the current display mode on the selected display
8813 * controller
8814 *
8815 * Setup up the line buffer allocation for
8816 * the selected display controller (CIK).
8817 * Returns the line buffer size in pixels.
8818 */
8819static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8820				   struct radeon_crtc *radeon_crtc,
8821				   struct drm_display_mode *mode)
8822{
8823	u32 tmp, buffer_alloc, i;
8824	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8825	/*
8826	 * Line Buffer Setup
8827	 * There are 6 line buffers, one for each display controllers.
8828	 * There are 3 partitions per LB. Select the number of partitions
8829	 * to enable based on the display width.  For display widths larger
8830	 * than 4096, you need use to use 2 display controllers and combine
8831	 * them using the stereo blender.
8832	 */
8833	if (radeon_crtc->base.enabled && mode) {
8834		if (mode->crtc_hdisplay < 1920) {
8835			tmp = 1;
8836			buffer_alloc = 2;
8837		} else if (mode->crtc_hdisplay < 2560) {
8838			tmp = 2;
8839			buffer_alloc = 2;
8840		} else if (mode->crtc_hdisplay < 4096) {
8841			tmp = 0;
8842			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8843		} else {
8844			DRM_DEBUG_KMS("Mode too big for LB!\n");
8845			tmp = 0;
8846			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8847		}
8848	} else {
8849		tmp = 1;
8850		buffer_alloc = 0;
8851	}
8852
8853	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8854	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8855
8856	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8857	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8858	for (i = 0; i < rdev->usec_timeout; i++) {
8859		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8860		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8861			break;
8862		udelay(1);
8863	}
8864
8865	if (radeon_crtc->base.enabled && mode) {
8866		switch (tmp) {
8867		case 0:
8868		default:
8869			return 4096 * 2;
8870		case 1:
8871			return 1920 * 2;
8872		case 2:
8873			return 2560 * 2;
8874		}
8875	}
8876
8877	/* controller not enabled, so no lb used */
8878	return 0;
8879}
8880
8881/**
8882 * cik_get_number_of_dram_channels - get the number of dram channels
8883 *
8884 * @rdev: radeon_device pointer
8885 *
8886 * Look up the number of video ram channels (CIK).
8887 * Used for display watermark bandwidth calculations
8888 * Returns the number of dram channels
8889 */
8890static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8891{
8892	u32 tmp = RREG32(MC_SHARED_CHMAP);
8893
8894	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8895	case 0:
8896	default:
8897		return 1;
8898	case 1:
8899		return 2;
8900	case 2:
8901		return 4;
8902	case 3:
8903		return 8;
8904	case 4:
8905		return 3;
8906	case 5:
8907		return 6;
8908	case 6:
8909		return 10;
8910	case 7:
8911		return 12;
8912	case 8:
8913		return 16;
8914	}
8915}
8916
8917struct dce8_wm_params {
8918	u32 dram_channels; /* number of dram channels */
8919	u32 yclk;          /* bandwidth per dram data pin in kHz */
8920	u32 sclk;          /* engine clock in kHz */
8921	u32 disp_clk;      /* display clock in kHz */
8922	u32 src_width;     /* viewport width */
8923	u32 active_time;   /* active display time in ns */
8924	u32 blank_time;    /* blank time in ns */
8925	bool interlaced;    /* mode is interlaced */
8926	fixed20_12 vsc;    /* vertical scale ratio */
8927	u32 num_heads;     /* number of active crtcs */
8928	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8929	u32 lb_size;       /* line buffer allocated to pipe */
8930	u32 vtaps;         /* vertical scaler taps */
8931};
8932
8933/**
8934 * dce8_dram_bandwidth - get the dram bandwidth
8935 *
8936 * @wm: watermark calculation data
8937 *
8938 * Calculate the raw dram bandwidth (CIK).
8939 * Used for display watermark bandwidth calculations
8940 * Returns the dram bandwidth in MBytes/s
8941 */
8942static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8943{
8944	/* Calculate raw DRAM Bandwidth */
8945	fixed20_12 dram_efficiency; /* 0.7 */
8946	fixed20_12 yclk, dram_channels, bandwidth;
8947	fixed20_12 a;
8948
8949	a.full = dfixed_const(1000);
8950	yclk.full = dfixed_const(wm->yclk);
8951	yclk.full = dfixed_div(yclk, a);
8952	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8953	a.full = dfixed_const(10);
8954	dram_efficiency.full = dfixed_const(7);
8955	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8956	bandwidth.full = dfixed_mul(dram_channels, yclk);
8957	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8958
8959	return dfixed_trunc(bandwidth);
8960}
8961
8962/**
8963 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8964 *
8965 * @wm: watermark calculation data
8966 *
8967 * Calculate the dram bandwidth used for display (CIK).
8968 * Used for display watermark bandwidth calculations
8969 * Returns the dram bandwidth for display in MBytes/s
8970 */
8971static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8972{
8973	/* Calculate DRAM Bandwidth and the part allocated to display. */
8974	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8975	fixed20_12 yclk, dram_channels, bandwidth;
8976	fixed20_12 a;
8977
8978	a.full = dfixed_const(1000);
8979	yclk.full = dfixed_const(wm->yclk);
8980	yclk.full = dfixed_div(yclk, a);
8981	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8982	a.full = dfixed_const(10);
8983	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8984	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8985	bandwidth.full = dfixed_mul(dram_channels, yclk);
8986	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8987
8988	return dfixed_trunc(bandwidth);
8989}
8990
8991/**
8992 * dce8_data_return_bandwidth - get the data return bandwidth
8993 *
8994 * @wm: watermark calculation data
8995 *
8996 * Calculate the data return bandwidth used for display (CIK).
8997 * Used for display watermark bandwidth calculations
8998 * Returns the data return bandwidth in MBytes/s
8999 */
9000static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9001{
9002	/* Calculate the display Data return Bandwidth */
9003	fixed20_12 return_efficiency; /* 0.8 */
9004	fixed20_12 sclk, bandwidth;
9005	fixed20_12 a;
9006
9007	a.full = dfixed_const(1000);
9008	sclk.full = dfixed_const(wm->sclk);
9009	sclk.full = dfixed_div(sclk, a);
9010	a.full = dfixed_const(10);
9011	return_efficiency.full = dfixed_const(8);
9012	return_efficiency.full = dfixed_div(return_efficiency, a);
9013	a.full = dfixed_const(32);
9014	bandwidth.full = dfixed_mul(a, sclk);
9015	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9016
9017	return dfixed_trunc(bandwidth);
9018}
9019
9020/**
9021 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9022 *
9023 * @wm: watermark calculation data
9024 *
9025 * Calculate the dmif bandwidth used for display (CIK).
9026 * Used for display watermark bandwidth calculations
9027 * Returns the dmif bandwidth in MBytes/s
9028 */
9029static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9030{
9031	/* Calculate the DMIF Request Bandwidth */
9032	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9033	fixed20_12 disp_clk, bandwidth;
9034	fixed20_12 a, b;
9035
9036	a.full = dfixed_const(1000);
9037	disp_clk.full = dfixed_const(wm->disp_clk);
9038	disp_clk.full = dfixed_div(disp_clk, a);
9039	a.full = dfixed_const(32);
9040	b.full = dfixed_mul(a, disp_clk);
9041
9042	a.full = dfixed_const(10);
9043	disp_clk_request_efficiency.full = dfixed_const(8);
9044	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9045
9046	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9047
9048	return dfixed_trunc(bandwidth);
9049}
9050
9051/**
9052 * dce8_available_bandwidth - get the min available bandwidth
9053 *
9054 * @wm: watermark calculation data
9055 *
9056 * Calculate the min available bandwidth used for display (CIK).
9057 * Used for display watermark bandwidth calculations
9058 * Returns the min available bandwidth in MBytes/s
9059 */
9060static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9061{
9062	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9063	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9064	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9065	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9066
9067	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9068}
9069
9070/**
9071 * dce8_average_bandwidth - get the average available bandwidth
9072 *
9073 * @wm: watermark calculation data
9074 *
9075 * Calculate the average available bandwidth used for display (CIK).
9076 * Used for display watermark bandwidth calculations
9077 * Returns the average available bandwidth in MBytes/s
9078 */
9079static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9080{
9081	/* Calculate the display mode Average Bandwidth
9082	 * DisplayMode should contain the source and destination dimensions,
9083	 * timing, etc.
9084	 */
9085	fixed20_12 bpp;
9086	fixed20_12 line_time;
9087	fixed20_12 src_width;
9088	fixed20_12 bandwidth;
9089	fixed20_12 a;
9090
9091	a.full = dfixed_const(1000);
9092	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9093	line_time.full = dfixed_div(line_time, a);
9094	bpp.full = dfixed_const(wm->bytes_per_pixel);
9095	src_width.full = dfixed_const(wm->src_width);
9096	bandwidth.full = dfixed_mul(src_width, bpp);
9097	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9098	bandwidth.full = dfixed_div(bandwidth, line_time);
9099
9100	return dfixed_trunc(bandwidth);
9101}
9102
9103/**
9104 * dce8_latency_watermark - get the latency watermark
9105 *
9106 * @wm: watermark calculation data
9107 *
9108 * Calculate the latency watermark (CIK).
9109 * Used for display watermark bandwidth calculations
9110 * Returns the latency watermark in ns
9111 */
9112static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9113{
9114	/* First calculate the latency in ns */
9115	u32 mc_latency = 2000; /* 2000 ns. */
9116	u32 available_bandwidth = dce8_available_bandwidth(wm);
9117	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9118	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9119	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9120	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9121		(wm->num_heads * cursor_line_pair_return_time);
9122	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9123	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9124	u32 tmp, dmif_size = 12288;
9125	fixed20_12 a, b, c;
9126
9127	if (wm->num_heads == 0)
9128		return 0;
9129
9130	a.full = dfixed_const(2);
9131	b.full = dfixed_const(1);
9132	if ((wm->vsc.full > a.full) ||
9133	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9134	    (wm->vtaps >= 5) ||
9135	    ((wm->vsc.full >= a.full) && wm->interlaced))
9136		max_src_lines_per_dst_line = 4;
9137	else
9138		max_src_lines_per_dst_line = 2;
9139
9140	a.full = dfixed_const(available_bandwidth);
9141	b.full = dfixed_const(wm->num_heads);
9142	a.full = dfixed_div(a, b);
9143	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9144	tmp = min(dfixed_trunc(a), tmp);
9145
9146	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9147
9148	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9149	b.full = dfixed_const(1000);
9150	c.full = dfixed_const(lb_fill_bw);
9151	b.full = dfixed_div(c, b);
9152	a.full = dfixed_div(a, b);
9153	line_fill_time = dfixed_trunc(a);
9154
9155	if (line_fill_time < wm->active_time)
9156		return latency;
9157	else
9158		return latency + (line_fill_time - wm->active_time);
9159
9160}
9161
9162/**
9163 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9164 * average and available dram bandwidth
9165 *
9166 * @wm: watermark calculation data
9167 *
9168 * Check if the display average bandwidth fits in the display
9169 * dram bandwidth (CIK).
9170 * Used for display watermark bandwidth calculations
9171 * Returns true if the display fits, false if not.
9172 */
9173static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9174{
9175	if (dce8_average_bandwidth(wm) <=
9176	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9177		return true;
9178	else
9179		return false;
9180}
9181
9182/**
9183 * dce8_average_bandwidth_vs_available_bandwidth - check
9184 * average and available bandwidth
9185 *
9186 * @wm: watermark calculation data
9187 *
9188 * Check if the display average bandwidth fits in the display
9189 * available bandwidth (CIK).
9190 * Used for display watermark bandwidth calculations
9191 * Returns true if the display fits, false if not.
9192 */
9193static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9194{
9195	if (dce8_average_bandwidth(wm) <=
9196	    (dce8_available_bandwidth(wm) / wm->num_heads))
9197		return true;
9198	else
9199		return false;
9200}
9201
9202/**
9203 * dce8_check_latency_hiding - check latency hiding
9204 *
9205 * @wm: watermark calculation data
9206 *
9207 * Check latency hiding (CIK).
9208 * Used for display watermark bandwidth calculations
9209 * Returns true if the display fits, false if not.
9210 */
9211static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9212{
9213	u32 lb_partitions = wm->lb_size / wm->src_width;
9214	u32 line_time = wm->active_time + wm->blank_time;
9215	u32 latency_tolerant_lines;
9216	u32 latency_hiding;
9217	fixed20_12 a;
9218
9219	a.full = dfixed_const(1);
9220	if (wm->vsc.full > a.full)
9221		latency_tolerant_lines = 1;
9222	else {
9223		if (lb_partitions <= (wm->vtaps + 1))
9224			latency_tolerant_lines = 1;
9225		else
9226			latency_tolerant_lines = 2;
9227	}
9228
9229	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9230
9231	if (dce8_latency_watermark(wm) <= latency_hiding)
9232		return true;
9233	else
9234		return false;
9235}
9236
9237/**
9238 * dce8_program_watermarks - program display watermarks
9239 *
9240 * @rdev: radeon_device pointer
9241 * @radeon_crtc: the selected display controller
9242 * @lb_size: line buffer size
9243 * @num_heads: number of display controllers in use
9244 *
9245 * Calculate and program the display watermarks for the
9246 * selected display controller (CIK).
9247 */
9248static void dce8_program_watermarks(struct radeon_device *rdev,
9249				    struct radeon_crtc *radeon_crtc,
9250				    u32 lb_size, u32 num_heads)
9251{
9252	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9253	struct dce8_wm_params wm_low, wm_high;
9254	u32 active_time;
9255	u32 line_time = 0;
9256	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9257	u32 tmp, wm_mask;
9258
9259	if (radeon_crtc->base.enabled && num_heads && mode) {
9260		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9261					    (u32)mode->clock);
9262		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9263					  (u32)mode->clock);
9264		line_time = min(line_time, (u32)65535);
9265
9266		/* watermark for high clocks */
9267		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9268		    rdev->pm.dpm_enabled) {
9269			wm_high.yclk =
9270				radeon_dpm_get_mclk(rdev, false) * 10;
9271			wm_high.sclk =
9272				radeon_dpm_get_sclk(rdev, false) * 10;
9273		} else {
9274			wm_high.yclk = rdev->pm.current_mclk * 10;
9275			wm_high.sclk = rdev->pm.current_sclk * 10;
9276		}
9277
9278		wm_high.disp_clk = mode->clock;
9279		wm_high.src_width = mode->crtc_hdisplay;
9280		wm_high.active_time = active_time;
9281		wm_high.blank_time = line_time - wm_high.active_time;
9282		wm_high.interlaced = false;
9283		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9284			wm_high.interlaced = true;
9285		wm_high.vsc = radeon_crtc->vsc;
9286		wm_high.vtaps = 1;
9287		if (radeon_crtc->rmx_type != RMX_OFF)
9288			wm_high.vtaps = 2;
9289		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9290		wm_high.lb_size = lb_size;
9291		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9292		wm_high.num_heads = num_heads;
9293
9294		/* set for high clocks */
9295		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9296
9297		/* possibly force display priority to high */
9298		/* should really do this at mode validation time... */
9299		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9300		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9301		    !dce8_check_latency_hiding(&wm_high) ||
9302		    (rdev->disp_priority == 2)) {
9303			DRM_DEBUG_KMS("force priority to high\n");
9304		}
9305
9306		/* watermark for low clocks */
9307		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9308		    rdev->pm.dpm_enabled) {
9309			wm_low.yclk =
9310				radeon_dpm_get_mclk(rdev, true) * 10;
9311			wm_low.sclk =
9312				radeon_dpm_get_sclk(rdev, true) * 10;
9313		} else {
9314			wm_low.yclk = rdev->pm.current_mclk * 10;
9315			wm_low.sclk = rdev->pm.current_sclk * 10;
9316		}
9317
9318		wm_low.disp_clk = mode->clock;
9319		wm_low.src_width = mode->crtc_hdisplay;
9320		wm_low.active_time = active_time;
9321		wm_low.blank_time = line_time - wm_low.active_time;
9322		wm_low.interlaced = false;
9323		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9324			wm_low.interlaced = true;
9325		wm_low.vsc = radeon_crtc->vsc;
9326		wm_low.vtaps = 1;
9327		if (radeon_crtc->rmx_type != RMX_OFF)
9328			wm_low.vtaps = 2;
9329		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9330		wm_low.lb_size = lb_size;
9331		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9332		wm_low.num_heads = num_heads;
9333
9334		/* set for low clocks */
9335		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9336
9337		/* possibly force display priority to high */
9338		/* should really do this at mode validation time... */
9339		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9340		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9341		    !dce8_check_latency_hiding(&wm_low) ||
9342		    (rdev->disp_priority == 2)) {
9343			DRM_DEBUG_KMS("force priority to high\n");
9344		}
9345
9346		/* Save number of lines the linebuffer leads before the scanout */
9347		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9348	}
9349
9350	/* select wm A */
9351	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9352	tmp = wm_mask;
9353	tmp &= ~LATENCY_WATERMARK_MASK(3);
9354	tmp |= LATENCY_WATERMARK_MASK(1);
9355	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9356	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9357	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9358		LATENCY_HIGH_WATERMARK(line_time)));
9359	/* select wm B */
9360	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9361	tmp &= ~LATENCY_WATERMARK_MASK(3);
9362	tmp |= LATENCY_WATERMARK_MASK(2);
9363	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9364	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9365	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9366		LATENCY_HIGH_WATERMARK(line_time)));
9367	/* restore original selection */
9368	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9369
9370	/* save values for DPM */
9371	radeon_crtc->line_time = line_time;
9372	radeon_crtc->wm_high = latency_watermark_a;
9373	radeon_crtc->wm_low = latency_watermark_b;
9374}
9375
9376/**
9377 * dce8_bandwidth_update - program display watermarks
9378 *
9379 * @rdev: radeon_device pointer
9380 *
9381 * Calculate and program the display watermarks and line
9382 * buffer allocation (CIK).
9383 */
9384void dce8_bandwidth_update(struct radeon_device *rdev)
9385{
9386	struct drm_display_mode *mode = NULL;
9387	u32 num_heads = 0, lb_size;
9388	int i;
9389
9390	if (!rdev->mode_info.mode_config_initialized)
9391		return;
9392
9393	radeon_update_display_priority(rdev);
9394
9395	for (i = 0; i < rdev->num_crtc; i++) {
9396		if (rdev->mode_info.crtcs[i]->base.enabled)
9397			num_heads++;
9398	}
9399	for (i = 0; i < rdev->num_crtc; i++) {
9400		mode = &rdev->mode_info.crtcs[i]->base.mode;
9401		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9402		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9403	}
9404}
9405
9406/**
9407 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9408 *
9409 * @rdev: radeon_device pointer
9410 *
9411 * Fetches a GPU clock counter snapshot (SI).
9412 * Returns the 64 bit clock counter snapshot.
9413 */
9414uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9415{
9416	uint64_t clock;
9417
9418	mutex_lock(&rdev->gpu_clock_mutex);
9419	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9420	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9421		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9422	mutex_unlock(&rdev->gpu_clock_mutex);
9423	return clock;
9424}
9425
9426static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9427			     u32 cntl_reg, u32 status_reg)
9428{
9429	int r, i;
9430	struct atom_clock_dividers dividers;
9431	uint32_t tmp;
9432
9433	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9434					   clock, false, &dividers);
9435	if (r)
9436		return r;
9437
9438	tmp = RREG32_SMC(cntl_reg);
9439	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9440	tmp |= dividers.post_divider;
9441	WREG32_SMC(cntl_reg, tmp);
9442
9443	for (i = 0; i < 100; i++) {
9444		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9445			break;
9446		mdelay(10);
9447	}
9448	if (i == 100)
9449		return -ETIMEDOUT;
9450
9451	return 0;
9452}
9453
9454int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9455{
9456	int r = 0;
9457
9458	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9459	if (r)
9460		return r;
9461
9462	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9463	return r;
9464}
9465
9466int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9467{
9468	int r, i;
9469	struct atom_clock_dividers dividers;
9470	u32 tmp;
9471
9472	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9473					   ecclk, false, &dividers);
9474	if (r)
9475		return r;
9476
9477	for (i = 0; i < 100; i++) {
9478		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9479			break;
9480		mdelay(10);
9481	}
9482	if (i == 100)
9483		return -ETIMEDOUT;
9484
9485	tmp = RREG32_SMC(CG_ECLK_CNTL);
9486	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9487	tmp |= dividers.post_divider;
9488	WREG32_SMC(CG_ECLK_CNTL, tmp);
9489
9490	for (i = 0; i < 100; i++) {
9491		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9492			break;
9493		mdelay(10);
9494	}
9495	if (i == 100)
9496		return -ETIMEDOUT;
9497
9498	return 0;
9499}
9500
9501static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9502{
9503	struct pci_dev *root = rdev->pdev->bus->self;
9504	enum pci_bus_speed speed_cap;
9505	int bridge_pos, gpu_pos;
9506	u32 speed_cntl, current_data_rate;
9507	int i;
9508	u16 tmp16;
9509
9510	if (pci_is_root_bus(rdev->pdev->bus))
9511		return;
9512
9513	if (radeon_pcie_gen2 == 0)
9514		return;
9515
9516	if (rdev->flags & RADEON_IS_IGP)
9517		return;
9518
9519	if (!(rdev->flags & RADEON_IS_PCIE))
9520		return;
9521
9522	speed_cap = pcie_get_speed_cap(root);
9523	if (speed_cap == PCI_SPEED_UNKNOWN)
9524		return;
9525
9526	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9527	    (speed_cap != PCIE_SPEED_5_0GT))
9528		return;
9529
9530	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9531	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9532		LC_CURRENT_DATA_RATE_SHIFT;
9533	if (speed_cap == PCIE_SPEED_8_0GT) {
9534		if (current_data_rate == 2) {
9535			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9536			return;
9537		}
9538		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9539	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9540		if (current_data_rate == 1) {
9541			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9542			return;
9543		}
9544		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9545	}
9546
9547	bridge_pos = pci_pcie_cap(root);
9548	if (!bridge_pos)
9549		return;
9550
9551	gpu_pos = pci_pcie_cap(rdev->pdev);
9552	if (!gpu_pos)
9553		return;
9554
9555	if (speed_cap == PCIE_SPEED_8_0GT) {
9556		/* re-try equalization if gen3 is not already enabled */
9557		if (current_data_rate != 2) {
9558			u16 bridge_cfg, gpu_cfg;
9559			u16 bridge_cfg2, gpu_cfg2;
9560			u32 max_lw, current_lw, tmp;
9561
9562			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9563			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9564
9565			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9566			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9567
9568			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9569			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9570
9571			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9572			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9573			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9574
9575			if (current_lw < max_lw) {
9576				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9577				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9578					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9579					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9580					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9581					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9582				}
9583			}
9584
9585			for (i = 0; i < 10; i++) {
9586				/* check status */
9587				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9588				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9589					break;
9590
9591				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9592				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9593
9594				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9595				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9596
9597				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9598				tmp |= LC_SET_QUIESCE;
9599				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9600
9601				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9602				tmp |= LC_REDO_EQ;
9603				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9604
9605				mdelay(100);
9606
9607				/* linkctl */
9608				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9609				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9610				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9611				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9612
9613				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9614				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9615				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9616				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9617
9618				/* linkctl2 */
9619				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9620				tmp16 &= ~((1 << 4) | (7 << 9));
9621				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9622				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9623
9624				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9625				tmp16 &= ~((1 << 4) | (7 << 9));
9626				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9627				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9628
9629				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9630				tmp &= ~LC_SET_QUIESCE;
9631				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9632			}
9633		}
9634	}
9635
9636	/* set the link speed */
9637	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9638	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9639	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9640
9641	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9642	tmp16 &= ~0xf;
9643	if (speed_cap == PCIE_SPEED_8_0GT)
9644		tmp16 |= 3; /* gen3 */
9645	else if (speed_cap == PCIE_SPEED_5_0GT)
9646		tmp16 |= 2; /* gen2 */
9647	else
9648		tmp16 |= 1; /* gen1 */
9649	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9650
9651	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9652	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9653	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9654
9655	for (i = 0; i < rdev->usec_timeout; i++) {
9656		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9657		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9658			break;
9659		udelay(1);
9660	}
9661}
9662
9663static void cik_program_aspm(struct radeon_device *rdev)
9664{
9665	u32 data, orig;
9666	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9667	bool disable_clkreq = false;
9668
9669	if (radeon_aspm == 0)
9670		return;
9671
9672	/* XXX double check IGPs */
9673	if (rdev->flags & RADEON_IS_IGP)
9674		return;
9675
9676	if (!(rdev->flags & RADEON_IS_PCIE))
9677		return;
9678
9679	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9680	data &= ~LC_XMIT_N_FTS_MASK;
9681	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9682	if (orig != data)
9683		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9684
9685	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9686	data |= LC_GO_TO_RECOVERY;
9687	if (orig != data)
9688		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9689
9690	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9691	data |= P_IGNORE_EDB_ERR;
9692	if (orig != data)
9693		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9694
9695	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9696	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9697	data |= LC_PMI_TO_L1_DIS;
9698	if (!disable_l0s)
9699		data |= LC_L0S_INACTIVITY(7);
9700
9701	if (!disable_l1) {
9702		data |= LC_L1_INACTIVITY(7);
9703		data &= ~LC_PMI_TO_L1_DIS;
9704		if (orig != data)
9705			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9706
9707		if (!disable_plloff_in_l1) {
9708			bool clk_req_support;
9709
9710			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9711			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9712			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9713			if (orig != data)
9714				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9715
9716			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9717			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9718			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9719			if (orig != data)
9720				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9721
9722			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9723			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9724			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9725			if (orig != data)
9726				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9727
9728			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9729			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9730			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9731			if (orig != data)
9732				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9733
9734			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9735			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9736			data |= LC_DYN_LANES_PWR_STATE(3);
9737			if (orig != data)
9738				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9739
9740			if (!disable_clkreq &&
9741			    !pci_is_root_bus(rdev->pdev->bus)) {
9742				struct pci_dev *root = rdev->pdev->bus->self;
9743				u32 lnkcap;
9744
9745				clk_req_support = false;
9746				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9747				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9748					clk_req_support = true;
9749			} else {
9750				clk_req_support = false;
9751			}
9752
9753			if (clk_req_support) {
9754				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9755				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9756				if (orig != data)
9757					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9758
9759				orig = data = RREG32_SMC(THM_CLK_CNTL);
9760				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9761				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9762				if (orig != data)
9763					WREG32_SMC(THM_CLK_CNTL, data);
9764
9765				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9766				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9767				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9768				if (orig != data)
9769					WREG32_SMC(MISC_CLK_CTRL, data);
9770
9771				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9772				data &= ~BCLK_AS_XCLK;
9773				if (orig != data)
9774					WREG32_SMC(CG_CLKPIN_CNTL, data);
9775
9776				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9777				data &= ~FORCE_BIF_REFCLK_EN;
9778				if (orig != data)
9779					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9780
9781				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9782				data &= ~MPLL_CLKOUT_SEL_MASK;
9783				data |= MPLL_CLKOUT_SEL(4);
9784				if (orig != data)
9785					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9786			}
9787		}
9788	} else {
9789		if (orig != data)
9790			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9791	}
9792
9793	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9794	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9795	if (orig != data)
9796		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9797
9798	if (!disable_l0s) {
9799		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9800		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9801			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9802			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9803				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9804				data &= ~LC_L0S_INACTIVITY_MASK;
9805				if (orig != data)
9806					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9807			}
9808		}
9809	}
9810}
9811