cik.c revision 1.11
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24
25#include <linux/firmware.h>
26#include <linux/module.h>
27#include <linux/pci.h>
28#include <linux/slab.h>
29
30#include <drm/drm_vblank.h>
31
32#include "atom.h"
33#include "evergreen.h"
34#include "cik_blit_shaders.h"
35#include "cik.h"
36#include "cikd.h"
37#include "clearstate_ci.h"
38#include "r600.h"
39#include "radeon.h"
40#include "radeon_asic.h"
41#include "radeon_audio.h"
42#include "radeon_ucode.h"
43#include "si.h"
44#include "vce.h"
45
46#define SH_MEM_CONFIG_GFX_DEFAULT \
47	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48
49MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
58
59MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
60MODULE_FIRMWARE("radeon/bonaire_me.bin");
61MODULE_FIRMWARE("radeon/bonaire_ce.bin");
62MODULE_FIRMWARE("radeon/bonaire_mec.bin");
63MODULE_FIRMWARE("radeon/bonaire_mc.bin");
64MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
65MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
66MODULE_FIRMWARE("radeon/bonaire_smc.bin");
67MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
68
69MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
70MODULE_FIRMWARE("radeon/HAWAII_me.bin");
71MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
72MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
73MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
74MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
75MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
76MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
77MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
78
79MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
80MODULE_FIRMWARE("radeon/hawaii_me.bin");
81MODULE_FIRMWARE("radeon/hawaii_ce.bin");
82MODULE_FIRMWARE("radeon/hawaii_mec.bin");
83MODULE_FIRMWARE("radeon/hawaii_mc.bin");
84MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
85MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
86MODULE_FIRMWARE("radeon/hawaii_smc.bin");
87MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
88
89MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
90MODULE_FIRMWARE("radeon/KAVERI_me.bin");
91MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
92MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
93MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
94MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
95
96MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
97MODULE_FIRMWARE("radeon/kaveri_me.bin");
98MODULE_FIRMWARE("radeon/kaveri_ce.bin");
99MODULE_FIRMWARE("radeon/kaveri_mec.bin");
100MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
101MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
102MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
103
104MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
105MODULE_FIRMWARE("radeon/KABINI_me.bin");
106MODULE_FIRMWARE("radeon/KABINI_ce.bin");
107MODULE_FIRMWARE("radeon/KABINI_mec.bin");
108MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
109MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
110
111MODULE_FIRMWARE("radeon/kabini_pfp.bin");
112MODULE_FIRMWARE("radeon/kabini_me.bin");
113MODULE_FIRMWARE("radeon/kabini_ce.bin");
114MODULE_FIRMWARE("radeon/kabini_mec.bin");
115MODULE_FIRMWARE("radeon/kabini_rlc.bin");
116MODULE_FIRMWARE("radeon/kabini_sdma.bin");
117
118MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
119MODULE_FIRMWARE("radeon/MULLINS_me.bin");
120MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
121MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
122MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
123MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
124
125MODULE_FIRMWARE("radeon/mullins_pfp.bin");
126MODULE_FIRMWARE("radeon/mullins_me.bin");
127MODULE_FIRMWARE("radeon/mullins_ce.bin");
128MODULE_FIRMWARE("radeon/mullins_mec.bin");
129MODULE_FIRMWARE("radeon/mullins_rlc.bin");
130MODULE_FIRMWARE("radeon/mullins_sdma.bin");
131
132static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
133static void cik_rlc_stop(struct radeon_device *rdev);
134static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135static void cik_program_aspm(struct radeon_device *rdev);
136static void cik_init_pg(struct radeon_device *rdev);
137static void cik_init_cg(struct radeon_device *rdev);
138static void cik_fini_pg(struct radeon_device *rdev);
139static void cik_fini_cg(struct radeon_device *rdev);
140static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141					  bool enable);
142
143/**
144 * cik_get_allowed_info_register - fetch the register for the info ioctl
145 *
146 * @rdev: radeon_device pointer
147 * @reg: register offset in bytes
148 * @val: register value
149 *
150 * Returns 0 for success or -EINVAL for an invalid register
151 *
152 */
153int cik_get_allowed_info_register(struct radeon_device *rdev,
154				  u32 reg, u32 *val)
155{
156	switch (reg) {
157	case GRBM_STATUS:
158	case GRBM_STATUS2:
159	case GRBM_STATUS_SE0:
160	case GRBM_STATUS_SE1:
161	case GRBM_STATUS_SE2:
162	case GRBM_STATUS_SE3:
163	case SRBM_STATUS:
164	case SRBM_STATUS2:
165	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
166	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
167	case UVD_STATUS:
168	/* TODO VCE */
169		*val = RREG32(reg);
170		return 0;
171	default:
172		return -EINVAL;
173	}
174}
175
176/*
177 * Indirect registers accessor
178 */
179u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
180{
181	unsigned long flags;
182	u32 r;
183
184	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
185	WREG32(CIK_DIDT_IND_INDEX, (reg));
186	r = RREG32(CIK_DIDT_IND_DATA);
187	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
188	return r;
189}
190
191void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
192{
193	unsigned long flags;
194
195	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
196	WREG32(CIK_DIDT_IND_INDEX, (reg));
197	WREG32(CIK_DIDT_IND_DATA, (v));
198	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
199}
200
201/* get temperature in millidegrees */
202int ci_get_temp(struct radeon_device *rdev)
203{
204	u32 temp;
205	int actual_temp = 0;
206
207	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
208		CTF_TEMP_SHIFT;
209
210	if (temp & 0x200)
211		actual_temp = 255;
212	else
213		actual_temp = temp & 0x1ff;
214
215	return actual_temp * 1000;
216}
217
218/* get temperature in millidegrees */
219int kv_get_temp(struct radeon_device *rdev)
220{
221	u32 temp;
222	int actual_temp = 0;
223
224	temp = RREG32_SMC(0xC0300E0C);
225
226	if (temp)
227		actual_temp = (temp / 8) - 49;
228	else
229		actual_temp = 0;
230
231	return actual_temp * 1000;
232}
233
234/*
235 * Indirect registers accessor
236 */
237u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
238{
239	unsigned long flags;
240	u32 r;
241
242	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243	WREG32(PCIE_INDEX, reg);
244	(void)RREG32(PCIE_INDEX);
245	r = RREG32(PCIE_DATA);
246	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
247	return r;
248}
249
250void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
251{
252	unsigned long flags;
253
254	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
255	WREG32(PCIE_INDEX, reg);
256	(void)RREG32(PCIE_INDEX);
257	WREG32(PCIE_DATA, v);
258	(void)RREG32(PCIE_DATA);
259	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260}
261
262static const u32 spectre_rlc_save_restore_register_list[] =
263{
264	(0x0e00 << 16) | (0xc12c >> 2),
265	0x00000000,
266	(0x0e00 << 16) | (0xc140 >> 2),
267	0x00000000,
268	(0x0e00 << 16) | (0xc150 >> 2),
269	0x00000000,
270	(0x0e00 << 16) | (0xc15c >> 2),
271	0x00000000,
272	(0x0e00 << 16) | (0xc168 >> 2),
273	0x00000000,
274	(0x0e00 << 16) | (0xc170 >> 2),
275	0x00000000,
276	(0x0e00 << 16) | (0xc178 >> 2),
277	0x00000000,
278	(0x0e00 << 16) | (0xc204 >> 2),
279	0x00000000,
280	(0x0e00 << 16) | (0xc2b4 >> 2),
281	0x00000000,
282	(0x0e00 << 16) | (0xc2b8 >> 2),
283	0x00000000,
284	(0x0e00 << 16) | (0xc2bc >> 2),
285	0x00000000,
286	(0x0e00 << 16) | (0xc2c0 >> 2),
287	0x00000000,
288	(0x0e00 << 16) | (0x8228 >> 2),
289	0x00000000,
290	(0x0e00 << 16) | (0x829c >> 2),
291	0x00000000,
292	(0x0e00 << 16) | (0x869c >> 2),
293	0x00000000,
294	(0x0600 << 16) | (0x98f4 >> 2),
295	0x00000000,
296	(0x0e00 << 16) | (0x98f8 >> 2),
297	0x00000000,
298	(0x0e00 << 16) | (0x9900 >> 2),
299	0x00000000,
300	(0x0e00 << 16) | (0xc260 >> 2),
301	0x00000000,
302	(0x0e00 << 16) | (0x90e8 >> 2),
303	0x00000000,
304	(0x0e00 << 16) | (0x3c000 >> 2),
305	0x00000000,
306	(0x0e00 << 16) | (0x3c00c >> 2),
307	0x00000000,
308	(0x0e00 << 16) | (0x8c1c >> 2),
309	0x00000000,
310	(0x0e00 << 16) | (0x9700 >> 2),
311	0x00000000,
312	(0x0e00 << 16) | (0xcd20 >> 2),
313	0x00000000,
314	(0x4e00 << 16) | (0xcd20 >> 2),
315	0x00000000,
316	(0x5e00 << 16) | (0xcd20 >> 2),
317	0x00000000,
318	(0x6e00 << 16) | (0xcd20 >> 2),
319	0x00000000,
320	(0x7e00 << 16) | (0xcd20 >> 2),
321	0x00000000,
322	(0x8e00 << 16) | (0xcd20 >> 2),
323	0x00000000,
324	(0x9e00 << 16) | (0xcd20 >> 2),
325	0x00000000,
326	(0xae00 << 16) | (0xcd20 >> 2),
327	0x00000000,
328	(0xbe00 << 16) | (0xcd20 >> 2),
329	0x00000000,
330	(0x0e00 << 16) | (0x89bc >> 2),
331	0x00000000,
332	(0x0e00 << 16) | (0x8900 >> 2),
333	0x00000000,
334	0x3,
335	(0x0e00 << 16) | (0xc130 >> 2),
336	0x00000000,
337	(0x0e00 << 16) | (0xc134 >> 2),
338	0x00000000,
339	(0x0e00 << 16) | (0xc1fc >> 2),
340	0x00000000,
341	(0x0e00 << 16) | (0xc208 >> 2),
342	0x00000000,
343	(0x0e00 << 16) | (0xc264 >> 2),
344	0x00000000,
345	(0x0e00 << 16) | (0xc268 >> 2),
346	0x00000000,
347	(0x0e00 << 16) | (0xc26c >> 2),
348	0x00000000,
349	(0x0e00 << 16) | (0xc270 >> 2),
350	0x00000000,
351	(0x0e00 << 16) | (0xc274 >> 2),
352	0x00000000,
353	(0x0e00 << 16) | (0xc278 >> 2),
354	0x00000000,
355	(0x0e00 << 16) | (0xc27c >> 2),
356	0x00000000,
357	(0x0e00 << 16) | (0xc280 >> 2),
358	0x00000000,
359	(0x0e00 << 16) | (0xc284 >> 2),
360	0x00000000,
361	(0x0e00 << 16) | (0xc288 >> 2),
362	0x00000000,
363	(0x0e00 << 16) | (0xc28c >> 2),
364	0x00000000,
365	(0x0e00 << 16) | (0xc290 >> 2),
366	0x00000000,
367	(0x0e00 << 16) | (0xc294 >> 2),
368	0x00000000,
369	(0x0e00 << 16) | (0xc298 >> 2),
370	0x00000000,
371	(0x0e00 << 16) | (0xc29c >> 2),
372	0x00000000,
373	(0x0e00 << 16) | (0xc2a0 >> 2),
374	0x00000000,
375	(0x0e00 << 16) | (0xc2a4 >> 2),
376	0x00000000,
377	(0x0e00 << 16) | (0xc2a8 >> 2),
378	0x00000000,
379	(0x0e00 << 16) | (0xc2ac  >> 2),
380	0x00000000,
381	(0x0e00 << 16) | (0xc2b0 >> 2),
382	0x00000000,
383	(0x0e00 << 16) | (0x301d0 >> 2),
384	0x00000000,
385	(0x0e00 << 16) | (0x30238 >> 2),
386	0x00000000,
387	(0x0e00 << 16) | (0x30250 >> 2),
388	0x00000000,
389	(0x0e00 << 16) | (0x30254 >> 2),
390	0x00000000,
391	(0x0e00 << 16) | (0x30258 >> 2),
392	0x00000000,
393	(0x0e00 << 16) | (0x3025c >> 2),
394	0x00000000,
395	(0x4e00 << 16) | (0xc900 >> 2),
396	0x00000000,
397	(0x5e00 << 16) | (0xc900 >> 2),
398	0x00000000,
399	(0x6e00 << 16) | (0xc900 >> 2),
400	0x00000000,
401	(0x7e00 << 16) | (0xc900 >> 2),
402	0x00000000,
403	(0x8e00 << 16) | (0xc900 >> 2),
404	0x00000000,
405	(0x9e00 << 16) | (0xc900 >> 2),
406	0x00000000,
407	(0xae00 << 16) | (0xc900 >> 2),
408	0x00000000,
409	(0xbe00 << 16) | (0xc900 >> 2),
410	0x00000000,
411	(0x4e00 << 16) | (0xc904 >> 2),
412	0x00000000,
413	(0x5e00 << 16) | (0xc904 >> 2),
414	0x00000000,
415	(0x6e00 << 16) | (0xc904 >> 2),
416	0x00000000,
417	(0x7e00 << 16) | (0xc904 >> 2),
418	0x00000000,
419	(0x8e00 << 16) | (0xc904 >> 2),
420	0x00000000,
421	(0x9e00 << 16) | (0xc904 >> 2),
422	0x00000000,
423	(0xae00 << 16) | (0xc904 >> 2),
424	0x00000000,
425	(0xbe00 << 16) | (0xc904 >> 2),
426	0x00000000,
427	(0x4e00 << 16) | (0xc908 >> 2),
428	0x00000000,
429	(0x5e00 << 16) | (0xc908 >> 2),
430	0x00000000,
431	(0x6e00 << 16) | (0xc908 >> 2),
432	0x00000000,
433	(0x7e00 << 16) | (0xc908 >> 2),
434	0x00000000,
435	(0x8e00 << 16) | (0xc908 >> 2),
436	0x00000000,
437	(0x9e00 << 16) | (0xc908 >> 2),
438	0x00000000,
439	(0xae00 << 16) | (0xc908 >> 2),
440	0x00000000,
441	(0xbe00 << 16) | (0xc908 >> 2),
442	0x00000000,
443	(0x4e00 << 16) | (0xc90c >> 2),
444	0x00000000,
445	(0x5e00 << 16) | (0xc90c >> 2),
446	0x00000000,
447	(0x6e00 << 16) | (0xc90c >> 2),
448	0x00000000,
449	(0x7e00 << 16) | (0xc90c >> 2),
450	0x00000000,
451	(0x8e00 << 16) | (0xc90c >> 2),
452	0x00000000,
453	(0x9e00 << 16) | (0xc90c >> 2),
454	0x00000000,
455	(0xae00 << 16) | (0xc90c >> 2),
456	0x00000000,
457	(0xbe00 << 16) | (0xc90c >> 2),
458	0x00000000,
459	(0x4e00 << 16) | (0xc910 >> 2),
460	0x00000000,
461	(0x5e00 << 16) | (0xc910 >> 2),
462	0x00000000,
463	(0x6e00 << 16) | (0xc910 >> 2),
464	0x00000000,
465	(0x7e00 << 16) | (0xc910 >> 2),
466	0x00000000,
467	(0x8e00 << 16) | (0xc910 >> 2),
468	0x00000000,
469	(0x9e00 << 16) | (0xc910 >> 2),
470	0x00000000,
471	(0xae00 << 16) | (0xc910 >> 2),
472	0x00000000,
473	(0xbe00 << 16) | (0xc910 >> 2),
474	0x00000000,
475	(0x0e00 << 16) | (0xc99c >> 2),
476	0x00000000,
477	(0x0e00 << 16) | (0x9834 >> 2),
478	0x00000000,
479	(0x0000 << 16) | (0x30f00 >> 2),
480	0x00000000,
481	(0x0001 << 16) | (0x30f00 >> 2),
482	0x00000000,
483	(0x0000 << 16) | (0x30f04 >> 2),
484	0x00000000,
485	(0x0001 << 16) | (0x30f04 >> 2),
486	0x00000000,
487	(0x0000 << 16) | (0x30f08 >> 2),
488	0x00000000,
489	(0x0001 << 16) | (0x30f08 >> 2),
490	0x00000000,
491	(0x0000 << 16) | (0x30f0c >> 2),
492	0x00000000,
493	(0x0001 << 16) | (0x30f0c >> 2),
494	0x00000000,
495	(0x0600 << 16) | (0x9b7c >> 2),
496	0x00000000,
497	(0x0e00 << 16) | (0x8a14 >> 2),
498	0x00000000,
499	(0x0e00 << 16) | (0x8a18 >> 2),
500	0x00000000,
501	(0x0600 << 16) | (0x30a00 >> 2),
502	0x00000000,
503	(0x0e00 << 16) | (0x8bf0 >> 2),
504	0x00000000,
505	(0x0e00 << 16) | (0x8bcc >> 2),
506	0x00000000,
507	(0x0e00 << 16) | (0x8b24 >> 2),
508	0x00000000,
509	(0x0e00 << 16) | (0x30a04 >> 2),
510	0x00000000,
511	(0x0600 << 16) | (0x30a10 >> 2),
512	0x00000000,
513	(0x0600 << 16) | (0x30a14 >> 2),
514	0x00000000,
515	(0x0600 << 16) | (0x30a18 >> 2),
516	0x00000000,
517	(0x0600 << 16) | (0x30a2c >> 2),
518	0x00000000,
519	(0x0e00 << 16) | (0xc700 >> 2),
520	0x00000000,
521	(0x0e00 << 16) | (0xc704 >> 2),
522	0x00000000,
523	(0x0e00 << 16) | (0xc708 >> 2),
524	0x00000000,
525	(0x0e00 << 16) | (0xc768 >> 2),
526	0x00000000,
527	(0x0400 << 16) | (0xc770 >> 2),
528	0x00000000,
529	(0x0400 << 16) | (0xc774 >> 2),
530	0x00000000,
531	(0x0400 << 16) | (0xc778 >> 2),
532	0x00000000,
533	(0x0400 << 16) | (0xc77c >> 2),
534	0x00000000,
535	(0x0400 << 16) | (0xc780 >> 2),
536	0x00000000,
537	(0x0400 << 16) | (0xc784 >> 2),
538	0x00000000,
539	(0x0400 << 16) | (0xc788 >> 2),
540	0x00000000,
541	(0x0400 << 16) | (0xc78c >> 2),
542	0x00000000,
543	(0x0400 << 16) | (0xc798 >> 2),
544	0x00000000,
545	(0x0400 << 16) | (0xc79c >> 2),
546	0x00000000,
547	(0x0400 << 16) | (0xc7a0 >> 2),
548	0x00000000,
549	(0x0400 << 16) | (0xc7a4 >> 2),
550	0x00000000,
551	(0x0400 << 16) | (0xc7a8 >> 2),
552	0x00000000,
553	(0x0400 << 16) | (0xc7ac >> 2),
554	0x00000000,
555	(0x0400 << 16) | (0xc7b0 >> 2),
556	0x00000000,
557	(0x0400 << 16) | (0xc7b4 >> 2),
558	0x00000000,
559	(0x0e00 << 16) | (0x9100 >> 2),
560	0x00000000,
561	(0x0e00 << 16) | (0x3c010 >> 2),
562	0x00000000,
563	(0x0e00 << 16) | (0x92a8 >> 2),
564	0x00000000,
565	(0x0e00 << 16) | (0x92ac >> 2),
566	0x00000000,
567	(0x0e00 << 16) | (0x92b4 >> 2),
568	0x00000000,
569	(0x0e00 << 16) | (0x92b8 >> 2),
570	0x00000000,
571	(0x0e00 << 16) | (0x92bc >> 2),
572	0x00000000,
573	(0x0e00 << 16) | (0x92c0 >> 2),
574	0x00000000,
575	(0x0e00 << 16) | (0x92c4 >> 2),
576	0x00000000,
577	(0x0e00 << 16) | (0x92c8 >> 2),
578	0x00000000,
579	(0x0e00 << 16) | (0x92cc >> 2),
580	0x00000000,
581	(0x0e00 << 16) | (0x92d0 >> 2),
582	0x00000000,
583	(0x0e00 << 16) | (0x8c00 >> 2),
584	0x00000000,
585	(0x0e00 << 16) | (0x8c04 >> 2),
586	0x00000000,
587	(0x0e00 << 16) | (0x8c20 >> 2),
588	0x00000000,
589	(0x0e00 << 16) | (0x8c38 >> 2),
590	0x00000000,
591	(0x0e00 << 16) | (0x8c3c >> 2),
592	0x00000000,
593	(0x0e00 << 16) | (0xae00 >> 2),
594	0x00000000,
595	(0x0e00 << 16) | (0x9604 >> 2),
596	0x00000000,
597	(0x0e00 << 16) | (0xac08 >> 2),
598	0x00000000,
599	(0x0e00 << 16) | (0xac0c >> 2),
600	0x00000000,
601	(0x0e00 << 16) | (0xac10 >> 2),
602	0x00000000,
603	(0x0e00 << 16) | (0xac14 >> 2),
604	0x00000000,
605	(0x0e00 << 16) | (0xac58 >> 2),
606	0x00000000,
607	(0x0e00 << 16) | (0xac68 >> 2),
608	0x00000000,
609	(0x0e00 << 16) | (0xac6c >> 2),
610	0x00000000,
611	(0x0e00 << 16) | (0xac70 >> 2),
612	0x00000000,
613	(0x0e00 << 16) | (0xac74 >> 2),
614	0x00000000,
615	(0x0e00 << 16) | (0xac78 >> 2),
616	0x00000000,
617	(0x0e00 << 16) | (0xac7c >> 2),
618	0x00000000,
619	(0x0e00 << 16) | (0xac80 >> 2),
620	0x00000000,
621	(0x0e00 << 16) | (0xac84 >> 2),
622	0x00000000,
623	(0x0e00 << 16) | (0xac88 >> 2),
624	0x00000000,
625	(0x0e00 << 16) | (0xac8c >> 2),
626	0x00000000,
627	(0x0e00 << 16) | (0x970c >> 2),
628	0x00000000,
629	(0x0e00 << 16) | (0x9714 >> 2),
630	0x00000000,
631	(0x0e00 << 16) | (0x9718 >> 2),
632	0x00000000,
633	(0x0e00 << 16) | (0x971c >> 2),
634	0x00000000,
635	(0x0e00 << 16) | (0x31068 >> 2),
636	0x00000000,
637	(0x4e00 << 16) | (0x31068 >> 2),
638	0x00000000,
639	(0x5e00 << 16) | (0x31068 >> 2),
640	0x00000000,
641	(0x6e00 << 16) | (0x31068 >> 2),
642	0x00000000,
643	(0x7e00 << 16) | (0x31068 >> 2),
644	0x00000000,
645	(0x8e00 << 16) | (0x31068 >> 2),
646	0x00000000,
647	(0x9e00 << 16) | (0x31068 >> 2),
648	0x00000000,
649	(0xae00 << 16) | (0x31068 >> 2),
650	0x00000000,
651	(0xbe00 << 16) | (0x31068 >> 2),
652	0x00000000,
653	(0x0e00 << 16) | (0xcd10 >> 2),
654	0x00000000,
655	(0x0e00 << 16) | (0xcd14 >> 2),
656	0x00000000,
657	(0x0e00 << 16) | (0x88b0 >> 2),
658	0x00000000,
659	(0x0e00 << 16) | (0x88b4 >> 2),
660	0x00000000,
661	(0x0e00 << 16) | (0x88b8 >> 2),
662	0x00000000,
663	(0x0e00 << 16) | (0x88bc >> 2),
664	0x00000000,
665	(0x0400 << 16) | (0x89c0 >> 2),
666	0x00000000,
667	(0x0e00 << 16) | (0x88c4 >> 2),
668	0x00000000,
669	(0x0e00 << 16) | (0x88c8 >> 2),
670	0x00000000,
671	(0x0e00 << 16) | (0x88d0 >> 2),
672	0x00000000,
673	(0x0e00 << 16) | (0x88d4 >> 2),
674	0x00000000,
675	(0x0e00 << 16) | (0x88d8 >> 2),
676	0x00000000,
677	(0x0e00 << 16) | (0x8980 >> 2),
678	0x00000000,
679	(0x0e00 << 16) | (0x30938 >> 2),
680	0x00000000,
681	(0x0e00 << 16) | (0x3093c >> 2),
682	0x00000000,
683	(0x0e00 << 16) | (0x30940 >> 2),
684	0x00000000,
685	(0x0e00 << 16) | (0x89a0 >> 2),
686	0x00000000,
687	(0x0e00 << 16) | (0x30900 >> 2),
688	0x00000000,
689	(0x0e00 << 16) | (0x30904 >> 2),
690	0x00000000,
691	(0x0e00 << 16) | (0x89b4 >> 2),
692	0x00000000,
693	(0x0e00 << 16) | (0x3c210 >> 2),
694	0x00000000,
695	(0x0e00 << 16) | (0x3c214 >> 2),
696	0x00000000,
697	(0x0e00 << 16) | (0x3c218 >> 2),
698	0x00000000,
699	(0x0e00 << 16) | (0x8904 >> 2),
700	0x00000000,
701	0x5,
702	(0x0e00 << 16) | (0x8c28 >> 2),
703	(0x0e00 << 16) | (0x8c2c >> 2),
704	(0x0e00 << 16) | (0x8c30 >> 2),
705	(0x0e00 << 16) | (0x8c34 >> 2),
706	(0x0e00 << 16) | (0x9600 >> 2),
707};
708
709static const u32 kalindi_rlc_save_restore_register_list[] =
710{
711	(0x0e00 << 16) | (0xc12c >> 2),
712	0x00000000,
713	(0x0e00 << 16) | (0xc140 >> 2),
714	0x00000000,
715	(0x0e00 << 16) | (0xc150 >> 2),
716	0x00000000,
717	(0x0e00 << 16) | (0xc15c >> 2),
718	0x00000000,
719	(0x0e00 << 16) | (0xc168 >> 2),
720	0x00000000,
721	(0x0e00 << 16) | (0xc170 >> 2),
722	0x00000000,
723	(0x0e00 << 16) | (0xc204 >> 2),
724	0x00000000,
725	(0x0e00 << 16) | (0xc2b4 >> 2),
726	0x00000000,
727	(0x0e00 << 16) | (0xc2b8 >> 2),
728	0x00000000,
729	(0x0e00 << 16) | (0xc2bc >> 2),
730	0x00000000,
731	(0x0e00 << 16) | (0xc2c0 >> 2),
732	0x00000000,
733	(0x0e00 << 16) | (0x8228 >> 2),
734	0x00000000,
735	(0x0e00 << 16) | (0x829c >> 2),
736	0x00000000,
737	(0x0e00 << 16) | (0x869c >> 2),
738	0x00000000,
739	(0x0600 << 16) | (0x98f4 >> 2),
740	0x00000000,
741	(0x0e00 << 16) | (0x98f8 >> 2),
742	0x00000000,
743	(0x0e00 << 16) | (0x9900 >> 2),
744	0x00000000,
745	(0x0e00 << 16) | (0xc260 >> 2),
746	0x00000000,
747	(0x0e00 << 16) | (0x90e8 >> 2),
748	0x00000000,
749	(0x0e00 << 16) | (0x3c000 >> 2),
750	0x00000000,
751	(0x0e00 << 16) | (0x3c00c >> 2),
752	0x00000000,
753	(0x0e00 << 16) | (0x8c1c >> 2),
754	0x00000000,
755	(0x0e00 << 16) | (0x9700 >> 2),
756	0x00000000,
757	(0x0e00 << 16) | (0xcd20 >> 2),
758	0x00000000,
759	(0x4e00 << 16) | (0xcd20 >> 2),
760	0x00000000,
761	(0x5e00 << 16) | (0xcd20 >> 2),
762	0x00000000,
763	(0x6e00 << 16) | (0xcd20 >> 2),
764	0x00000000,
765	(0x7e00 << 16) | (0xcd20 >> 2),
766	0x00000000,
767	(0x0e00 << 16) | (0x89bc >> 2),
768	0x00000000,
769	(0x0e00 << 16) | (0x8900 >> 2),
770	0x00000000,
771	0x3,
772	(0x0e00 << 16) | (0xc130 >> 2),
773	0x00000000,
774	(0x0e00 << 16) | (0xc134 >> 2),
775	0x00000000,
776	(0x0e00 << 16) | (0xc1fc >> 2),
777	0x00000000,
778	(0x0e00 << 16) | (0xc208 >> 2),
779	0x00000000,
780	(0x0e00 << 16) | (0xc264 >> 2),
781	0x00000000,
782	(0x0e00 << 16) | (0xc268 >> 2),
783	0x00000000,
784	(0x0e00 << 16) | (0xc26c >> 2),
785	0x00000000,
786	(0x0e00 << 16) | (0xc270 >> 2),
787	0x00000000,
788	(0x0e00 << 16) | (0xc274 >> 2),
789	0x00000000,
790	(0x0e00 << 16) | (0xc28c >> 2),
791	0x00000000,
792	(0x0e00 << 16) | (0xc290 >> 2),
793	0x00000000,
794	(0x0e00 << 16) | (0xc294 >> 2),
795	0x00000000,
796	(0x0e00 << 16) | (0xc298 >> 2),
797	0x00000000,
798	(0x0e00 << 16) | (0xc2a0 >> 2),
799	0x00000000,
800	(0x0e00 << 16) | (0xc2a4 >> 2),
801	0x00000000,
802	(0x0e00 << 16) | (0xc2a8 >> 2),
803	0x00000000,
804	(0x0e00 << 16) | (0xc2ac >> 2),
805	0x00000000,
806	(0x0e00 << 16) | (0x301d0 >> 2),
807	0x00000000,
808	(0x0e00 << 16) | (0x30238 >> 2),
809	0x00000000,
810	(0x0e00 << 16) | (0x30250 >> 2),
811	0x00000000,
812	(0x0e00 << 16) | (0x30254 >> 2),
813	0x00000000,
814	(0x0e00 << 16) | (0x30258 >> 2),
815	0x00000000,
816	(0x0e00 << 16) | (0x3025c >> 2),
817	0x00000000,
818	(0x4e00 << 16) | (0xc900 >> 2),
819	0x00000000,
820	(0x5e00 << 16) | (0xc900 >> 2),
821	0x00000000,
822	(0x6e00 << 16) | (0xc900 >> 2),
823	0x00000000,
824	(0x7e00 << 16) | (0xc900 >> 2),
825	0x00000000,
826	(0x4e00 << 16) | (0xc904 >> 2),
827	0x00000000,
828	(0x5e00 << 16) | (0xc904 >> 2),
829	0x00000000,
830	(0x6e00 << 16) | (0xc904 >> 2),
831	0x00000000,
832	(0x7e00 << 16) | (0xc904 >> 2),
833	0x00000000,
834	(0x4e00 << 16) | (0xc908 >> 2),
835	0x00000000,
836	(0x5e00 << 16) | (0xc908 >> 2),
837	0x00000000,
838	(0x6e00 << 16) | (0xc908 >> 2),
839	0x00000000,
840	(0x7e00 << 16) | (0xc908 >> 2),
841	0x00000000,
842	(0x4e00 << 16) | (0xc90c >> 2),
843	0x00000000,
844	(0x5e00 << 16) | (0xc90c >> 2),
845	0x00000000,
846	(0x6e00 << 16) | (0xc90c >> 2),
847	0x00000000,
848	(0x7e00 << 16) | (0xc90c >> 2),
849	0x00000000,
850	(0x4e00 << 16) | (0xc910 >> 2),
851	0x00000000,
852	(0x5e00 << 16) | (0xc910 >> 2),
853	0x00000000,
854	(0x6e00 << 16) | (0xc910 >> 2),
855	0x00000000,
856	(0x7e00 << 16) | (0xc910 >> 2),
857	0x00000000,
858	(0x0e00 << 16) | (0xc99c >> 2),
859	0x00000000,
860	(0x0e00 << 16) | (0x9834 >> 2),
861	0x00000000,
862	(0x0000 << 16) | (0x30f00 >> 2),
863	0x00000000,
864	(0x0000 << 16) | (0x30f04 >> 2),
865	0x00000000,
866	(0x0000 << 16) | (0x30f08 >> 2),
867	0x00000000,
868	(0x0000 << 16) | (0x30f0c >> 2),
869	0x00000000,
870	(0x0600 << 16) | (0x9b7c >> 2),
871	0x00000000,
872	(0x0e00 << 16) | (0x8a14 >> 2),
873	0x00000000,
874	(0x0e00 << 16) | (0x8a18 >> 2),
875	0x00000000,
876	(0x0600 << 16) | (0x30a00 >> 2),
877	0x00000000,
878	(0x0e00 << 16) | (0x8bf0 >> 2),
879	0x00000000,
880	(0x0e00 << 16) | (0x8bcc >> 2),
881	0x00000000,
882	(0x0e00 << 16) | (0x8b24 >> 2),
883	0x00000000,
884	(0x0e00 << 16) | (0x30a04 >> 2),
885	0x00000000,
886	(0x0600 << 16) | (0x30a10 >> 2),
887	0x00000000,
888	(0x0600 << 16) | (0x30a14 >> 2),
889	0x00000000,
890	(0x0600 << 16) | (0x30a18 >> 2),
891	0x00000000,
892	(0x0600 << 16) | (0x30a2c >> 2),
893	0x00000000,
894	(0x0e00 << 16) | (0xc700 >> 2),
895	0x00000000,
896	(0x0e00 << 16) | (0xc704 >> 2),
897	0x00000000,
898	(0x0e00 << 16) | (0xc708 >> 2),
899	0x00000000,
900	(0x0e00 << 16) | (0xc768 >> 2),
901	0x00000000,
902	(0x0400 << 16) | (0xc770 >> 2),
903	0x00000000,
904	(0x0400 << 16) | (0xc774 >> 2),
905	0x00000000,
906	(0x0400 << 16) | (0xc798 >> 2),
907	0x00000000,
908	(0x0400 << 16) | (0xc79c >> 2),
909	0x00000000,
910	(0x0e00 << 16) | (0x9100 >> 2),
911	0x00000000,
912	(0x0e00 << 16) | (0x3c010 >> 2),
913	0x00000000,
914	(0x0e00 << 16) | (0x8c00 >> 2),
915	0x00000000,
916	(0x0e00 << 16) | (0x8c04 >> 2),
917	0x00000000,
918	(0x0e00 << 16) | (0x8c20 >> 2),
919	0x00000000,
920	(0x0e00 << 16) | (0x8c38 >> 2),
921	0x00000000,
922	(0x0e00 << 16) | (0x8c3c >> 2),
923	0x00000000,
924	(0x0e00 << 16) | (0xae00 >> 2),
925	0x00000000,
926	(0x0e00 << 16) | (0x9604 >> 2),
927	0x00000000,
928	(0x0e00 << 16) | (0xac08 >> 2),
929	0x00000000,
930	(0x0e00 << 16) | (0xac0c >> 2),
931	0x00000000,
932	(0x0e00 << 16) | (0xac10 >> 2),
933	0x00000000,
934	(0x0e00 << 16) | (0xac14 >> 2),
935	0x00000000,
936	(0x0e00 << 16) | (0xac58 >> 2),
937	0x00000000,
938	(0x0e00 << 16) | (0xac68 >> 2),
939	0x00000000,
940	(0x0e00 << 16) | (0xac6c >> 2),
941	0x00000000,
942	(0x0e00 << 16) | (0xac70 >> 2),
943	0x00000000,
944	(0x0e00 << 16) | (0xac74 >> 2),
945	0x00000000,
946	(0x0e00 << 16) | (0xac78 >> 2),
947	0x00000000,
948	(0x0e00 << 16) | (0xac7c >> 2),
949	0x00000000,
950	(0x0e00 << 16) | (0xac80 >> 2),
951	0x00000000,
952	(0x0e00 << 16) | (0xac84 >> 2),
953	0x00000000,
954	(0x0e00 << 16) | (0xac88 >> 2),
955	0x00000000,
956	(0x0e00 << 16) | (0xac8c >> 2),
957	0x00000000,
958	(0x0e00 << 16) | (0x970c >> 2),
959	0x00000000,
960	(0x0e00 << 16) | (0x9714 >> 2),
961	0x00000000,
962	(0x0e00 << 16) | (0x9718 >> 2),
963	0x00000000,
964	(0x0e00 << 16) | (0x971c >> 2),
965	0x00000000,
966	(0x0e00 << 16) | (0x31068 >> 2),
967	0x00000000,
968	(0x4e00 << 16) | (0x31068 >> 2),
969	0x00000000,
970	(0x5e00 << 16) | (0x31068 >> 2),
971	0x00000000,
972	(0x6e00 << 16) | (0x31068 >> 2),
973	0x00000000,
974	(0x7e00 << 16) | (0x31068 >> 2),
975	0x00000000,
976	(0x0e00 << 16) | (0xcd10 >> 2),
977	0x00000000,
978	(0x0e00 << 16) | (0xcd14 >> 2),
979	0x00000000,
980	(0x0e00 << 16) | (0x88b0 >> 2),
981	0x00000000,
982	(0x0e00 << 16) | (0x88b4 >> 2),
983	0x00000000,
984	(0x0e00 << 16) | (0x88b8 >> 2),
985	0x00000000,
986	(0x0e00 << 16) | (0x88bc >> 2),
987	0x00000000,
988	(0x0400 << 16) | (0x89c0 >> 2),
989	0x00000000,
990	(0x0e00 << 16) | (0x88c4 >> 2),
991	0x00000000,
992	(0x0e00 << 16) | (0x88c8 >> 2),
993	0x00000000,
994	(0x0e00 << 16) | (0x88d0 >> 2),
995	0x00000000,
996	(0x0e00 << 16) | (0x88d4 >> 2),
997	0x00000000,
998	(0x0e00 << 16) | (0x88d8 >> 2),
999	0x00000000,
1000	(0x0e00 << 16) | (0x8980 >> 2),
1001	0x00000000,
1002	(0x0e00 << 16) | (0x30938 >> 2),
1003	0x00000000,
1004	(0x0e00 << 16) | (0x3093c >> 2),
1005	0x00000000,
1006	(0x0e00 << 16) | (0x30940 >> 2),
1007	0x00000000,
1008	(0x0e00 << 16) | (0x89a0 >> 2),
1009	0x00000000,
1010	(0x0e00 << 16) | (0x30900 >> 2),
1011	0x00000000,
1012	(0x0e00 << 16) | (0x30904 >> 2),
1013	0x00000000,
1014	(0x0e00 << 16) | (0x89b4 >> 2),
1015	0x00000000,
1016	(0x0e00 << 16) | (0x3e1fc >> 2),
1017	0x00000000,
1018	(0x0e00 << 16) | (0x3c210 >> 2),
1019	0x00000000,
1020	(0x0e00 << 16) | (0x3c214 >> 2),
1021	0x00000000,
1022	(0x0e00 << 16) | (0x3c218 >> 2),
1023	0x00000000,
1024	(0x0e00 << 16) | (0x8904 >> 2),
1025	0x00000000,
1026	0x5,
1027	(0x0e00 << 16) | (0x8c28 >> 2),
1028	(0x0e00 << 16) | (0x8c2c >> 2),
1029	(0x0e00 << 16) | (0x8c30 >> 2),
1030	(0x0e00 << 16) | (0x8c34 >> 2),
1031	(0x0e00 << 16) | (0x9600 >> 2),
1032};
1033
1034static const u32 bonaire_golden_spm_registers[] =
1035{
1036	0x30800, 0xe0ffffff, 0xe0000000
1037};
1038
1039static const u32 bonaire_golden_common_registers[] =
1040{
1041	0xc770, 0xffffffff, 0x00000800,
1042	0xc774, 0xffffffff, 0x00000800,
1043	0xc798, 0xffffffff, 0x00007fbf,
1044	0xc79c, 0xffffffff, 0x00007faf
1045};
1046
1047static const u32 bonaire_golden_registers[] =
1048{
1049	0x3354, 0x00000333, 0x00000333,
1050	0x3350, 0x000c0fc0, 0x00040200,
1051	0x9a10, 0x00010000, 0x00058208,
1052	0x3c000, 0xffff1fff, 0x00140000,
1053	0x3c200, 0xfdfc0fff, 0x00000100,
1054	0x3c234, 0x40000000, 0x40000200,
1055	0x9830, 0xffffffff, 0x00000000,
1056	0x9834, 0xf00fffff, 0x00000400,
1057	0x9838, 0x0002021c, 0x00020200,
1058	0xc78, 0x00000080, 0x00000000,
1059	0x5bb0, 0x000000f0, 0x00000070,
1060	0x5bc0, 0xf0311fff, 0x80300000,
1061	0x98f8, 0x73773777, 0x12010001,
1062	0x350c, 0x00810000, 0x408af000,
1063	0x7030, 0x31000111, 0x00000011,
1064	0x2f48, 0x73773777, 0x12010001,
1065	0x220c, 0x00007fb6, 0x0021a1b1,
1066	0x2210, 0x00007fb6, 0x002021b1,
1067	0x2180, 0x00007fb6, 0x00002191,
1068	0x2218, 0x00007fb6, 0x002121b1,
1069	0x221c, 0x00007fb6, 0x002021b1,
1070	0x21dc, 0x00007fb6, 0x00002191,
1071	0x21e0, 0x00007fb6, 0x00002191,
1072	0x3628, 0x0000003f, 0x0000000a,
1073	0x362c, 0x0000003f, 0x0000000a,
1074	0x2ae4, 0x00073ffe, 0x000022a2,
1075	0x240c, 0x000007ff, 0x00000000,
1076	0x8a14, 0xf000003f, 0x00000007,
1077	0x8bf0, 0x00002001, 0x00000001,
1078	0x8b24, 0xffffffff, 0x00ffffff,
1079	0x30a04, 0x0000ff0f, 0x00000000,
1080	0x28a4c, 0x07ffffff, 0x06000000,
1081	0x4d8, 0x00000fff, 0x00000100,
1082	0x3e78, 0x00000001, 0x00000002,
1083	0x9100, 0x03000000, 0x0362c688,
1084	0x8c00, 0x000000ff, 0x00000001,
1085	0xe40, 0x00001fff, 0x00001fff,
1086	0x9060, 0x0000007f, 0x00000020,
1087	0x9508, 0x00010000, 0x00010000,
1088	0xac14, 0x000003ff, 0x000000f3,
1089	0xac0c, 0xffffffff, 0x00001032
1090};
1091
1092static const u32 bonaire_mgcg_cgcg_init[] =
1093{
1094	0xc420, 0xffffffff, 0xfffffffc,
1095	0x30800, 0xffffffff, 0xe0000000,
1096	0x3c2a0, 0xffffffff, 0x00000100,
1097	0x3c208, 0xffffffff, 0x00000100,
1098	0x3c2c0, 0xffffffff, 0xc0000100,
1099	0x3c2c8, 0xffffffff, 0xc0000100,
1100	0x3c2c4, 0xffffffff, 0xc0000100,
1101	0x55e4, 0xffffffff, 0x00600100,
1102	0x3c280, 0xffffffff, 0x00000100,
1103	0x3c214, 0xffffffff, 0x06000100,
1104	0x3c220, 0xffffffff, 0x00000100,
1105	0x3c218, 0xffffffff, 0x06000100,
1106	0x3c204, 0xffffffff, 0x00000100,
1107	0x3c2e0, 0xffffffff, 0x00000100,
1108	0x3c224, 0xffffffff, 0x00000100,
1109	0x3c200, 0xffffffff, 0x00000100,
1110	0x3c230, 0xffffffff, 0x00000100,
1111	0x3c234, 0xffffffff, 0x00000100,
1112	0x3c250, 0xffffffff, 0x00000100,
1113	0x3c254, 0xffffffff, 0x00000100,
1114	0x3c258, 0xffffffff, 0x00000100,
1115	0x3c25c, 0xffffffff, 0x00000100,
1116	0x3c260, 0xffffffff, 0x00000100,
1117	0x3c27c, 0xffffffff, 0x00000100,
1118	0x3c278, 0xffffffff, 0x00000100,
1119	0x3c210, 0xffffffff, 0x06000100,
1120	0x3c290, 0xffffffff, 0x00000100,
1121	0x3c274, 0xffffffff, 0x00000100,
1122	0x3c2b4, 0xffffffff, 0x00000100,
1123	0x3c2b0, 0xffffffff, 0x00000100,
1124	0x3c270, 0xffffffff, 0x00000100,
1125	0x30800, 0xffffffff, 0xe0000000,
1126	0x3c020, 0xffffffff, 0x00010000,
1127	0x3c024, 0xffffffff, 0x00030002,
1128	0x3c028, 0xffffffff, 0x00040007,
1129	0x3c02c, 0xffffffff, 0x00060005,
1130	0x3c030, 0xffffffff, 0x00090008,
1131	0x3c034, 0xffffffff, 0x00010000,
1132	0x3c038, 0xffffffff, 0x00030002,
1133	0x3c03c, 0xffffffff, 0x00040007,
1134	0x3c040, 0xffffffff, 0x00060005,
1135	0x3c044, 0xffffffff, 0x00090008,
1136	0x3c048, 0xffffffff, 0x00010000,
1137	0x3c04c, 0xffffffff, 0x00030002,
1138	0x3c050, 0xffffffff, 0x00040007,
1139	0x3c054, 0xffffffff, 0x00060005,
1140	0x3c058, 0xffffffff, 0x00090008,
1141	0x3c05c, 0xffffffff, 0x00010000,
1142	0x3c060, 0xffffffff, 0x00030002,
1143	0x3c064, 0xffffffff, 0x00040007,
1144	0x3c068, 0xffffffff, 0x00060005,
1145	0x3c06c, 0xffffffff, 0x00090008,
1146	0x3c070, 0xffffffff, 0x00010000,
1147	0x3c074, 0xffffffff, 0x00030002,
1148	0x3c078, 0xffffffff, 0x00040007,
1149	0x3c07c, 0xffffffff, 0x00060005,
1150	0x3c080, 0xffffffff, 0x00090008,
1151	0x3c084, 0xffffffff, 0x00010000,
1152	0x3c088, 0xffffffff, 0x00030002,
1153	0x3c08c, 0xffffffff, 0x00040007,
1154	0x3c090, 0xffffffff, 0x00060005,
1155	0x3c094, 0xffffffff, 0x00090008,
1156	0x3c098, 0xffffffff, 0x00010000,
1157	0x3c09c, 0xffffffff, 0x00030002,
1158	0x3c0a0, 0xffffffff, 0x00040007,
1159	0x3c0a4, 0xffffffff, 0x00060005,
1160	0x3c0a8, 0xffffffff, 0x00090008,
1161	0x3c000, 0xffffffff, 0x96e00200,
1162	0x8708, 0xffffffff, 0x00900100,
1163	0xc424, 0xffffffff, 0x0020003f,
1164	0x38, 0xffffffff, 0x0140001c,
1165	0x3c, 0x000f0000, 0x000f0000,
1166	0x220, 0xffffffff, 0xC060000C,
1167	0x224, 0xc0000fff, 0x00000100,
1168	0xf90, 0xffffffff, 0x00000100,
1169	0xf98, 0x00000101, 0x00000000,
1170	0x20a8, 0xffffffff, 0x00000104,
1171	0x55e4, 0xff000fff, 0x00000100,
1172	0x30cc, 0xc0000fff, 0x00000104,
1173	0xc1e4, 0x00000001, 0x00000001,
1174	0xd00c, 0xff000ff0, 0x00000100,
1175	0xd80c, 0xff000ff0, 0x00000100
1176};
1177
1178static const u32 spectre_golden_spm_registers[] =
1179{
1180	0x30800, 0xe0ffffff, 0xe0000000
1181};
1182
1183static const u32 spectre_golden_common_registers[] =
1184{
1185	0xc770, 0xffffffff, 0x00000800,
1186	0xc774, 0xffffffff, 0x00000800,
1187	0xc798, 0xffffffff, 0x00007fbf,
1188	0xc79c, 0xffffffff, 0x00007faf
1189};
1190
1191static const u32 spectre_golden_registers[] =
1192{
1193	0x3c000, 0xffff1fff, 0x96940200,
1194	0x3c00c, 0xffff0001, 0xff000000,
1195	0x3c200, 0xfffc0fff, 0x00000100,
1196	0x6ed8, 0x00010101, 0x00010000,
1197	0x9834, 0xf00fffff, 0x00000400,
1198	0x9838, 0xfffffffc, 0x00020200,
1199	0x5bb0, 0x000000f0, 0x00000070,
1200	0x5bc0, 0xf0311fff, 0x80300000,
1201	0x98f8, 0x73773777, 0x12010001,
1202	0x9b7c, 0x00ff0000, 0x00fc0000,
1203	0x2f48, 0x73773777, 0x12010001,
1204	0x8a14, 0xf000003f, 0x00000007,
1205	0x8b24, 0xffffffff, 0x00ffffff,
1206	0x28350, 0x3f3f3fff, 0x00000082,
1207	0x28354, 0x0000003f, 0x00000000,
1208	0x3e78, 0x00000001, 0x00000002,
1209	0x913c, 0xffff03df, 0x00000004,
1210	0xc768, 0x00000008, 0x00000008,
1211	0x8c00, 0x000008ff, 0x00000800,
1212	0x9508, 0x00010000, 0x00010000,
1213	0xac0c, 0xffffffff, 0x54763210,
1214	0x214f8, 0x01ff01ff, 0x00000002,
1215	0x21498, 0x007ff800, 0x00200000,
1216	0x2015c, 0xffffffff, 0x00000f40,
1217	0x30934, 0xffffffff, 0x00000001
1218};
1219
1220static const u32 spectre_mgcg_cgcg_init[] =
1221{
1222	0xc420, 0xffffffff, 0xfffffffc,
1223	0x30800, 0xffffffff, 0xe0000000,
1224	0x3c2a0, 0xffffffff, 0x00000100,
1225	0x3c208, 0xffffffff, 0x00000100,
1226	0x3c2c0, 0xffffffff, 0x00000100,
1227	0x3c2c8, 0xffffffff, 0x00000100,
1228	0x3c2c4, 0xffffffff, 0x00000100,
1229	0x55e4, 0xffffffff, 0x00600100,
1230	0x3c280, 0xffffffff, 0x00000100,
1231	0x3c214, 0xffffffff, 0x06000100,
1232	0x3c220, 0xffffffff, 0x00000100,
1233	0x3c218, 0xffffffff, 0x06000100,
1234	0x3c204, 0xffffffff, 0x00000100,
1235	0x3c2e0, 0xffffffff, 0x00000100,
1236	0x3c224, 0xffffffff, 0x00000100,
1237	0x3c200, 0xffffffff, 0x00000100,
1238	0x3c230, 0xffffffff, 0x00000100,
1239	0x3c234, 0xffffffff, 0x00000100,
1240	0x3c250, 0xffffffff, 0x00000100,
1241	0x3c254, 0xffffffff, 0x00000100,
1242	0x3c258, 0xffffffff, 0x00000100,
1243	0x3c25c, 0xffffffff, 0x00000100,
1244	0x3c260, 0xffffffff, 0x00000100,
1245	0x3c27c, 0xffffffff, 0x00000100,
1246	0x3c278, 0xffffffff, 0x00000100,
1247	0x3c210, 0xffffffff, 0x06000100,
1248	0x3c290, 0xffffffff, 0x00000100,
1249	0x3c274, 0xffffffff, 0x00000100,
1250	0x3c2b4, 0xffffffff, 0x00000100,
1251	0x3c2b0, 0xffffffff, 0x00000100,
1252	0x3c270, 0xffffffff, 0x00000100,
1253	0x30800, 0xffffffff, 0xe0000000,
1254	0x3c020, 0xffffffff, 0x00010000,
1255	0x3c024, 0xffffffff, 0x00030002,
1256	0x3c028, 0xffffffff, 0x00040007,
1257	0x3c02c, 0xffffffff, 0x00060005,
1258	0x3c030, 0xffffffff, 0x00090008,
1259	0x3c034, 0xffffffff, 0x00010000,
1260	0x3c038, 0xffffffff, 0x00030002,
1261	0x3c03c, 0xffffffff, 0x00040007,
1262	0x3c040, 0xffffffff, 0x00060005,
1263	0x3c044, 0xffffffff, 0x00090008,
1264	0x3c048, 0xffffffff, 0x00010000,
1265	0x3c04c, 0xffffffff, 0x00030002,
1266	0x3c050, 0xffffffff, 0x00040007,
1267	0x3c054, 0xffffffff, 0x00060005,
1268	0x3c058, 0xffffffff, 0x00090008,
1269	0x3c05c, 0xffffffff, 0x00010000,
1270	0x3c060, 0xffffffff, 0x00030002,
1271	0x3c064, 0xffffffff, 0x00040007,
1272	0x3c068, 0xffffffff, 0x00060005,
1273	0x3c06c, 0xffffffff, 0x00090008,
1274	0x3c070, 0xffffffff, 0x00010000,
1275	0x3c074, 0xffffffff, 0x00030002,
1276	0x3c078, 0xffffffff, 0x00040007,
1277	0x3c07c, 0xffffffff, 0x00060005,
1278	0x3c080, 0xffffffff, 0x00090008,
1279	0x3c084, 0xffffffff, 0x00010000,
1280	0x3c088, 0xffffffff, 0x00030002,
1281	0x3c08c, 0xffffffff, 0x00040007,
1282	0x3c090, 0xffffffff, 0x00060005,
1283	0x3c094, 0xffffffff, 0x00090008,
1284	0x3c098, 0xffffffff, 0x00010000,
1285	0x3c09c, 0xffffffff, 0x00030002,
1286	0x3c0a0, 0xffffffff, 0x00040007,
1287	0x3c0a4, 0xffffffff, 0x00060005,
1288	0x3c0a8, 0xffffffff, 0x00090008,
1289	0x3c0ac, 0xffffffff, 0x00010000,
1290	0x3c0b0, 0xffffffff, 0x00030002,
1291	0x3c0b4, 0xffffffff, 0x00040007,
1292	0x3c0b8, 0xffffffff, 0x00060005,
1293	0x3c0bc, 0xffffffff, 0x00090008,
1294	0x3c000, 0xffffffff, 0x96e00200,
1295	0x8708, 0xffffffff, 0x00900100,
1296	0xc424, 0xffffffff, 0x0020003f,
1297	0x38, 0xffffffff, 0x0140001c,
1298	0x3c, 0x000f0000, 0x000f0000,
1299	0x220, 0xffffffff, 0xC060000C,
1300	0x224, 0xc0000fff, 0x00000100,
1301	0xf90, 0xffffffff, 0x00000100,
1302	0xf98, 0x00000101, 0x00000000,
1303	0x20a8, 0xffffffff, 0x00000104,
1304	0x55e4, 0xff000fff, 0x00000100,
1305	0x30cc, 0xc0000fff, 0x00000104,
1306	0xc1e4, 0x00000001, 0x00000001,
1307	0xd00c, 0xff000ff0, 0x00000100,
1308	0xd80c, 0xff000ff0, 0x00000100
1309};
1310
1311static const u32 kalindi_golden_spm_registers[] =
1312{
1313	0x30800, 0xe0ffffff, 0xe0000000
1314};
1315
1316static const u32 kalindi_golden_common_registers[] =
1317{
1318	0xc770, 0xffffffff, 0x00000800,
1319	0xc774, 0xffffffff, 0x00000800,
1320	0xc798, 0xffffffff, 0x00007fbf,
1321	0xc79c, 0xffffffff, 0x00007faf
1322};
1323
1324static const u32 kalindi_golden_registers[] =
1325{
1326	0x3c000, 0xffffdfff, 0x6e944040,
1327	0x55e4, 0xff607fff, 0xfc000100,
1328	0x3c220, 0xff000fff, 0x00000100,
1329	0x3c224, 0xff000fff, 0x00000100,
1330	0x3c200, 0xfffc0fff, 0x00000100,
1331	0x6ed8, 0x00010101, 0x00010000,
1332	0x9830, 0xffffffff, 0x00000000,
1333	0x9834, 0xf00fffff, 0x00000400,
1334	0x5bb0, 0x000000f0, 0x00000070,
1335	0x5bc0, 0xf0311fff, 0x80300000,
1336	0x98f8, 0x73773777, 0x12010001,
1337	0x98fc, 0xffffffff, 0x00000010,
1338	0x9b7c, 0x00ff0000, 0x00fc0000,
1339	0x8030, 0x00001f0f, 0x0000100a,
1340	0x2f48, 0x73773777, 0x12010001,
1341	0x2408, 0x000fffff, 0x000c007f,
1342	0x8a14, 0xf000003f, 0x00000007,
1343	0x8b24, 0x3fff3fff, 0x00ffcfff,
1344	0x30a04, 0x0000ff0f, 0x00000000,
1345	0x28a4c, 0x07ffffff, 0x06000000,
1346	0x4d8, 0x00000fff, 0x00000100,
1347	0x3e78, 0x00000001, 0x00000002,
1348	0xc768, 0x00000008, 0x00000008,
1349	0x8c00, 0x000000ff, 0x00000003,
1350	0x214f8, 0x01ff01ff, 0x00000002,
1351	0x21498, 0x007ff800, 0x00200000,
1352	0x2015c, 0xffffffff, 0x00000f40,
1353	0x88c4, 0x001f3ae3, 0x00000082,
1354	0x88d4, 0x0000001f, 0x00000010,
1355	0x30934, 0xffffffff, 0x00000000
1356};
1357
1358static const u32 kalindi_mgcg_cgcg_init[] =
1359{
1360	0xc420, 0xffffffff, 0xfffffffc,
1361	0x30800, 0xffffffff, 0xe0000000,
1362	0x3c2a0, 0xffffffff, 0x00000100,
1363	0x3c208, 0xffffffff, 0x00000100,
1364	0x3c2c0, 0xffffffff, 0x00000100,
1365	0x3c2c8, 0xffffffff, 0x00000100,
1366	0x3c2c4, 0xffffffff, 0x00000100,
1367	0x55e4, 0xffffffff, 0x00600100,
1368	0x3c280, 0xffffffff, 0x00000100,
1369	0x3c214, 0xffffffff, 0x06000100,
1370	0x3c220, 0xffffffff, 0x00000100,
1371	0x3c218, 0xffffffff, 0x06000100,
1372	0x3c204, 0xffffffff, 0x00000100,
1373	0x3c2e0, 0xffffffff, 0x00000100,
1374	0x3c224, 0xffffffff, 0x00000100,
1375	0x3c200, 0xffffffff, 0x00000100,
1376	0x3c230, 0xffffffff, 0x00000100,
1377	0x3c234, 0xffffffff, 0x00000100,
1378	0x3c250, 0xffffffff, 0x00000100,
1379	0x3c254, 0xffffffff, 0x00000100,
1380	0x3c258, 0xffffffff, 0x00000100,
1381	0x3c25c, 0xffffffff, 0x00000100,
1382	0x3c260, 0xffffffff, 0x00000100,
1383	0x3c27c, 0xffffffff, 0x00000100,
1384	0x3c278, 0xffffffff, 0x00000100,
1385	0x3c210, 0xffffffff, 0x06000100,
1386	0x3c290, 0xffffffff, 0x00000100,
1387	0x3c274, 0xffffffff, 0x00000100,
1388	0x3c2b4, 0xffffffff, 0x00000100,
1389	0x3c2b0, 0xffffffff, 0x00000100,
1390	0x3c270, 0xffffffff, 0x00000100,
1391	0x30800, 0xffffffff, 0xe0000000,
1392	0x3c020, 0xffffffff, 0x00010000,
1393	0x3c024, 0xffffffff, 0x00030002,
1394	0x3c028, 0xffffffff, 0x00040007,
1395	0x3c02c, 0xffffffff, 0x00060005,
1396	0x3c030, 0xffffffff, 0x00090008,
1397	0x3c034, 0xffffffff, 0x00010000,
1398	0x3c038, 0xffffffff, 0x00030002,
1399	0x3c03c, 0xffffffff, 0x00040007,
1400	0x3c040, 0xffffffff, 0x00060005,
1401	0x3c044, 0xffffffff, 0x00090008,
1402	0x3c000, 0xffffffff, 0x96e00200,
1403	0x8708, 0xffffffff, 0x00900100,
1404	0xc424, 0xffffffff, 0x0020003f,
1405	0x38, 0xffffffff, 0x0140001c,
1406	0x3c, 0x000f0000, 0x000f0000,
1407	0x220, 0xffffffff, 0xC060000C,
1408	0x224, 0xc0000fff, 0x00000100,
1409	0x20a8, 0xffffffff, 0x00000104,
1410	0x55e4, 0xff000fff, 0x00000100,
1411	0x30cc, 0xc0000fff, 0x00000104,
1412	0xc1e4, 0x00000001, 0x00000001,
1413	0xd00c, 0xff000ff0, 0x00000100,
1414	0xd80c, 0xff000ff0, 0x00000100
1415};
1416
1417static const u32 hawaii_golden_spm_registers[] =
1418{
1419	0x30800, 0xe0ffffff, 0xe0000000
1420};
1421
1422static const u32 hawaii_golden_common_registers[] =
1423{
1424	0x30800, 0xffffffff, 0xe0000000,
1425	0x28350, 0xffffffff, 0x3a00161a,
1426	0x28354, 0xffffffff, 0x0000002e,
1427	0x9a10, 0xffffffff, 0x00018208,
1428	0x98f8, 0xffffffff, 0x12011003
1429};
1430
1431static const u32 hawaii_golden_registers[] =
1432{
1433	0x3354, 0x00000333, 0x00000333,
1434	0x9a10, 0x00010000, 0x00058208,
1435	0x9830, 0xffffffff, 0x00000000,
1436	0x9834, 0xf00fffff, 0x00000400,
1437	0x9838, 0x0002021c, 0x00020200,
1438	0xc78, 0x00000080, 0x00000000,
1439	0x5bb0, 0x000000f0, 0x00000070,
1440	0x5bc0, 0xf0311fff, 0x80300000,
1441	0x350c, 0x00810000, 0x408af000,
1442	0x7030, 0x31000111, 0x00000011,
1443	0x2f48, 0x73773777, 0x12010001,
1444	0x2120, 0x0000007f, 0x0000001b,
1445	0x21dc, 0x00007fb6, 0x00002191,
1446	0x3628, 0x0000003f, 0x0000000a,
1447	0x362c, 0x0000003f, 0x0000000a,
1448	0x2ae4, 0x00073ffe, 0x000022a2,
1449	0x240c, 0x000007ff, 0x00000000,
1450	0x8bf0, 0x00002001, 0x00000001,
1451	0x8b24, 0xffffffff, 0x00ffffff,
1452	0x30a04, 0x0000ff0f, 0x00000000,
1453	0x28a4c, 0x07ffffff, 0x06000000,
1454	0x3e78, 0x00000001, 0x00000002,
1455	0xc768, 0x00000008, 0x00000008,
1456	0xc770, 0x00000f00, 0x00000800,
1457	0xc774, 0x00000f00, 0x00000800,
1458	0xc798, 0x00ffffff, 0x00ff7fbf,
1459	0xc79c, 0x00ffffff, 0x00ff7faf,
1460	0x8c00, 0x000000ff, 0x00000800,
1461	0xe40, 0x00001fff, 0x00001fff,
1462	0x9060, 0x0000007f, 0x00000020,
1463	0x9508, 0x00010000, 0x00010000,
1464	0xae00, 0x00100000, 0x000ff07c,
1465	0xac14, 0x000003ff, 0x0000000f,
1466	0xac10, 0xffffffff, 0x7564fdec,
1467	0xac0c, 0xffffffff, 0x3120b9a8,
1468	0xac08, 0x20000000, 0x0f9c0000
1469};
1470
1471static const u32 hawaii_mgcg_cgcg_init[] =
1472{
1473	0xc420, 0xffffffff, 0xfffffffd,
1474	0x30800, 0xffffffff, 0xe0000000,
1475	0x3c2a0, 0xffffffff, 0x00000100,
1476	0x3c208, 0xffffffff, 0x00000100,
1477	0x3c2c0, 0xffffffff, 0x00000100,
1478	0x3c2c8, 0xffffffff, 0x00000100,
1479	0x3c2c4, 0xffffffff, 0x00000100,
1480	0x55e4, 0xffffffff, 0x00200100,
1481	0x3c280, 0xffffffff, 0x00000100,
1482	0x3c214, 0xffffffff, 0x06000100,
1483	0x3c220, 0xffffffff, 0x00000100,
1484	0x3c218, 0xffffffff, 0x06000100,
1485	0x3c204, 0xffffffff, 0x00000100,
1486	0x3c2e0, 0xffffffff, 0x00000100,
1487	0x3c224, 0xffffffff, 0x00000100,
1488	0x3c200, 0xffffffff, 0x00000100,
1489	0x3c230, 0xffffffff, 0x00000100,
1490	0x3c234, 0xffffffff, 0x00000100,
1491	0x3c250, 0xffffffff, 0x00000100,
1492	0x3c254, 0xffffffff, 0x00000100,
1493	0x3c258, 0xffffffff, 0x00000100,
1494	0x3c25c, 0xffffffff, 0x00000100,
1495	0x3c260, 0xffffffff, 0x00000100,
1496	0x3c27c, 0xffffffff, 0x00000100,
1497	0x3c278, 0xffffffff, 0x00000100,
1498	0x3c210, 0xffffffff, 0x06000100,
1499	0x3c290, 0xffffffff, 0x00000100,
1500	0x3c274, 0xffffffff, 0x00000100,
1501	0x3c2b4, 0xffffffff, 0x00000100,
1502	0x3c2b0, 0xffffffff, 0x00000100,
1503	0x3c270, 0xffffffff, 0x00000100,
1504	0x30800, 0xffffffff, 0xe0000000,
1505	0x3c020, 0xffffffff, 0x00010000,
1506	0x3c024, 0xffffffff, 0x00030002,
1507	0x3c028, 0xffffffff, 0x00040007,
1508	0x3c02c, 0xffffffff, 0x00060005,
1509	0x3c030, 0xffffffff, 0x00090008,
1510	0x3c034, 0xffffffff, 0x00010000,
1511	0x3c038, 0xffffffff, 0x00030002,
1512	0x3c03c, 0xffffffff, 0x00040007,
1513	0x3c040, 0xffffffff, 0x00060005,
1514	0x3c044, 0xffffffff, 0x00090008,
1515	0x3c048, 0xffffffff, 0x00010000,
1516	0x3c04c, 0xffffffff, 0x00030002,
1517	0x3c050, 0xffffffff, 0x00040007,
1518	0x3c054, 0xffffffff, 0x00060005,
1519	0x3c058, 0xffffffff, 0x00090008,
1520	0x3c05c, 0xffffffff, 0x00010000,
1521	0x3c060, 0xffffffff, 0x00030002,
1522	0x3c064, 0xffffffff, 0x00040007,
1523	0x3c068, 0xffffffff, 0x00060005,
1524	0x3c06c, 0xffffffff, 0x00090008,
1525	0x3c070, 0xffffffff, 0x00010000,
1526	0x3c074, 0xffffffff, 0x00030002,
1527	0x3c078, 0xffffffff, 0x00040007,
1528	0x3c07c, 0xffffffff, 0x00060005,
1529	0x3c080, 0xffffffff, 0x00090008,
1530	0x3c084, 0xffffffff, 0x00010000,
1531	0x3c088, 0xffffffff, 0x00030002,
1532	0x3c08c, 0xffffffff, 0x00040007,
1533	0x3c090, 0xffffffff, 0x00060005,
1534	0x3c094, 0xffffffff, 0x00090008,
1535	0x3c098, 0xffffffff, 0x00010000,
1536	0x3c09c, 0xffffffff, 0x00030002,
1537	0x3c0a0, 0xffffffff, 0x00040007,
1538	0x3c0a4, 0xffffffff, 0x00060005,
1539	0x3c0a8, 0xffffffff, 0x00090008,
1540	0x3c0ac, 0xffffffff, 0x00010000,
1541	0x3c0b0, 0xffffffff, 0x00030002,
1542	0x3c0b4, 0xffffffff, 0x00040007,
1543	0x3c0b8, 0xffffffff, 0x00060005,
1544	0x3c0bc, 0xffffffff, 0x00090008,
1545	0x3c0c0, 0xffffffff, 0x00010000,
1546	0x3c0c4, 0xffffffff, 0x00030002,
1547	0x3c0c8, 0xffffffff, 0x00040007,
1548	0x3c0cc, 0xffffffff, 0x00060005,
1549	0x3c0d0, 0xffffffff, 0x00090008,
1550	0x3c0d4, 0xffffffff, 0x00010000,
1551	0x3c0d8, 0xffffffff, 0x00030002,
1552	0x3c0dc, 0xffffffff, 0x00040007,
1553	0x3c0e0, 0xffffffff, 0x00060005,
1554	0x3c0e4, 0xffffffff, 0x00090008,
1555	0x3c0e8, 0xffffffff, 0x00010000,
1556	0x3c0ec, 0xffffffff, 0x00030002,
1557	0x3c0f0, 0xffffffff, 0x00040007,
1558	0x3c0f4, 0xffffffff, 0x00060005,
1559	0x3c0f8, 0xffffffff, 0x00090008,
1560	0xc318, 0xffffffff, 0x00020200,
1561	0x3350, 0xffffffff, 0x00000200,
1562	0x15c0, 0xffffffff, 0x00000400,
1563	0x55e8, 0xffffffff, 0x00000000,
1564	0x2f50, 0xffffffff, 0x00000902,
1565	0x3c000, 0xffffffff, 0x96940200,
1566	0x8708, 0xffffffff, 0x00900100,
1567	0xc424, 0xffffffff, 0x0020003f,
1568	0x38, 0xffffffff, 0x0140001c,
1569	0x3c, 0x000f0000, 0x000f0000,
1570	0x220, 0xffffffff, 0xc060000c,
1571	0x224, 0xc0000fff, 0x00000100,
1572	0xf90, 0xffffffff, 0x00000100,
1573	0xf98, 0x00000101, 0x00000000,
1574	0x20a8, 0xffffffff, 0x00000104,
1575	0x55e4, 0xff000fff, 0x00000100,
1576	0x30cc, 0xc0000fff, 0x00000104,
1577	0xc1e4, 0x00000001, 0x00000001,
1578	0xd00c, 0xff000ff0, 0x00000100,
1579	0xd80c, 0xff000ff0, 0x00000100
1580};
1581
1582static const u32 godavari_golden_registers[] =
1583{
1584	0x55e4, 0xff607fff, 0xfc000100,
1585	0x6ed8, 0x00010101, 0x00010000,
1586	0x9830, 0xffffffff, 0x00000000,
1587	0x98302, 0xf00fffff, 0x00000400,
1588	0x6130, 0xffffffff, 0x00010000,
1589	0x5bb0, 0x000000f0, 0x00000070,
1590	0x5bc0, 0xf0311fff, 0x80300000,
1591	0x98f8, 0x73773777, 0x12010001,
1592	0x98fc, 0xffffffff, 0x00000010,
1593	0x8030, 0x00001f0f, 0x0000100a,
1594	0x2f48, 0x73773777, 0x12010001,
1595	0x2408, 0x000fffff, 0x000c007f,
1596	0x8a14, 0xf000003f, 0x00000007,
1597	0x8b24, 0xffffffff, 0x00ff0fff,
1598	0x30a04, 0x0000ff0f, 0x00000000,
1599	0x28a4c, 0x07ffffff, 0x06000000,
1600	0x4d8, 0x00000fff, 0x00000100,
1601	0xd014, 0x00010000, 0x00810001,
1602	0xd814, 0x00010000, 0x00810001,
1603	0x3e78, 0x00000001, 0x00000002,
1604	0xc768, 0x00000008, 0x00000008,
1605	0xc770, 0x00000f00, 0x00000800,
1606	0xc774, 0x00000f00, 0x00000800,
1607	0xc798, 0x00ffffff, 0x00ff7fbf,
1608	0xc79c, 0x00ffffff, 0x00ff7faf,
1609	0x8c00, 0x000000ff, 0x00000001,
1610	0x214f8, 0x01ff01ff, 0x00000002,
1611	0x21498, 0x007ff800, 0x00200000,
1612	0x2015c, 0xffffffff, 0x00000f40,
1613	0x88c4, 0x001f3ae3, 0x00000082,
1614	0x88d4, 0x0000001f, 0x00000010,
1615	0x30934, 0xffffffff, 0x00000000
1616};
1617
1618
1619static void cik_init_golden_registers(struct radeon_device *rdev)
1620{
1621	switch (rdev->family) {
1622	case CHIP_BONAIRE:
1623		radeon_program_register_sequence(rdev,
1624						 bonaire_mgcg_cgcg_init,
1625						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626		radeon_program_register_sequence(rdev,
1627						 bonaire_golden_registers,
1628						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629		radeon_program_register_sequence(rdev,
1630						 bonaire_golden_common_registers,
1631						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632		radeon_program_register_sequence(rdev,
1633						 bonaire_golden_spm_registers,
1634						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635		break;
1636	case CHIP_KABINI:
1637		radeon_program_register_sequence(rdev,
1638						 kalindi_mgcg_cgcg_init,
1639						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640		radeon_program_register_sequence(rdev,
1641						 kalindi_golden_registers,
1642						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643		radeon_program_register_sequence(rdev,
1644						 kalindi_golden_common_registers,
1645						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646		radeon_program_register_sequence(rdev,
1647						 kalindi_golden_spm_registers,
1648						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649		break;
1650	case CHIP_MULLINS:
1651		radeon_program_register_sequence(rdev,
1652						 kalindi_mgcg_cgcg_init,
1653						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654		radeon_program_register_sequence(rdev,
1655						 godavari_golden_registers,
1656						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1657		radeon_program_register_sequence(rdev,
1658						 kalindi_golden_common_registers,
1659						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660		radeon_program_register_sequence(rdev,
1661						 kalindi_golden_spm_registers,
1662						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663		break;
1664	case CHIP_KAVERI:
1665		radeon_program_register_sequence(rdev,
1666						 spectre_mgcg_cgcg_init,
1667						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668		radeon_program_register_sequence(rdev,
1669						 spectre_golden_registers,
1670						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1671		radeon_program_register_sequence(rdev,
1672						 spectre_golden_common_registers,
1673						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674		radeon_program_register_sequence(rdev,
1675						 spectre_golden_spm_registers,
1676						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677		break;
1678	case CHIP_HAWAII:
1679		radeon_program_register_sequence(rdev,
1680						 hawaii_mgcg_cgcg_init,
1681						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682		radeon_program_register_sequence(rdev,
1683						 hawaii_golden_registers,
1684						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685		radeon_program_register_sequence(rdev,
1686						 hawaii_golden_common_registers,
1687						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688		radeon_program_register_sequence(rdev,
1689						 hawaii_golden_spm_registers,
1690						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691		break;
1692	default:
1693		break;
1694	}
1695}
1696
1697/**
1698 * cik_get_xclk - get the xclk
1699 *
1700 * @rdev: radeon_device pointer
1701 *
1702 * Returns the reference clock used by the gfx engine
1703 * (CIK).
1704 */
1705u32 cik_get_xclk(struct radeon_device *rdev)
1706{
1707	u32 reference_clock = rdev->clock.spll.reference_freq;
1708
1709	if (rdev->flags & RADEON_IS_IGP) {
1710		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711			return reference_clock / 2;
1712	} else {
1713		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714			return reference_clock / 4;
1715	}
1716	return reference_clock;
1717}
1718
1719/**
1720 * cik_mm_rdoorbell - read a doorbell dword
1721 *
1722 * @rdev: radeon_device pointer
1723 * @index: doorbell index
1724 *
1725 * Returns the value in the doorbell aperture at the
1726 * requested doorbell index (CIK).
1727 */
1728u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729{
1730	if (index < rdev->doorbell.num_doorbells) {
1731		return readl(rdev->doorbell.ptr + index);
1732	} else {
1733		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734		return 0;
1735	}
1736}
1737
1738/**
1739 * cik_mm_wdoorbell - write a doorbell dword
1740 *
1741 * @rdev: radeon_device pointer
1742 * @index: doorbell index
1743 * @v: value to write
1744 *
1745 * Writes @v to the doorbell aperture at the
1746 * requested doorbell index (CIK).
1747 */
1748void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749{
1750	if (index < rdev->doorbell.num_doorbells) {
1751		writel(v, rdev->doorbell.ptr + index);
1752	} else {
1753		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754	}
1755}
1756
1757#define BONAIRE_IO_MC_REGS_SIZE 36
1758
1759static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760{
1761	{0x00000070, 0x04400000},
1762	{0x00000071, 0x80c01803},
1763	{0x00000072, 0x00004004},
1764	{0x00000073, 0x00000100},
1765	{0x00000074, 0x00ff0000},
1766	{0x00000075, 0x34000000},
1767	{0x00000076, 0x08000014},
1768	{0x00000077, 0x00cc08ec},
1769	{0x00000078, 0x00000400},
1770	{0x00000079, 0x00000000},
1771	{0x0000007a, 0x04090000},
1772	{0x0000007c, 0x00000000},
1773	{0x0000007e, 0x4408a8e8},
1774	{0x0000007f, 0x00000304},
1775	{0x00000080, 0x00000000},
1776	{0x00000082, 0x00000001},
1777	{0x00000083, 0x00000002},
1778	{0x00000084, 0xf3e4f400},
1779	{0x00000085, 0x052024e3},
1780	{0x00000087, 0x00000000},
1781	{0x00000088, 0x01000000},
1782	{0x0000008a, 0x1c0a0000},
1783	{0x0000008b, 0xff010000},
1784	{0x0000008d, 0xffffefff},
1785	{0x0000008e, 0xfff3efff},
1786	{0x0000008f, 0xfff3efbf},
1787	{0x00000092, 0xf7ffffff},
1788	{0x00000093, 0xffffff7f},
1789	{0x00000095, 0x00101101},
1790	{0x00000096, 0x00000fff},
1791	{0x00000097, 0x00116fff},
1792	{0x00000098, 0x60010000},
1793	{0x00000099, 0x10010000},
1794	{0x0000009a, 0x00006000},
1795	{0x0000009b, 0x00001000},
1796	{0x0000009f, 0x00b48000}
1797};
1798
1799#define HAWAII_IO_MC_REGS_SIZE 22
1800
1801static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802{
1803	{0x0000007d, 0x40000000},
1804	{0x0000007e, 0x40180304},
1805	{0x0000007f, 0x0000ff00},
1806	{0x00000081, 0x00000000},
1807	{0x00000083, 0x00000800},
1808	{0x00000086, 0x00000000},
1809	{0x00000087, 0x00000100},
1810	{0x00000088, 0x00020100},
1811	{0x00000089, 0x00000000},
1812	{0x0000008b, 0x00040000},
1813	{0x0000008c, 0x00000100},
1814	{0x0000008e, 0xff010000},
1815	{0x00000090, 0xffffefff},
1816	{0x00000091, 0xfff3efff},
1817	{0x00000092, 0xfff3efbf},
1818	{0x00000093, 0xf7ffffff},
1819	{0x00000094, 0xffffff7f},
1820	{0x00000095, 0x00000fff},
1821	{0x00000096, 0x00116fff},
1822	{0x00000097, 0x60010000},
1823	{0x00000098, 0x10010000},
1824	{0x0000009f, 0x00c79000}
1825};
1826
1827
1828/**
1829 * cik_srbm_select - select specific register instances
1830 *
1831 * @rdev: radeon_device pointer
1832 * @me: selected ME (micro engine)
1833 * @pipe: pipe
1834 * @queue: queue
1835 * @vmid: VMID
1836 *
1837 * Switches the currently active registers instances.  Some
1838 * registers are instanced per VMID, others are instanced per
1839 * me/pipe/queue combination.
1840 */
1841static void cik_srbm_select(struct radeon_device *rdev,
1842			    u32 me, u32 pipe, u32 queue, u32 vmid)
1843{
1844	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845			     MEID(me & 0x3) |
1846			     VMID(vmid & 0xf) |
1847			     QUEUEID(queue & 0x7));
1848	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849}
1850
1851/* ucode loading */
1852/**
1853 * ci_mc_load_microcode - load MC ucode into the hw
1854 *
1855 * @rdev: radeon_device pointer
1856 *
1857 * Load the GDDR MC ucode into the hw (CIK).
1858 * Returns 0 on success, error on failure.
1859 */
1860int ci_mc_load_microcode(struct radeon_device *rdev)
1861{
1862	const __be32 *fw_data = NULL;
1863	const __le32 *new_fw_data = NULL;
1864	u32 running, tmp;
1865	u32 *io_mc_regs = NULL;
1866	const __le32 *new_io_mc_regs = NULL;
1867	int i, regs_size, ucode_size;
1868
1869	if (!rdev->mc_fw)
1870		return -EINVAL;
1871
1872	if (rdev->new_fw) {
1873		const struct mc_firmware_header_v1_0 *hdr =
1874			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875
1876		radeon_ucode_print_mc_hdr(&hdr->header);
1877
1878		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879		new_io_mc_regs = (const __le32 *)
1880			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882		new_fw_data = (const __le32 *)
1883			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884	} else {
1885		ucode_size = rdev->mc_fw->size / 4;
1886
1887		switch (rdev->family) {
1888		case CHIP_BONAIRE:
1889			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891			break;
1892		case CHIP_HAWAII:
1893			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894			regs_size = HAWAII_IO_MC_REGS_SIZE;
1895			break;
1896		default:
1897			return -EINVAL;
1898		}
1899		fw_data = (const __be32 *)rdev->mc_fw->data;
1900	}
1901
1902	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903
1904	if (running == 0) {
1905		/* reset the engine and set to writable */
1906		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908
1909		/* load mc io regs */
1910		for (i = 0; i < regs_size; i++) {
1911			if (rdev->new_fw) {
1912				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914			} else {
1915				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917			}
1918		}
1919
1920		tmp = RREG32(MC_SEQ_MISC0);
1921		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926		}
1927
1928		/* load the MC ucode */
1929		for (i = 0; i < ucode_size; i++) {
1930			if (rdev->new_fw)
1931				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932			else
1933				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934		}
1935
1936		/* put the engine back into the active state */
1937		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940
1941		/* wait for training to complete */
1942		for (i = 0; i < rdev->usec_timeout; i++) {
1943			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944				break;
1945			udelay(1);
1946		}
1947		for (i = 0; i < rdev->usec_timeout; i++) {
1948			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949				break;
1950			udelay(1);
1951		}
1952	}
1953
1954	return 0;
1955}
1956
1957/**
1958 * cik_init_microcode - load ucode images from disk
1959 *
1960 * @rdev: radeon_device pointer
1961 *
1962 * Use the firmware interface to load the ucode images into
1963 * the driver (not loaded into hw).
1964 * Returns 0 on success, error on failure.
1965 */
1966static int cik_init_microcode(struct radeon_device *rdev)
1967{
1968	const char *chip_name;
1969	const char *new_chip_name;
1970	size_t pfp_req_size, me_req_size, ce_req_size,
1971		mec_req_size, rlc_req_size, mc_req_size = 0,
1972		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973	char fw_name[30];
1974	int new_fw = 0;
1975	int err;
1976	int num_fw;
1977	bool new_smc = false;
1978
1979	DRM_DEBUG("\n");
1980
1981	switch (rdev->family) {
1982	case CHIP_BONAIRE:
1983		chip_name = "BONAIRE";
1984		if ((rdev->pdev->revision == 0x80) ||
1985		    (rdev->pdev->revision == 0x81) ||
1986		    (rdev->pdev->device == 0x665f))
1987			new_smc = true;
1988		new_chip_name = "bonaire";
1989		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998		num_fw = 8;
1999		break;
2000	case CHIP_HAWAII:
2001		chip_name = "HAWAII";
2002		if (rdev->pdev->revision == 0x80)
2003			new_smc = true;
2004		new_chip_name = "hawaii";
2005		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006		me_req_size = CIK_ME_UCODE_SIZE * 4;
2007		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014		num_fw = 8;
2015		break;
2016	case CHIP_KAVERI:
2017		chip_name = "KAVERI";
2018		new_chip_name = "kaveri";
2019		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020		me_req_size = CIK_ME_UCODE_SIZE * 4;
2021		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025		num_fw = 7;
2026		break;
2027	case CHIP_KABINI:
2028		chip_name = "KABINI";
2029		new_chip_name = "kabini";
2030		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031		me_req_size = CIK_ME_UCODE_SIZE * 4;
2032		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036		num_fw = 6;
2037		break;
2038	case CHIP_MULLINS:
2039		chip_name = "MULLINS";
2040		new_chip_name = "mullins";
2041		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042		me_req_size = CIK_ME_UCODE_SIZE * 4;
2043		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047		num_fw = 6;
2048		break;
2049	default: BUG();
2050	}
2051
2052	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053
2054	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056	if (err) {
2057		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059		if (err)
2060			goto out;
2061		if (rdev->pfp_fw->size != pfp_req_size) {
2062			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063			       rdev->pfp_fw->size, fw_name);
2064			err = -EINVAL;
2065			goto out;
2066		}
2067	} else {
2068		err = radeon_ucode_validate(rdev->pfp_fw);
2069		if (err) {
2070			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071			       fw_name);
2072			goto out;
2073		} else {
2074			new_fw++;
2075		}
2076	}
2077
2078	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080	if (err) {
2081		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083		if (err)
2084			goto out;
2085		if (rdev->me_fw->size != me_req_size) {
2086			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087			       rdev->me_fw->size, fw_name);
2088			err = -EINVAL;
2089		}
2090	} else {
2091		err = radeon_ucode_validate(rdev->me_fw);
2092		if (err) {
2093			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094			       fw_name);
2095			goto out;
2096		} else {
2097			new_fw++;
2098		}
2099	}
2100
2101	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103	if (err) {
2104		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106		if (err)
2107			goto out;
2108		if (rdev->ce_fw->size != ce_req_size) {
2109			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110			       rdev->ce_fw->size, fw_name);
2111			err = -EINVAL;
2112		}
2113	} else {
2114		err = radeon_ucode_validate(rdev->ce_fw);
2115		if (err) {
2116			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117			       fw_name);
2118			goto out;
2119		} else {
2120			new_fw++;
2121		}
2122	}
2123
2124	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126	if (err) {
2127		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129		if (err)
2130			goto out;
2131		if (rdev->mec_fw->size != mec_req_size) {
2132			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133			       rdev->mec_fw->size, fw_name);
2134			err = -EINVAL;
2135		}
2136	} else {
2137		err = radeon_ucode_validate(rdev->mec_fw);
2138		if (err) {
2139			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140			       fw_name);
2141			goto out;
2142		} else {
2143			new_fw++;
2144		}
2145	}
2146
2147	if (rdev->family == CHIP_KAVERI) {
2148		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150		if (err) {
2151			goto out;
2152		} else {
2153			err = radeon_ucode_validate(rdev->mec2_fw);
2154			if (err) {
2155				goto out;
2156			} else {
2157				new_fw++;
2158			}
2159		}
2160	}
2161
2162	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164	if (err) {
2165		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167		if (err)
2168			goto out;
2169		if (rdev->rlc_fw->size != rlc_req_size) {
2170			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171			       rdev->rlc_fw->size, fw_name);
2172			err = -EINVAL;
2173		}
2174	} else {
2175		err = radeon_ucode_validate(rdev->rlc_fw);
2176		if (err) {
2177			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178			       fw_name);
2179			goto out;
2180		} else {
2181			new_fw++;
2182		}
2183	}
2184
2185	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187	if (err) {
2188		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190		if (err)
2191			goto out;
2192		if (rdev->sdma_fw->size != sdma_req_size) {
2193			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194			       rdev->sdma_fw->size, fw_name);
2195			err = -EINVAL;
2196		}
2197	} else {
2198		err = radeon_ucode_validate(rdev->sdma_fw);
2199		if (err) {
2200			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201			       fw_name);
2202			goto out;
2203		} else {
2204			new_fw++;
2205		}
2206	}
2207
2208	/* No SMC, MC ucode on APUs */
2209	if (!(rdev->flags & RADEON_IS_IGP)) {
2210		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212		if (err) {
2213			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215			if (err) {
2216				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218				if (err)
2219					goto out;
2220			}
2221			if ((rdev->mc_fw->size != mc_req_size) &&
2222			    (rdev->mc_fw->size != mc2_req_size)){
2223				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224				       rdev->mc_fw->size, fw_name);
2225				err = -EINVAL;
2226			}
2227			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228		} else {
2229			err = radeon_ucode_validate(rdev->mc_fw);
2230			if (err) {
2231				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232				       fw_name);
2233				goto out;
2234			} else {
2235				new_fw++;
2236			}
2237		}
2238
2239		if (new_smc)
2240			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241		else
2242			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244		if (err) {
2245			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247			if (err) {
2248				pr_err("smc: error loading firmware \"%s\"\n",
2249				       fw_name);
2250				release_firmware(rdev->smc_fw);
2251				rdev->smc_fw = NULL;
2252				err = 0;
2253			} else if (rdev->smc_fw->size != smc_req_size) {
2254				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255				       rdev->smc_fw->size, fw_name);
2256				err = -EINVAL;
2257			}
2258		} else {
2259			err = radeon_ucode_validate(rdev->smc_fw);
2260			if (err) {
2261				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262				       fw_name);
2263				goto out;
2264			} else {
2265				new_fw++;
2266			}
2267		}
2268	}
2269
2270	if (new_fw == 0) {
2271		rdev->new_fw = false;
2272	} else if (new_fw < num_fw) {
2273		pr_err("ci_fw: mixing new and old firmware!\n");
2274		err = -EINVAL;
2275	} else {
2276		rdev->new_fw = true;
2277	}
2278
2279out:
2280	if (err) {
2281		if (err != -EINVAL)
2282			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283			       fw_name);
2284		release_firmware(rdev->pfp_fw);
2285		rdev->pfp_fw = NULL;
2286		release_firmware(rdev->me_fw);
2287		rdev->me_fw = NULL;
2288		release_firmware(rdev->ce_fw);
2289		rdev->ce_fw = NULL;
2290		release_firmware(rdev->mec_fw);
2291		rdev->mec_fw = NULL;
2292		release_firmware(rdev->mec2_fw);
2293		rdev->mec2_fw = NULL;
2294		release_firmware(rdev->rlc_fw);
2295		rdev->rlc_fw = NULL;
2296		release_firmware(rdev->sdma_fw);
2297		rdev->sdma_fw = NULL;
2298		release_firmware(rdev->mc_fw);
2299		rdev->mc_fw = NULL;
2300		release_firmware(rdev->smc_fw);
2301		rdev->smc_fw = NULL;
2302	}
2303	return err;
2304}
2305
2306/*
2307 * Core functions
2308 */
2309/**
2310 * cik_tiling_mode_table_init - init the hw tiling table
2311 *
2312 * @rdev: radeon_device pointer
2313 *
2314 * Starting with SI, the tiling setup is done globally in a
2315 * set of 32 tiling modes.  Rather than selecting each set of
2316 * parameters per surface as on older asics, we just select
2317 * which index in the tiling table we want to use, and the
2318 * surface uses those parameters (CIK).
2319 */
2320static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321{
2322	u32 *tile = rdev->config.cik.tile_mode_array;
2323	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324	const u32 num_tile_mode_states =
2325			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326	const u32 num_secondary_tile_mode_states =
2327			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328	u32 reg_offset, split_equal_to_row_size;
2329	u32 num_pipe_configs;
2330	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331		rdev->config.cik.max_shader_engines;
2332
2333	switch (rdev->config.cik.mem_row_size_in_kb) {
2334	case 1:
2335		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336		break;
2337	case 2:
2338	default:
2339		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340		break;
2341	case 4:
2342		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343		break;
2344	}
2345
2346	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347	if (num_pipe_configs > 8)
2348		num_pipe_configs = 16;
2349
2350	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351		tile[reg_offset] = 0;
2352	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353		macrotile[reg_offset] = 0;
2354
2355	switch(num_pipe_configs) {
2356	case 16:
2357		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376			   TILE_SPLIT(split_equal_to_row_size));
2377		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387			   TILE_SPLIT(split_equal_to_row_size));
2388		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435
2436		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439			   NUM_BANKS(ADDR_SURF_16_BANK));
2440		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443			   NUM_BANKS(ADDR_SURF_16_BANK));
2444		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447			   NUM_BANKS(ADDR_SURF_16_BANK));
2448		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451			   NUM_BANKS(ADDR_SURF_16_BANK));
2452		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455			   NUM_BANKS(ADDR_SURF_8_BANK));
2456		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459			   NUM_BANKS(ADDR_SURF_4_BANK));
2460		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463			   NUM_BANKS(ADDR_SURF_2_BANK));
2464		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467			   NUM_BANKS(ADDR_SURF_16_BANK));
2468		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471			   NUM_BANKS(ADDR_SURF_16_BANK));
2472		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475			    NUM_BANKS(ADDR_SURF_16_BANK));
2476		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479			    NUM_BANKS(ADDR_SURF_8_BANK));
2480		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483			    NUM_BANKS(ADDR_SURF_4_BANK));
2484		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487			    NUM_BANKS(ADDR_SURF_2_BANK));
2488		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491			    NUM_BANKS(ADDR_SURF_2_BANK));
2492
2493		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497		break;
2498
2499	case 8:
2500		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519			   TILE_SPLIT(split_equal_to_row_size));
2520		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530			   TILE_SPLIT(split_equal_to_row_size));
2531		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578
2579		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582				NUM_BANKS(ADDR_SURF_16_BANK));
2583		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586				NUM_BANKS(ADDR_SURF_16_BANK));
2587		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590				NUM_BANKS(ADDR_SURF_16_BANK));
2591		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594				NUM_BANKS(ADDR_SURF_16_BANK));
2595		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598				NUM_BANKS(ADDR_SURF_8_BANK));
2599		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602				NUM_BANKS(ADDR_SURF_4_BANK));
2603		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606				NUM_BANKS(ADDR_SURF_2_BANK));
2607		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610				NUM_BANKS(ADDR_SURF_16_BANK));
2611		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614				NUM_BANKS(ADDR_SURF_16_BANK));
2615		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618				NUM_BANKS(ADDR_SURF_16_BANK));
2619		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622				NUM_BANKS(ADDR_SURF_16_BANK));
2623		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626				NUM_BANKS(ADDR_SURF_8_BANK));
2627		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630				NUM_BANKS(ADDR_SURF_4_BANK));
2631		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634				NUM_BANKS(ADDR_SURF_2_BANK));
2635
2636		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640		break;
2641
2642	case 4:
2643		if (num_rbs == 4) {
2644		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663			   TILE_SPLIT(split_equal_to_row_size));
2664		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674			   TILE_SPLIT(split_equal_to_row_size));
2675		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722
2723		} else if (num_rbs < 4) {
2724		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743			   TILE_SPLIT(split_equal_to_row_size));
2744		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754			   TILE_SPLIT(split_equal_to_row_size));
2755		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802		}
2803
2804		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807				NUM_BANKS(ADDR_SURF_16_BANK));
2808		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811				NUM_BANKS(ADDR_SURF_16_BANK));
2812		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815				NUM_BANKS(ADDR_SURF_16_BANK));
2816		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819				NUM_BANKS(ADDR_SURF_16_BANK));
2820		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823				NUM_BANKS(ADDR_SURF_16_BANK));
2824		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827				NUM_BANKS(ADDR_SURF_8_BANK));
2828		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831				NUM_BANKS(ADDR_SURF_4_BANK));
2832		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835				NUM_BANKS(ADDR_SURF_16_BANK));
2836		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839				NUM_BANKS(ADDR_SURF_16_BANK));
2840		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843				NUM_BANKS(ADDR_SURF_16_BANK));
2844		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847				NUM_BANKS(ADDR_SURF_16_BANK));
2848		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851				NUM_BANKS(ADDR_SURF_16_BANK));
2852		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855				NUM_BANKS(ADDR_SURF_8_BANK));
2856		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859				NUM_BANKS(ADDR_SURF_4_BANK));
2860
2861		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865		break;
2866
2867	case 2:
2868		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870			   PIPE_CONFIG(ADDR_SURF_P2) |
2871			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874			   PIPE_CONFIG(ADDR_SURF_P2) |
2875			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878			   PIPE_CONFIG(ADDR_SURF_P2) |
2879			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882			   PIPE_CONFIG(ADDR_SURF_P2) |
2883			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886			   PIPE_CONFIG(ADDR_SURF_P2) |
2887			   TILE_SPLIT(split_equal_to_row_size));
2888		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889			   PIPE_CONFIG(ADDR_SURF_P2) |
2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893			   PIPE_CONFIG(ADDR_SURF_P2) |
2894			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897			   PIPE_CONFIG(ADDR_SURF_P2) |
2898			   TILE_SPLIT(split_equal_to_row_size));
2899		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900			   PIPE_CONFIG(ADDR_SURF_P2);
2901		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903			   PIPE_CONFIG(ADDR_SURF_P2));
2904		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906			    PIPE_CONFIG(ADDR_SURF_P2) |
2907			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910			    PIPE_CONFIG(ADDR_SURF_P2) |
2911			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914			    PIPE_CONFIG(ADDR_SURF_P2) |
2915			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917			    PIPE_CONFIG(ADDR_SURF_P2) |
2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921			    PIPE_CONFIG(ADDR_SURF_P2) |
2922			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925			    PIPE_CONFIG(ADDR_SURF_P2) |
2926			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929			    PIPE_CONFIG(ADDR_SURF_P2) |
2930			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933			    PIPE_CONFIG(ADDR_SURF_P2));
2934		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936			    PIPE_CONFIG(ADDR_SURF_P2) |
2937			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940			    PIPE_CONFIG(ADDR_SURF_P2) |
2941			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944			    PIPE_CONFIG(ADDR_SURF_P2) |
2945			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946
2947		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950				NUM_BANKS(ADDR_SURF_16_BANK));
2951		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954				NUM_BANKS(ADDR_SURF_16_BANK));
2955		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958				NUM_BANKS(ADDR_SURF_16_BANK));
2959		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962				NUM_BANKS(ADDR_SURF_16_BANK));
2963		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966				NUM_BANKS(ADDR_SURF_16_BANK));
2967		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970				NUM_BANKS(ADDR_SURF_16_BANK));
2971		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974				NUM_BANKS(ADDR_SURF_8_BANK));
2975		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978				NUM_BANKS(ADDR_SURF_16_BANK));
2979		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982				NUM_BANKS(ADDR_SURF_16_BANK));
2983		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986				NUM_BANKS(ADDR_SURF_16_BANK));
2987		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990				NUM_BANKS(ADDR_SURF_16_BANK));
2991		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994				NUM_BANKS(ADDR_SURF_16_BANK));
2995		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998				NUM_BANKS(ADDR_SURF_16_BANK));
2999		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002				NUM_BANKS(ADDR_SURF_8_BANK));
3003
3004		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008		break;
3009
3010	default:
3011		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012	}
3013}
3014
3015/**
3016 * cik_select_se_sh - select which SE, SH to address
3017 *
3018 * @rdev: radeon_device pointer
3019 * @se_num: shader engine to address
3020 * @sh_num: sh block to address
3021 *
3022 * Select which SE, SH combinations to address. Certain
3023 * registers are instanced per SE or SH.  0xffffffff means
3024 * broadcast to all SEs or SHs (CIK).
3025 */
3026static void cik_select_se_sh(struct radeon_device *rdev,
3027			     u32 se_num, u32 sh_num)
3028{
3029	u32 data = INSTANCE_BROADCAST_WRITES;
3030
3031	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033	else if (se_num == 0xffffffff)
3034		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035	else if (sh_num == 0xffffffff)
3036		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037	else
3038		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039	WREG32(GRBM_GFX_INDEX, data);
3040}
3041
3042/**
3043 * cik_create_bitmask - create a bitmask
3044 *
3045 * @bit_width: length of the mask
3046 *
3047 * create a variable length bit mask (CIK).
3048 * Returns the bitmask.
3049 */
3050static u32 cik_create_bitmask(u32 bit_width)
3051{
3052	u32 i, mask = 0;
3053
3054	for (i = 0; i < bit_width; i++) {
3055		mask <<= 1;
3056		mask |= 1;
3057	}
3058	return mask;
3059}
3060
3061/**
3062 * cik_get_rb_disabled - computes the mask of disabled RBs
3063 *
3064 * @rdev: radeon_device pointer
3065 * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
3066 * @sh_per_se: number of SH blocks per SE for the asic
3067 *
3068 * Calculates the bitmask of disabled RBs (CIK).
3069 * Returns the disabled RB bitmask.
3070 */
3071static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072			      u32 max_rb_num_per_se,
3073			      u32 sh_per_se)
3074{
3075	u32 data, mask;
3076
3077	data = RREG32(CC_RB_BACKEND_DISABLE);
3078	if (data & 1)
3079		data &= BACKEND_DISABLE_MASK;
3080	else
3081		data = 0;
3082	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083
3084	data >>= BACKEND_DISABLE_SHIFT;
3085
3086	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087
3088	return data & mask;
3089}
3090
3091/**
3092 * cik_setup_rb - setup the RBs on the asic
3093 *
3094 * @rdev: radeon_device pointer
3095 * @se_num: number of SEs (shader engines) for the asic
3096 * @sh_per_se: number of SH blocks per SE for the asic
3097 * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098 *
3099 * Configures per-SE/SH RB registers (CIK).
3100 */
3101static void cik_setup_rb(struct radeon_device *rdev,
3102			 u32 se_num, u32 sh_per_se,
3103			 u32 max_rb_num_per_se)
3104{
3105	int i, j;
3106	u32 data, mask;
3107	u32 disabled_rbs = 0;
3108	u32 enabled_rbs = 0;
3109
3110	for (i = 0; i < se_num; i++) {
3111		for (j = 0; j < sh_per_se; j++) {
3112			cik_select_se_sh(rdev, i, j);
3113			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114			if (rdev->family == CHIP_HAWAII)
3115				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116			else
3117				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118		}
3119	}
3120	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121
3122	mask = 1;
3123	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124		if (!(disabled_rbs & mask))
3125			enabled_rbs |= mask;
3126		mask <<= 1;
3127	}
3128
3129	rdev->config.cik.backend_enable_mask = enabled_rbs;
3130
3131	for (i = 0; i < se_num; i++) {
3132		cik_select_se_sh(rdev, i, 0xffffffff);
3133		data = 0;
3134		for (j = 0; j < sh_per_se; j++) {
3135			switch (enabled_rbs & 3) {
3136			case 0:
3137				if (j == 0)
3138					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139				else
3140					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141				break;
3142			case 1:
3143				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144				break;
3145			case 2:
3146				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147				break;
3148			case 3:
3149			default:
3150				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151				break;
3152			}
3153			enabled_rbs >>= 2;
3154		}
3155		WREG32(PA_SC_RASTER_CONFIG, data);
3156	}
3157	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158}
3159
3160/**
3161 * cik_gpu_init - setup the 3D engine
3162 *
3163 * @rdev: radeon_device pointer
3164 *
3165 * Configures the 3D engine and tiling configuration
3166 * registers so that the 3D engine is usable.
3167 */
3168static void cik_gpu_init(struct radeon_device *rdev)
3169{
3170	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171	u32 mc_arb_ramcfg;
3172	u32 hdp_host_path_cntl;
3173	u32 tmp;
3174	int i, j;
3175
3176	switch (rdev->family) {
3177	case CHIP_BONAIRE:
3178		rdev->config.cik.max_shader_engines = 2;
3179		rdev->config.cik.max_tile_pipes = 4;
3180		rdev->config.cik.max_cu_per_sh = 7;
3181		rdev->config.cik.max_sh_per_se = 1;
3182		rdev->config.cik.max_backends_per_se = 2;
3183		rdev->config.cik.max_texture_channel_caches = 4;
3184		rdev->config.cik.max_gprs = 256;
3185		rdev->config.cik.max_gs_threads = 32;
3186		rdev->config.cik.max_hw_contexts = 8;
3187
3188		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193		break;
3194	case CHIP_HAWAII:
3195		rdev->config.cik.max_shader_engines = 4;
3196		rdev->config.cik.max_tile_pipes = 16;
3197		rdev->config.cik.max_cu_per_sh = 11;
3198		rdev->config.cik.max_sh_per_se = 1;
3199		rdev->config.cik.max_backends_per_se = 4;
3200		rdev->config.cik.max_texture_channel_caches = 16;
3201		rdev->config.cik.max_gprs = 256;
3202		rdev->config.cik.max_gs_threads = 32;
3203		rdev->config.cik.max_hw_contexts = 8;
3204
3205		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210		break;
3211	case CHIP_KAVERI:
3212		rdev->config.cik.max_shader_engines = 1;
3213		rdev->config.cik.max_tile_pipes = 4;
3214		rdev->config.cik.max_cu_per_sh = 8;
3215		rdev->config.cik.max_backends_per_se = 2;
3216		rdev->config.cik.max_sh_per_se = 1;
3217		rdev->config.cik.max_texture_channel_caches = 4;
3218		rdev->config.cik.max_gprs = 256;
3219		rdev->config.cik.max_gs_threads = 16;
3220		rdev->config.cik.max_hw_contexts = 8;
3221
3222		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227		break;
3228	case CHIP_KABINI:
3229	case CHIP_MULLINS:
3230	default:
3231		rdev->config.cik.max_shader_engines = 1;
3232		rdev->config.cik.max_tile_pipes = 2;
3233		rdev->config.cik.max_cu_per_sh = 2;
3234		rdev->config.cik.max_sh_per_se = 1;
3235		rdev->config.cik.max_backends_per_se = 1;
3236		rdev->config.cik.max_texture_channel_caches = 2;
3237		rdev->config.cik.max_gprs = 256;
3238		rdev->config.cik.max_gs_threads = 16;
3239		rdev->config.cik.max_hw_contexts = 8;
3240
3241		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246		break;
3247	}
3248
3249	/* Initialize HDP */
3250	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251		WREG32((0x2c14 + j), 0x00000000);
3252		WREG32((0x2c18 + j), 0x00000000);
3253		WREG32((0x2c1c + j), 0x00000000);
3254		WREG32((0x2c20 + j), 0x00000000);
3255		WREG32((0x2c24 + j), 0x00000000);
3256	}
3257
3258	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259	WREG32(SRBM_INT_CNTL, 0x1);
3260	WREG32(SRBM_INT_ACK, 0x1);
3261
3262	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263
3264	RREG32(MC_SHARED_CHMAP);
3265	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266
3267	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268	rdev->config.cik.mem_max_burst_length_bytes = 256;
3269	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271	if (rdev->config.cik.mem_row_size_in_kb > 4)
3272		rdev->config.cik.mem_row_size_in_kb = 4;
3273	/* XXX use MC settings? */
3274	rdev->config.cik.shader_engine_tile_size = 32;
3275	rdev->config.cik.num_gpus = 1;
3276	rdev->config.cik.multi_gpu_tile_size = 64;
3277
3278	/* fix up row size */
3279	gb_addr_config &= ~ROW_SIZE_MASK;
3280	switch (rdev->config.cik.mem_row_size_in_kb) {
3281	case 1:
3282	default:
3283		gb_addr_config |= ROW_SIZE(0);
3284		break;
3285	case 2:
3286		gb_addr_config |= ROW_SIZE(1);
3287		break;
3288	case 4:
3289		gb_addr_config |= ROW_SIZE(2);
3290		break;
3291	}
3292
3293	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3294	 * not have bank info, so create a custom tiling dword.
3295	 * bits 3:0   num_pipes
3296	 * bits 7:4   num_banks
3297	 * bits 11:8  group_size
3298	 * bits 15:12 row_size
3299	 */
3300	rdev->config.cik.tile_config = 0;
3301	switch (rdev->config.cik.num_tile_pipes) {
3302	case 1:
3303		rdev->config.cik.tile_config |= (0 << 0);
3304		break;
3305	case 2:
3306		rdev->config.cik.tile_config |= (1 << 0);
3307		break;
3308	case 4:
3309		rdev->config.cik.tile_config |= (2 << 0);
3310		break;
3311	case 8:
3312	default:
3313		/* XXX what about 12? */
3314		rdev->config.cik.tile_config |= (3 << 0);
3315		break;
3316	}
3317	rdev->config.cik.tile_config |=
3318		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319	rdev->config.cik.tile_config |=
3320		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321	rdev->config.cik.tile_config |=
3322		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323
3324	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332
3333	cik_tiling_mode_table_init(rdev);
3334
3335	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336		     rdev->config.cik.max_sh_per_se,
3337		     rdev->config.cik.max_backends_per_se);
3338
3339	rdev->config.cik.active_cus = 0;
3340	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342			rdev->config.cik.active_cus +=
3343				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344		}
3345	}
3346
3347	/* set HW defaults for 3D engine */
3348	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349
3350	WREG32(SX_DEBUG_1, 0x20);
3351
3352	WREG32(TA_CNTL_AUX, 0x00010000);
3353
3354	tmp = RREG32(SPI_CONFIG_CNTL);
3355	tmp |= 0x03000000;
3356	WREG32(SPI_CONFIG_CNTL, tmp);
3357
3358	WREG32(SQ_CONFIG, 1);
3359
3360	WREG32(DB_DEBUG, 0);
3361
3362	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363	tmp |= 0x00000400;
3364	WREG32(DB_DEBUG2, tmp);
3365
3366	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367	tmp |= 0x00020200;
3368	WREG32(DB_DEBUG3, tmp);
3369
3370	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371	tmp |= 0x00018208;
3372	WREG32(CB_HW_CONTROL, tmp);
3373
3374	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375
3376	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380
3381	WREG32(VGT_NUM_INSTANCES, 1);
3382
3383	WREG32(CP_PERFMON_CNTL, 0);
3384
3385	WREG32(SQ_CONFIG, 0);
3386
3387	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388					  FORCE_EOV_MAX_REZ_CNT(255)));
3389
3390	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392
3393	WREG32(VGT_GS_VERTEX_REUSE, 16);
3394	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395
3396	tmp = RREG32(HDP_MISC_CNTL);
3397	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398	WREG32(HDP_MISC_CNTL, tmp);
3399
3400	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402
3403	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405
3406	udelay(50);
3407}
3408
3409/*
3410 * GPU scratch registers helpers function.
3411 */
3412/**
3413 * cik_scratch_init - setup driver info for CP scratch regs
3414 *
3415 * @rdev: radeon_device pointer
3416 *
3417 * Set up the number and offset of the CP scratch registers.
3418 * NOTE: use of CP scratch registers is a legacy inferface and
3419 * is not used by default on newer asics (r6xx+).  On newer asics,
3420 * memory buffers are used for fences rather than scratch regs.
3421 */
3422static void cik_scratch_init(struct radeon_device *rdev)
3423{
3424	int i;
3425
3426	rdev->scratch.num_reg = 7;
3427	rdev->scratch.reg_base = SCRATCH_REG0;
3428	for (i = 0; i < rdev->scratch.num_reg; i++) {
3429		rdev->scratch.free[i] = true;
3430		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431	}
3432}
3433
3434/**
3435 * cik_ring_test - basic gfx ring test
3436 *
3437 * @rdev: radeon_device pointer
3438 * @ring: radeon_ring structure holding ring information
3439 *
3440 * Allocate a scratch register and write to it using the gfx ring (CIK).
3441 * Provides a basic gfx ring test to verify that the ring is working.
3442 * Used by cik_cp_gfx_resume();
3443 * Returns 0 on success, error on failure.
3444 */
3445int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446{
3447	uint32_t scratch;
3448	uint32_t tmp = 0;
3449	unsigned i;
3450	int r;
3451
3452	r = radeon_scratch_get(rdev, &scratch);
3453	if (r) {
3454		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455		return r;
3456	}
3457	WREG32(scratch, 0xCAFEDEAD);
3458	r = radeon_ring_lock(rdev, ring, 3);
3459	if (r) {
3460		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461		radeon_scratch_free(rdev, scratch);
3462		return r;
3463	}
3464	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466	radeon_ring_write(ring, 0xDEADBEEF);
3467	radeon_ring_unlock_commit(rdev, ring, false);
3468
3469	for (i = 0; i < rdev->usec_timeout; i++) {
3470		tmp = RREG32(scratch);
3471		if (tmp == 0xDEADBEEF)
3472			break;
3473		udelay(1);
3474	}
3475	if (i < rdev->usec_timeout) {
3476		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477	} else {
3478		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479			  ring->idx, scratch, tmp);
3480		r = -EINVAL;
3481	}
3482	radeon_scratch_free(rdev, scratch);
3483	return r;
3484}
3485
3486/**
3487 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488 *
3489 * @rdev: radeon_device pointer
3490 * @ridx: radeon ring index
3491 *
3492 * Emits an hdp flush on the cp.
3493 */
3494static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495				       int ridx)
3496{
3497	struct radeon_ring *ring = &rdev->ring[ridx];
3498	u32 ref_and_mask;
3499
3500	switch (ring->idx) {
3501	case CAYMAN_RING_TYPE_CP1_INDEX:
3502	case CAYMAN_RING_TYPE_CP2_INDEX:
3503	default:
3504		switch (ring->me) {
3505		case 0:
3506			ref_and_mask = CP2 << ring->pipe;
3507			break;
3508		case 1:
3509			ref_and_mask = CP6 << ring->pipe;
3510			break;
3511		default:
3512			return;
3513		}
3514		break;
3515	case RADEON_RING_TYPE_GFX_INDEX:
3516		ref_and_mask = CP0;
3517		break;
3518	}
3519
3520	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3523				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3524	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526	radeon_ring_write(ring, ref_and_mask);
3527	radeon_ring_write(ring, ref_and_mask);
3528	radeon_ring_write(ring, 0x20); /* poll interval */
3529}
3530
3531/**
3532 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533 *
3534 * @rdev: radeon_device pointer
3535 * @fence: radeon fence object
3536 *
3537 * Emits a fence sequnce number on the gfx ring and flushes
3538 * GPU caches.
3539 */
3540void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541			     struct radeon_fence *fence)
3542{
3543	struct radeon_ring *ring = &rdev->ring[fence->ring];
3544	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545
3546	/* Workaround for cache flush problems. First send a dummy EOP
3547	 * event down the pipe with seq one below.
3548	 */
3549	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551				 EOP_TC_ACTION_EN |
3552				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553				 EVENT_INDEX(5)));
3554	radeon_ring_write(ring, addr & 0xfffffffc);
3555	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556				DATA_SEL(1) | INT_SEL(0));
3557	radeon_ring_write(ring, fence->seq - 1);
3558	radeon_ring_write(ring, 0);
3559
3560	/* Then send the real EOP event down the pipe. */
3561	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563				 EOP_TC_ACTION_EN |
3564				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565				 EVENT_INDEX(5)));
3566	radeon_ring_write(ring, addr & 0xfffffffc);
3567	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568	radeon_ring_write(ring, fence->seq);
3569	radeon_ring_write(ring, 0);
3570}
3571
3572/**
3573 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574 *
3575 * @rdev: radeon_device pointer
3576 * @fence: radeon fence object
3577 *
3578 * Emits a fence sequnce number on the compute ring and flushes
3579 * GPU caches.
3580 */
3581void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582				 struct radeon_fence *fence)
3583{
3584	struct radeon_ring *ring = &rdev->ring[fence->ring];
3585	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586
3587	/* RELEASE_MEM - flush caches, send int */
3588	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590				 EOP_TC_ACTION_EN |
3591				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592				 EVENT_INDEX(5)));
3593	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594	radeon_ring_write(ring, addr & 0xfffffffc);
3595	radeon_ring_write(ring, upper_32_bits(addr));
3596	radeon_ring_write(ring, fence->seq);
3597	radeon_ring_write(ring, 0);
3598}
3599
3600/**
3601 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602 *
3603 * @rdev: radeon_device pointer
3604 * @ring: radeon ring buffer object
3605 * @semaphore: radeon semaphore object
3606 * @emit_wait: Is this a semaphore wait?
3607 *
3608 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609 * from running ahead of semaphore waits.
3610 */
3611bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612			     struct radeon_ring *ring,
3613			     struct radeon_semaphore *semaphore,
3614			     bool emit_wait)
3615{
3616	uint64_t addr = semaphore->gpu_addr;
3617	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618
3619	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620	radeon_ring_write(ring, lower_32_bits(addr));
3621	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622
3623	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624		/* Prevent the PFP from running ahead of the semaphore wait */
3625		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626		radeon_ring_write(ring, 0x0);
3627	}
3628
3629	return true;
3630}
3631
3632/**
3633 * cik_copy_cpdma - copy pages using the CP DMA engine
3634 *
3635 * @rdev: radeon_device pointer
3636 * @src_offset: src GPU address
3637 * @dst_offset: dst GPU address
3638 * @num_gpu_pages: number of GPU pages to xfer
3639 * @resv: reservation object to sync to
3640 *
3641 * Copy GPU paging using the CP DMA engine (CIK+).
3642 * Used by the radeon ttm implementation to move pages if
3643 * registered as the asic copy callback.
3644 */
3645struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646				    uint64_t src_offset, uint64_t dst_offset,
3647				    unsigned num_gpu_pages,
3648				    struct dma_resv *resv)
3649{
3650	struct radeon_fence *fence;
3651	struct radeon_sync sync;
3652	int ring_index = rdev->asic->copy.blit_ring_index;
3653	struct radeon_ring *ring = &rdev->ring[ring_index];
3654	u32 size_in_bytes, cur_size_in_bytes, control;
3655	int i, num_loops;
3656	int r = 0;
3657
3658	radeon_sync_create(&sync);
3659
3660	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663	if (r) {
3664		DRM_ERROR("radeon: moving bo (%d).\n", r);
3665		radeon_sync_free(rdev, &sync, NULL);
3666		return ERR_PTR(r);
3667	}
3668
3669	radeon_sync_resv(rdev, &sync, resv, false);
3670	radeon_sync_rings(rdev, &sync, ring->idx);
3671
3672	for (i = 0; i < num_loops; i++) {
3673		cur_size_in_bytes = size_in_bytes;
3674		if (cur_size_in_bytes > 0x1fffff)
3675			cur_size_in_bytes = 0x1fffff;
3676		size_in_bytes -= cur_size_in_bytes;
3677		control = 0;
3678		if (size_in_bytes == 0)
3679			control |= PACKET3_DMA_DATA_CP_SYNC;
3680		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681		radeon_ring_write(ring, control);
3682		radeon_ring_write(ring, lower_32_bits(src_offset));
3683		radeon_ring_write(ring, upper_32_bits(src_offset));
3684		radeon_ring_write(ring, lower_32_bits(dst_offset));
3685		radeon_ring_write(ring, upper_32_bits(dst_offset));
3686		radeon_ring_write(ring, cur_size_in_bytes);
3687		src_offset += cur_size_in_bytes;
3688		dst_offset += cur_size_in_bytes;
3689	}
3690
3691	r = radeon_fence_emit(rdev, &fence, ring->idx);
3692	if (r) {
3693		radeon_ring_unlock_undo(rdev, ring);
3694		radeon_sync_free(rdev, &sync, NULL);
3695		return ERR_PTR(r);
3696	}
3697
3698	radeon_ring_unlock_commit(rdev, ring, false);
3699	radeon_sync_free(rdev, &sync, fence);
3700
3701	return fence;
3702}
3703
3704/*
3705 * IB stuff
3706 */
3707/**
3708 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709 *
3710 * @rdev: radeon_device pointer
3711 * @ib: radeon indirect buffer object
3712 *
3713 * Emits a DE (drawing engine) or CE (constant engine) IB
3714 * on the gfx ring.  IBs are usually generated by userspace
3715 * acceleration drivers and submitted to the kernel for
3716 * scheduling on the ring.  This function schedules the IB
3717 * on the gfx ring for execution by the GPU.
3718 */
3719void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720{
3721	struct radeon_ring *ring = &rdev->ring[ib->ring];
3722	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723	u32 header, control = INDIRECT_BUFFER_VALID;
3724
3725	if (ib->is_const_ib) {
3726		/* set switch buffer packet before const IB */
3727		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728		radeon_ring_write(ring, 0);
3729
3730		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731	} else {
3732		u32 next_rptr;
3733		if (ring->rptr_save_reg) {
3734			next_rptr = ring->wptr + 3 + 4;
3735			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736			radeon_ring_write(ring, ((ring->rptr_save_reg -
3737						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3738			radeon_ring_write(ring, next_rptr);
3739		} else if (rdev->wb.enabled) {
3740			next_rptr = ring->wptr + 5 + 4;
3741			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745			radeon_ring_write(ring, next_rptr);
3746		}
3747
3748		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749	}
3750
3751	control |= ib->length_dw | (vm_id << 24);
3752
3753	radeon_ring_write(ring, header);
3754	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756	radeon_ring_write(ring, control);
3757}
3758
3759/**
3760 * cik_ib_test - basic gfx ring IB test
3761 *
3762 * @rdev: radeon_device pointer
3763 * @ring: radeon_ring structure holding ring information
3764 *
3765 * Allocate an IB and execute it on the gfx ring (CIK).
3766 * Provides a basic gfx ring test to verify that IBs are working.
3767 * Returns 0 on success, error on failure.
3768 */
3769int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770{
3771	struct radeon_ib ib;
3772	uint32_t scratch;
3773	uint32_t tmp = 0;
3774	unsigned i;
3775	int r;
3776
3777	r = radeon_scratch_get(rdev, &scratch);
3778	if (r) {
3779		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780		return r;
3781	}
3782	WREG32(scratch, 0xCAFEDEAD);
3783	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784	if (r) {
3785		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786		radeon_scratch_free(rdev, scratch);
3787		return r;
3788	}
3789	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791	ib.ptr[2] = 0xDEADBEEF;
3792	ib.length_dw = 3;
3793	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794	if (r) {
3795		radeon_scratch_free(rdev, scratch);
3796		radeon_ib_free(rdev, &ib);
3797		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798		return r;
3799	}
3800	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801		RADEON_USEC_IB_TEST_TIMEOUT));
3802	if (r < 0) {
3803		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804		radeon_scratch_free(rdev, scratch);
3805		radeon_ib_free(rdev, &ib);
3806		return r;
3807	} else if (r == 0) {
3808		DRM_ERROR("radeon: fence wait timed out.\n");
3809		radeon_scratch_free(rdev, scratch);
3810		radeon_ib_free(rdev, &ib);
3811		return -ETIMEDOUT;
3812	}
3813	r = 0;
3814	for (i = 0; i < rdev->usec_timeout; i++) {
3815		tmp = RREG32(scratch);
3816		if (tmp == 0xDEADBEEF)
3817			break;
3818		udelay(1);
3819	}
3820	if (i < rdev->usec_timeout) {
3821		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822	} else {
3823		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824			  scratch, tmp);
3825		r = -EINVAL;
3826	}
3827	radeon_scratch_free(rdev, scratch);
3828	radeon_ib_free(rdev, &ib);
3829	return r;
3830}
3831
3832/*
3833 * CP.
3834 * On CIK, gfx and compute now have independant command processors.
3835 *
3836 * GFX
3837 * Gfx consists of a single ring and can process both gfx jobs and
3838 * compute jobs.  The gfx CP consists of three microengines (ME):
3839 * PFP - Pre-Fetch Parser
3840 * ME - Micro Engine
3841 * CE - Constant Engine
3842 * The PFP and ME make up what is considered the Drawing Engine (DE).
3843 * The CE is an asynchronous engine used for updating buffer desciptors
3844 * used by the DE so that they can be loaded into cache in parallel
3845 * while the DE is processing state update packets.
3846 *
3847 * Compute
3848 * The compute CP consists of two microengines (ME):
3849 * MEC1 - Compute MicroEngine 1
3850 * MEC2 - Compute MicroEngine 2
3851 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852 * The queues are exposed to userspace and are programmed directly
3853 * by the compute runtime.
3854 */
3855/**
3856 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857 *
3858 * @rdev: radeon_device pointer
3859 * @enable: enable or disable the MEs
3860 *
3861 * Halts or unhalts the gfx MEs.
3862 */
3863static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864{
3865	if (enable)
3866		WREG32(CP_ME_CNTL, 0);
3867	else {
3868		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872	}
3873	udelay(50);
3874}
3875
3876/**
3877 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878 *
3879 * @rdev: radeon_device pointer
3880 *
3881 * Loads the gfx PFP, ME, and CE ucode.
3882 * Returns 0 for success, -EINVAL if the ucode is not available.
3883 */
3884static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885{
3886	int i;
3887
3888	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889		return -EINVAL;
3890
3891	cik_cp_gfx_enable(rdev, false);
3892
3893	if (rdev->new_fw) {
3894		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896		const struct gfx_firmware_header_v1_0 *ce_hdr =
3897			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898		const struct gfx_firmware_header_v1_0 *me_hdr =
3899			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900		const __le32 *fw_data;
3901		u32 fw_size;
3902
3903		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906
3907		/* PFP */
3908		fw_data = (const __le32 *)
3909			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911		WREG32(CP_PFP_UCODE_ADDR, 0);
3912		for (i = 0; i < fw_size; i++)
3913			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915
3916		/* CE */
3917		fw_data = (const __le32 *)
3918			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920		WREG32(CP_CE_UCODE_ADDR, 0);
3921		for (i = 0; i < fw_size; i++)
3922			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924
3925		/* ME */
3926		fw_data = (const __be32 *)
3927			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929		WREG32(CP_ME_RAM_WADDR, 0);
3930		for (i = 0; i < fw_size; i++)
3931			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934	} else {
3935		const __be32 *fw_data;
3936
3937		/* PFP */
3938		fw_data = (const __be32 *)rdev->pfp_fw->data;
3939		WREG32(CP_PFP_UCODE_ADDR, 0);
3940		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942		WREG32(CP_PFP_UCODE_ADDR, 0);
3943
3944		/* CE */
3945		fw_data = (const __be32 *)rdev->ce_fw->data;
3946		WREG32(CP_CE_UCODE_ADDR, 0);
3947		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949		WREG32(CP_CE_UCODE_ADDR, 0);
3950
3951		/* ME */
3952		fw_data = (const __be32 *)rdev->me_fw->data;
3953		WREG32(CP_ME_RAM_WADDR, 0);
3954		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956		WREG32(CP_ME_RAM_WADDR, 0);
3957	}
3958
3959	return 0;
3960}
3961
3962/**
3963 * cik_cp_gfx_start - start the gfx ring
3964 *
3965 * @rdev: radeon_device pointer
3966 *
3967 * Enables the ring and loads the clear state context and other
3968 * packets required to init the ring.
3969 * Returns 0 for success, error for failure.
3970 */
3971static int cik_cp_gfx_start(struct radeon_device *rdev)
3972{
3973	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974	int r, i;
3975
3976	/* init the CP */
3977	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978	WREG32(CP_ENDIAN_SWAP, 0);
3979	WREG32(CP_DEVICE_ID, 1);
3980
3981	cik_cp_gfx_enable(rdev, true);
3982
3983	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984	if (r) {
3985		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986		return r;
3987	}
3988
3989	/* init the CE partitions.  CE only used for gfx on CIK */
3990	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992	radeon_ring_write(ring, 0x8000);
3993	radeon_ring_write(ring, 0x8000);
3994
3995	/* setup clear context state */
3996	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998
3999	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000	radeon_ring_write(ring, 0x80000000);
4001	radeon_ring_write(ring, 0x80000000);
4002
4003	for (i = 0; i < cik_default_size; i++)
4004		radeon_ring_write(ring, cik_default_state[i]);
4005
4006	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008
4009	/* set clear context state */
4010	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011	radeon_ring_write(ring, 0);
4012
4013	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014	radeon_ring_write(ring, 0x00000316);
4015	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017
4018	radeon_ring_unlock_commit(rdev, ring, false);
4019
4020	return 0;
4021}
4022
4023/**
4024 * cik_cp_gfx_fini - stop the gfx ring
4025 *
4026 * @rdev: radeon_device pointer
4027 *
4028 * Stop the gfx ring and tear down the driver ring
4029 * info.
4030 */
4031static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032{
4033	cik_cp_gfx_enable(rdev, false);
4034	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035}
4036
4037/**
4038 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Program the location and size of the gfx ring buffer
4043 * and test it to make sure it's working.
4044 * Returns 0 for success, error for failure.
4045 */
4046static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047{
4048	struct radeon_ring *ring;
4049	u32 tmp;
4050	u32 rb_bufsz;
4051	u64 rb_addr;
4052	int r;
4053
4054	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055	if (rdev->family != CHIP_HAWAII)
4056		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057
4058	/* Set the write pointer delay */
4059	WREG32(CP_RB_WPTR_DELAY, 0);
4060
4061	/* set the RB to use vmid 0 */
4062	WREG32(CP_RB_VMID, 0);
4063
4064	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065
4066	/* ring 0 - compute and gfx */
4067	/* Set ring buffer size */
4068	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069	rb_bufsz = order_base_2(ring->ring_size / 8);
4070	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071#ifdef __BIG_ENDIAN
4072	tmp |= BUF_SWAP_32BIT;
4073#endif
4074	WREG32(CP_RB0_CNTL, tmp);
4075
4076	/* Initialize the ring buffer's read and write pointers */
4077	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078	ring->wptr = 0;
4079	WREG32(CP_RB0_WPTR, ring->wptr);
4080
4081	/* set the wb address wether it's enabled or not */
4082	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084
4085	/* scratch register shadowing is no longer supported */
4086	WREG32(SCRATCH_UMSK, 0);
4087
4088	if (!rdev->wb.enabled)
4089		tmp |= RB_NO_UPDATE;
4090
4091	mdelay(1);
4092	WREG32(CP_RB0_CNTL, tmp);
4093
4094	rb_addr = ring->gpu_addr >> 8;
4095	WREG32(CP_RB0_BASE, rb_addr);
4096	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097
4098	/* start the ring */
4099	cik_cp_gfx_start(rdev);
4100	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102	if (r) {
4103		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104		return r;
4105	}
4106
4107	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109
4110	return 0;
4111}
4112
4113u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114		     struct radeon_ring *ring)
4115{
4116	u32 rptr;
4117
4118	if (rdev->wb.enabled)
4119		rptr = rdev->wb.wb[ring->rptr_offs/4];
4120	else
4121		rptr = RREG32(CP_RB0_RPTR);
4122
4123	return rptr;
4124}
4125
4126u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127		     struct radeon_ring *ring)
4128{
4129	return RREG32(CP_RB0_WPTR);
4130}
4131
4132void cik_gfx_set_wptr(struct radeon_device *rdev,
4133		      struct radeon_ring *ring)
4134{
4135	WREG32(CP_RB0_WPTR, ring->wptr);
4136	(void)RREG32(CP_RB0_WPTR);
4137}
4138
4139u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140			 struct radeon_ring *ring)
4141{
4142	u32 rptr;
4143
4144	if (rdev->wb.enabled) {
4145		rptr = rdev->wb.wb[ring->rptr_offs/4];
4146	} else {
4147		mutex_lock(&rdev->srbm_mutex);
4148		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149		rptr = RREG32(CP_HQD_PQ_RPTR);
4150		cik_srbm_select(rdev, 0, 0, 0, 0);
4151		mutex_unlock(&rdev->srbm_mutex);
4152	}
4153
4154	return rptr;
4155}
4156
4157u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158			 struct radeon_ring *ring)
4159{
4160	u32 wptr;
4161
4162	if (rdev->wb.enabled) {
4163		/* XXX check if swapping is necessary on BE */
4164		wptr = rdev->wb.wb[ring->wptr_offs/4];
4165	} else {
4166		mutex_lock(&rdev->srbm_mutex);
4167		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168		wptr = RREG32(CP_HQD_PQ_WPTR);
4169		cik_srbm_select(rdev, 0, 0, 0, 0);
4170		mutex_unlock(&rdev->srbm_mutex);
4171	}
4172
4173	return wptr;
4174}
4175
4176void cik_compute_set_wptr(struct radeon_device *rdev,
4177			  struct radeon_ring *ring)
4178{
4179	/* XXX check if swapping is necessary on BE */
4180	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181	WDOORBELL32(ring->doorbell_index, ring->wptr);
4182}
4183
4184static void cik_compute_stop(struct radeon_device *rdev,
4185			     struct radeon_ring *ring)
4186{
4187	u32 j, tmp;
4188
4189	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190	/* Disable wptr polling. */
4191	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192	tmp &= ~WPTR_POLL_EN;
4193	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194	/* Disable HQD. */
4195	if (RREG32(CP_HQD_ACTIVE) & 1) {
4196		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197		for (j = 0; j < rdev->usec_timeout; j++) {
4198			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199				break;
4200			udelay(1);
4201		}
4202		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203		WREG32(CP_HQD_PQ_RPTR, 0);
4204		WREG32(CP_HQD_PQ_WPTR, 0);
4205	}
4206	cik_srbm_select(rdev, 0, 0, 0, 0);
4207}
4208
4209/**
4210 * cik_cp_compute_enable - enable/disable the compute CP MEs
4211 *
4212 * @rdev: radeon_device pointer
4213 * @enable: enable or disable the MEs
4214 *
4215 * Halts or unhalts the compute MEs.
4216 */
4217static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218{
4219	if (enable)
4220		WREG32(CP_MEC_CNTL, 0);
4221	else {
4222		/*
4223		 * To make hibernation reliable we need to clear compute ring
4224		 * configuration before halting the compute ring.
4225		 */
4226		mutex_lock(&rdev->srbm_mutex);
4227		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229		mutex_unlock(&rdev->srbm_mutex);
4230
4231		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234	}
4235	udelay(50);
4236}
4237
4238/**
4239 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240 *
4241 * @rdev: radeon_device pointer
4242 *
4243 * Loads the compute MEC1&2 ucode.
4244 * Returns 0 for success, -EINVAL if the ucode is not available.
4245 */
4246static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247{
4248	int i;
4249
4250	if (!rdev->mec_fw)
4251		return -EINVAL;
4252
4253	cik_cp_compute_enable(rdev, false);
4254
4255	if (rdev->new_fw) {
4256		const struct gfx_firmware_header_v1_0 *mec_hdr =
4257			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258		const __le32 *fw_data;
4259		u32 fw_size;
4260
4261		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262
4263		/* MEC1 */
4264		fw_data = (const __le32 *)
4265			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268		for (i = 0; i < fw_size; i++)
4269			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271
4272		/* MEC2 */
4273		if (rdev->family == CHIP_KAVERI) {
4274			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276
4277			fw_data = (const __le32 *)
4278				(rdev->mec2_fw->data +
4279				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282			for (i = 0; i < fw_size; i++)
4283				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285		}
4286	} else {
4287		const __be32 *fw_data;
4288
4289		/* MEC1 */
4290		fw_data = (const __be32 *)rdev->mec_fw->data;
4291		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295
4296		if (rdev->family == CHIP_KAVERI) {
4297			/* MEC2 */
4298			fw_data = (const __be32 *)rdev->mec_fw->data;
4299			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303		}
4304	}
4305
4306	return 0;
4307}
4308
4309/**
4310 * cik_cp_compute_start - start the compute queues
4311 *
4312 * @rdev: radeon_device pointer
4313 *
4314 * Enable the compute queues.
4315 * Returns 0 for success, error for failure.
4316 */
4317static int cik_cp_compute_start(struct radeon_device *rdev)
4318{
4319	cik_cp_compute_enable(rdev, true);
4320
4321	return 0;
4322}
4323
4324/**
4325 * cik_cp_compute_fini - stop the compute queues
4326 *
4327 * @rdev: radeon_device pointer
4328 *
4329 * Stop the compute queues and tear down the driver queue
4330 * info.
4331 */
4332static void cik_cp_compute_fini(struct radeon_device *rdev)
4333{
4334	int i, idx, r;
4335
4336	cik_cp_compute_enable(rdev, false);
4337
4338	for (i = 0; i < 2; i++) {
4339		if (i == 0)
4340			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341		else
4342			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343
4344		if (rdev->ring[idx].mqd_obj) {
4345			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346			if (unlikely(r != 0))
4347				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348
4349			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351
4352			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353			rdev->ring[idx].mqd_obj = NULL;
4354		}
4355	}
4356}
4357
4358static void cik_mec_fini(struct radeon_device *rdev)
4359{
4360	int r;
4361
4362	if (rdev->mec.hpd_eop_obj) {
4363		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364		if (unlikely(r != 0))
4365			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368
4369		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370		rdev->mec.hpd_eop_obj = NULL;
4371	}
4372}
4373
4374#define MEC_HPD_SIZE 2048
4375
4376static int cik_mec_init(struct radeon_device *rdev)
4377{
4378	int r;
4379	u32 *hpd;
4380
4381	/*
4382	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384	 */
4385	if (rdev->family == CHIP_KAVERI)
4386		rdev->mec.num_mec = 2;
4387	else
4388		rdev->mec.num_mec = 1;
4389	rdev->mec.num_pipe = 4;
4390	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391
4392	if (rdev->mec.hpd_eop_obj == NULL) {
4393		r = radeon_bo_create(rdev,
4394				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395				     PAGE_SIZE, true,
4396				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397				     &rdev->mec.hpd_eop_obj);
4398		if (r) {
4399			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400			return r;
4401		}
4402	}
4403
4404	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405	if (unlikely(r != 0)) {
4406		cik_mec_fini(rdev);
4407		return r;
4408	}
4409	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410			  &rdev->mec.hpd_eop_gpu_addr);
4411	if (r) {
4412		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413		cik_mec_fini(rdev);
4414		return r;
4415	}
4416	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417	if (r) {
4418		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419		cik_mec_fini(rdev);
4420		return r;
4421	}
4422
4423	/* clear memory.  Not sure if this is required or not */
4424	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425
4426	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428
4429	return 0;
4430}
4431
4432struct hqd_registers
4433{
4434	u32 cp_mqd_base_addr;
4435	u32 cp_mqd_base_addr_hi;
4436	u32 cp_hqd_active;
4437	u32 cp_hqd_vmid;
4438	u32 cp_hqd_persistent_state;
4439	u32 cp_hqd_pipe_priority;
4440	u32 cp_hqd_queue_priority;
4441	u32 cp_hqd_quantum;
4442	u32 cp_hqd_pq_base;
4443	u32 cp_hqd_pq_base_hi;
4444	u32 cp_hqd_pq_rptr;
4445	u32 cp_hqd_pq_rptr_report_addr;
4446	u32 cp_hqd_pq_rptr_report_addr_hi;
4447	u32 cp_hqd_pq_wptr_poll_addr;
4448	u32 cp_hqd_pq_wptr_poll_addr_hi;
4449	u32 cp_hqd_pq_doorbell_control;
4450	u32 cp_hqd_pq_wptr;
4451	u32 cp_hqd_pq_control;
4452	u32 cp_hqd_ib_base_addr;
4453	u32 cp_hqd_ib_base_addr_hi;
4454	u32 cp_hqd_ib_rptr;
4455	u32 cp_hqd_ib_control;
4456	u32 cp_hqd_iq_timer;
4457	u32 cp_hqd_iq_rptr;
4458	u32 cp_hqd_dequeue_request;
4459	u32 cp_hqd_dma_offload;
4460	u32 cp_hqd_sema_cmd;
4461	u32 cp_hqd_msg_type;
4462	u32 cp_hqd_atomic0_preop_lo;
4463	u32 cp_hqd_atomic0_preop_hi;
4464	u32 cp_hqd_atomic1_preop_lo;
4465	u32 cp_hqd_atomic1_preop_hi;
4466	u32 cp_hqd_hq_scheduler0;
4467	u32 cp_hqd_hq_scheduler1;
4468	u32 cp_mqd_control;
4469};
4470
4471struct bonaire_mqd
4472{
4473	u32 header;
4474	u32 dispatch_initiator;
4475	u32 dimensions[3];
4476	u32 start_idx[3];
4477	u32 num_threads[3];
4478	u32 pipeline_stat_enable;
4479	u32 perf_counter_enable;
4480	u32 pgm[2];
4481	u32 tba[2];
4482	u32 tma[2];
4483	u32 pgm_rsrc[2];
4484	u32 vmid;
4485	u32 resource_limits;
4486	u32 static_thread_mgmt01[2];
4487	u32 tmp_ring_size;
4488	u32 static_thread_mgmt23[2];
4489	u32 restart[3];
4490	u32 thread_trace_enable;
4491	u32 reserved1;
4492	u32 user_data[16];
4493	u32 vgtcs_invoke_count[2];
4494	struct hqd_registers queue_state;
4495	u32 dequeue_cntr;
4496	u32 interrupt_queue[64];
4497};
4498
4499/**
4500 * cik_cp_compute_resume - setup the compute queue registers
4501 *
4502 * @rdev: radeon_device pointer
4503 *
4504 * Program the compute queues and test them to make sure they
4505 * are working.
4506 * Returns 0 for success, error for failure.
4507 */
4508static int cik_cp_compute_resume(struct radeon_device *rdev)
4509{
4510	int r, i, j, idx;
4511	u32 tmp;
4512	bool use_doorbell = true;
4513	u64 hqd_gpu_addr;
4514	u64 mqd_gpu_addr;
4515	u64 eop_gpu_addr;
4516	u64 wb_gpu_addr;
4517	u32 *buf;
4518	struct bonaire_mqd *mqd;
4519
4520	r = cik_cp_compute_start(rdev);
4521	if (r)
4522		return r;
4523
4524	/* fix up chicken bits */
4525	tmp = RREG32(CP_CPF_DEBUG);
4526	tmp |= (1 << 23);
4527	WREG32(CP_CPF_DEBUG, tmp);
4528
4529	/* init the pipes */
4530	mutex_lock(&rdev->srbm_mutex);
4531
4532	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533		int me = (i < 4) ? 1 : 2;
4534		int pipe = (i < 4) ? i : (i - 4);
4535
4536		cik_srbm_select(rdev, me, pipe, 0, 0);
4537
4538		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539		/* write the EOP addr */
4540		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542
4543		/* set the VMID assigned */
4544		WREG32(CP_HPD_EOP_VMID, 0);
4545
4546		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547		tmp = RREG32(CP_HPD_EOP_CONTROL);
4548		tmp &= ~EOP_SIZE_MASK;
4549		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550		WREG32(CP_HPD_EOP_CONTROL, tmp);
4551
4552	}
4553	cik_srbm_select(rdev, 0, 0, 0, 0);
4554	mutex_unlock(&rdev->srbm_mutex);
4555
4556	/* init the queues.  Just two for now. */
4557	for (i = 0; i < 2; i++) {
4558		if (i == 0)
4559			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560		else
4561			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562
4563		if (rdev->ring[idx].mqd_obj == NULL) {
4564			r = radeon_bo_create(rdev,
4565					     sizeof(struct bonaire_mqd),
4566					     PAGE_SIZE, true,
4567					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568					     NULL, &rdev->ring[idx].mqd_obj);
4569			if (r) {
4570				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571				return r;
4572			}
4573		}
4574
4575		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576		if (unlikely(r != 0)) {
4577			cik_cp_compute_fini(rdev);
4578			return r;
4579		}
4580		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581				  &mqd_gpu_addr);
4582		if (r) {
4583			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584			cik_cp_compute_fini(rdev);
4585			return r;
4586		}
4587		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588		if (r) {
4589			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590			cik_cp_compute_fini(rdev);
4591			return r;
4592		}
4593
4594		/* init the mqd struct */
4595		memset(buf, 0, sizeof(struct bonaire_mqd));
4596
4597		mqd = (struct bonaire_mqd *)buf;
4598		mqd->header = 0xC0310800;
4599		mqd->static_thread_mgmt01[0] = 0xffffffff;
4600		mqd->static_thread_mgmt01[1] = 0xffffffff;
4601		mqd->static_thread_mgmt23[0] = 0xffffffff;
4602		mqd->static_thread_mgmt23[1] = 0xffffffff;
4603
4604		mutex_lock(&rdev->srbm_mutex);
4605		cik_srbm_select(rdev, rdev->ring[idx].me,
4606				rdev->ring[idx].pipe,
4607				rdev->ring[idx].queue, 0);
4608
4609		/* disable wptr polling */
4610		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611		tmp &= ~WPTR_POLL_EN;
4612		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613
4614		/* enable doorbell? */
4615		mqd->queue_state.cp_hqd_pq_doorbell_control =
4616			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617		if (use_doorbell)
4618			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619		else
4620			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4623
4624		/* disable the queue if it's active */
4625		mqd->queue_state.cp_hqd_dequeue_request = 0;
4626		mqd->queue_state.cp_hqd_pq_rptr = 0;
4627		mqd->queue_state.cp_hqd_pq_wptr= 0;
4628		if (RREG32(CP_HQD_ACTIVE) & 1) {
4629			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630			for (j = 0; j < rdev->usec_timeout; j++) {
4631				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632					break;
4633				udelay(1);
4634			}
4635			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638		}
4639
4640		/* set the pointer to the MQD */
4641		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645		/* set MQD vmid to 0 */
4646		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649
4650		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656
4657		/* set up the HQD, this is similar to CP_RB0_CNTL */
4658		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659		mqd->queue_state.cp_hqd_pq_control &=
4660			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661
4662		mqd->queue_state.cp_hqd_pq_control |=
4663			order_base_2(rdev->ring[idx].ring_size / 8);
4664		mqd->queue_state.cp_hqd_pq_control |=
4665			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666#ifdef __BIG_ENDIAN
4667		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668#endif
4669		mqd->queue_state.cp_hqd_pq_control &=
4670			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671		mqd->queue_state.cp_hqd_pq_control |=
4672			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674
4675		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676		if (i == 0)
4677			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678		else
4679			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685
4686		/* set the wb address wether it's enabled or not */
4687		if (i == 0)
4688			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689		else
4690			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693			upper_32_bits(wb_gpu_addr) & 0xffff;
4694		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698
4699		/* enable the doorbell if requested */
4700		if (use_doorbell) {
4701			mqd->queue_state.cp_hqd_pq_doorbell_control =
4702				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708				~(DOORBELL_SOURCE | DOORBELL_HIT);
4709
4710		} else {
4711			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712		}
4713		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4715
4716		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717		rdev->ring[idx].wptr = 0;
4718		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721
4722		/* set the vmid for the queue */
4723		mqd->queue_state.cp_hqd_vmid = 0;
4724		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725
4726		/* activate the queue */
4727		mqd->queue_state.cp_hqd_active = 1;
4728		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729
4730		cik_srbm_select(rdev, 0, 0, 0, 0);
4731		mutex_unlock(&rdev->srbm_mutex);
4732
4733		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735
4736		rdev->ring[idx].ready = true;
4737		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738		if (r)
4739			rdev->ring[idx].ready = false;
4740	}
4741
4742	return 0;
4743}
4744
4745static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746{
4747	cik_cp_gfx_enable(rdev, enable);
4748	cik_cp_compute_enable(rdev, enable);
4749}
4750
4751static int cik_cp_load_microcode(struct radeon_device *rdev)
4752{
4753	int r;
4754
4755	r = cik_cp_gfx_load_microcode(rdev);
4756	if (r)
4757		return r;
4758	r = cik_cp_compute_load_microcode(rdev);
4759	if (r)
4760		return r;
4761
4762	return 0;
4763}
4764
4765static void cik_cp_fini(struct radeon_device *rdev)
4766{
4767	cik_cp_gfx_fini(rdev);
4768	cik_cp_compute_fini(rdev);
4769}
4770
4771static int cik_cp_resume(struct radeon_device *rdev)
4772{
4773	int r;
4774
4775	cik_enable_gui_idle_interrupt(rdev, false);
4776
4777	r = cik_cp_load_microcode(rdev);
4778	if (r)
4779		return r;
4780
4781	r = cik_cp_gfx_resume(rdev);
4782	if (r)
4783		return r;
4784	r = cik_cp_compute_resume(rdev);
4785	if (r)
4786		return r;
4787
4788	cik_enable_gui_idle_interrupt(rdev, true);
4789
4790	return 0;
4791}
4792
4793static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794{
4795	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4796		RREG32(GRBM_STATUS));
4797	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4798		RREG32(GRBM_STATUS2));
4799	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4800		RREG32(GRBM_STATUS_SE0));
4801	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4802		RREG32(GRBM_STATUS_SE1));
4803	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4804		RREG32(GRBM_STATUS_SE2));
4805	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4806		RREG32(GRBM_STATUS_SE3));
4807	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4808		RREG32(SRBM_STATUS));
4809	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4810		RREG32(SRBM_STATUS2));
4811	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4812		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4814		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4817		 RREG32(CP_STALLED_STAT1));
4818	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4819		 RREG32(CP_STALLED_STAT2));
4820	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4821		 RREG32(CP_STALLED_STAT3));
4822	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4823		 RREG32(CP_CPF_BUSY_STAT));
4824	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825		 RREG32(CP_CPF_STALLED_STAT1));
4826	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829		 RREG32(CP_CPC_STALLED_STAT1));
4830	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831}
4832
4833/**
4834 * cik_gpu_check_soft_reset - check which blocks are busy
4835 *
4836 * @rdev: radeon_device pointer
4837 *
4838 * Check which blocks are busy and return the relevant reset
4839 * mask to be used by cik_gpu_soft_reset().
4840 * Returns a mask of the blocks to be reset.
4841 */
4842u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843{
4844	u32 reset_mask = 0;
4845	u32 tmp;
4846
4847	/* GRBM_STATUS */
4848	tmp = RREG32(GRBM_STATUS);
4849	if (tmp & (PA_BUSY | SC_BUSY |
4850		   BCI_BUSY | SX_BUSY |
4851		   TA_BUSY | VGT_BUSY |
4852		   DB_BUSY | CB_BUSY |
4853		   GDS_BUSY | SPI_BUSY |
4854		   IA_BUSY | IA_BUSY_NO_DMA))
4855		reset_mask |= RADEON_RESET_GFX;
4856
4857	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858		reset_mask |= RADEON_RESET_CP;
4859
4860	/* GRBM_STATUS2 */
4861	tmp = RREG32(GRBM_STATUS2);
4862	if (tmp & RLC_BUSY)
4863		reset_mask |= RADEON_RESET_RLC;
4864
4865	/* SDMA0_STATUS_REG */
4866	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867	if (!(tmp & SDMA_IDLE))
4868		reset_mask |= RADEON_RESET_DMA;
4869
4870	/* SDMA1_STATUS_REG */
4871	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872	if (!(tmp & SDMA_IDLE))
4873		reset_mask |= RADEON_RESET_DMA1;
4874
4875	/* SRBM_STATUS2 */
4876	tmp = RREG32(SRBM_STATUS2);
4877	if (tmp & SDMA_BUSY)
4878		reset_mask |= RADEON_RESET_DMA;
4879
4880	if (tmp & SDMA1_BUSY)
4881		reset_mask |= RADEON_RESET_DMA1;
4882
4883	/* SRBM_STATUS */
4884	tmp = RREG32(SRBM_STATUS);
4885
4886	if (tmp & IH_BUSY)
4887		reset_mask |= RADEON_RESET_IH;
4888
4889	if (tmp & SEM_BUSY)
4890		reset_mask |= RADEON_RESET_SEM;
4891
4892	if (tmp & GRBM_RQ_PENDING)
4893		reset_mask |= RADEON_RESET_GRBM;
4894
4895	if (tmp & VMC_BUSY)
4896		reset_mask |= RADEON_RESET_VMC;
4897
4898	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899		   MCC_BUSY | MCD_BUSY))
4900		reset_mask |= RADEON_RESET_MC;
4901
4902	if (evergreen_is_display_hung(rdev))
4903		reset_mask |= RADEON_RESET_DISPLAY;
4904
4905	/* Skip MC reset as it's mostly likely not hung, just busy */
4906	if (reset_mask & RADEON_RESET_MC) {
4907		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908		reset_mask &= ~RADEON_RESET_MC;
4909	}
4910
4911	return reset_mask;
4912}
4913
4914/**
4915 * cik_gpu_soft_reset - soft reset GPU
4916 *
4917 * @rdev: radeon_device pointer
4918 * @reset_mask: mask of which blocks to reset
4919 *
4920 * Soft reset the blocks specified in @reset_mask.
4921 */
4922static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923{
4924	struct evergreen_mc_save save;
4925	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926	u32 tmp;
4927
4928	if (reset_mask == 0)
4929		return;
4930
4931	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932
4933	cik_print_gpu_status_regs(rdev);
4934	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4935		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938
4939	/* disable CG/PG */
4940	cik_fini_pg(rdev);
4941	cik_fini_cg(rdev);
4942
4943	/* stop the rlc */
4944	cik_rlc_stop(rdev);
4945
4946	/* Disable GFX parsing/prefetching */
4947	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948
4949	/* Disable MEC parsing/prefetching */
4950	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951
4952	if (reset_mask & RADEON_RESET_DMA) {
4953		/* sdma0 */
4954		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955		tmp |= SDMA_HALT;
4956		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957	}
4958	if (reset_mask & RADEON_RESET_DMA1) {
4959		/* sdma1 */
4960		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961		tmp |= SDMA_HALT;
4962		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963	}
4964
4965	evergreen_mc_stop(rdev, &save);
4966	if (evergreen_mc_wait_for_idle(rdev)) {
4967		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968	}
4969
4970	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972
4973	if (reset_mask & RADEON_RESET_CP) {
4974		grbm_soft_reset |= SOFT_RESET_CP;
4975
4976		srbm_soft_reset |= SOFT_RESET_GRBM;
4977	}
4978
4979	if (reset_mask & RADEON_RESET_DMA)
4980		srbm_soft_reset |= SOFT_RESET_SDMA;
4981
4982	if (reset_mask & RADEON_RESET_DMA1)
4983		srbm_soft_reset |= SOFT_RESET_SDMA1;
4984
4985	if (reset_mask & RADEON_RESET_DISPLAY)
4986		srbm_soft_reset |= SOFT_RESET_DC;
4987
4988	if (reset_mask & RADEON_RESET_RLC)
4989		grbm_soft_reset |= SOFT_RESET_RLC;
4990
4991	if (reset_mask & RADEON_RESET_SEM)
4992		srbm_soft_reset |= SOFT_RESET_SEM;
4993
4994	if (reset_mask & RADEON_RESET_IH)
4995		srbm_soft_reset |= SOFT_RESET_IH;
4996
4997	if (reset_mask & RADEON_RESET_GRBM)
4998		srbm_soft_reset |= SOFT_RESET_GRBM;
4999
5000	if (reset_mask & RADEON_RESET_VMC)
5001		srbm_soft_reset |= SOFT_RESET_VMC;
5002
5003	if (!(rdev->flags & RADEON_IS_IGP)) {
5004		if (reset_mask & RADEON_RESET_MC)
5005			srbm_soft_reset |= SOFT_RESET_MC;
5006	}
5007
5008	if (grbm_soft_reset) {
5009		tmp = RREG32(GRBM_SOFT_RESET);
5010		tmp |= grbm_soft_reset;
5011		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012		WREG32(GRBM_SOFT_RESET, tmp);
5013		tmp = RREG32(GRBM_SOFT_RESET);
5014
5015		udelay(50);
5016
5017		tmp &= ~grbm_soft_reset;
5018		WREG32(GRBM_SOFT_RESET, tmp);
5019		tmp = RREG32(GRBM_SOFT_RESET);
5020	}
5021
5022	if (srbm_soft_reset) {
5023		tmp = RREG32(SRBM_SOFT_RESET);
5024		tmp |= srbm_soft_reset;
5025		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026		WREG32(SRBM_SOFT_RESET, tmp);
5027		tmp = RREG32(SRBM_SOFT_RESET);
5028
5029		udelay(50);
5030
5031		tmp &= ~srbm_soft_reset;
5032		WREG32(SRBM_SOFT_RESET, tmp);
5033		tmp = RREG32(SRBM_SOFT_RESET);
5034	}
5035
5036	/* Wait a little for things to settle down */
5037	udelay(50);
5038
5039	evergreen_mc_resume(rdev, &save);
5040	udelay(50);
5041
5042	cik_print_gpu_status_regs(rdev);
5043}
5044
5045struct kv_reset_save_regs {
5046	u32 gmcon_reng_execute;
5047	u32 gmcon_misc;
5048	u32 gmcon_misc3;
5049};
5050
5051static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052				   struct kv_reset_save_regs *save)
5053{
5054	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055	save->gmcon_misc = RREG32(GMCON_MISC);
5056	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057
5058	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060						STCTRL_STUTTER_EN));
5061}
5062
5063static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064				      struct kv_reset_save_regs *save)
5065{
5066	int i;
5067
5068	WREG32(GMCON_PGFSM_WRITE, 0);
5069	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070
5071	for (i = 0; i < 5; i++)
5072		WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074	WREG32(GMCON_PGFSM_WRITE, 0);
5075	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076
5077	for (i = 0; i < 5; i++)
5078		WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082
5083	for (i = 0; i < 5; i++)
5084		WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088
5089	for (i = 0; i < 5; i++)
5090		WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094
5095	for (i = 0; i < 5; i++)
5096		WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098	WREG32(GMCON_PGFSM_WRITE, 0);
5099	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100
5101	for (i = 0; i < 5; i++)
5102		WREG32(GMCON_PGFSM_WRITE, 0);
5103
5104	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106
5107	for (i = 0; i < 5; i++)
5108		WREG32(GMCON_PGFSM_WRITE, 0);
5109
5110	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112
5113	for (i = 0; i < 5; i++)
5114		WREG32(GMCON_PGFSM_WRITE, 0);
5115
5116	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118
5119	for (i = 0; i < 5; i++)
5120		WREG32(GMCON_PGFSM_WRITE, 0);
5121
5122	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124
5125	for (i = 0; i < 5; i++)
5126		WREG32(GMCON_PGFSM_WRITE, 0);
5127
5128	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130
5131	WREG32(GMCON_MISC3, save->gmcon_misc3);
5132	WREG32(GMCON_MISC, save->gmcon_misc);
5133	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134}
5135
5136static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137{
5138	struct evergreen_mc_save save;
5139	struct kv_reset_save_regs kv_save = { 0 };
5140	u32 tmp, i;
5141
5142	dev_info(rdev->dev, "GPU pci config reset\n");
5143
5144	/* disable dpm? */
5145
5146	/* disable cg/pg */
5147	cik_fini_pg(rdev);
5148	cik_fini_cg(rdev);
5149
5150	/* Disable GFX parsing/prefetching */
5151	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152
5153	/* Disable MEC parsing/prefetching */
5154	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155
5156	/* sdma0 */
5157	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158	tmp |= SDMA_HALT;
5159	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160	/* sdma1 */
5161	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162	tmp |= SDMA_HALT;
5163	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164	/* XXX other engines? */
5165
5166	/* halt the rlc, disable cp internal ints */
5167	cik_rlc_stop(rdev);
5168
5169	udelay(50);
5170
5171	/* disable mem access */
5172	evergreen_mc_stop(rdev, &save);
5173	if (evergreen_mc_wait_for_idle(rdev)) {
5174		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175	}
5176
5177	if (rdev->flags & RADEON_IS_IGP)
5178		kv_save_regs_for_reset(rdev, &kv_save);
5179
5180	/* disable BM */
5181	pci_clear_master(rdev->pdev);
5182	/* reset */
5183	radeon_pci_config_reset(rdev);
5184
5185	udelay(100);
5186
5187	/* wait for asic to come out of reset */
5188	for (i = 0; i < rdev->usec_timeout; i++) {
5189		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190			break;
5191		udelay(1);
5192	}
5193
5194	/* does asic init need to be run first??? */
5195	if (rdev->flags & RADEON_IS_IGP)
5196		kv_restore_regs_for_reset(rdev, &kv_save);
5197}
5198
5199/**
5200 * cik_asic_reset - soft reset GPU
5201 *
5202 * @rdev: radeon_device pointer
5203 * @hard: force hard reset
5204 *
5205 * Look up which blocks are hung and attempt
5206 * to reset them.
5207 * Returns 0 for success.
5208 */
5209int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210{
5211	u32 reset_mask;
5212
5213	if (hard) {
5214		cik_gpu_pci_config_reset(rdev);
5215		return 0;
5216	}
5217
5218	reset_mask = cik_gpu_check_soft_reset(rdev);
5219
5220	if (reset_mask)
5221		r600_set_bios_scratch_engine_hung(rdev, true);
5222
5223	/* try soft reset */
5224	cik_gpu_soft_reset(rdev, reset_mask);
5225
5226	reset_mask = cik_gpu_check_soft_reset(rdev);
5227
5228	/* try pci config reset */
5229	if (reset_mask && radeon_hard_reset)
5230		cik_gpu_pci_config_reset(rdev);
5231
5232	reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234	if (!reset_mask)
5235		r600_set_bios_scratch_engine_hung(rdev, false);
5236
5237	return 0;
5238}
5239
5240/**
5241 * cik_gfx_is_lockup - check if the 3D engine is locked up
5242 *
5243 * @rdev: radeon_device pointer
5244 * @ring: radeon_ring structure holding ring information
5245 *
5246 * Check if the 3D engine is locked up (CIK).
5247 * Returns true if the engine is locked, false if not.
5248 */
5249bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250{
5251	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252
5253	if (!(reset_mask & (RADEON_RESET_GFX |
5254			    RADEON_RESET_COMPUTE |
5255			    RADEON_RESET_CP))) {
5256		radeon_ring_lockup_update(rdev, ring);
5257		return false;
5258	}
5259	return radeon_ring_test_lockup(rdev, ring);
5260}
5261
5262/* MC */
5263/**
5264 * cik_mc_program - program the GPU memory controller
5265 *
5266 * @rdev: radeon_device pointer
5267 *
5268 * Set the location of vram, gart, and AGP in the GPU's
5269 * physical address space (CIK).
5270 */
5271static void cik_mc_program(struct radeon_device *rdev)
5272{
5273	struct evergreen_mc_save save;
5274	u32 tmp;
5275	int i, j;
5276
5277	/* Initialize HDP */
5278	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279		WREG32((0x2c14 + j), 0x00000000);
5280		WREG32((0x2c18 + j), 0x00000000);
5281		WREG32((0x2c1c + j), 0x00000000);
5282		WREG32((0x2c20 + j), 0x00000000);
5283		WREG32((0x2c24 + j), 0x00000000);
5284	}
5285	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286
5287	evergreen_mc_stop(rdev, &save);
5288	if (radeon_mc_wait_for_idle(rdev)) {
5289		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290	}
5291	/* Lockout access through VGA aperture*/
5292	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293	/* Update configuration */
5294	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295	       rdev->mc.vram_start >> 12);
5296	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297	       rdev->mc.vram_end >> 12);
5298	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299	       rdev->vram_scratch.gpu_addr >> 12);
5300	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302	WREG32(MC_VM_FB_LOCATION, tmp);
5303	/* XXX double check these! */
5304	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307	WREG32(MC_VM_AGP_BASE, 0);
5308	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310	if (radeon_mc_wait_for_idle(rdev)) {
5311		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312	}
5313	evergreen_mc_resume(rdev, &save);
5314	/* we need to own VRAM, so turn off the VGA renderer here
5315	 * to stop it overwriting our objects */
5316	rv515_vga_render_disable(rdev);
5317}
5318
5319/**
5320 * cik_mc_init - initialize the memory controller driver params
5321 *
5322 * @rdev: radeon_device pointer
5323 *
5324 * Look up the amount of vram, vram width, and decide how to place
5325 * vram and gart within the GPU's physical address space (CIK).
5326 * Returns 0 for success.
5327 */
5328static int cik_mc_init(struct radeon_device *rdev)
5329{
5330	u32 tmp;
5331	int chansize, numchan;
5332
5333	/* Get VRAM informations */
5334	rdev->mc.vram_is_ddr = true;
5335	tmp = RREG32(MC_ARB_RAMCFG);
5336	if (tmp & CHANSIZE_MASK) {
5337		chansize = 64;
5338	} else {
5339		chansize = 32;
5340	}
5341	tmp = RREG32(MC_SHARED_CHMAP);
5342	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343	case 0:
5344	default:
5345		numchan = 1;
5346		break;
5347	case 1:
5348		numchan = 2;
5349		break;
5350	case 2:
5351		numchan = 4;
5352		break;
5353	case 3:
5354		numchan = 8;
5355		break;
5356	case 4:
5357		numchan = 3;
5358		break;
5359	case 5:
5360		numchan = 6;
5361		break;
5362	case 6:
5363		numchan = 10;
5364		break;
5365	case 7:
5366		numchan = 12;
5367		break;
5368	case 8:
5369		numchan = 16;
5370		break;
5371	}
5372	rdev->mc.vram_width = numchan * chansize;
5373	/* Could aper size report 0 ? */
5374	rdev->mc.aper_base = rdev->fb_aper_offset;
5375	rdev->mc.aper_size = rdev->fb_aper_size;
5376	/* size in MB on si */
5377	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380	si_vram_gtt_location(rdev, &rdev->mc);
5381	radeon_update_bandwidth_info(rdev);
5382
5383	return 0;
5384}
5385
5386/*
5387 * GART
5388 * VMID 0 is the physical GPU addresses as used by the kernel.
5389 * VMIDs 1-15 are used for userspace clients and are handled
5390 * by the radeon vm/hsa code.
5391 */
5392/**
5393 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394 *
5395 * @rdev: radeon_device pointer
5396 *
5397 * Flush the TLB for the VMID 0 page table (CIK).
5398 */
5399void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400{
5401	/* flush hdp cache */
5402	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403
5404	/* bits 0-15 are the VM contexts0-15 */
5405	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406}
5407
5408/**
5409 * cik_pcie_gart_enable - gart enable
5410 *
5411 * @rdev: radeon_device pointer
5412 *
5413 * This sets up the TLBs, programs the page tables for VMID0,
5414 * sets up the hw for VMIDs 1-15 which are allocated on
5415 * demand, and sets up the global locations for the LDS, GDS,
5416 * and GPUVM for FSA64 clients (CIK).
5417 * Returns 0 for success, errors for failure.
5418 */
5419static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420{
5421	int r, i;
5422
5423	if (rdev->gart.robj == NULL) {
5424		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425		return -EINVAL;
5426	}
5427	r = radeon_gart_table_vram_pin(rdev);
5428	if (r)
5429		return r;
5430	/* Setup TLB control */
5431	WREG32(MC_VM_MX_L1_TLB_CNTL,
5432	       (0xA << 7) |
5433	       ENABLE_L1_TLB |
5434	       ENABLE_L1_FRAGMENT_PROCESSING |
5435	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436	       ENABLE_ADVANCED_DRIVER_MODEL |
5437	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438	/* Setup L2 cache */
5439	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440	       ENABLE_L2_FRAGMENT_PROCESSING |
5441	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5444	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447	       BANK_SELECT(4) |
5448	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449	/* setup context0 */
5450	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454			(u32)(rdev->dummy_page.addr >> 12));
5455	WREG32(VM_CONTEXT0_CNTL2, 0);
5456	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458
5459	WREG32(0x15D4, 0);
5460	WREG32(0x15D8, 0);
5461	WREG32(0x15DC, 0);
5462
5463	/* restore context1-15 */
5464	/* set vm size, must be a multiple of 4 */
5465	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467	for (i = 1; i < 16; i++) {
5468		if (i < 8)
5469			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470			       rdev->vm_manager.saved_table_addr[i]);
5471		else
5472			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473			       rdev->vm_manager.saved_table_addr[i]);
5474	}
5475
5476	/* enable context1-15 */
5477	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478	       (u32)(rdev->dummy_page.addr >> 12));
5479	WREG32(VM_CONTEXT1_CNTL2, 4);
5480	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494
5495	if (rdev->family == CHIP_KAVERI) {
5496		u32 tmp = RREG32(CHUB_CONTROL);
5497		tmp &= ~BYPASS_VM;
5498		WREG32(CHUB_CONTROL, tmp);
5499	}
5500
5501	/* XXX SH_MEM regs */
5502	/* where to put LDS, scratch, GPUVM in FSA64 space */
5503	mutex_lock(&rdev->srbm_mutex);
5504	for (i = 0; i < 16; i++) {
5505		cik_srbm_select(rdev, 0, 0, 0, i);
5506		/* CP and shaders */
5507		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508		WREG32(SH_MEM_APE1_BASE, 1);
5509		WREG32(SH_MEM_APE1_LIMIT, 0);
5510		WREG32(SH_MEM_BASES, 0);
5511		/* SDMA GFX */
5512		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516		/* XXX SDMA RLC - todo */
5517	}
5518	cik_srbm_select(rdev, 0, 0, 0, 0);
5519	mutex_unlock(&rdev->srbm_mutex);
5520
5521	cik_pcie_gart_tlb_flush(rdev);
5522	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523		 (unsigned)(rdev->mc.gtt_size >> 20),
5524		 (unsigned long long)rdev->gart.table_addr);
5525	rdev->gart.ready = true;
5526	return 0;
5527}
5528
5529/**
5530 * cik_pcie_gart_disable - gart disable
5531 *
5532 * @rdev: radeon_device pointer
5533 *
5534 * This disables all VM page table (CIK).
5535 */
5536static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537{
5538	unsigned i;
5539
5540	for (i = 1; i < 16; ++i) {
5541		uint32_t reg;
5542		if (i < 8)
5543			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544		else
5545			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547	}
5548
5549	/* Disable all tables */
5550	WREG32(VM_CONTEXT0_CNTL, 0);
5551	WREG32(VM_CONTEXT1_CNTL, 0);
5552	/* Setup TLB control */
5553	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555	/* Setup L2 cache */
5556	WREG32(VM_L2_CNTL,
5557	       ENABLE_L2_FRAGMENT_PROCESSING |
5558	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5561	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562	WREG32(VM_L2_CNTL2, 0);
5563	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565	radeon_gart_table_vram_unpin(rdev);
5566}
5567
5568/**
5569 * cik_pcie_gart_fini - vm fini callback
5570 *
5571 * @rdev: radeon_device pointer
5572 *
5573 * Tears down the driver GART/VM setup (CIK).
5574 */
5575static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576{
5577	cik_pcie_gart_disable(rdev);
5578	radeon_gart_table_vram_free(rdev);
5579	radeon_gart_fini(rdev);
5580}
5581
5582/* vm parser */
5583/**
5584 * cik_ib_parse - vm ib_parse callback
5585 *
5586 * @rdev: radeon_device pointer
5587 * @ib: indirect buffer pointer
5588 *
5589 * CIK uses hw IB checking so this is a nop (CIK).
5590 */
5591int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592{
5593	return 0;
5594}
5595
5596/*
5597 * vm
5598 * VMID 0 is the physical GPU addresses as used by the kernel.
5599 * VMIDs 1-15 are used for userspace clients and are handled
5600 * by the radeon vm/hsa code.
5601 */
5602/**
5603 * cik_vm_init - cik vm init callback
5604 *
5605 * @rdev: radeon_device pointer
5606 *
5607 * Inits cik specific vm parameters (number of VMs, base of vram for
5608 * VMIDs 1-15) (CIK).
5609 * Returns 0 for success.
5610 */
5611int cik_vm_init(struct radeon_device *rdev)
5612{
5613	/*
5614	 * number of VMs
5615	 * VMID 0 is reserved for System
5616	 * radeon graphics/compute will use VMIDs 1-15
5617	 */
5618	rdev->vm_manager.nvm = 16;
5619	/* base offset of vram pages */
5620	if (rdev->flags & RADEON_IS_IGP) {
5621		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622		tmp <<= 22;
5623		rdev->vm_manager.vram_base_offset = tmp;
5624	} else
5625		rdev->vm_manager.vram_base_offset = 0;
5626
5627	return 0;
5628}
5629
5630/**
5631 * cik_vm_fini - cik vm fini callback
5632 *
5633 * @rdev: radeon_device pointer
5634 *
5635 * Tear down any asic specific VM setup (CIK).
5636 */
5637void cik_vm_fini(struct radeon_device *rdev)
5638{
5639}
5640
5641/**
5642 * cik_vm_decode_fault - print human readable fault info
5643 *
5644 * @rdev: radeon_device pointer
5645 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647 * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648 *
5649 * Print human readable fault information (CIK).
5650 */
5651static void cik_vm_decode_fault(struct radeon_device *rdev,
5652				u32 status, u32 addr, u32 mc_client)
5653{
5654	u32 mc_id;
5655	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659
5660	if (rdev->family == CHIP_HAWAII)
5661		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662	else
5663		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664
5665	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666	       protections, vmid, addr,
5667	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668	       block, mc_client, mc_id);
5669}
5670
5671/*
5672 * cik_vm_flush - cik vm flush using the CP
5673 *
5674 * Update the page table base and flush the VM TLB
5675 * using the CP (CIK).
5676 */
5677void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678		  unsigned vm_id, uint64_t pd_addr)
5679{
5680	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681
5682	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684				 WRITE_DATA_DST_SEL(0)));
5685	if (vm_id < 8) {
5686		radeon_ring_write(ring,
5687				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688	} else {
5689		radeon_ring_write(ring,
5690				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691	}
5692	radeon_ring_write(ring, 0);
5693	radeon_ring_write(ring, pd_addr >> 12);
5694
5695	/* update SH_MEM_* regs */
5696	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698				 WRITE_DATA_DST_SEL(0)));
5699	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700	radeon_ring_write(ring, 0);
5701	radeon_ring_write(ring, VMID(vm_id));
5702
5703	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705				 WRITE_DATA_DST_SEL(0)));
5706	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707	radeon_ring_write(ring, 0);
5708
5709	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713
5714	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716				 WRITE_DATA_DST_SEL(0)));
5717	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718	radeon_ring_write(ring, 0);
5719	radeon_ring_write(ring, VMID(0));
5720
5721	/* HDP flush */
5722	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723
5724	/* bits 0-15 are the VM contexts0-15 */
5725	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727				 WRITE_DATA_DST_SEL(0)));
5728	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729	radeon_ring_write(ring, 0);
5730	radeon_ring_write(ring, 1 << vm_id);
5731
5732	/* wait for the invalidate to complete */
5733	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5736				 WAIT_REG_MEM_ENGINE(0))); /* me */
5737	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738	radeon_ring_write(ring, 0);
5739	radeon_ring_write(ring, 0); /* ref */
5740	radeon_ring_write(ring, 0); /* mask */
5741	radeon_ring_write(ring, 0x20); /* poll interval */
5742
5743	/* compute doesn't have PFP */
5744	if (usepfp) {
5745		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5746		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747		radeon_ring_write(ring, 0x0);
5748	}
5749}
5750
5751/*
5752 * RLC
5753 * The RLC is a multi-purpose microengine that handles a
5754 * variety of functions, the most important of which is
5755 * the interrupt controller.
5756 */
5757static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758					  bool enable)
5759{
5760	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761
5762	if (enable)
5763		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764	else
5765		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766	WREG32(CP_INT_CNTL_RING0, tmp);
5767}
5768
5769static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770{
5771	u32 tmp;
5772
5773	tmp = RREG32(RLC_LB_CNTL);
5774	if (enable)
5775		tmp |= LOAD_BALANCE_ENABLE;
5776	else
5777		tmp &= ~LOAD_BALANCE_ENABLE;
5778	WREG32(RLC_LB_CNTL, tmp);
5779}
5780
5781static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782{
5783	u32 i, j, k;
5784	u32 mask;
5785
5786	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788			cik_select_se_sh(rdev, i, j);
5789			for (k = 0; k < rdev->usec_timeout; k++) {
5790				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791					break;
5792				udelay(1);
5793			}
5794		}
5795	}
5796	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797
5798	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799	for (k = 0; k < rdev->usec_timeout; k++) {
5800		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801			break;
5802		udelay(1);
5803	}
5804}
5805
5806static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807{
5808	u32 tmp;
5809
5810	tmp = RREG32(RLC_CNTL);
5811	if (tmp != rlc)
5812		WREG32(RLC_CNTL, rlc);
5813}
5814
5815static u32 cik_halt_rlc(struct radeon_device *rdev)
5816{
5817	u32 data, orig;
5818
5819	orig = data = RREG32(RLC_CNTL);
5820
5821	if (data & RLC_ENABLE) {
5822		u32 i;
5823
5824		data &= ~RLC_ENABLE;
5825		WREG32(RLC_CNTL, data);
5826
5827		for (i = 0; i < rdev->usec_timeout; i++) {
5828			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829				break;
5830			udelay(1);
5831		}
5832
5833		cik_wait_for_rlc_serdes(rdev);
5834	}
5835
5836	return orig;
5837}
5838
5839void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840{
5841	u32 tmp, i, mask;
5842
5843	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844	WREG32(RLC_GPR_REG2, tmp);
5845
5846	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847	for (i = 0; i < rdev->usec_timeout; i++) {
5848		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849			break;
5850		udelay(1);
5851	}
5852
5853	for (i = 0; i < rdev->usec_timeout; i++) {
5854		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855			break;
5856		udelay(1);
5857	}
5858}
5859
5860void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861{
5862	u32 tmp;
5863
5864	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865	WREG32(RLC_GPR_REG2, tmp);
5866}
5867
5868/**
5869 * cik_rlc_stop - stop the RLC ME
5870 *
5871 * @rdev: radeon_device pointer
5872 *
5873 * Halt the RLC ME (MicroEngine) (CIK).
5874 */
5875static void cik_rlc_stop(struct radeon_device *rdev)
5876{
5877	WREG32(RLC_CNTL, 0);
5878
5879	cik_enable_gui_idle_interrupt(rdev, false);
5880
5881	cik_wait_for_rlc_serdes(rdev);
5882}
5883
5884/**
5885 * cik_rlc_start - start the RLC ME
5886 *
5887 * @rdev: radeon_device pointer
5888 *
5889 * Unhalt the RLC ME (MicroEngine) (CIK).
5890 */
5891static void cik_rlc_start(struct radeon_device *rdev)
5892{
5893	WREG32(RLC_CNTL, RLC_ENABLE);
5894
5895	cik_enable_gui_idle_interrupt(rdev, true);
5896
5897	udelay(50);
5898}
5899
5900/**
5901 * cik_rlc_resume - setup the RLC hw
5902 *
5903 * @rdev: radeon_device pointer
5904 *
5905 * Initialize the RLC registers, load the ucode,
5906 * and start the RLC (CIK).
5907 * Returns 0 for success, -EINVAL if the ucode is not available.
5908 */
5909static int cik_rlc_resume(struct radeon_device *rdev)
5910{
5911	u32 i, size, tmp;
5912
5913	if (!rdev->rlc_fw)
5914		return -EINVAL;
5915
5916	cik_rlc_stop(rdev);
5917
5918	/* disable CG */
5919	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921
5922	si_rlc_reset(rdev);
5923
5924	cik_init_pg(rdev);
5925
5926	cik_init_cg(rdev);
5927
5928	WREG32(RLC_LB_CNTR_INIT, 0);
5929	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930
5931	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933	WREG32(RLC_LB_PARAMS, 0x00600408);
5934	WREG32(RLC_LB_CNTL, 0x80000004);
5935
5936	WREG32(RLC_MC_CNTL, 0);
5937	WREG32(RLC_UCODE_CNTL, 0);
5938
5939	if (rdev->new_fw) {
5940		const struct rlc_firmware_header_v1_0 *hdr =
5941			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942		const __le32 *fw_data = (const __le32 *)
5943			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944
5945		radeon_ucode_print_rlc_hdr(&hdr->header);
5946
5947		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948		WREG32(RLC_GPM_UCODE_ADDR, 0);
5949		for (i = 0; i < size; i++)
5950			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952	} else {
5953		const __be32 *fw_data;
5954
5955		switch (rdev->family) {
5956		case CHIP_BONAIRE:
5957		case CHIP_HAWAII:
5958		default:
5959			size = BONAIRE_RLC_UCODE_SIZE;
5960			break;
5961		case CHIP_KAVERI:
5962			size = KV_RLC_UCODE_SIZE;
5963			break;
5964		case CHIP_KABINI:
5965			size = KB_RLC_UCODE_SIZE;
5966			break;
5967		case CHIP_MULLINS:
5968			size = ML_RLC_UCODE_SIZE;
5969			break;
5970		}
5971
5972		fw_data = (const __be32 *)rdev->rlc_fw->data;
5973		WREG32(RLC_GPM_UCODE_ADDR, 0);
5974		for (i = 0; i < size; i++)
5975			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976		WREG32(RLC_GPM_UCODE_ADDR, 0);
5977	}
5978
5979	/* XXX - find out what chips support lbpw */
5980	cik_enable_lbpw(rdev, false);
5981
5982	if (rdev->family == CHIP_BONAIRE)
5983		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984
5985	cik_rlc_start(rdev);
5986
5987	return 0;
5988}
5989
5990static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991{
5992	u32 data, orig, tmp, tmp2;
5993
5994	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995
5996	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997		cik_enable_gui_idle_interrupt(rdev, true);
5998
5999		tmp = cik_halt_rlc(rdev);
6000
6001		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006
6007		cik_update_rlc(rdev, tmp);
6008
6009		data |= CGCG_EN | CGLS_EN;
6010	} else {
6011		cik_enable_gui_idle_interrupt(rdev, false);
6012
6013		RREG32(CB_CGTT_SCLK_CTRL);
6014		RREG32(CB_CGTT_SCLK_CTRL);
6015		RREG32(CB_CGTT_SCLK_CTRL);
6016		RREG32(CB_CGTT_SCLK_CTRL);
6017
6018		data &= ~(CGCG_EN | CGLS_EN);
6019	}
6020
6021	if (orig != data)
6022		WREG32(RLC_CGCG_CGLS_CTRL, data);
6023
6024}
6025
6026static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027{
6028	u32 data, orig, tmp = 0;
6029
6030	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033				orig = data = RREG32(CP_MEM_SLP_CNTL);
6034				data |= CP_MEM_LS_EN;
6035				if (orig != data)
6036					WREG32(CP_MEM_SLP_CNTL, data);
6037			}
6038		}
6039
6040		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041		data |= 0x00000001;
6042		data &= 0xfffffffd;
6043		if (orig != data)
6044			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045
6046		tmp = cik_halt_rlc(rdev);
6047
6048		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052		WREG32(RLC_SERDES_WR_CTRL, data);
6053
6054		cik_update_rlc(rdev, tmp);
6055
6056		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057			orig = data = RREG32(CGTS_SM_CTRL_REG);
6058			data &= ~SM_MODE_MASK;
6059			data |= SM_MODE(0x2);
6060			data |= SM_MODE_ENABLE;
6061			data &= ~CGTS_OVERRIDE;
6062			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064				data &= ~CGTS_LS_OVERRIDE;
6065			data &= ~ON_MONITOR_ADD_MASK;
6066			data |= ON_MONITOR_ADD_EN;
6067			data |= ON_MONITOR_ADD(0x96);
6068			if (orig != data)
6069				WREG32(CGTS_SM_CTRL_REG, data);
6070		}
6071	} else {
6072		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073		data |= 0x00000003;
6074		if (orig != data)
6075			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076
6077		data = RREG32(RLC_MEM_SLP_CNTL);
6078		if (data & RLC_MEM_LS_EN) {
6079			data &= ~RLC_MEM_LS_EN;
6080			WREG32(RLC_MEM_SLP_CNTL, data);
6081		}
6082
6083		data = RREG32(CP_MEM_SLP_CNTL);
6084		if (data & CP_MEM_LS_EN) {
6085			data &= ~CP_MEM_LS_EN;
6086			WREG32(CP_MEM_SLP_CNTL, data);
6087		}
6088
6089		orig = data = RREG32(CGTS_SM_CTRL_REG);
6090		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091		if (orig != data)
6092			WREG32(CGTS_SM_CTRL_REG, data);
6093
6094		tmp = cik_halt_rlc(rdev);
6095
6096		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100		WREG32(RLC_SERDES_WR_CTRL, data);
6101
6102		cik_update_rlc(rdev, tmp);
6103	}
6104}
6105
6106static const u32 mc_cg_registers[] =
6107{
6108	MC_HUB_MISC_HUB_CG,
6109	MC_HUB_MISC_SIP_CG,
6110	MC_HUB_MISC_VM_CG,
6111	MC_XPB_CLK_GAT,
6112	ATC_MISC_CG,
6113	MC_CITF_MISC_WR_CG,
6114	MC_CITF_MISC_RD_CG,
6115	MC_CITF_MISC_VM_CG,
6116	VM_L2_CG,
6117};
6118
6119static void cik_enable_mc_ls(struct radeon_device *rdev,
6120			     bool enable)
6121{
6122	int i;
6123	u32 orig, data;
6124
6125	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126		orig = data = RREG32(mc_cg_registers[i]);
6127		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128			data |= MC_LS_ENABLE;
6129		else
6130			data &= ~MC_LS_ENABLE;
6131		if (data != orig)
6132			WREG32(mc_cg_registers[i], data);
6133	}
6134}
6135
6136static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137			       bool enable)
6138{
6139	int i;
6140	u32 orig, data;
6141
6142	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143		orig = data = RREG32(mc_cg_registers[i]);
6144		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145			data |= MC_CG_ENABLE;
6146		else
6147			data &= ~MC_CG_ENABLE;
6148		if (data != orig)
6149			WREG32(mc_cg_registers[i], data);
6150	}
6151}
6152
6153static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154				 bool enable)
6155{
6156	u32 orig, data;
6157
6158	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161	} else {
6162		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163		data |= 0xff000000;
6164		if (data != orig)
6165			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166
6167		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168		data |= 0xff000000;
6169		if (data != orig)
6170			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171	}
6172}
6173
6174static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175				 bool enable)
6176{
6177	u32 orig, data;
6178
6179	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181		data |= 0x100;
6182		if (orig != data)
6183			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184
6185		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186		data |= 0x100;
6187		if (orig != data)
6188			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189	} else {
6190		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191		data &= ~0x100;
6192		if (orig != data)
6193			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194
6195		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196		data &= ~0x100;
6197		if (orig != data)
6198			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199	}
6200}
6201
6202static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203				bool enable)
6204{
6205	u32 orig, data;
6206
6207	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209		data = 0xfff;
6210		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211
6212		orig = data = RREG32(UVD_CGC_CTRL);
6213		data |= DCM;
6214		if (orig != data)
6215			WREG32(UVD_CGC_CTRL, data);
6216	} else {
6217		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218		data &= ~0xfff;
6219		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220
6221		orig = data = RREG32(UVD_CGC_CTRL);
6222		data &= ~DCM;
6223		if (orig != data)
6224			WREG32(UVD_CGC_CTRL, data);
6225	}
6226}
6227
6228static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229			       bool enable)
6230{
6231	u32 orig, data;
6232
6233	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234
6235	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238	else
6239		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241
6242	if (orig != data)
6243		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244}
6245
6246static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247				bool enable)
6248{
6249	u32 orig, data;
6250
6251	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252
6253	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254		data &= ~CLOCK_GATING_DIS;
6255	else
6256		data |= CLOCK_GATING_DIS;
6257
6258	if (orig != data)
6259		WREG32(HDP_HOST_PATH_CNTL, data);
6260}
6261
6262static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263			      bool enable)
6264{
6265	u32 orig, data;
6266
6267	orig = data = RREG32(HDP_MEM_POWER_LS);
6268
6269	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270		data |= HDP_LS_ENABLE;
6271	else
6272		data &= ~HDP_LS_ENABLE;
6273
6274	if (orig != data)
6275		WREG32(HDP_MEM_POWER_LS, data);
6276}
6277
6278void cik_update_cg(struct radeon_device *rdev,
6279		   u32 block, bool enable)
6280{
6281
6282	if (block & RADEON_CG_BLOCK_GFX) {
6283		cik_enable_gui_idle_interrupt(rdev, false);
6284		/* order matters! */
6285		if (enable) {
6286			cik_enable_mgcg(rdev, true);
6287			cik_enable_cgcg(rdev, true);
6288		} else {
6289			cik_enable_cgcg(rdev, false);
6290			cik_enable_mgcg(rdev, false);
6291		}
6292		cik_enable_gui_idle_interrupt(rdev, true);
6293	}
6294
6295	if (block & RADEON_CG_BLOCK_MC) {
6296		if (!(rdev->flags & RADEON_IS_IGP)) {
6297			cik_enable_mc_mgcg(rdev, enable);
6298			cik_enable_mc_ls(rdev, enable);
6299		}
6300	}
6301
6302	if (block & RADEON_CG_BLOCK_SDMA) {
6303		cik_enable_sdma_mgcg(rdev, enable);
6304		cik_enable_sdma_mgls(rdev, enable);
6305	}
6306
6307	if (block & RADEON_CG_BLOCK_BIF) {
6308		cik_enable_bif_mgls(rdev, enable);
6309	}
6310
6311	if (block & RADEON_CG_BLOCK_UVD) {
6312		if (rdev->has_uvd)
6313			cik_enable_uvd_mgcg(rdev, enable);
6314	}
6315
6316	if (block & RADEON_CG_BLOCK_HDP) {
6317		cik_enable_hdp_mgcg(rdev, enable);
6318		cik_enable_hdp_ls(rdev, enable);
6319	}
6320
6321	if (block & RADEON_CG_BLOCK_VCE) {
6322		vce_v2_0_enable_mgcg(rdev, enable);
6323	}
6324}
6325
6326static void cik_init_cg(struct radeon_device *rdev)
6327{
6328
6329	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330
6331	if (rdev->has_uvd)
6332		si_init_uvd_internal_cg(rdev);
6333
6334	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335			     RADEON_CG_BLOCK_SDMA |
6336			     RADEON_CG_BLOCK_BIF |
6337			     RADEON_CG_BLOCK_UVD |
6338			     RADEON_CG_BLOCK_HDP), true);
6339}
6340
6341static void cik_fini_cg(struct radeon_device *rdev)
6342{
6343	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344			     RADEON_CG_BLOCK_SDMA |
6345			     RADEON_CG_BLOCK_BIF |
6346			     RADEON_CG_BLOCK_UVD |
6347			     RADEON_CG_BLOCK_HDP), false);
6348
6349	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350}
6351
6352static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353					  bool enable)
6354{
6355	u32 data, orig;
6356
6357	orig = data = RREG32(RLC_PG_CNTL);
6358	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360	else
6361		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362	if (orig != data)
6363		WREG32(RLC_PG_CNTL, data);
6364}
6365
6366static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367					  bool enable)
6368{
6369	u32 data, orig;
6370
6371	orig = data = RREG32(RLC_PG_CNTL);
6372	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374	else
6375		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376	if (orig != data)
6377		WREG32(RLC_PG_CNTL, data);
6378}
6379
6380static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381{
6382	u32 data, orig;
6383
6384	orig = data = RREG32(RLC_PG_CNTL);
6385	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386		data &= ~DISABLE_CP_PG;
6387	else
6388		data |= DISABLE_CP_PG;
6389	if (orig != data)
6390		WREG32(RLC_PG_CNTL, data);
6391}
6392
6393static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394{
6395	u32 data, orig;
6396
6397	orig = data = RREG32(RLC_PG_CNTL);
6398	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399		data &= ~DISABLE_GDS_PG;
6400	else
6401		data |= DISABLE_GDS_PG;
6402	if (orig != data)
6403		WREG32(RLC_PG_CNTL, data);
6404}
6405
6406#define CP_ME_TABLE_SIZE    96
6407#define CP_ME_TABLE_OFFSET  2048
6408#define CP_MEC_TABLE_OFFSET 4096
6409
6410void cik_init_cp_pg_table(struct radeon_device *rdev)
6411{
6412	volatile u32 *dst_ptr;
6413	int me, i, max_me = 4;
6414	u32 bo_offset = 0;
6415	u32 table_offset, table_size;
6416
6417	if (rdev->family == CHIP_KAVERI)
6418		max_me = 5;
6419
6420	if (rdev->rlc.cp_table_ptr == NULL)
6421		return;
6422
6423	/* write the cp table buffer */
6424	dst_ptr = rdev->rlc.cp_table_ptr;
6425	for (me = 0; me < max_me; me++) {
6426		if (rdev->new_fw) {
6427			const __le32 *fw_data;
6428			const struct gfx_firmware_header_v1_0 *hdr;
6429
6430			if (me == 0) {
6431				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432				fw_data = (const __le32 *)
6433					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434				table_offset = le32_to_cpu(hdr->jt_offset);
6435				table_size = le32_to_cpu(hdr->jt_size);
6436			} else if (me == 1) {
6437				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438				fw_data = (const __le32 *)
6439					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440				table_offset = le32_to_cpu(hdr->jt_offset);
6441				table_size = le32_to_cpu(hdr->jt_size);
6442			} else if (me == 2) {
6443				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444				fw_data = (const __le32 *)
6445					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446				table_offset = le32_to_cpu(hdr->jt_offset);
6447				table_size = le32_to_cpu(hdr->jt_size);
6448			} else if (me == 3) {
6449				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450				fw_data = (const __le32 *)
6451					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452				table_offset = le32_to_cpu(hdr->jt_offset);
6453				table_size = le32_to_cpu(hdr->jt_size);
6454			} else {
6455				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456				fw_data = (const __le32 *)
6457					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458				table_offset = le32_to_cpu(hdr->jt_offset);
6459				table_size = le32_to_cpu(hdr->jt_size);
6460			}
6461
6462			for (i = 0; i < table_size; i ++) {
6463				dst_ptr[bo_offset + i] =
6464					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465			}
6466			bo_offset += table_size;
6467		} else {
6468			const __be32 *fw_data;
6469			table_size = CP_ME_TABLE_SIZE;
6470
6471			if (me == 0) {
6472				fw_data = (const __be32 *)rdev->ce_fw->data;
6473				table_offset = CP_ME_TABLE_OFFSET;
6474			} else if (me == 1) {
6475				fw_data = (const __be32 *)rdev->pfp_fw->data;
6476				table_offset = CP_ME_TABLE_OFFSET;
6477			} else if (me == 2) {
6478				fw_data = (const __be32 *)rdev->me_fw->data;
6479				table_offset = CP_ME_TABLE_OFFSET;
6480			} else {
6481				fw_data = (const __be32 *)rdev->mec_fw->data;
6482				table_offset = CP_MEC_TABLE_OFFSET;
6483			}
6484
6485			for (i = 0; i < table_size; i ++) {
6486				dst_ptr[bo_offset + i] =
6487					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488			}
6489			bo_offset += table_size;
6490		}
6491	}
6492}
6493
6494static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495				bool enable)
6496{
6497	u32 data, orig;
6498
6499	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500		orig = data = RREG32(RLC_PG_CNTL);
6501		data |= GFX_PG_ENABLE;
6502		if (orig != data)
6503			WREG32(RLC_PG_CNTL, data);
6504
6505		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506		data |= AUTO_PG_EN;
6507		if (orig != data)
6508			WREG32(RLC_AUTO_PG_CTRL, data);
6509	} else {
6510		orig = data = RREG32(RLC_PG_CNTL);
6511		data &= ~GFX_PG_ENABLE;
6512		if (orig != data)
6513			WREG32(RLC_PG_CNTL, data);
6514
6515		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516		data &= ~AUTO_PG_EN;
6517		if (orig != data)
6518			WREG32(RLC_AUTO_PG_CTRL, data);
6519
6520		data = RREG32(DB_RENDER_CONTROL);
6521	}
6522}
6523
6524static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525{
6526	u32 mask = 0, tmp, tmp1;
6527	int i;
6528
6529	cik_select_se_sh(rdev, se, sh);
6530	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533
6534	tmp &= 0xffff0000;
6535
6536	tmp |= tmp1;
6537	tmp >>= 16;
6538
6539	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540		mask <<= 1;
6541		mask |= 1;
6542	}
6543
6544	return (~tmp) & mask;
6545}
6546
6547static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548{
6549	u32 i, j, k, active_cu_number = 0;
6550	u32 mask, counter, cu_bitmap;
6551	u32 tmp = 0;
6552
6553	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555			mask = 1;
6556			cu_bitmap = 0;
6557			counter = 0;
6558			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560					if (counter < 2)
6561						cu_bitmap |= mask;
6562					counter ++;
6563				}
6564				mask <<= 1;
6565			}
6566
6567			active_cu_number += counter;
6568			tmp |= (cu_bitmap << (i * 16 + j * 8));
6569		}
6570	}
6571
6572	WREG32(RLC_PG_AO_CU_MASK, tmp);
6573
6574	tmp = RREG32(RLC_MAX_PG_CU);
6575	tmp &= ~MAX_PU_CU_MASK;
6576	tmp |= MAX_PU_CU(active_cu_number);
6577	WREG32(RLC_MAX_PG_CU, tmp);
6578}
6579
6580static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581				       bool enable)
6582{
6583	u32 data, orig;
6584
6585	orig = data = RREG32(RLC_PG_CNTL);
6586	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587		data |= STATIC_PER_CU_PG_ENABLE;
6588	else
6589		data &= ~STATIC_PER_CU_PG_ENABLE;
6590	if (orig != data)
6591		WREG32(RLC_PG_CNTL, data);
6592}
6593
6594static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595					bool enable)
6596{
6597	u32 data, orig;
6598
6599	orig = data = RREG32(RLC_PG_CNTL);
6600	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601		data |= DYN_PER_CU_PG_ENABLE;
6602	else
6603		data &= ~DYN_PER_CU_PG_ENABLE;
6604	if (orig != data)
6605		WREG32(RLC_PG_CNTL, data);
6606}
6607
6608#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6610
6611static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612{
6613	u32 data, orig;
6614	u32 i;
6615
6616	if (rdev->rlc.cs_data) {
6617		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621	} else {
6622		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623		for (i = 0; i < 3; i++)
6624			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625	}
6626	if (rdev->rlc.reg_list) {
6627		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630	}
6631
6632	orig = data = RREG32(RLC_PG_CNTL);
6633	data |= GFX_PG_SRC;
6634	if (orig != data)
6635		WREG32(RLC_PG_CNTL, data);
6636
6637	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639
6640	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641	data &= ~IDLE_POLL_COUNT_MASK;
6642	data |= IDLE_POLL_COUNT(0x60);
6643	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644
6645	data = 0x10101010;
6646	WREG32(RLC_PG_DELAY, data);
6647
6648	data = RREG32(RLC_PG_DELAY_2);
6649	data &= ~0xff;
6650	data |= 0x3;
6651	WREG32(RLC_PG_DELAY_2, data);
6652
6653	data = RREG32(RLC_AUTO_PG_CTRL);
6654	data &= ~GRBM_REG_SGIT_MASK;
6655	data |= GRBM_REG_SGIT(0x700);
6656	WREG32(RLC_AUTO_PG_CTRL, data);
6657
6658}
6659
6660static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661{
6662	cik_enable_gfx_cgpg(rdev, enable);
6663	cik_enable_gfx_static_mgpg(rdev, enable);
6664	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665}
6666
6667u32 cik_get_csb_size(struct radeon_device *rdev)
6668{
6669	u32 count = 0;
6670	const struct cs_section_def *sect = NULL;
6671	const struct cs_extent_def *ext = NULL;
6672
6673	if (rdev->rlc.cs_data == NULL)
6674		return 0;
6675
6676	/* begin clear state */
6677	count += 2;
6678	/* context control state */
6679	count += 3;
6680
6681	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682		for (ext = sect->section; ext->extent != NULL; ++ext) {
6683			if (sect->id == SECT_CONTEXT)
6684				count += 2 + ext->reg_count;
6685			else
6686				return 0;
6687		}
6688	}
6689	/* pa_sc_raster_config/pa_sc_raster_config1 */
6690	count += 4;
6691	/* end clear state */
6692	count += 2;
6693	/* clear state */
6694	count += 2;
6695
6696	return count;
6697}
6698
6699void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700{
6701	u32 count = 0, i;
6702	const struct cs_section_def *sect = NULL;
6703	const struct cs_extent_def *ext = NULL;
6704
6705	if (rdev->rlc.cs_data == NULL)
6706		return;
6707	if (buffer == NULL)
6708		return;
6709
6710	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712
6713	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714	buffer[count++] = cpu_to_le32(0x80000000);
6715	buffer[count++] = cpu_to_le32(0x80000000);
6716
6717	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718		for (ext = sect->section; ext->extent != NULL; ++ext) {
6719			if (sect->id == SECT_CONTEXT) {
6720				buffer[count++] =
6721					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723				for (i = 0; i < ext->reg_count; i++)
6724					buffer[count++] = cpu_to_le32(ext->extent[i]);
6725			} else {
6726				return;
6727			}
6728		}
6729	}
6730
6731	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733	switch (rdev->family) {
6734	case CHIP_BONAIRE:
6735		buffer[count++] = cpu_to_le32(0x16000012);
6736		buffer[count++] = cpu_to_le32(0x00000000);
6737		break;
6738	case CHIP_KAVERI:
6739		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740		buffer[count++] = cpu_to_le32(0x00000000);
6741		break;
6742	case CHIP_KABINI:
6743	case CHIP_MULLINS:
6744		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745		buffer[count++] = cpu_to_le32(0x00000000);
6746		break;
6747	case CHIP_HAWAII:
6748		buffer[count++] = cpu_to_le32(0x3a00161a);
6749		buffer[count++] = cpu_to_le32(0x0000002e);
6750		break;
6751	default:
6752		buffer[count++] = cpu_to_le32(0x00000000);
6753		buffer[count++] = cpu_to_le32(0x00000000);
6754		break;
6755	}
6756
6757	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759
6760	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761	buffer[count++] = cpu_to_le32(0);
6762}
6763
6764static void cik_init_pg(struct radeon_device *rdev)
6765{
6766	if (rdev->pg_flags) {
6767		cik_enable_sck_slowdown_on_pu(rdev, true);
6768		cik_enable_sck_slowdown_on_pd(rdev, true);
6769		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770			cik_init_gfx_cgpg(rdev);
6771			cik_enable_cp_pg(rdev, true);
6772			cik_enable_gds_pg(rdev, true);
6773		}
6774		cik_init_ao_cu_mask(rdev);
6775		cik_update_gfx_pg(rdev, true);
6776	}
6777}
6778
6779static void cik_fini_pg(struct radeon_device *rdev)
6780{
6781	if (rdev->pg_flags) {
6782		cik_update_gfx_pg(rdev, false);
6783		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784			cik_enable_cp_pg(rdev, false);
6785			cik_enable_gds_pg(rdev, false);
6786		}
6787	}
6788}
6789
6790/*
6791 * Interrupts
6792 * Starting with r6xx, interrupts are handled via a ring buffer.
6793 * Ring buffers are areas of GPU accessible memory that the GPU
6794 * writes interrupt vectors into and the host reads vectors out of.
6795 * There is a rptr (read pointer) that determines where the
6796 * host is currently reading, and a wptr (write pointer)
6797 * which determines where the GPU has written.  When the
6798 * pointers are equal, the ring is idle.  When the GPU
6799 * writes vectors to the ring buffer, it increments the
6800 * wptr.  When there is an interrupt, the host then starts
6801 * fetching commands and processing them until the pointers are
6802 * equal again at which point it updates the rptr.
6803 */
6804
6805/**
6806 * cik_enable_interrupts - Enable the interrupt ring buffer
6807 *
6808 * @rdev: radeon_device pointer
6809 *
6810 * Enable the interrupt ring buffer (CIK).
6811 */
6812static void cik_enable_interrupts(struct radeon_device *rdev)
6813{
6814	u32 ih_cntl = RREG32(IH_CNTL);
6815	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816
6817	ih_cntl |= ENABLE_INTR;
6818	ih_rb_cntl |= IH_RB_ENABLE;
6819	WREG32(IH_CNTL, ih_cntl);
6820	WREG32(IH_RB_CNTL, ih_rb_cntl);
6821	rdev->ih.enabled = true;
6822}
6823
6824/**
6825 * cik_disable_interrupts - Disable the interrupt ring buffer
6826 *
6827 * @rdev: radeon_device pointer
6828 *
6829 * Disable the interrupt ring buffer (CIK).
6830 */
6831static void cik_disable_interrupts(struct radeon_device *rdev)
6832{
6833	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834	u32 ih_cntl = RREG32(IH_CNTL);
6835
6836	ih_rb_cntl &= ~IH_RB_ENABLE;
6837	ih_cntl &= ~ENABLE_INTR;
6838	WREG32(IH_RB_CNTL, ih_rb_cntl);
6839	WREG32(IH_CNTL, ih_cntl);
6840	/* set rptr, wptr to 0 */
6841	WREG32(IH_RB_RPTR, 0);
6842	WREG32(IH_RB_WPTR, 0);
6843	rdev->ih.enabled = false;
6844	rdev->ih.rptr = 0;
6845}
6846
6847/**
6848 * cik_disable_interrupt_state - Disable all interrupt sources
6849 *
6850 * @rdev: radeon_device pointer
6851 *
6852 * Clear all interrupt enable bits used by the driver (CIK).
6853 */
6854static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855{
6856	u32 tmp;
6857
6858	/* gfx ring */
6859	tmp = RREG32(CP_INT_CNTL_RING0) &
6860		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861	WREG32(CP_INT_CNTL_RING0, tmp);
6862	/* sdma */
6863	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867	/* compute queues */
6868	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876	/* grbm */
6877	WREG32(GRBM_INT_CNTL, 0);
6878	/* SRBM */
6879	WREG32(SRBM_INT_CNTL, 0);
6880	/* vline/vblank, etc. */
6881	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883	if (rdev->num_crtc >= 4) {
6884		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886	}
6887	if (rdev->num_crtc >= 6) {
6888		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890	}
6891	/* pflip */
6892	if (rdev->num_crtc >= 2) {
6893		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895	}
6896	if (rdev->num_crtc >= 4) {
6897		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899	}
6900	if (rdev->num_crtc >= 6) {
6901		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903	}
6904
6905	/* dac hotplug */
6906	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907
6908	/* digital hotplug */
6909	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910	WREG32(DC_HPD1_INT_CONTROL, tmp);
6911	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912	WREG32(DC_HPD2_INT_CONTROL, tmp);
6913	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914	WREG32(DC_HPD3_INT_CONTROL, tmp);
6915	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916	WREG32(DC_HPD4_INT_CONTROL, tmp);
6917	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918	WREG32(DC_HPD5_INT_CONTROL, tmp);
6919	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920	WREG32(DC_HPD6_INT_CONTROL, tmp);
6921
6922}
6923
6924/**
6925 * cik_irq_init - init and enable the interrupt ring
6926 *
6927 * @rdev: radeon_device pointer
6928 *
6929 * Allocate a ring buffer for the interrupt controller,
6930 * enable the RLC, disable interrupts, enable the IH
6931 * ring buffer and enable it (CIK).
6932 * Called at device load and reume.
6933 * Returns 0 for success, errors for failure.
6934 */
6935static int cik_irq_init(struct radeon_device *rdev)
6936{
6937	int ret = 0;
6938	int rb_bufsz;
6939	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940
6941	/* allocate ring */
6942	ret = r600_ih_ring_alloc(rdev);
6943	if (ret)
6944		return ret;
6945
6946	/* disable irqs */
6947	cik_disable_interrupts(rdev);
6948
6949	/* init rlc */
6950	ret = cik_rlc_resume(rdev);
6951	if (ret) {
6952		r600_ih_ring_fini(rdev);
6953		return ret;
6954	}
6955
6956	/* setup interrupt control */
6957	/* set dummy read address to dummy page address */
6958	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962	 */
6963	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967
6968	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970
6971	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972		      IH_WPTR_OVERFLOW_CLEAR |
6973		      (rb_bufsz << 1));
6974
6975	if (rdev->wb.enabled)
6976		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977
6978	/* set the writeback address whether it's enabled or not */
6979	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981
6982	WREG32(IH_RB_CNTL, ih_rb_cntl);
6983
6984	/* set rptr, wptr to 0 */
6985	WREG32(IH_RB_RPTR, 0);
6986	WREG32(IH_RB_WPTR, 0);
6987
6988	/* Default settings for IH_CNTL (disabled at first) */
6989	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990	/* RPTR_REARM only works if msi's are enabled */
6991	if (rdev->msi_enabled)
6992		ih_cntl |= RPTR_REARM;
6993	WREG32(IH_CNTL, ih_cntl);
6994
6995	/* force the active interrupt state to all disabled */
6996	cik_disable_interrupt_state(rdev);
6997
6998	pci_set_master(rdev->pdev);
6999
7000	/* enable irqs */
7001	cik_enable_interrupts(rdev);
7002
7003	return ret;
7004}
7005
7006/**
7007 * cik_irq_set - enable/disable interrupt sources
7008 *
7009 * @rdev: radeon_device pointer
7010 *
7011 * Enable interrupt sources on the GPU (vblanks, hpd,
7012 * etc.) (CIK).
7013 * Returns 0 for success, errors for failure.
7014 */
7015int cik_irq_set(struct radeon_device *rdev)
7016{
7017	u32 cp_int_cntl;
7018	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022	u32 grbm_int_cntl = 0;
7023	u32 dma_cntl, dma_cntl1;
7024
7025	if (!rdev->irq.installed) {
7026		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027		return -EINVAL;
7028	}
7029	/* don't enable anything if the ih is disabled */
7030	if (!rdev->ih.enabled) {
7031		cik_disable_interrupts(rdev);
7032		/* force the active interrupt state to all disabled */
7033		cik_disable_interrupt_state(rdev);
7034		return 0;
7035	}
7036
7037	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040
7041	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047
7048	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050
7051	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059
7060	/* enable CP interrupts on all rings */
7061	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064	}
7065	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067		DRM_DEBUG("si_irq_set: sw int cp1\n");
7068		if (ring->me == 1) {
7069			switch (ring->pipe) {
7070			case 0:
7071				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072				break;
7073			case 1:
7074				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075				break;
7076			case 2:
7077				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078				break;
7079			case 3:
7080				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081				break;
7082			default:
7083				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084				break;
7085			}
7086		} else if (ring->me == 2) {
7087			switch (ring->pipe) {
7088			case 0:
7089				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090				break;
7091			case 1:
7092				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093				break;
7094			case 2:
7095				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096				break;
7097			case 3:
7098				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099				break;
7100			default:
7101				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102				break;
7103			}
7104		} else {
7105			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106		}
7107	}
7108	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110		DRM_DEBUG("si_irq_set: sw int cp2\n");
7111		if (ring->me == 1) {
7112			switch (ring->pipe) {
7113			case 0:
7114				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115				break;
7116			case 1:
7117				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118				break;
7119			case 2:
7120				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121				break;
7122			case 3:
7123				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124				break;
7125			default:
7126				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127				break;
7128			}
7129		} else if (ring->me == 2) {
7130			switch (ring->pipe) {
7131			case 0:
7132				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133				break;
7134			case 1:
7135				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136				break;
7137			case 2:
7138				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139				break;
7140			case 3:
7141				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142				break;
7143			default:
7144				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145				break;
7146			}
7147		} else {
7148			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149		}
7150	}
7151
7152	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153		DRM_DEBUG("cik_irq_set: sw int dma\n");
7154		dma_cntl |= TRAP_ENABLE;
7155	}
7156
7157	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159		dma_cntl1 |= TRAP_ENABLE;
7160	}
7161
7162	if (rdev->irq.crtc_vblank_int[0] ||
7163	    atomic_read(&rdev->irq.pflip[0])) {
7164		DRM_DEBUG("cik_irq_set: vblank 0\n");
7165		crtc1 |= VBLANK_INTERRUPT_MASK;
7166	}
7167	if (rdev->irq.crtc_vblank_int[1] ||
7168	    atomic_read(&rdev->irq.pflip[1])) {
7169		DRM_DEBUG("cik_irq_set: vblank 1\n");
7170		crtc2 |= VBLANK_INTERRUPT_MASK;
7171	}
7172	if (rdev->irq.crtc_vblank_int[2] ||
7173	    atomic_read(&rdev->irq.pflip[2])) {
7174		DRM_DEBUG("cik_irq_set: vblank 2\n");
7175		crtc3 |= VBLANK_INTERRUPT_MASK;
7176	}
7177	if (rdev->irq.crtc_vblank_int[3] ||
7178	    atomic_read(&rdev->irq.pflip[3])) {
7179		DRM_DEBUG("cik_irq_set: vblank 3\n");
7180		crtc4 |= VBLANK_INTERRUPT_MASK;
7181	}
7182	if (rdev->irq.crtc_vblank_int[4] ||
7183	    atomic_read(&rdev->irq.pflip[4])) {
7184		DRM_DEBUG("cik_irq_set: vblank 4\n");
7185		crtc5 |= VBLANK_INTERRUPT_MASK;
7186	}
7187	if (rdev->irq.crtc_vblank_int[5] ||
7188	    atomic_read(&rdev->irq.pflip[5])) {
7189		DRM_DEBUG("cik_irq_set: vblank 5\n");
7190		crtc6 |= VBLANK_INTERRUPT_MASK;
7191	}
7192	if (rdev->irq.hpd[0]) {
7193		DRM_DEBUG("cik_irq_set: hpd 1\n");
7194		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195	}
7196	if (rdev->irq.hpd[1]) {
7197		DRM_DEBUG("cik_irq_set: hpd 2\n");
7198		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199	}
7200	if (rdev->irq.hpd[2]) {
7201		DRM_DEBUG("cik_irq_set: hpd 3\n");
7202		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203	}
7204	if (rdev->irq.hpd[3]) {
7205		DRM_DEBUG("cik_irq_set: hpd 4\n");
7206		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207	}
7208	if (rdev->irq.hpd[4]) {
7209		DRM_DEBUG("cik_irq_set: hpd 5\n");
7210		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211	}
7212	if (rdev->irq.hpd[5]) {
7213		DRM_DEBUG("cik_irq_set: hpd 6\n");
7214		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215	}
7216
7217	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218
7219	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221
7222	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230
7231	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232
7233	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235	if (rdev->num_crtc >= 4) {
7236		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238	}
7239	if (rdev->num_crtc >= 6) {
7240		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242	}
7243
7244	if (rdev->num_crtc >= 2) {
7245		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246		       GRPH_PFLIP_INT_MASK);
7247		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248		       GRPH_PFLIP_INT_MASK);
7249	}
7250	if (rdev->num_crtc >= 4) {
7251		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252		       GRPH_PFLIP_INT_MASK);
7253		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254		       GRPH_PFLIP_INT_MASK);
7255	}
7256	if (rdev->num_crtc >= 6) {
7257		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258		       GRPH_PFLIP_INT_MASK);
7259		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260		       GRPH_PFLIP_INT_MASK);
7261	}
7262
7263	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269
7270	/* posting read */
7271	RREG32(SRBM_STATUS);
7272
7273	return 0;
7274}
7275
7276/**
7277 * cik_irq_ack - ack interrupt sources
7278 *
7279 * @rdev: radeon_device pointer
7280 *
7281 * Ack interrupt sources on the GPU (vblanks, hpd,
7282 * etc.) (CIK).  Certain interrupts sources are sw
7283 * generated and do not require an explicit ack.
7284 */
7285static inline void cik_irq_ack(struct radeon_device *rdev)
7286{
7287	u32 tmp;
7288
7289	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296
7297	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298		EVERGREEN_CRTC0_REGISTER_OFFSET);
7299	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300		EVERGREEN_CRTC1_REGISTER_OFFSET);
7301	if (rdev->num_crtc >= 4) {
7302		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303			EVERGREEN_CRTC2_REGISTER_OFFSET);
7304		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305			EVERGREEN_CRTC3_REGISTER_OFFSET);
7306	}
7307	if (rdev->num_crtc >= 6) {
7308		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309			EVERGREEN_CRTC4_REGISTER_OFFSET);
7310		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311			EVERGREEN_CRTC5_REGISTER_OFFSET);
7312	}
7313
7314	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316		       GRPH_PFLIP_INT_CLEAR);
7317	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319		       GRPH_PFLIP_INT_CLEAR);
7320	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328
7329	if (rdev->num_crtc >= 4) {
7330		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332			       GRPH_PFLIP_INT_CLEAR);
7333		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335			       GRPH_PFLIP_INT_CLEAR);
7336		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344	}
7345
7346	if (rdev->num_crtc >= 6) {
7347		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349			       GRPH_PFLIP_INT_CLEAR);
7350		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352			       GRPH_PFLIP_INT_CLEAR);
7353		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361	}
7362
7363	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364		tmp = RREG32(DC_HPD1_INT_CONTROL);
7365		tmp |= DC_HPDx_INT_ACK;
7366		WREG32(DC_HPD1_INT_CONTROL, tmp);
7367	}
7368	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369		tmp = RREG32(DC_HPD2_INT_CONTROL);
7370		tmp |= DC_HPDx_INT_ACK;
7371		WREG32(DC_HPD2_INT_CONTROL, tmp);
7372	}
7373	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374		tmp = RREG32(DC_HPD3_INT_CONTROL);
7375		tmp |= DC_HPDx_INT_ACK;
7376		WREG32(DC_HPD3_INT_CONTROL, tmp);
7377	}
7378	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379		tmp = RREG32(DC_HPD4_INT_CONTROL);
7380		tmp |= DC_HPDx_INT_ACK;
7381		WREG32(DC_HPD4_INT_CONTROL, tmp);
7382	}
7383	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384		tmp = RREG32(DC_HPD5_INT_CONTROL);
7385		tmp |= DC_HPDx_INT_ACK;
7386		WREG32(DC_HPD5_INT_CONTROL, tmp);
7387	}
7388	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389		tmp = RREG32(DC_HPD6_INT_CONTROL);
7390		tmp |= DC_HPDx_INT_ACK;
7391		WREG32(DC_HPD6_INT_CONTROL, tmp);
7392	}
7393	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394		tmp = RREG32(DC_HPD1_INT_CONTROL);
7395		tmp |= DC_HPDx_RX_INT_ACK;
7396		WREG32(DC_HPD1_INT_CONTROL, tmp);
7397	}
7398	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399		tmp = RREG32(DC_HPD2_INT_CONTROL);
7400		tmp |= DC_HPDx_RX_INT_ACK;
7401		WREG32(DC_HPD2_INT_CONTROL, tmp);
7402	}
7403	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404		tmp = RREG32(DC_HPD3_INT_CONTROL);
7405		tmp |= DC_HPDx_RX_INT_ACK;
7406		WREG32(DC_HPD3_INT_CONTROL, tmp);
7407	}
7408	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409		tmp = RREG32(DC_HPD4_INT_CONTROL);
7410		tmp |= DC_HPDx_RX_INT_ACK;
7411		WREG32(DC_HPD4_INT_CONTROL, tmp);
7412	}
7413	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414		tmp = RREG32(DC_HPD5_INT_CONTROL);
7415		tmp |= DC_HPDx_RX_INT_ACK;
7416		WREG32(DC_HPD5_INT_CONTROL, tmp);
7417	}
7418	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419		tmp = RREG32(DC_HPD6_INT_CONTROL);
7420		tmp |= DC_HPDx_RX_INT_ACK;
7421		WREG32(DC_HPD6_INT_CONTROL, tmp);
7422	}
7423}
7424
7425/**
7426 * cik_irq_disable - disable interrupts
7427 *
7428 * @rdev: radeon_device pointer
7429 *
7430 * Disable interrupts on the hw (CIK).
7431 */
7432static void cik_irq_disable(struct radeon_device *rdev)
7433{
7434	cik_disable_interrupts(rdev);
7435	/* Wait and acknowledge irq */
7436	mdelay(1);
7437	cik_irq_ack(rdev);
7438	cik_disable_interrupt_state(rdev);
7439}
7440
7441/**
7442 * cik_irq_suspend - disable interrupts for suspend
7443 *
7444 * @rdev: radeon_device pointer
7445 *
7446 * Disable interrupts and stop the RLC (CIK).
7447 * Used for suspend.
7448 */
7449static void cik_irq_suspend(struct radeon_device *rdev)
7450{
7451	cik_irq_disable(rdev);
7452	cik_rlc_stop(rdev);
7453}
7454
7455/**
7456 * cik_irq_fini - tear down interrupt support
7457 *
7458 * @rdev: radeon_device pointer
7459 *
7460 * Disable interrupts on the hw and free the IH ring
7461 * buffer (CIK).
7462 * Used for driver unload.
7463 */
7464static void cik_irq_fini(struct radeon_device *rdev)
7465{
7466	cik_irq_suspend(rdev);
7467	r600_ih_ring_fini(rdev);
7468}
7469
7470/**
7471 * cik_get_ih_wptr - get the IH ring buffer wptr
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Get the IH ring buffer wptr from either the register
7476 * or the writeback memory buffer (CIK).  Also check for
7477 * ring buffer overflow and deal with it.
7478 * Used by cik_irq_process().
7479 * Returns the value of the wptr.
7480 */
7481static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482{
7483	u32 wptr, tmp;
7484
7485	if (rdev->wb.enabled)
7486		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487	else
7488		wptr = RREG32(IH_RB_WPTR);
7489
7490	if (wptr & RB_OVERFLOW) {
7491		wptr &= ~RB_OVERFLOW;
7492		/* When a ring buffer overflow happen start parsing interrupt
7493		 * from the last not overwritten vector (wptr + 16). Hopefully
7494		 * this should allow us to catchup.
7495		 */
7496		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499		tmp = RREG32(IH_RB_CNTL);
7500		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501		WREG32(IH_RB_CNTL, tmp);
7502	}
7503	return (wptr & rdev->ih.ptr_mask);
7504}
7505
7506/*        CIK IV Ring
7507 * Each IV ring entry is 128 bits:
7508 * [7:0]    - interrupt source id
7509 * [31:8]   - reserved
7510 * [59:32]  - interrupt source data
7511 * [63:60]  - reserved
7512 * [71:64]  - RINGID
7513 *            CP:
7514 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518 *            PIPE_ID - ME0 0=3D
7519 *                    - ME1&2 compute dispatcher (4 pipes each)
7520 *            SDMA:
7521 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524 * [79:72]  - VMID
7525 * [95:80]  - PASID
7526 * [127:96] - reserved
7527 */
7528/**
7529 * cik_irq_process - interrupt handler
7530 *
7531 * @rdev: radeon_device pointer
7532 *
7533 * Interrupt hander (CIK).  Walk the IH ring,
7534 * ack interrupts and schedule work to handle
7535 * interrupt events.
7536 * Returns irq process return code.
7537 */
7538int cik_irq_process(struct radeon_device *rdev)
7539{
7540	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542	u32 wptr;
7543	u32 rptr;
7544	u32 src_id, src_data, ring_id;
7545	u8 me_id, pipe_id, queue_id;
7546	u32 ring_index;
7547	bool queue_hotplug = false;
7548	bool queue_dp = false;
7549	bool queue_reset = false;
7550	u32 addr, status, mc_client;
7551	bool queue_thermal = false;
7552
7553	if (!rdev->ih.enabled || rdev->shutdown)
7554		return IRQ_NONE;
7555
7556	wptr = cik_get_ih_wptr(rdev);
7557
7558	if (wptr == rdev->ih.rptr)
7559		return IRQ_NONE;
7560restart_ih:
7561	/* is somebody else already processing irqs? */
7562	if (atomic_xchg(&rdev->ih.lock, 1))
7563		return IRQ_NONE;
7564
7565	rptr = rdev->ih.rptr;
7566	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7567
7568	/* Order reading of wptr vs. reading of IH ring data */
7569	rmb();
7570
7571	/* display interrupts */
7572	cik_irq_ack(rdev);
7573
7574	while (rptr != wptr) {
7575		/* wptr/rptr are in bytes! */
7576		ring_index = rptr / 4;
7577
7578		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7579		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7580		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7581
7582		switch (src_id) {
7583		case 1: /* D1 vblank/vline */
7584			switch (src_data) {
7585			case 0: /* D1 vblank */
7586				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7587					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7588
7589				if (rdev->irq.crtc_vblank_int[0]) {
7590					drm_handle_vblank(rdev->ddev, 0);
7591					rdev->pm.vblank_sync = true;
7592					wake_up(&rdev->irq.vblank_queue);
7593				}
7594				if (atomic_read(&rdev->irq.pflip[0]))
7595					radeon_crtc_handle_vblank(rdev, 0);
7596				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7597				DRM_DEBUG("IH: D1 vblank\n");
7598
7599				break;
7600			case 1: /* D1 vline */
7601				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7602					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7603
7604				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7605				DRM_DEBUG("IH: D1 vline\n");
7606
7607				break;
7608			default:
7609				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7610				break;
7611			}
7612			break;
7613		case 2: /* D2 vblank/vline */
7614			switch (src_data) {
7615			case 0: /* D2 vblank */
7616				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7617					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7618
7619				if (rdev->irq.crtc_vblank_int[1]) {
7620					drm_handle_vblank(rdev->ddev, 1);
7621					rdev->pm.vblank_sync = true;
7622					wake_up(&rdev->irq.vblank_queue);
7623				}
7624				if (atomic_read(&rdev->irq.pflip[1]))
7625					radeon_crtc_handle_vblank(rdev, 1);
7626				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7627				DRM_DEBUG("IH: D2 vblank\n");
7628
7629				break;
7630			case 1: /* D2 vline */
7631				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7632					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7633
7634				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7635				DRM_DEBUG("IH: D2 vline\n");
7636
7637				break;
7638			default:
7639				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7640				break;
7641			}
7642			break;
7643		case 3: /* D3 vblank/vline */
7644			switch (src_data) {
7645			case 0: /* D3 vblank */
7646				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7647					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7648
7649				if (rdev->irq.crtc_vblank_int[2]) {
7650					drm_handle_vblank(rdev->ddev, 2);
7651					rdev->pm.vblank_sync = true;
7652					wake_up(&rdev->irq.vblank_queue);
7653				}
7654				if (atomic_read(&rdev->irq.pflip[2]))
7655					radeon_crtc_handle_vblank(rdev, 2);
7656				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7657				DRM_DEBUG("IH: D3 vblank\n");
7658
7659				break;
7660			case 1: /* D3 vline */
7661				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7662					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7663
7664				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7665				DRM_DEBUG("IH: D3 vline\n");
7666
7667				break;
7668			default:
7669				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7670				break;
7671			}
7672			break;
7673		case 4: /* D4 vblank/vline */
7674			switch (src_data) {
7675			case 0: /* D4 vblank */
7676				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7677					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7678
7679				if (rdev->irq.crtc_vblank_int[3]) {
7680					drm_handle_vblank(rdev->ddev, 3);
7681					rdev->pm.vblank_sync = true;
7682					wake_up(&rdev->irq.vblank_queue);
7683				}
7684				if (atomic_read(&rdev->irq.pflip[3]))
7685					radeon_crtc_handle_vblank(rdev, 3);
7686				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7687				DRM_DEBUG("IH: D4 vblank\n");
7688
7689				break;
7690			case 1: /* D4 vline */
7691				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7692					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7693
7694				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7695				DRM_DEBUG("IH: D4 vline\n");
7696
7697				break;
7698			default:
7699				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7700				break;
7701			}
7702			break;
7703		case 5: /* D5 vblank/vline */
7704			switch (src_data) {
7705			case 0: /* D5 vblank */
7706				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7707					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7708
7709				if (rdev->irq.crtc_vblank_int[4]) {
7710					drm_handle_vblank(rdev->ddev, 4);
7711					rdev->pm.vblank_sync = true;
7712					wake_up(&rdev->irq.vblank_queue);
7713				}
7714				if (atomic_read(&rdev->irq.pflip[4]))
7715					radeon_crtc_handle_vblank(rdev, 4);
7716				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7717				DRM_DEBUG("IH: D5 vblank\n");
7718
7719				break;
7720			case 1: /* D5 vline */
7721				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7722					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7723
7724				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7725				DRM_DEBUG("IH: D5 vline\n");
7726
7727				break;
7728			default:
7729				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7730				break;
7731			}
7732			break;
7733		case 6: /* D6 vblank/vline */
7734			switch (src_data) {
7735			case 0: /* D6 vblank */
7736				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7737					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7738
7739				if (rdev->irq.crtc_vblank_int[5]) {
7740					drm_handle_vblank(rdev->ddev, 5);
7741					rdev->pm.vblank_sync = true;
7742					wake_up(&rdev->irq.vblank_queue);
7743				}
7744				if (atomic_read(&rdev->irq.pflip[5]))
7745					radeon_crtc_handle_vblank(rdev, 5);
7746				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7747				DRM_DEBUG("IH: D6 vblank\n");
7748
7749				break;
7750			case 1: /* D6 vline */
7751				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7752					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7753
7754				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7755				DRM_DEBUG("IH: D6 vline\n");
7756
7757				break;
7758			default:
7759				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7760				break;
7761			}
7762			break;
7763		case 8: /* D1 page flip */
7764		case 10: /* D2 page flip */
7765		case 12: /* D3 page flip */
7766		case 14: /* D4 page flip */
7767		case 16: /* D5 page flip */
7768		case 18: /* D6 page flip */
7769			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7770			if (radeon_use_pflipirq > 0)
7771				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7772			break;
7773		case 42: /* HPD hotplug */
7774			switch (src_data) {
7775			case 0:
7776				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7777					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7778
7779				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7780				queue_hotplug = true;
7781				DRM_DEBUG("IH: HPD1\n");
7782
7783				break;
7784			case 1:
7785				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7786					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7787
7788				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7789				queue_hotplug = true;
7790				DRM_DEBUG("IH: HPD2\n");
7791
7792				break;
7793			case 2:
7794				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7795					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7796
7797				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7798				queue_hotplug = true;
7799				DRM_DEBUG("IH: HPD3\n");
7800
7801				break;
7802			case 3:
7803				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7804					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7805
7806				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7807				queue_hotplug = true;
7808				DRM_DEBUG("IH: HPD4\n");
7809
7810				break;
7811			case 4:
7812				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7813					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7814
7815				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7816				queue_hotplug = true;
7817				DRM_DEBUG("IH: HPD5\n");
7818
7819				break;
7820			case 5:
7821				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7822					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7823
7824				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7825				queue_hotplug = true;
7826				DRM_DEBUG("IH: HPD6\n");
7827
7828				break;
7829			case 6:
7830				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7831					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7832
7833				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7834				queue_dp = true;
7835				DRM_DEBUG("IH: HPD_RX 1\n");
7836
7837				break;
7838			case 7:
7839				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7840					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7841
7842				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7843				queue_dp = true;
7844				DRM_DEBUG("IH: HPD_RX 2\n");
7845
7846				break;
7847			case 8:
7848				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7849					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7850
7851				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7852				queue_dp = true;
7853				DRM_DEBUG("IH: HPD_RX 3\n");
7854
7855				break;
7856			case 9:
7857				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7858					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7859
7860				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7861				queue_dp = true;
7862				DRM_DEBUG("IH: HPD_RX 4\n");
7863
7864				break;
7865			case 10:
7866				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7867					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7868
7869				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7870				queue_dp = true;
7871				DRM_DEBUG("IH: HPD_RX 5\n");
7872
7873				break;
7874			case 11:
7875				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7876					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7877
7878				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7879				queue_dp = true;
7880				DRM_DEBUG("IH: HPD_RX 6\n");
7881
7882				break;
7883			default:
7884				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7885				break;
7886			}
7887			break;
7888		case 96:
7889			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7890			WREG32(SRBM_INT_ACK, 0x1);
7891			break;
7892		case 124: /* UVD */
7893			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7894			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7895			break;
7896		case 146:
7897		case 147:
7898			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7899			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7900			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7901			/* reset addr and status */
7902			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7903			if (addr == 0x0 && status == 0x0)
7904				break;
7905			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7906			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7907				addr);
7908			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7909				status);
7910			cik_vm_decode_fault(rdev, status, addr, mc_client);
7911			break;
7912		case 167: /* VCE */
7913			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7914			switch (src_data) {
7915			case 0:
7916				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7917				break;
7918			case 1:
7919				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7920				break;
7921			default:
7922				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7923				break;
7924			}
7925			break;
7926		case 176: /* GFX RB CP_INT */
7927		case 177: /* GFX IB CP_INT */
7928			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7929			break;
7930		case 181: /* CP EOP event */
7931			DRM_DEBUG("IH: CP EOP\n");
7932			/* XXX check the bitfield order! */
7933			me_id = (ring_id & 0x60) >> 5;
7934			pipe_id = (ring_id & 0x18) >> 3;
7935			queue_id = (ring_id & 0x7) >> 0;
7936			switch (me_id) {
7937			case 0:
7938				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7939				break;
7940			case 1:
7941			case 2:
7942				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7943					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7944				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7945					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7946				break;
7947			}
7948			break;
7949		case 184: /* CP Privileged reg access */
7950			DRM_ERROR("Illegal register access in command stream\n");
7951			/* XXX check the bitfield order! */
7952			me_id = (ring_id & 0x60) >> 5;
7953			switch (me_id) {
7954			case 0:
7955				/* This results in a full GPU reset, but all we need to do is soft
7956				 * reset the CP for gfx
7957				 */
7958				queue_reset = true;
7959				break;
7960			case 1:
7961				/* XXX compute */
7962				queue_reset = true;
7963				break;
7964			case 2:
7965				/* XXX compute */
7966				queue_reset = true;
7967				break;
7968			}
7969			break;
7970		case 185: /* CP Privileged inst */
7971			DRM_ERROR("Illegal instruction in command stream\n");
7972			/* XXX check the bitfield order! */
7973			me_id = (ring_id & 0x60) >> 5;
7974			switch (me_id) {
7975			case 0:
7976				/* This results in a full GPU reset, but all we need to do is soft
7977				 * reset the CP for gfx
7978				 */
7979				queue_reset = true;
7980				break;
7981			case 1:
7982				/* XXX compute */
7983				queue_reset = true;
7984				break;
7985			case 2:
7986				/* XXX compute */
7987				queue_reset = true;
7988				break;
7989			}
7990			break;
7991		case 224: /* SDMA trap event */
7992			/* XXX check the bitfield order! */
7993			me_id = (ring_id & 0x3) >> 0;
7994			queue_id = (ring_id & 0xc) >> 2;
7995			DRM_DEBUG("IH: SDMA trap\n");
7996			switch (me_id) {
7997			case 0:
7998				switch (queue_id) {
7999				case 0:
8000					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8001					break;
8002				case 1:
8003					/* XXX compute */
8004					break;
8005				case 2:
8006					/* XXX compute */
8007					break;
8008				}
8009				break;
8010			case 1:
8011				switch (queue_id) {
8012				case 0:
8013					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8014					break;
8015				case 1:
8016					/* XXX compute */
8017					break;
8018				case 2:
8019					/* XXX compute */
8020					break;
8021				}
8022				break;
8023			}
8024			break;
8025		case 230: /* thermal low to high */
8026			DRM_DEBUG("IH: thermal low to high\n");
8027			rdev->pm.dpm.thermal.high_to_low = false;
8028			queue_thermal = true;
8029			break;
8030		case 231: /* thermal high to low */
8031			DRM_DEBUG("IH: thermal high to low\n");
8032			rdev->pm.dpm.thermal.high_to_low = true;
8033			queue_thermal = true;
8034			break;
8035		case 233: /* GUI IDLE */
8036			DRM_DEBUG("IH: GUI idle\n");
8037			break;
8038		case 241: /* SDMA Privileged inst */
8039		case 247: /* SDMA Privileged inst */
8040			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8041			/* XXX check the bitfield order! */
8042			me_id = (ring_id & 0x3) >> 0;
8043			queue_id = (ring_id & 0xc) >> 2;
8044			switch (me_id) {
8045			case 0:
8046				switch (queue_id) {
8047				case 0:
8048					queue_reset = true;
8049					break;
8050				case 1:
8051					/* XXX compute */
8052					queue_reset = true;
8053					break;
8054				case 2:
8055					/* XXX compute */
8056					queue_reset = true;
8057					break;
8058				}
8059				break;
8060			case 1:
8061				switch (queue_id) {
8062				case 0:
8063					queue_reset = true;
8064					break;
8065				case 1:
8066					/* XXX compute */
8067					queue_reset = true;
8068					break;
8069				case 2:
8070					/* XXX compute */
8071					queue_reset = true;
8072					break;
8073				}
8074				break;
8075			}
8076			break;
8077		default:
8078			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8079			break;
8080		}
8081
8082		/* wptr/rptr are in bytes! */
8083		rptr += 16;
8084		rptr &= rdev->ih.ptr_mask;
8085		WREG32(IH_RB_RPTR, rptr);
8086	}
8087	if (queue_dp)
8088		schedule_work(&rdev->dp_work);
8089	if (queue_hotplug)
8090		schedule_delayed_work(&rdev->hotplug_work, 0);
8091	if (queue_reset) {
8092		rdev->needs_reset = true;
8093		wake_up_all(&rdev->fence_queue);
8094	}
8095	if (queue_thermal)
8096		schedule_work(&rdev->pm.dpm.thermal.work);
8097	rdev->ih.rptr = rptr;
8098	atomic_set(&rdev->ih.lock, 0);
8099
8100	/* make sure wptr hasn't changed while processing */
8101	wptr = cik_get_ih_wptr(rdev);
8102	if (wptr != rptr)
8103		goto restart_ih;
8104
8105	return IRQ_HANDLED;
8106}
8107
8108/*
8109 * startup/shutdown callbacks
8110 */
8111static void cik_uvd_init(struct radeon_device *rdev)
8112{
8113	int r;
8114
8115	if (!rdev->has_uvd)
8116		return;
8117
8118	r = radeon_uvd_init(rdev);
8119	if (r) {
8120		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8121		/*
8122		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8123		 * to early fails cik_uvd_start() and thus nothing happens
8124		 * there. So it is pointless to try to go through that code
8125		 * hence why we disable uvd here.
8126		 */
8127		rdev->has_uvd = false;
8128		return;
8129	}
8130	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8131	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8132}
8133
8134static void cik_uvd_start(struct radeon_device *rdev)
8135{
8136	int r;
8137
8138	if (!rdev->has_uvd)
8139		return;
8140
8141	r = radeon_uvd_resume(rdev);
8142	if (r) {
8143		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8144		goto error;
8145	}
8146	r = uvd_v4_2_resume(rdev);
8147	if (r) {
8148		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8149		goto error;
8150	}
8151	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8152	if (r) {
8153		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8154		goto error;
8155	}
8156	return;
8157
8158error:
8159	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8160}
8161
8162static void cik_uvd_resume(struct radeon_device *rdev)
8163{
8164	struct radeon_ring *ring;
8165	int r;
8166
8167	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8168		return;
8169
8170	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8171	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8172	if (r) {
8173		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8174		return;
8175	}
8176	r = uvd_v1_0_init(rdev);
8177	if (r) {
8178		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8179		return;
8180	}
8181}
8182
8183static void cik_vce_init(struct radeon_device *rdev)
8184{
8185	int r;
8186
8187	if (!rdev->has_vce)
8188		return;
8189
8190	r = radeon_vce_init(rdev);
8191	if (r) {
8192		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8193		/*
8194		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8195		 * to early fails cik_vce_start() and thus nothing happens
8196		 * there. So it is pointless to try to go through that code
8197		 * hence why we disable vce here.
8198		 */
8199		rdev->has_vce = false;
8200		return;
8201	}
8202	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8203	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8204	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8205	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8206}
8207
8208static void cik_vce_start(struct radeon_device *rdev)
8209{
8210	int r;
8211
8212	if (!rdev->has_vce)
8213		return;
8214
8215	r = radeon_vce_resume(rdev);
8216	if (r) {
8217		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8218		goto error;
8219	}
8220	r = vce_v2_0_resume(rdev);
8221	if (r) {
8222		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8223		goto error;
8224	}
8225	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8226	if (r) {
8227		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8228		goto error;
8229	}
8230	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8231	if (r) {
8232		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8233		goto error;
8234	}
8235	return;
8236
8237error:
8238	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8239	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8240}
8241
8242static void cik_vce_resume(struct radeon_device *rdev)
8243{
8244	struct radeon_ring *ring;
8245	int r;
8246
8247	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8248		return;
8249
8250	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8251	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8252	if (r) {
8253		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8254		return;
8255	}
8256	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8257	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8258	if (r) {
8259		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8260		return;
8261	}
8262	r = vce_v1_0_init(rdev);
8263	if (r) {
8264		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8265		return;
8266	}
8267}
8268
8269/**
8270 * cik_startup - program the asic to a functional state
8271 *
8272 * @rdev: radeon_device pointer
8273 *
8274 * Programs the asic to a functional state (CIK).
8275 * Called by cik_init() and cik_resume().
8276 * Returns 0 for success, error for failure.
8277 */
8278static int cik_startup(struct radeon_device *rdev)
8279{
8280	struct radeon_ring *ring;
8281	u32 nop;
8282	int r;
8283
8284	/* enable pcie gen2/3 link */
8285	cik_pcie_gen3_enable(rdev);
8286	/* enable aspm */
8287	cik_program_aspm(rdev);
8288
8289	/* scratch needs to be initialized before MC */
8290	r = r600_vram_scratch_init(rdev);
8291	if (r)
8292		return r;
8293
8294	cik_mc_program(rdev);
8295
8296	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8297		r = ci_mc_load_microcode(rdev);
8298		if (r) {
8299			DRM_ERROR("Failed to load MC firmware!\n");
8300			return r;
8301		}
8302	}
8303
8304	r = cik_pcie_gart_enable(rdev);
8305	if (r)
8306		return r;
8307	cik_gpu_init(rdev);
8308
8309	/* allocate rlc buffers */
8310	if (rdev->flags & RADEON_IS_IGP) {
8311		if (rdev->family == CHIP_KAVERI) {
8312			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8313			rdev->rlc.reg_list_size =
8314				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8315		} else {
8316			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8317			rdev->rlc.reg_list_size =
8318				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8319		}
8320	}
8321	rdev->rlc.cs_data = ci_cs_data;
8322	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8323	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8324	r = sumo_rlc_init(rdev);
8325	if (r) {
8326		DRM_ERROR("Failed to init rlc BOs!\n");
8327		return r;
8328	}
8329
8330	/* allocate wb buffer */
8331	r = radeon_wb_init(rdev);
8332	if (r)
8333		return r;
8334
8335	/* allocate mec buffers */
8336	r = cik_mec_init(rdev);
8337	if (r) {
8338		DRM_ERROR("Failed to init MEC BOs!\n");
8339		return r;
8340	}
8341
8342	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8343	if (r) {
8344		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8345		return r;
8346	}
8347
8348	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8349	if (r) {
8350		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8351		return r;
8352	}
8353
8354	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8355	if (r) {
8356		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8357		return r;
8358	}
8359
8360	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8361	if (r) {
8362		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8363		return r;
8364	}
8365
8366	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8367	if (r) {
8368		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8369		return r;
8370	}
8371
8372	cik_uvd_start(rdev);
8373	cik_vce_start(rdev);
8374
8375	/* Enable IRQ */
8376	if (!rdev->irq.installed) {
8377		r = radeon_irq_kms_init(rdev);
8378		if (r)
8379			return r;
8380	}
8381
8382	r = cik_irq_init(rdev);
8383	if (r) {
8384		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8385		radeon_irq_kms_fini(rdev);
8386		return r;
8387	}
8388	cik_irq_set(rdev);
8389
8390	if (rdev->family == CHIP_HAWAII) {
8391		if (rdev->new_fw)
8392			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8393		else
8394			nop = RADEON_CP_PACKET2;
8395	} else {
8396		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8397	}
8398
8399	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8400	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8401			     nop);
8402	if (r)
8403		return r;
8404
8405	/* set up the compute queues */
8406	/* type-2 packets are deprecated on MEC, use type-3 instead */
8407	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8408	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8409			     nop);
8410	if (r)
8411		return r;
8412	ring->me = 1; /* first MEC */
8413	ring->pipe = 0; /* first pipe */
8414	ring->queue = 0; /* first queue */
8415	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8416
8417	/* type-2 packets are deprecated on MEC, use type-3 instead */
8418	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8419	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8420			     nop);
8421	if (r)
8422		return r;
8423	/* dGPU only have 1 MEC */
8424	ring->me = 1; /* first MEC */
8425	ring->pipe = 0; /* first pipe */
8426	ring->queue = 1; /* second queue */
8427	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8428
8429	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8430	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8431			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8432	if (r)
8433		return r;
8434
8435	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8436	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8437			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8438	if (r)
8439		return r;
8440
8441	r = cik_cp_resume(rdev);
8442	if (r)
8443		return r;
8444
8445	r = cik_sdma_resume(rdev);
8446	if (r)
8447		return r;
8448
8449	cik_uvd_resume(rdev);
8450	cik_vce_resume(rdev);
8451
8452	r = radeon_ib_pool_init(rdev);
8453	if (r) {
8454		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8455		return r;
8456	}
8457
8458	r = radeon_vm_manager_init(rdev);
8459	if (r) {
8460		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8461		return r;
8462	}
8463
8464	r = radeon_audio_init(rdev);
8465	if (r)
8466		return r;
8467
8468	return 0;
8469}
8470
8471/**
8472 * cik_resume - resume the asic to a functional state
8473 *
8474 * @rdev: radeon_device pointer
8475 *
8476 * Programs the asic to a functional state (CIK).
8477 * Called at resume.
8478 * Returns 0 for success, error for failure.
8479 */
8480int cik_resume(struct radeon_device *rdev)
8481{
8482	int r;
8483
8484	/* post card */
8485	atom_asic_init(rdev->mode_info.atom_context);
8486
8487	/* init golden registers */
8488	cik_init_golden_registers(rdev);
8489
8490	if (rdev->pm.pm_method == PM_METHOD_DPM)
8491		radeon_pm_resume(rdev);
8492
8493	rdev->accel_working = true;
8494	r = cik_startup(rdev);
8495	if (r) {
8496		DRM_ERROR("cik startup failed on resume\n");
8497		rdev->accel_working = false;
8498		return r;
8499	}
8500
8501	return r;
8502
8503}
8504
8505/**
8506 * cik_suspend - suspend the asic
8507 *
8508 * @rdev: radeon_device pointer
8509 *
8510 * Bring the chip into a state suitable for suspend (CIK).
8511 * Called at suspend.
8512 * Returns 0 for success.
8513 */
8514int cik_suspend(struct radeon_device *rdev)
8515{
8516	radeon_pm_suspend(rdev);
8517	radeon_audio_fini(rdev);
8518	radeon_vm_manager_fini(rdev);
8519	cik_cp_enable(rdev, false);
8520	cik_sdma_enable(rdev, false);
8521	if (rdev->has_uvd) {
8522		radeon_uvd_suspend(rdev);
8523		uvd_v1_0_fini(rdev);
8524	}
8525	if (rdev->has_vce)
8526		radeon_vce_suspend(rdev);
8527	cik_fini_pg(rdev);
8528	cik_fini_cg(rdev);
8529	cik_irq_suspend(rdev);
8530	radeon_wb_disable(rdev);
8531	cik_pcie_gart_disable(rdev);
8532	return 0;
8533}
8534
8535/* Plan is to move initialization in that function and use
8536 * helper function so that radeon_device_init pretty much
8537 * do nothing more than calling asic specific function. This
8538 * should also allow to remove a bunch of callback function
8539 * like vram_info.
8540 */
8541/**
8542 * cik_init - asic specific driver and hw init
8543 *
8544 * @rdev: radeon_device pointer
8545 *
8546 * Setup asic specific driver variables and program the hw
8547 * to a functional state (CIK).
8548 * Called at driver startup.
8549 * Returns 0 for success, errors for failure.
8550 */
8551int cik_init(struct radeon_device *rdev)
8552{
8553	struct radeon_ring *ring;
8554	int r;
8555
8556	/* Read BIOS */
8557	if (!radeon_get_bios(rdev)) {
8558		if (ASIC_IS_AVIVO(rdev))
8559			return -EINVAL;
8560	}
8561	/* Must be an ATOMBIOS */
8562	if (!rdev->is_atom_bios) {
8563		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8564		return -EINVAL;
8565	}
8566	r = radeon_atombios_init(rdev);
8567	if (r)
8568		return r;
8569
8570	/* Post card if necessary */
8571	if (!radeon_card_posted(rdev)) {
8572		if (!rdev->bios) {
8573			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8574			return -EINVAL;
8575		}
8576		DRM_INFO("GPU not posted. posting now...\n");
8577		atom_asic_init(rdev->mode_info.atom_context);
8578	}
8579	/* init golden registers */
8580	cik_init_golden_registers(rdev);
8581	/* Initialize scratch registers */
8582	cik_scratch_init(rdev);
8583	/* Initialize surface registers */
8584	radeon_surface_init(rdev);
8585	/* Initialize clocks */
8586	radeon_get_clock_info(rdev->ddev);
8587
8588	/* Fence driver */
8589	radeon_fence_driver_init(rdev);
8590
8591	/* initialize memory controller */
8592	r = cik_mc_init(rdev);
8593	if (r)
8594		return r;
8595	/* Memory manager */
8596	r = radeon_bo_init(rdev);
8597	if (r)
8598		return r;
8599
8600	if (rdev->flags & RADEON_IS_IGP) {
8601		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8602		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8603			r = cik_init_microcode(rdev);
8604			if (r) {
8605				DRM_ERROR("Failed to load firmware!\n");
8606				return r;
8607			}
8608		}
8609	} else {
8610		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8611		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8612		    !rdev->mc_fw) {
8613			r = cik_init_microcode(rdev);
8614			if (r) {
8615				DRM_ERROR("Failed to load firmware!\n");
8616				return r;
8617			}
8618		}
8619	}
8620
8621	/* Initialize power management */
8622	radeon_pm_init(rdev);
8623
8624	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8625	ring->ring_obj = NULL;
8626	r600_ring_init(rdev, ring, 1024 * 1024);
8627
8628	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8629	ring->ring_obj = NULL;
8630	r600_ring_init(rdev, ring, 1024 * 1024);
8631	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8632	if (r)
8633		return r;
8634
8635	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8636	ring->ring_obj = NULL;
8637	r600_ring_init(rdev, ring, 1024 * 1024);
8638	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8639	if (r)
8640		return r;
8641
8642	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8643	ring->ring_obj = NULL;
8644	r600_ring_init(rdev, ring, 256 * 1024);
8645
8646	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8647	ring->ring_obj = NULL;
8648	r600_ring_init(rdev, ring, 256 * 1024);
8649
8650	cik_uvd_init(rdev);
8651	cik_vce_init(rdev);
8652
8653	rdev->ih.ring_obj = NULL;
8654	r600_ih_ring_init(rdev, 64 * 1024);
8655
8656	r = r600_pcie_gart_init(rdev);
8657	if (r)
8658		return r;
8659
8660	rdev->accel_working = true;
8661	r = cik_startup(rdev);
8662	if (r) {
8663		dev_err(rdev->dev, "disabling GPU acceleration\n");
8664		cik_cp_fini(rdev);
8665		cik_sdma_fini(rdev);
8666		cik_irq_fini(rdev);
8667		sumo_rlc_fini(rdev);
8668		cik_mec_fini(rdev);
8669		radeon_wb_fini(rdev);
8670		radeon_ib_pool_fini(rdev);
8671		radeon_vm_manager_fini(rdev);
8672		radeon_irq_kms_fini(rdev);
8673		cik_pcie_gart_fini(rdev);
8674		rdev->accel_working = false;
8675	}
8676
8677	/* Don't start up if the MC ucode is missing.
8678	 * The default clocks and voltages before the MC ucode
8679	 * is loaded are not suffient for advanced operations.
8680	 */
8681	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8682		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8683		return -EINVAL;
8684	}
8685
8686	return 0;
8687}
8688
8689/**
8690 * cik_fini - asic specific driver and hw fini
8691 *
8692 * @rdev: radeon_device pointer
8693 *
8694 * Tear down the asic specific driver variables and program the hw
8695 * to an idle state (CIK).
8696 * Called at driver unload.
8697 */
8698void cik_fini(struct radeon_device *rdev)
8699{
8700	radeon_pm_fini(rdev);
8701	cik_cp_fini(rdev);
8702	cik_sdma_fini(rdev);
8703	cik_fini_pg(rdev);
8704	cik_fini_cg(rdev);
8705	cik_irq_fini(rdev);
8706	sumo_rlc_fini(rdev);
8707	cik_mec_fini(rdev);
8708	radeon_wb_fini(rdev);
8709	radeon_vm_manager_fini(rdev);
8710	radeon_ib_pool_fini(rdev);
8711	radeon_irq_kms_fini(rdev);
8712	uvd_v1_0_fini(rdev);
8713	radeon_uvd_fini(rdev);
8714	radeon_vce_fini(rdev);
8715	cik_pcie_gart_fini(rdev);
8716	r600_vram_scratch_fini(rdev);
8717	radeon_gem_fini(rdev);
8718	radeon_fence_driver_fini(rdev);
8719	radeon_bo_fini(rdev);
8720	radeon_atombios_fini(rdev);
8721	kfree(rdev->bios);
8722	rdev->bios = NULL;
8723}
8724
8725void dce8_program_fmt(struct drm_encoder *encoder)
8726{
8727	struct drm_device *dev = encoder->dev;
8728	struct radeon_device *rdev = dev->dev_private;
8729	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8730	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8731	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8732	int bpc = 0;
8733	u32 tmp = 0;
8734	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8735
8736	if (connector) {
8737		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8738		bpc = radeon_get_monitor_bpc(connector);
8739		dither = radeon_connector->dither;
8740	}
8741
8742	/* LVDS/eDP FMT is set up by atom */
8743	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8744		return;
8745
8746	/* not needed for analog */
8747	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8748	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8749		return;
8750
8751	if (bpc == 0)
8752		return;
8753
8754	switch (bpc) {
8755	case 6:
8756		if (dither == RADEON_FMT_DITHER_ENABLE)
8757			/* XXX sort out optimal dither settings */
8758			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8759				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8760		else
8761			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8762		break;
8763	case 8:
8764		if (dither == RADEON_FMT_DITHER_ENABLE)
8765			/* XXX sort out optimal dither settings */
8766			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8767				FMT_RGB_RANDOM_ENABLE |
8768				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8769		else
8770			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8771		break;
8772	case 10:
8773		if (dither == RADEON_FMT_DITHER_ENABLE)
8774			/* XXX sort out optimal dither settings */
8775			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8776				FMT_RGB_RANDOM_ENABLE |
8777				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8778		else
8779			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8780		break;
8781	default:
8782		/* not needed */
8783		break;
8784	}
8785
8786	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8787}
8788
8789/* display watermark setup */
8790/**
8791 * dce8_line_buffer_adjust - Set up the line buffer
8792 *
8793 * @rdev: radeon_device pointer
8794 * @radeon_crtc: the selected display controller
8795 * @mode: the current display mode on the selected display
8796 * controller
8797 *
8798 * Setup up the line buffer allocation for
8799 * the selected display controller (CIK).
8800 * Returns the line buffer size in pixels.
8801 */
8802static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8803				   struct radeon_crtc *radeon_crtc,
8804				   struct drm_display_mode *mode)
8805{
8806	u32 tmp, buffer_alloc, i;
8807	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8808	/*
8809	 * Line Buffer Setup
8810	 * There are 6 line buffers, one for each display controllers.
8811	 * There are 3 partitions per LB. Select the number of partitions
8812	 * to enable based on the display width.  For display widths larger
8813	 * than 4096, you need use to use 2 display controllers and combine
8814	 * them using the stereo blender.
8815	 */
8816	if (radeon_crtc->base.enabled && mode) {
8817		if (mode->crtc_hdisplay < 1920) {
8818			tmp = 1;
8819			buffer_alloc = 2;
8820		} else if (mode->crtc_hdisplay < 2560) {
8821			tmp = 2;
8822			buffer_alloc = 2;
8823		} else if (mode->crtc_hdisplay < 4096) {
8824			tmp = 0;
8825			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8826		} else {
8827			DRM_DEBUG_KMS("Mode too big for LB!\n");
8828			tmp = 0;
8829			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8830		}
8831	} else {
8832		tmp = 1;
8833		buffer_alloc = 0;
8834	}
8835
8836	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8837	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8838
8839	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8840	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8841	for (i = 0; i < rdev->usec_timeout; i++) {
8842		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8843		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8844			break;
8845		udelay(1);
8846	}
8847
8848	if (radeon_crtc->base.enabled && mode) {
8849		switch (tmp) {
8850		case 0:
8851		default:
8852			return 4096 * 2;
8853		case 1:
8854			return 1920 * 2;
8855		case 2:
8856			return 2560 * 2;
8857		}
8858	}
8859
8860	/* controller not enabled, so no lb used */
8861	return 0;
8862}
8863
8864/**
8865 * cik_get_number_of_dram_channels - get the number of dram channels
8866 *
8867 * @rdev: radeon_device pointer
8868 *
8869 * Look up the number of video ram channels (CIK).
8870 * Used for display watermark bandwidth calculations
8871 * Returns the number of dram channels
8872 */
8873static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8874{
8875	u32 tmp = RREG32(MC_SHARED_CHMAP);
8876
8877	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8878	case 0:
8879	default:
8880		return 1;
8881	case 1:
8882		return 2;
8883	case 2:
8884		return 4;
8885	case 3:
8886		return 8;
8887	case 4:
8888		return 3;
8889	case 5:
8890		return 6;
8891	case 6:
8892		return 10;
8893	case 7:
8894		return 12;
8895	case 8:
8896		return 16;
8897	}
8898}
8899
8900struct dce8_wm_params {
8901	u32 dram_channels; /* number of dram channels */
8902	u32 yclk;          /* bandwidth per dram data pin in kHz */
8903	u32 sclk;          /* engine clock in kHz */
8904	u32 disp_clk;      /* display clock in kHz */
8905	u32 src_width;     /* viewport width */
8906	u32 active_time;   /* active display time in ns */
8907	u32 blank_time;    /* blank time in ns */
8908	bool interlaced;    /* mode is interlaced */
8909	fixed20_12 vsc;    /* vertical scale ratio */
8910	u32 num_heads;     /* number of active crtcs */
8911	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8912	u32 lb_size;       /* line buffer allocated to pipe */
8913	u32 vtaps;         /* vertical scaler taps */
8914};
8915
8916/**
8917 * dce8_dram_bandwidth - get the dram bandwidth
8918 *
8919 * @wm: watermark calculation data
8920 *
8921 * Calculate the raw dram bandwidth (CIK).
8922 * Used for display watermark bandwidth calculations
8923 * Returns the dram bandwidth in MBytes/s
8924 */
8925static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8926{
8927	/* Calculate raw DRAM Bandwidth */
8928	fixed20_12 dram_efficiency; /* 0.7 */
8929	fixed20_12 yclk, dram_channels, bandwidth;
8930	fixed20_12 a;
8931
8932	a.full = dfixed_const(1000);
8933	yclk.full = dfixed_const(wm->yclk);
8934	yclk.full = dfixed_div(yclk, a);
8935	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8936	a.full = dfixed_const(10);
8937	dram_efficiency.full = dfixed_const(7);
8938	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8939	bandwidth.full = dfixed_mul(dram_channels, yclk);
8940	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8941
8942	return dfixed_trunc(bandwidth);
8943}
8944
8945/**
8946 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8947 *
8948 * @wm: watermark calculation data
8949 *
8950 * Calculate the dram bandwidth used for display (CIK).
8951 * Used for display watermark bandwidth calculations
8952 * Returns the dram bandwidth for display in MBytes/s
8953 */
8954static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8955{
8956	/* Calculate DRAM Bandwidth and the part allocated to display. */
8957	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8958	fixed20_12 yclk, dram_channels, bandwidth;
8959	fixed20_12 a;
8960
8961	a.full = dfixed_const(1000);
8962	yclk.full = dfixed_const(wm->yclk);
8963	yclk.full = dfixed_div(yclk, a);
8964	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8965	a.full = dfixed_const(10);
8966	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8967	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8968	bandwidth.full = dfixed_mul(dram_channels, yclk);
8969	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8970
8971	return dfixed_trunc(bandwidth);
8972}
8973
8974/**
8975 * dce8_data_return_bandwidth - get the data return bandwidth
8976 *
8977 * @wm: watermark calculation data
8978 *
8979 * Calculate the data return bandwidth used for display (CIK).
8980 * Used for display watermark bandwidth calculations
8981 * Returns the data return bandwidth in MBytes/s
8982 */
8983static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8984{
8985	/* Calculate the display Data return Bandwidth */
8986	fixed20_12 return_efficiency; /* 0.8 */
8987	fixed20_12 sclk, bandwidth;
8988	fixed20_12 a;
8989
8990	a.full = dfixed_const(1000);
8991	sclk.full = dfixed_const(wm->sclk);
8992	sclk.full = dfixed_div(sclk, a);
8993	a.full = dfixed_const(10);
8994	return_efficiency.full = dfixed_const(8);
8995	return_efficiency.full = dfixed_div(return_efficiency, a);
8996	a.full = dfixed_const(32);
8997	bandwidth.full = dfixed_mul(a, sclk);
8998	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8999
9000	return dfixed_trunc(bandwidth);
9001}
9002
9003/**
9004 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9005 *
9006 * @wm: watermark calculation data
9007 *
9008 * Calculate the dmif bandwidth used for display (CIK).
9009 * Used for display watermark bandwidth calculations
9010 * Returns the dmif bandwidth in MBytes/s
9011 */
9012static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9013{
9014	/* Calculate the DMIF Request Bandwidth */
9015	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9016	fixed20_12 disp_clk, bandwidth;
9017	fixed20_12 a, b;
9018
9019	a.full = dfixed_const(1000);
9020	disp_clk.full = dfixed_const(wm->disp_clk);
9021	disp_clk.full = dfixed_div(disp_clk, a);
9022	a.full = dfixed_const(32);
9023	b.full = dfixed_mul(a, disp_clk);
9024
9025	a.full = dfixed_const(10);
9026	disp_clk_request_efficiency.full = dfixed_const(8);
9027	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9028
9029	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9030
9031	return dfixed_trunc(bandwidth);
9032}
9033
9034/**
9035 * dce8_available_bandwidth - get the min available bandwidth
9036 *
9037 * @wm: watermark calculation data
9038 *
9039 * Calculate the min available bandwidth used for display (CIK).
9040 * Used for display watermark bandwidth calculations
9041 * Returns the min available bandwidth in MBytes/s
9042 */
9043static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9044{
9045	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9046	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9047	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9048	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9049
9050	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9051}
9052
9053/**
9054 * dce8_average_bandwidth - get the average available bandwidth
9055 *
9056 * @wm: watermark calculation data
9057 *
9058 * Calculate the average available bandwidth used for display (CIK).
9059 * Used for display watermark bandwidth calculations
9060 * Returns the average available bandwidth in MBytes/s
9061 */
9062static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9063{
9064	/* Calculate the display mode Average Bandwidth
9065	 * DisplayMode should contain the source and destination dimensions,
9066	 * timing, etc.
9067	 */
9068	fixed20_12 bpp;
9069	fixed20_12 line_time;
9070	fixed20_12 src_width;
9071	fixed20_12 bandwidth;
9072	fixed20_12 a;
9073
9074	a.full = dfixed_const(1000);
9075	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9076	line_time.full = dfixed_div(line_time, a);
9077	bpp.full = dfixed_const(wm->bytes_per_pixel);
9078	src_width.full = dfixed_const(wm->src_width);
9079	bandwidth.full = dfixed_mul(src_width, bpp);
9080	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9081	bandwidth.full = dfixed_div(bandwidth, line_time);
9082
9083	return dfixed_trunc(bandwidth);
9084}
9085
9086/**
9087 * dce8_latency_watermark - get the latency watermark
9088 *
9089 * @wm: watermark calculation data
9090 *
9091 * Calculate the latency watermark (CIK).
9092 * Used for display watermark bandwidth calculations
9093 * Returns the latency watermark in ns
9094 */
9095static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9096{
9097	/* First calculate the latency in ns */
9098	u32 mc_latency = 2000; /* 2000 ns. */
9099	u32 available_bandwidth = dce8_available_bandwidth(wm);
9100	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9101	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9102	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9103	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9104		(wm->num_heads * cursor_line_pair_return_time);
9105	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9106	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9107	u32 tmp, dmif_size = 12288;
9108	fixed20_12 a, b, c;
9109
9110	if (wm->num_heads == 0)
9111		return 0;
9112
9113	a.full = dfixed_const(2);
9114	b.full = dfixed_const(1);
9115	if ((wm->vsc.full > a.full) ||
9116	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9117	    (wm->vtaps >= 5) ||
9118	    ((wm->vsc.full >= a.full) && wm->interlaced))
9119		max_src_lines_per_dst_line = 4;
9120	else
9121		max_src_lines_per_dst_line = 2;
9122
9123	a.full = dfixed_const(available_bandwidth);
9124	b.full = dfixed_const(wm->num_heads);
9125	a.full = dfixed_div(a, b);
9126	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9127	tmp = min(dfixed_trunc(a), tmp);
9128
9129	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9130
9131	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9132	b.full = dfixed_const(1000);
9133	c.full = dfixed_const(lb_fill_bw);
9134	b.full = dfixed_div(c, b);
9135	a.full = dfixed_div(a, b);
9136	line_fill_time = dfixed_trunc(a);
9137
9138	if (line_fill_time < wm->active_time)
9139		return latency;
9140	else
9141		return latency + (line_fill_time - wm->active_time);
9142
9143}
9144
9145/**
9146 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9147 * average and available dram bandwidth
9148 *
9149 * @wm: watermark calculation data
9150 *
9151 * Check if the display average bandwidth fits in the display
9152 * dram bandwidth (CIK).
9153 * Used for display watermark bandwidth calculations
9154 * Returns true if the display fits, false if not.
9155 */
9156static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9157{
9158	if (dce8_average_bandwidth(wm) <=
9159	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9160		return true;
9161	else
9162		return false;
9163}
9164
9165/**
9166 * dce8_average_bandwidth_vs_available_bandwidth - check
9167 * average and available bandwidth
9168 *
9169 * @wm: watermark calculation data
9170 *
9171 * Check if the display average bandwidth fits in the display
9172 * available bandwidth (CIK).
9173 * Used for display watermark bandwidth calculations
9174 * Returns true if the display fits, false if not.
9175 */
9176static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9177{
9178	if (dce8_average_bandwidth(wm) <=
9179	    (dce8_available_bandwidth(wm) / wm->num_heads))
9180		return true;
9181	else
9182		return false;
9183}
9184
9185/**
9186 * dce8_check_latency_hiding - check latency hiding
9187 *
9188 * @wm: watermark calculation data
9189 *
9190 * Check latency hiding (CIK).
9191 * Used for display watermark bandwidth calculations
9192 * Returns true if the display fits, false if not.
9193 */
9194static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9195{
9196	u32 lb_partitions = wm->lb_size / wm->src_width;
9197	u32 line_time = wm->active_time + wm->blank_time;
9198	u32 latency_tolerant_lines;
9199	u32 latency_hiding;
9200	fixed20_12 a;
9201
9202	a.full = dfixed_const(1);
9203	if (wm->vsc.full > a.full)
9204		latency_tolerant_lines = 1;
9205	else {
9206		if (lb_partitions <= (wm->vtaps + 1))
9207			latency_tolerant_lines = 1;
9208		else
9209			latency_tolerant_lines = 2;
9210	}
9211
9212	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9213
9214	if (dce8_latency_watermark(wm) <= latency_hiding)
9215		return true;
9216	else
9217		return false;
9218}
9219
9220/**
9221 * dce8_program_watermarks - program display watermarks
9222 *
9223 * @rdev: radeon_device pointer
9224 * @radeon_crtc: the selected display controller
9225 * @lb_size: line buffer size
9226 * @num_heads: number of display controllers in use
9227 *
9228 * Calculate and program the display watermarks for the
9229 * selected display controller (CIK).
9230 */
9231static void dce8_program_watermarks(struct radeon_device *rdev,
9232				    struct radeon_crtc *radeon_crtc,
9233				    u32 lb_size, u32 num_heads)
9234{
9235	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9236	struct dce8_wm_params wm_low, wm_high;
9237	u32 active_time;
9238	u32 line_time = 0;
9239	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9240	u32 tmp, wm_mask;
9241
9242	if (radeon_crtc->base.enabled && num_heads && mode) {
9243		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9244					    (u32)mode->clock);
9245		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9246					  (u32)mode->clock);
9247		line_time = min(line_time, (u32)65535);
9248
9249		/* watermark for high clocks */
9250		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9251		    rdev->pm.dpm_enabled) {
9252			wm_high.yclk =
9253				radeon_dpm_get_mclk(rdev, false) * 10;
9254			wm_high.sclk =
9255				radeon_dpm_get_sclk(rdev, false) * 10;
9256		} else {
9257			wm_high.yclk = rdev->pm.current_mclk * 10;
9258			wm_high.sclk = rdev->pm.current_sclk * 10;
9259		}
9260
9261		wm_high.disp_clk = mode->clock;
9262		wm_high.src_width = mode->crtc_hdisplay;
9263		wm_high.active_time = active_time;
9264		wm_high.blank_time = line_time - wm_high.active_time;
9265		wm_high.interlaced = false;
9266		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9267			wm_high.interlaced = true;
9268		wm_high.vsc = radeon_crtc->vsc;
9269		wm_high.vtaps = 1;
9270		if (radeon_crtc->rmx_type != RMX_OFF)
9271			wm_high.vtaps = 2;
9272		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9273		wm_high.lb_size = lb_size;
9274		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9275		wm_high.num_heads = num_heads;
9276
9277		/* set for high clocks */
9278		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9279
9280		/* possibly force display priority to high */
9281		/* should really do this at mode validation time... */
9282		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9283		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9284		    !dce8_check_latency_hiding(&wm_high) ||
9285		    (rdev->disp_priority == 2)) {
9286			DRM_DEBUG_KMS("force priority to high\n");
9287		}
9288
9289		/* watermark for low clocks */
9290		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9291		    rdev->pm.dpm_enabled) {
9292			wm_low.yclk =
9293				radeon_dpm_get_mclk(rdev, true) * 10;
9294			wm_low.sclk =
9295				radeon_dpm_get_sclk(rdev, true) * 10;
9296		} else {
9297			wm_low.yclk = rdev->pm.current_mclk * 10;
9298			wm_low.sclk = rdev->pm.current_sclk * 10;
9299		}
9300
9301		wm_low.disp_clk = mode->clock;
9302		wm_low.src_width = mode->crtc_hdisplay;
9303		wm_low.active_time = active_time;
9304		wm_low.blank_time = line_time - wm_low.active_time;
9305		wm_low.interlaced = false;
9306		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9307			wm_low.interlaced = true;
9308		wm_low.vsc = radeon_crtc->vsc;
9309		wm_low.vtaps = 1;
9310		if (radeon_crtc->rmx_type != RMX_OFF)
9311			wm_low.vtaps = 2;
9312		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9313		wm_low.lb_size = lb_size;
9314		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9315		wm_low.num_heads = num_heads;
9316
9317		/* set for low clocks */
9318		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9319
9320		/* possibly force display priority to high */
9321		/* should really do this at mode validation time... */
9322		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9323		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9324		    !dce8_check_latency_hiding(&wm_low) ||
9325		    (rdev->disp_priority == 2)) {
9326			DRM_DEBUG_KMS("force priority to high\n");
9327		}
9328
9329		/* Save number of lines the linebuffer leads before the scanout */
9330		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9331	}
9332
9333	/* select wm A */
9334	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9335	tmp = wm_mask;
9336	tmp &= ~LATENCY_WATERMARK_MASK(3);
9337	tmp |= LATENCY_WATERMARK_MASK(1);
9338	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9339	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9340	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9341		LATENCY_HIGH_WATERMARK(line_time)));
9342	/* select wm B */
9343	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9344	tmp &= ~LATENCY_WATERMARK_MASK(3);
9345	tmp |= LATENCY_WATERMARK_MASK(2);
9346	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9347	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9348	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9349		LATENCY_HIGH_WATERMARK(line_time)));
9350	/* restore original selection */
9351	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9352
9353	/* save values for DPM */
9354	radeon_crtc->line_time = line_time;
9355	radeon_crtc->wm_high = latency_watermark_a;
9356	radeon_crtc->wm_low = latency_watermark_b;
9357}
9358
9359/**
9360 * dce8_bandwidth_update - program display watermarks
9361 *
9362 * @rdev: radeon_device pointer
9363 *
9364 * Calculate and program the display watermarks and line
9365 * buffer allocation (CIK).
9366 */
9367void dce8_bandwidth_update(struct radeon_device *rdev)
9368{
9369	struct drm_display_mode *mode = NULL;
9370	u32 num_heads = 0, lb_size;
9371	int i;
9372
9373	if (!rdev->mode_info.mode_config_initialized)
9374		return;
9375
9376	radeon_update_display_priority(rdev);
9377
9378	for (i = 0; i < rdev->num_crtc; i++) {
9379		if (rdev->mode_info.crtcs[i]->base.enabled)
9380			num_heads++;
9381	}
9382	for (i = 0; i < rdev->num_crtc; i++) {
9383		mode = &rdev->mode_info.crtcs[i]->base.mode;
9384		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9385		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9386	}
9387}
9388
9389/**
9390 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9391 *
9392 * @rdev: radeon_device pointer
9393 *
9394 * Fetches a GPU clock counter snapshot (SI).
9395 * Returns the 64 bit clock counter snapshot.
9396 */
9397uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9398{
9399	uint64_t clock;
9400
9401	mutex_lock(&rdev->gpu_clock_mutex);
9402	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9403	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9404		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9405	mutex_unlock(&rdev->gpu_clock_mutex);
9406	return clock;
9407}
9408
9409static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9410			     u32 cntl_reg, u32 status_reg)
9411{
9412	int r, i;
9413	struct atom_clock_dividers dividers;
9414	uint32_t tmp;
9415
9416	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9417					   clock, false, &dividers);
9418	if (r)
9419		return r;
9420
9421	tmp = RREG32_SMC(cntl_reg);
9422	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9423	tmp |= dividers.post_divider;
9424	WREG32_SMC(cntl_reg, tmp);
9425
9426	for (i = 0; i < 100; i++) {
9427		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9428			break;
9429		mdelay(10);
9430	}
9431	if (i == 100)
9432		return -ETIMEDOUT;
9433
9434	return 0;
9435}
9436
9437int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9438{
9439	int r = 0;
9440
9441	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9442	if (r)
9443		return r;
9444
9445	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9446	return r;
9447}
9448
9449int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9450{
9451	int r, i;
9452	struct atom_clock_dividers dividers;
9453	u32 tmp;
9454
9455	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9456					   ecclk, false, &dividers);
9457	if (r)
9458		return r;
9459
9460	for (i = 0; i < 100; i++) {
9461		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9462			break;
9463		mdelay(10);
9464	}
9465	if (i == 100)
9466		return -ETIMEDOUT;
9467
9468	tmp = RREG32_SMC(CG_ECLK_CNTL);
9469	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9470	tmp |= dividers.post_divider;
9471	WREG32_SMC(CG_ECLK_CNTL, tmp);
9472
9473	for (i = 0; i < 100; i++) {
9474		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9475			break;
9476		mdelay(10);
9477	}
9478	if (i == 100)
9479		return -ETIMEDOUT;
9480
9481	return 0;
9482}
9483
9484static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9485{
9486	struct pci_dev *root = rdev->pdev->bus->self;
9487	enum pci_bus_speed speed_cap;
9488	u32 speed_cntl, current_data_rate;
9489	int i;
9490	u16 tmp16;
9491
9492	if (pci_is_root_bus(rdev->pdev->bus))
9493		return;
9494
9495	if (radeon_pcie_gen2 == 0)
9496		return;
9497
9498	if (rdev->flags & RADEON_IS_IGP)
9499		return;
9500
9501	if (!(rdev->flags & RADEON_IS_PCIE))
9502		return;
9503
9504	speed_cap = pcie_get_speed_cap(root);
9505	if (speed_cap == PCI_SPEED_UNKNOWN)
9506		return;
9507
9508	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9509	    (speed_cap != PCIE_SPEED_5_0GT))
9510		return;
9511
9512	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9513	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9514		LC_CURRENT_DATA_RATE_SHIFT;
9515	if (speed_cap == PCIE_SPEED_8_0GT) {
9516		if (current_data_rate == 2) {
9517			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9518			return;
9519		}
9520		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9521	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9522		if (current_data_rate == 1) {
9523			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9524			return;
9525		}
9526		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9527	}
9528
9529	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9530		return;
9531
9532	if (speed_cap == PCIE_SPEED_8_0GT) {
9533		/* re-try equalization if gen3 is not already enabled */
9534		if (current_data_rate != 2) {
9535			u16 bridge_cfg, gpu_cfg;
9536			u16 bridge_cfg2, gpu_cfg2;
9537			u32 max_lw, current_lw, tmp;
9538
9539			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9540			pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9541
9542			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9543			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9544			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9545
9546			if (current_lw < max_lw) {
9547				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9548				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9549					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9550					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9551					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9552					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9553				}
9554			}
9555
9556			for (i = 0; i < 10; i++) {
9557				/* check status */
9558				pcie_capability_read_word(rdev->pdev,
9559							  PCI_EXP_DEVSTA,
9560							  &tmp16);
9561				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9562					break;
9563
9564				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9565							  &bridge_cfg);
9566				pcie_capability_read_word(rdev->pdev,
9567							  PCI_EXP_LNKCTL,
9568							  &gpu_cfg);
9569
9570				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9571							  &bridge_cfg2);
9572				pcie_capability_read_word(rdev->pdev,
9573							  PCI_EXP_LNKCTL2,
9574							  &gpu_cfg2);
9575
9576				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9577				tmp |= LC_SET_QUIESCE;
9578				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9579
9580				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9581				tmp |= LC_REDO_EQ;
9582				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9583
9584				drm_msleep(100);
9585
9586				/* linkctl */
9587				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
9588								   PCI_EXP_LNKCTL_HAWD,
9589								   bridge_cfg &
9590								   PCI_EXP_LNKCTL_HAWD);
9591				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
9592								   PCI_EXP_LNKCTL_HAWD,
9593								   gpu_cfg &
9594								   PCI_EXP_LNKCTL_HAWD);
9595
9596				/* linkctl2 */
9597				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9598							  &tmp16);
9599				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9600					   PCI_EXP_LNKCTL2_TX_MARGIN);
9601				tmp16 |= (bridge_cfg2 &
9602					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9603					   PCI_EXP_LNKCTL2_TX_MARGIN));
9604				pcie_capability_write_word(root,
9605							   PCI_EXP_LNKCTL2,
9606							   tmp16);
9607
9608				pcie_capability_read_word(rdev->pdev,
9609							  PCI_EXP_LNKCTL2,
9610							  &tmp16);
9611				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9612					   PCI_EXP_LNKCTL2_TX_MARGIN);
9613				tmp16 |= (gpu_cfg2 &
9614					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9615					   PCI_EXP_LNKCTL2_TX_MARGIN));
9616				pcie_capability_write_word(rdev->pdev,
9617							   PCI_EXP_LNKCTL2,
9618							   tmp16);
9619
9620				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9621				tmp &= ~LC_SET_QUIESCE;
9622				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9623			}
9624		}
9625	}
9626
9627	/* set the link speed */
9628	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9629	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9630	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9631
9632	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9633	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9634	if (speed_cap == PCIE_SPEED_8_0GT)
9635		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9636	else if (speed_cap == PCIE_SPEED_5_0GT)
9637		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9638	else
9639		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9640	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9641
9642	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9643	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9644	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9645
9646	for (i = 0; i < rdev->usec_timeout; i++) {
9647		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9648		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9649			break;
9650		udelay(1);
9651	}
9652}
9653
9654static void cik_program_aspm(struct radeon_device *rdev)
9655{
9656	u32 data, orig;
9657	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9658	bool disable_clkreq = false;
9659
9660	if (radeon_aspm == 0)
9661		return;
9662
9663	/* XXX double check IGPs */
9664	if (rdev->flags & RADEON_IS_IGP)
9665		return;
9666
9667	if (!(rdev->flags & RADEON_IS_PCIE))
9668		return;
9669
9670	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9671	data &= ~LC_XMIT_N_FTS_MASK;
9672	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9673	if (orig != data)
9674		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9675
9676	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9677	data |= LC_GO_TO_RECOVERY;
9678	if (orig != data)
9679		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9680
9681	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9682	data |= P_IGNORE_EDB_ERR;
9683	if (orig != data)
9684		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9685
9686	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9687	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9688	data |= LC_PMI_TO_L1_DIS;
9689	if (!disable_l0s)
9690		data |= LC_L0S_INACTIVITY(7);
9691
9692	if (!disable_l1) {
9693		data |= LC_L1_INACTIVITY(7);
9694		data &= ~LC_PMI_TO_L1_DIS;
9695		if (orig != data)
9696			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9697
9698		if (!disable_plloff_in_l1) {
9699			bool clk_req_support;
9700
9701			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9702			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9703			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9704			if (orig != data)
9705				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9706
9707			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9708			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9709			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9710			if (orig != data)
9711				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9712
9713			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9714			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9715			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9716			if (orig != data)
9717				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9718
9719			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9720			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9721			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9722			if (orig != data)
9723				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9724
9725			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9726			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9727			data |= LC_DYN_LANES_PWR_STATE(3);
9728			if (orig != data)
9729				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9730
9731			if (!disable_clkreq &&
9732			    !pci_is_root_bus(rdev->pdev->bus)) {
9733				struct pci_dev *root = rdev->pdev->bus->self;
9734				u32 lnkcap;
9735
9736				clk_req_support = false;
9737				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9738				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9739					clk_req_support = true;
9740			} else {
9741				clk_req_support = false;
9742			}
9743
9744			if (clk_req_support) {
9745				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9746				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9747				if (orig != data)
9748					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9749
9750				orig = data = RREG32_SMC(THM_CLK_CNTL);
9751				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9752				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9753				if (orig != data)
9754					WREG32_SMC(THM_CLK_CNTL, data);
9755
9756				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9757				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9758				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9759				if (orig != data)
9760					WREG32_SMC(MISC_CLK_CTRL, data);
9761
9762				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9763				data &= ~BCLK_AS_XCLK;
9764				if (orig != data)
9765					WREG32_SMC(CG_CLKPIN_CNTL, data);
9766
9767				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9768				data &= ~FORCE_BIF_REFCLK_EN;
9769				if (orig != data)
9770					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9771
9772				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9773				data &= ~MPLL_CLKOUT_SEL_MASK;
9774				data |= MPLL_CLKOUT_SEL(4);
9775				if (orig != data)
9776					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9777			}
9778		}
9779	} else {
9780		if (orig != data)
9781			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9782	}
9783
9784	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9785	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9786	if (orig != data)
9787		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9788
9789	if (!disable_l0s) {
9790		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9791		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9792			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9793			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9794				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9795				data &= ~LC_L0S_INACTIVITY_MASK;
9796				if (orig != data)
9797					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9798			}
9799		}
9800	}
9801}
9802