1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24
25#include <linux/firmware.h>
26#include <linux/module.h>
27#include <linux/pci.h>
28#include <linux/slab.h>
29
30#include <drm/drm_vblank.h>
31
32#include "atom.h"
33#include "evergreen.h"
34#include "cik_blit_shaders.h"
35#include "cik.h"
36#include "cikd.h"
37#include "clearstate_ci.h"
38#include "r600.h"
39#include "radeon.h"
40#include "radeon_asic.h"
41#include "radeon_audio.h"
42#include "radeon_ucode.h"
43#include "si.h"
44#include "vce.h"
45
46#define SH_MEM_CONFIG_GFX_DEFAULT \
47	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48
49MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
58
59MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
60MODULE_FIRMWARE("radeon/bonaire_me.bin");
61MODULE_FIRMWARE("radeon/bonaire_ce.bin");
62MODULE_FIRMWARE("radeon/bonaire_mec.bin");
63MODULE_FIRMWARE("radeon/bonaire_mc.bin");
64MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
65MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
66MODULE_FIRMWARE("radeon/bonaire_smc.bin");
67MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
68
69MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
70MODULE_FIRMWARE("radeon/HAWAII_me.bin");
71MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
72MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
73MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
74MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
75MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
76MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
77MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
78
79MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
80MODULE_FIRMWARE("radeon/hawaii_me.bin");
81MODULE_FIRMWARE("radeon/hawaii_ce.bin");
82MODULE_FIRMWARE("radeon/hawaii_mec.bin");
83MODULE_FIRMWARE("radeon/hawaii_mc.bin");
84MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
85MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
86MODULE_FIRMWARE("radeon/hawaii_smc.bin");
87MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
88
89MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
90MODULE_FIRMWARE("radeon/KAVERI_me.bin");
91MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
92MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
93MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
94MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
95
96MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
97MODULE_FIRMWARE("radeon/kaveri_me.bin");
98MODULE_FIRMWARE("radeon/kaveri_ce.bin");
99MODULE_FIRMWARE("radeon/kaveri_mec.bin");
100MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
101MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
102MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
103
104MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
105MODULE_FIRMWARE("radeon/KABINI_me.bin");
106MODULE_FIRMWARE("radeon/KABINI_ce.bin");
107MODULE_FIRMWARE("radeon/KABINI_mec.bin");
108MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
109MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
110
111MODULE_FIRMWARE("radeon/kabini_pfp.bin");
112MODULE_FIRMWARE("radeon/kabini_me.bin");
113MODULE_FIRMWARE("radeon/kabini_ce.bin");
114MODULE_FIRMWARE("radeon/kabini_mec.bin");
115MODULE_FIRMWARE("radeon/kabini_rlc.bin");
116MODULE_FIRMWARE("radeon/kabini_sdma.bin");
117
118MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
119MODULE_FIRMWARE("radeon/MULLINS_me.bin");
120MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
121MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
122MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
123MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
124
125MODULE_FIRMWARE("radeon/mullins_pfp.bin");
126MODULE_FIRMWARE("radeon/mullins_me.bin");
127MODULE_FIRMWARE("radeon/mullins_ce.bin");
128MODULE_FIRMWARE("radeon/mullins_mec.bin");
129MODULE_FIRMWARE("radeon/mullins_rlc.bin");
130MODULE_FIRMWARE("radeon/mullins_sdma.bin");
131
132static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
133static void cik_rlc_stop(struct radeon_device *rdev);
134static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135static void cik_program_aspm(struct radeon_device *rdev);
136static void cik_init_pg(struct radeon_device *rdev);
137static void cik_init_cg(struct radeon_device *rdev);
138static void cik_fini_pg(struct radeon_device *rdev);
139static void cik_fini_cg(struct radeon_device *rdev);
140static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141					  bool enable);
142
143/**
144 * cik_get_allowed_info_register - fetch the register for the info ioctl
145 *
146 * @rdev: radeon_device pointer
147 * @reg: register offset in bytes
148 * @val: register value
149 *
150 * Returns 0 for success or -EINVAL for an invalid register
151 *
152 */
153int cik_get_allowed_info_register(struct radeon_device *rdev,
154				  u32 reg, u32 *val)
155{
156	switch (reg) {
157	case GRBM_STATUS:
158	case GRBM_STATUS2:
159	case GRBM_STATUS_SE0:
160	case GRBM_STATUS_SE1:
161	case GRBM_STATUS_SE2:
162	case GRBM_STATUS_SE3:
163	case SRBM_STATUS:
164	case SRBM_STATUS2:
165	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
166	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
167	case UVD_STATUS:
168	/* TODO VCE */
169		*val = RREG32(reg);
170		return 0;
171	default:
172		return -EINVAL;
173	}
174}
175
176/*
177 * Indirect registers accessor
178 */
179u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
180{
181	unsigned long flags;
182	u32 r;
183
184	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
185	WREG32(CIK_DIDT_IND_INDEX, (reg));
186	r = RREG32(CIK_DIDT_IND_DATA);
187	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
188	return r;
189}
190
191void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
192{
193	unsigned long flags;
194
195	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
196	WREG32(CIK_DIDT_IND_INDEX, (reg));
197	WREG32(CIK_DIDT_IND_DATA, (v));
198	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
199}
200
201/* get temperature in millidegrees */
202int ci_get_temp(struct radeon_device *rdev)
203{
204	u32 temp;
205	int actual_temp = 0;
206
207	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
208		CTF_TEMP_SHIFT;
209
210	if (temp & 0x200)
211		actual_temp = 255;
212	else
213		actual_temp = temp & 0x1ff;
214
215	return actual_temp * 1000;
216}
217
218/* get temperature in millidegrees */
219int kv_get_temp(struct radeon_device *rdev)
220{
221	u32 temp;
222	int actual_temp = 0;
223
224	temp = RREG32_SMC(0xC0300E0C);
225
226	if (temp)
227		actual_temp = (temp / 8) - 49;
228	else
229		actual_temp = 0;
230
231	return actual_temp * 1000;
232}
233
234/*
235 * Indirect registers accessor
236 */
237u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
238{
239	unsigned long flags;
240	u32 r;
241
242	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243	WREG32(PCIE_INDEX, reg);
244	(void)RREG32(PCIE_INDEX);
245	r = RREG32(PCIE_DATA);
246	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
247	return r;
248}
249
250void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
251{
252	unsigned long flags;
253
254	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
255	WREG32(PCIE_INDEX, reg);
256	(void)RREG32(PCIE_INDEX);
257	WREG32(PCIE_DATA, v);
258	(void)RREG32(PCIE_DATA);
259	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260}
261
262static const u32 spectre_rlc_save_restore_register_list[] =
263{
264	(0x0e00 << 16) | (0xc12c >> 2),
265	0x00000000,
266	(0x0e00 << 16) | (0xc140 >> 2),
267	0x00000000,
268	(0x0e00 << 16) | (0xc150 >> 2),
269	0x00000000,
270	(0x0e00 << 16) | (0xc15c >> 2),
271	0x00000000,
272	(0x0e00 << 16) | (0xc168 >> 2),
273	0x00000000,
274	(0x0e00 << 16) | (0xc170 >> 2),
275	0x00000000,
276	(0x0e00 << 16) | (0xc178 >> 2),
277	0x00000000,
278	(0x0e00 << 16) | (0xc204 >> 2),
279	0x00000000,
280	(0x0e00 << 16) | (0xc2b4 >> 2),
281	0x00000000,
282	(0x0e00 << 16) | (0xc2b8 >> 2),
283	0x00000000,
284	(0x0e00 << 16) | (0xc2bc >> 2),
285	0x00000000,
286	(0x0e00 << 16) | (0xc2c0 >> 2),
287	0x00000000,
288	(0x0e00 << 16) | (0x8228 >> 2),
289	0x00000000,
290	(0x0e00 << 16) | (0x829c >> 2),
291	0x00000000,
292	(0x0e00 << 16) | (0x869c >> 2),
293	0x00000000,
294	(0x0600 << 16) | (0x98f4 >> 2),
295	0x00000000,
296	(0x0e00 << 16) | (0x98f8 >> 2),
297	0x00000000,
298	(0x0e00 << 16) | (0x9900 >> 2),
299	0x00000000,
300	(0x0e00 << 16) | (0xc260 >> 2),
301	0x00000000,
302	(0x0e00 << 16) | (0x90e8 >> 2),
303	0x00000000,
304	(0x0e00 << 16) | (0x3c000 >> 2),
305	0x00000000,
306	(0x0e00 << 16) | (0x3c00c >> 2),
307	0x00000000,
308	(0x0e00 << 16) | (0x8c1c >> 2),
309	0x00000000,
310	(0x0e00 << 16) | (0x9700 >> 2),
311	0x00000000,
312	(0x0e00 << 16) | (0xcd20 >> 2),
313	0x00000000,
314	(0x4e00 << 16) | (0xcd20 >> 2),
315	0x00000000,
316	(0x5e00 << 16) | (0xcd20 >> 2),
317	0x00000000,
318	(0x6e00 << 16) | (0xcd20 >> 2),
319	0x00000000,
320	(0x7e00 << 16) | (0xcd20 >> 2),
321	0x00000000,
322	(0x8e00 << 16) | (0xcd20 >> 2),
323	0x00000000,
324	(0x9e00 << 16) | (0xcd20 >> 2),
325	0x00000000,
326	(0xae00 << 16) | (0xcd20 >> 2),
327	0x00000000,
328	(0xbe00 << 16) | (0xcd20 >> 2),
329	0x00000000,
330	(0x0e00 << 16) | (0x89bc >> 2),
331	0x00000000,
332	(0x0e00 << 16) | (0x8900 >> 2),
333	0x00000000,
334	0x3,
335	(0x0e00 << 16) | (0xc130 >> 2),
336	0x00000000,
337	(0x0e00 << 16) | (0xc134 >> 2),
338	0x00000000,
339	(0x0e00 << 16) | (0xc1fc >> 2),
340	0x00000000,
341	(0x0e00 << 16) | (0xc208 >> 2),
342	0x00000000,
343	(0x0e00 << 16) | (0xc264 >> 2),
344	0x00000000,
345	(0x0e00 << 16) | (0xc268 >> 2),
346	0x00000000,
347	(0x0e00 << 16) | (0xc26c >> 2),
348	0x00000000,
349	(0x0e00 << 16) | (0xc270 >> 2),
350	0x00000000,
351	(0x0e00 << 16) | (0xc274 >> 2),
352	0x00000000,
353	(0x0e00 << 16) | (0xc278 >> 2),
354	0x00000000,
355	(0x0e00 << 16) | (0xc27c >> 2),
356	0x00000000,
357	(0x0e00 << 16) | (0xc280 >> 2),
358	0x00000000,
359	(0x0e00 << 16) | (0xc284 >> 2),
360	0x00000000,
361	(0x0e00 << 16) | (0xc288 >> 2),
362	0x00000000,
363	(0x0e00 << 16) | (0xc28c >> 2),
364	0x00000000,
365	(0x0e00 << 16) | (0xc290 >> 2),
366	0x00000000,
367	(0x0e00 << 16) | (0xc294 >> 2),
368	0x00000000,
369	(0x0e00 << 16) | (0xc298 >> 2),
370	0x00000000,
371	(0x0e00 << 16) | (0xc29c >> 2),
372	0x00000000,
373	(0x0e00 << 16) | (0xc2a0 >> 2),
374	0x00000000,
375	(0x0e00 << 16) | (0xc2a4 >> 2),
376	0x00000000,
377	(0x0e00 << 16) | (0xc2a8 >> 2),
378	0x00000000,
379	(0x0e00 << 16) | (0xc2ac  >> 2),
380	0x00000000,
381	(0x0e00 << 16) | (0xc2b0 >> 2),
382	0x00000000,
383	(0x0e00 << 16) | (0x301d0 >> 2),
384	0x00000000,
385	(0x0e00 << 16) | (0x30238 >> 2),
386	0x00000000,
387	(0x0e00 << 16) | (0x30250 >> 2),
388	0x00000000,
389	(0x0e00 << 16) | (0x30254 >> 2),
390	0x00000000,
391	(0x0e00 << 16) | (0x30258 >> 2),
392	0x00000000,
393	(0x0e00 << 16) | (0x3025c >> 2),
394	0x00000000,
395	(0x4e00 << 16) | (0xc900 >> 2),
396	0x00000000,
397	(0x5e00 << 16) | (0xc900 >> 2),
398	0x00000000,
399	(0x6e00 << 16) | (0xc900 >> 2),
400	0x00000000,
401	(0x7e00 << 16) | (0xc900 >> 2),
402	0x00000000,
403	(0x8e00 << 16) | (0xc900 >> 2),
404	0x00000000,
405	(0x9e00 << 16) | (0xc900 >> 2),
406	0x00000000,
407	(0xae00 << 16) | (0xc900 >> 2),
408	0x00000000,
409	(0xbe00 << 16) | (0xc900 >> 2),
410	0x00000000,
411	(0x4e00 << 16) | (0xc904 >> 2),
412	0x00000000,
413	(0x5e00 << 16) | (0xc904 >> 2),
414	0x00000000,
415	(0x6e00 << 16) | (0xc904 >> 2),
416	0x00000000,
417	(0x7e00 << 16) | (0xc904 >> 2),
418	0x00000000,
419	(0x8e00 << 16) | (0xc904 >> 2),
420	0x00000000,
421	(0x9e00 << 16) | (0xc904 >> 2),
422	0x00000000,
423	(0xae00 << 16) | (0xc904 >> 2),
424	0x00000000,
425	(0xbe00 << 16) | (0xc904 >> 2),
426	0x00000000,
427	(0x4e00 << 16) | (0xc908 >> 2),
428	0x00000000,
429	(0x5e00 << 16) | (0xc908 >> 2),
430	0x00000000,
431	(0x6e00 << 16) | (0xc908 >> 2),
432	0x00000000,
433	(0x7e00 << 16) | (0xc908 >> 2),
434	0x00000000,
435	(0x8e00 << 16) | (0xc908 >> 2),
436	0x00000000,
437	(0x9e00 << 16) | (0xc908 >> 2),
438	0x00000000,
439	(0xae00 << 16) | (0xc908 >> 2),
440	0x00000000,
441	(0xbe00 << 16) | (0xc908 >> 2),
442	0x00000000,
443	(0x4e00 << 16) | (0xc90c >> 2),
444	0x00000000,
445	(0x5e00 << 16) | (0xc90c >> 2),
446	0x00000000,
447	(0x6e00 << 16) | (0xc90c >> 2),
448	0x00000000,
449	(0x7e00 << 16) | (0xc90c >> 2),
450	0x00000000,
451	(0x8e00 << 16) | (0xc90c >> 2),
452	0x00000000,
453	(0x9e00 << 16) | (0xc90c >> 2),
454	0x00000000,
455	(0xae00 << 16) | (0xc90c >> 2),
456	0x00000000,
457	(0xbe00 << 16) | (0xc90c >> 2),
458	0x00000000,
459	(0x4e00 << 16) | (0xc910 >> 2),
460	0x00000000,
461	(0x5e00 << 16) | (0xc910 >> 2),
462	0x00000000,
463	(0x6e00 << 16) | (0xc910 >> 2),
464	0x00000000,
465	(0x7e00 << 16) | (0xc910 >> 2),
466	0x00000000,
467	(0x8e00 << 16) | (0xc910 >> 2),
468	0x00000000,
469	(0x9e00 << 16) | (0xc910 >> 2),
470	0x00000000,
471	(0xae00 << 16) | (0xc910 >> 2),
472	0x00000000,
473	(0xbe00 << 16) | (0xc910 >> 2),
474	0x00000000,
475	(0x0e00 << 16) | (0xc99c >> 2),
476	0x00000000,
477	(0x0e00 << 16) | (0x9834 >> 2),
478	0x00000000,
479	(0x0000 << 16) | (0x30f00 >> 2),
480	0x00000000,
481	(0x0001 << 16) | (0x30f00 >> 2),
482	0x00000000,
483	(0x0000 << 16) | (0x30f04 >> 2),
484	0x00000000,
485	(0x0001 << 16) | (0x30f04 >> 2),
486	0x00000000,
487	(0x0000 << 16) | (0x30f08 >> 2),
488	0x00000000,
489	(0x0001 << 16) | (0x30f08 >> 2),
490	0x00000000,
491	(0x0000 << 16) | (0x30f0c >> 2),
492	0x00000000,
493	(0x0001 << 16) | (0x30f0c >> 2),
494	0x00000000,
495	(0x0600 << 16) | (0x9b7c >> 2),
496	0x00000000,
497	(0x0e00 << 16) | (0x8a14 >> 2),
498	0x00000000,
499	(0x0e00 << 16) | (0x8a18 >> 2),
500	0x00000000,
501	(0x0600 << 16) | (0x30a00 >> 2),
502	0x00000000,
503	(0x0e00 << 16) | (0x8bf0 >> 2),
504	0x00000000,
505	(0x0e00 << 16) | (0x8bcc >> 2),
506	0x00000000,
507	(0x0e00 << 16) | (0x8b24 >> 2),
508	0x00000000,
509	(0x0e00 << 16) | (0x30a04 >> 2),
510	0x00000000,
511	(0x0600 << 16) | (0x30a10 >> 2),
512	0x00000000,
513	(0x0600 << 16) | (0x30a14 >> 2),
514	0x00000000,
515	(0x0600 << 16) | (0x30a18 >> 2),
516	0x00000000,
517	(0x0600 << 16) | (0x30a2c >> 2),
518	0x00000000,
519	(0x0e00 << 16) | (0xc700 >> 2),
520	0x00000000,
521	(0x0e00 << 16) | (0xc704 >> 2),
522	0x00000000,
523	(0x0e00 << 16) | (0xc708 >> 2),
524	0x00000000,
525	(0x0e00 << 16) | (0xc768 >> 2),
526	0x00000000,
527	(0x0400 << 16) | (0xc770 >> 2),
528	0x00000000,
529	(0x0400 << 16) | (0xc774 >> 2),
530	0x00000000,
531	(0x0400 << 16) | (0xc778 >> 2),
532	0x00000000,
533	(0x0400 << 16) | (0xc77c >> 2),
534	0x00000000,
535	(0x0400 << 16) | (0xc780 >> 2),
536	0x00000000,
537	(0x0400 << 16) | (0xc784 >> 2),
538	0x00000000,
539	(0x0400 << 16) | (0xc788 >> 2),
540	0x00000000,
541	(0x0400 << 16) | (0xc78c >> 2),
542	0x00000000,
543	(0x0400 << 16) | (0xc798 >> 2),
544	0x00000000,
545	(0x0400 << 16) | (0xc79c >> 2),
546	0x00000000,
547	(0x0400 << 16) | (0xc7a0 >> 2),
548	0x00000000,
549	(0x0400 << 16) | (0xc7a4 >> 2),
550	0x00000000,
551	(0x0400 << 16) | (0xc7a8 >> 2),
552	0x00000000,
553	(0x0400 << 16) | (0xc7ac >> 2),
554	0x00000000,
555	(0x0400 << 16) | (0xc7b0 >> 2),
556	0x00000000,
557	(0x0400 << 16) | (0xc7b4 >> 2),
558	0x00000000,
559	(0x0e00 << 16) | (0x9100 >> 2),
560	0x00000000,
561	(0x0e00 << 16) | (0x3c010 >> 2),
562	0x00000000,
563	(0x0e00 << 16) | (0x92a8 >> 2),
564	0x00000000,
565	(0x0e00 << 16) | (0x92ac >> 2),
566	0x00000000,
567	(0x0e00 << 16) | (0x92b4 >> 2),
568	0x00000000,
569	(0x0e00 << 16) | (0x92b8 >> 2),
570	0x00000000,
571	(0x0e00 << 16) | (0x92bc >> 2),
572	0x00000000,
573	(0x0e00 << 16) | (0x92c0 >> 2),
574	0x00000000,
575	(0x0e00 << 16) | (0x92c4 >> 2),
576	0x00000000,
577	(0x0e00 << 16) | (0x92c8 >> 2),
578	0x00000000,
579	(0x0e00 << 16) | (0x92cc >> 2),
580	0x00000000,
581	(0x0e00 << 16) | (0x92d0 >> 2),
582	0x00000000,
583	(0x0e00 << 16) | (0x8c00 >> 2),
584	0x00000000,
585	(0x0e00 << 16) | (0x8c04 >> 2),
586	0x00000000,
587	(0x0e00 << 16) | (0x8c20 >> 2),
588	0x00000000,
589	(0x0e00 << 16) | (0x8c38 >> 2),
590	0x00000000,
591	(0x0e00 << 16) | (0x8c3c >> 2),
592	0x00000000,
593	(0x0e00 << 16) | (0xae00 >> 2),
594	0x00000000,
595	(0x0e00 << 16) | (0x9604 >> 2),
596	0x00000000,
597	(0x0e00 << 16) | (0xac08 >> 2),
598	0x00000000,
599	(0x0e00 << 16) | (0xac0c >> 2),
600	0x00000000,
601	(0x0e00 << 16) | (0xac10 >> 2),
602	0x00000000,
603	(0x0e00 << 16) | (0xac14 >> 2),
604	0x00000000,
605	(0x0e00 << 16) | (0xac58 >> 2),
606	0x00000000,
607	(0x0e00 << 16) | (0xac68 >> 2),
608	0x00000000,
609	(0x0e00 << 16) | (0xac6c >> 2),
610	0x00000000,
611	(0x0e00 << 16) | (0xac70 >> 2),
612	0x00000000,
613	(0x0e00 << 16) | (0xac74 >> 2),
614	0x00000000,
615	(0x0e00 << 16) | (0xac78 >> 2),
616	0x00000000,
617	(0x0e00 << 16) | (0xac7c >> 2),
618	0x00000000,
619	(0x0e00 << 16) | (0xac80 >> 2),
620	0x00000000,
621	(0x0e00 << 16) | (0xac84 >> 2),
622	0x00000000,
623	(0x0e00 << 16) | (0xac88 >> 2),
624	0x00000000,
625	(0x0e00 << 16) | (0xac8c >> 2),
626	0x00000000,
627	(0x0e00 << 16) | (0x970c >> 2),
628	0x00000000,
629	(0x0e00 << 16) | (0x9714 >> 2),
630	0x00000000,
631	(0x0e00 << 16) | (0x9718 >> 2),
632	0x00000000,
633	(0x0e00 << 16) | (0x971c >> 2),
634	0x00000000,
635	(0x0e00 << 16) | (0x31068 >> 2),
636	0x00000000,
637	(0x4e00 << 16) | (0x31068 >> 2),
638	0x00000000,
639	(0x5e00 << 16) | (0x31068 >> 2),
640	0x00000000,
641	(0x6e00 << 16) | (0x31068 >> 2),
642	0x00000000,
643	(0x7e00 << 16) | (0x31068 >> 2),
644	0x00000000,
645	(0x8e00 << 16) | (0x31068 >> 2),
646	0x00000000,
647	(0x9e00 << 16) | (0x31068 >> 2),
648	0x00000000,
649	(0xae00 << 16) | (0x31068 >> 2),
650	0x00000000,
651	(0xbe00 << 16) | (0x31068 >> 2),
652	0x00000000,
653	(0x0e00 << 16) | (0xcd10 >> 2),
654	0x00000000,
655	(0x0e00 << 16) | (0xcd14 >> 2),
656	0x00000000,
657	(0x0e00 << 16) | (0x88b0 >> 2),
658	0x00000000,
659	(0x0e00 << 16) | (0x88b4 >> 2),
660	0x00000000,
661	(0x0e00 << 16) | (0x88b8 >> 2),
662	0x00000000,
663	(0x0e00 << 16) | (0x88bc >> 2),
664	0x00000000,
665	(0x0400 << 16) | (0x89c0 >> 2),
666	0x00000000,
667	(0x0e00 << 16) | (0x88c4 >> 2),
668	0x00000000,
669	(0x0e00 << 16) | (0x88c8 >> 2),
670	0x00000000,
671	(0x0e00 << 16) | (0x88d0 >> 2),
672	0x00000000,
673	(0x0e00 << 16) | (0x88d4 >> 2),
674	0x00000000,
675	(0x0e00 << 16) | (0x88d8 >> 2),
676	0x00000000,
677	(0x0e00 << 16) | (0x8980 >> 2),
678	0x00000000,
679	(0x0e00 << 16) | (0x30938 >> 2),
680	0x00000000,
681	(0x0e00 << 16) | (0x3093c >> 2),
682	0x00000000,
683	(0x0e00 << 16) | (0x30940 >> 2),
684	0x00000000,
685	(0x0e00 << 16) | (0x89a0 >> 2),
686	0x00000000,
687	(0x0e00 << 16) | (0x30900 >> 2),
688	0x00000000,
689	(0x0e00 << 16) | (0x30904 >> 2),
690	0x00000000,
691	(0x0e00 << 16) | (0x89b4 >> 2),
692	0x00000000,
693	(0x0e00 << 16) | (0x3c210 >> 2),
694	0x00000000,
695	(0x0e00 << 16) | (0x3c214 >> 2),
696	0x00000000,
697	(0x0e00 << 16) | (0x3c218 >> 2),
698	0x00000000,
699	(0x0e00 << 16) | (0x8904 >> 2),
700	0x00000000,
701	0x5,
702	(0x0e00 << 16) | (0x8c28 >> 2),
703	(0x0e00 << 16) | (0x8c2c >> 2),
704	(0x0e00 << 16) | (0x8c30 >> 2),
705	(0x0e00 << 16) | (0x8c34 >> 2),
706	(0x0e00 << 16) | (0x9600 >> 2),
707};
708
709static const u32 kalindi_rlc_save_restore_register_list[] =
710{
711	(0x0e00 << 16) | (0xc12c >> 2),
712	0x00000000,
713	(0x0e00 << 16) | (0xc140 >> 2),
714	0x00000000,
715	(0x0e00 << 16) | (0xc150 >> 2),
716	0x00000000,
717	(0x0e00 << 16) | (0xc15c >> 2),
718	0x00000000,
719	(0x0e00 << 16) | (0xc168 >> 2),
720	0x00000000,
721	(0x0e00 << 16) | (0xc170 >> 2),
722	0x00000000,
723	(0x0e00 << 16) | (0xc204 >> 2),
724	0x00000000,
725	(0x0e00 << 16) | (0xc2b4 >> 2),
726	0x00000000,
727	(0x0e00 << 16) | (0xc2b8 >> 2),
728	0x00000000,
729	(0x0e00 << 16) | (0xc2bc >> 2),
730	0x00000000,
731	(0x0e00 << 16) | (0xc2c0 >> 2),
732	0x00000000,
733	(0x0e00 << 16) | (0x8228 >> 2),
734	0x00000000,
735	(0x0e00 << 16) | (0x829c >> 2),
736	0x00000000,
737	(0x0e00 << 16) | (0x869c >> 2),
738	0x00000000,
739	(0x0600 << 16) | (0x98f4 >> 2),
740	0x00000000,
741	(0x0e00 << 16) | (0x98f8 >> 2),
742	0x00000000,
743	(0x0e00 << 16) | (0x9900 >> 2),
744	0x00000000,
745	(0x0e00 << 16) | (0xc260 >> 2),
746	0x00000000,
747	(0x0e00 << 16) | (0x90e8 >> 2),
748	0x00000000,
749	(0x0e00 << 16) | (0x3c000 >> 2),
750	0x00000000,
751	(0x0e00 << 16) | (0x3c00c >> 2),
752	0x00000000,
753	(0x0e00 << 16) | (0x8c1c >> 2),
754	0x00000000,
755	(0x0e00 << 16) | (0x9700 >> 2),
756	0x00000000,
757	(0x0e00 << 16) | (0xcd20 >> 2),
758	0x00000000,
759	(0x4e00 << 16) | (0xcd20 >> 2),
760	0x00000000,
761	(0x5e00 << 16) | (0xcd20 >> 2),
762	0x00000000,
763	(0x6e00 << 16) | (0xcd20 >> 2),
764	0x00000000,
765	(0x7e00 << 16) | (0xcd20 >> 2),
766	0x00000000,
767	(0x0e00 << 16) | (0x89bc >> 2),
768	0x00000000,
769	(0x0e00 << 16) | (0x8900 >> 2),
770	0x00000000,
771	0x3,
772	(0x0e00 << 16) | (0xc130 >> 2),
773	0x00000000,
774	(0x0e00 << 16) | (0xc134 >> 2),
775	0x00000000,
776	(0x0e00 << 16) | (0xc1fc >> 2),
777	0x00000000,
778	(0x0e00 << 16) | (0xc208 >> 2),
779	0x00000000,
780	(0x0e00 << 16) | (0xc264 >> 2),
781	0x00000000,
782	(0x0e00 << 16) | (0xc268 >> 2),
783	0x00000000,
784	(0x0e00 << 16) | (0xc26c >> 2),
785	0x00000000,
786	(0x0e00 << 16) | (0xc270 >> 2),
787	0x00000000,
788	(0x0e00 << 16) | (0xc274 >> 2),
789	0x00000000,
790	(0x0e00 << 16) | (0xc28c >> 2),
791	0x00000000,
792	(0x0e00 << 16) | (0xc290 >> 2),
793	0x00000000,
794	(0x0e00 << 16) | (0xc294 >> 2),
795	0x00000000,
796	(0x0e00 << 16) | (0xc298 >> 2),
797	0x00000000,
798	(0x0e00 << 16) | (0xc2a0 >> 2),
799	0x00000000,
800	(0x0e00 << 16) | (0xc2a4 >> 2),
801	0x00000000,
802	(0x0e00 << 16) | (0xc2a8 >> 2),
803	0x00000000,
804	(0x0e00 << 16) | (0xc2ac >> 2),
805	0x00000000,
806	(0x0e00 << 16) | (0x301d0 >> 2),
807	0x00000000,
808	(0x0e00 << 16) | (0x30238 >> 2),
809	0x00000000,
810	(0x0e00 << 16) | (0x30250 >> 2),
811	0x00000000,
812	(0x0e00 << 16) | (0x30254 >> 2),
813	0x00000000,
814	(0x0e00 << 16) | (0x30258 >> 2),
815	0x00000000,
816	(0x0e00 << 16) | (0x3025c >> 2),
817	0x00000000,
818	(0x4e00 << 16) | (0xc900 >> 2),
819	0x00000000,
820	(0x5e00 << 16) | (0xc900 >> 2),
821	0x00000000,
822	(0x6e00 << 16) | (0xc900 >> 2),
823	0x00000000,
824	(0x7e00 << 16) | (0xc900 >> 2),
825	0x00000000,
826	(0x4e00 << 16) | (0xc904 >> 2),
827	0x00000000,
828	(0x5e00 << 16) | (0xc904 >> 2),
829	0x00000000,
830	(0x6e00 << 16) | (0xc904 >> 2),
831	0x00000000,
832	(0x7e00 << 16) | (0xc904 >> 2),
833	0x00000000,
834	(0x4e00 << 16) | (0xc908 >> 2),
835	0x00000000,
836	(0x5e00 << 16) | (0xc908 >> 2),
837	0x00000000,
838	(0x6e00 << 16) | (0xc908 >> 2),
839	0x00000000,
840	(0x7e00 << 16) | (0xc908 >> 2),
841	0x00000000,
842	(0x4e00 << 16) | (0xc90c >> 2),
843	0x00000000,
844	(0x5e00 << 16) | (0xc90c >> 2),
845	0x00000000,
846	(0x6e00 << 16) | (0xc90c >> 2),
847	0x00000000,
848	(0x7e00 << 16) | (0xc90c >> 2),
849	0x00000000,
850	(0x4e00 << 16) | (0xc910 >> 2),
851	0x00000000,
852	(0x5e00 << 16) | (0xc910 >> 2),
853	0x00000000,
854	(0x6e00 << 16) | (0xc910 >> 2),
855	0x00000000,
856	(0x7e00 << 16) | (0xc910 >> 2),
857	0x00000000,
858	(0x0e00 << 16) | (0xc99c >> 2),
859	0x00000000,
860	(0x0e00 << 16) | (0x9834 >> 2),
861	0x00000000,
862	(0x0000 << 16) | (0x30f00 >> 2),
863	0x00000000,
864	(0x0000 << 16) | (0x30f04 >> 2),
865	0x00000000,
866	(0x0000 << 16) | (0x30f08 >> 2),
867	0x00000000,
868	(0x0000 << 16) | (0x30f0c >> 2),
869	0x00000000,
870	(0x0600 << 16) | (0x9b7c >> 2),
871	0x00000000,
872	(0x0e00 << 16) | (0x8a14 >> 2),
873	0x00000000,
874	(0x0e00 << 16) | (0x8a18 >> 2),
875	0x00000000,
876	(0x0600 << 16) | (0x30a00 >> 2),
877	0x00000000,
878	(0x0e00 << 16) | (0x8bf0 >> 2),
879	0x00000000,
880	(0x0e00 << 16) | (0x8bcc >> 2),
881	0x00000000,
882	(0x0e00 << 16) | (0x8b24 >> 2),
883	0x00000000,
884	(0x0e00 << 16) | (0x30a04 >> 2),
885	0x00000000,
886	(0x0600 << 16) | (0x30a10 >> 2),
887	0x00000000,
888	(0x0600 << 16) | (0x30a14 >> 2),
889	0x00000000,
890	(0x0600 << 16) | (0x30a18 >> 2),
891	0x00000000,
892	(0x0600 << 16) | (0x30a2c >> 2),
893	0x00000000,
894	(0x0e00 << 16) | (0xc700 >> 2),
895	0x00000000,
896	(0x0e00 << 16) | (0xc704 >> 2),
897	0x00000000,
898	(0x0e00 << 16) | (0xc708 >> 2),
899	0x00000000,
900	(0x0e00 << 16) | (0xc768 >> 2),
901	0x00000000,
902	(0x0400 << 16) | (0xc770 >> 2),
903	0x00000000,
904	(0x0400 << 16) | (0xc774 >> 2),
905	0x00000000,
906	(0x0400 << 16) | (0xc798 >> 2),
907	0x00000000,
908	(0x0400 << 16) | (0xc79c >> 2),
909	0x00000000,
910	(0x0e00 << 16) | (0x9100 >> 2),
911	0x00000000,
912	(0x0e00 << 16) | (0x3c010 >> 2),
913	0x00000000,
914	(0x0e00 << 16) | (0x8c00 >> 2),
915	0x00000000,
916	(0x0e00 << 16) | (0x8c04 >> 2),
917	0x00000000,
918	(0x0e00 << 16) | (0x8c20 >> 2),
919	0x00000000,
920	(0x0e00 << 16) | (0x8c38 >> 2),
921	0x00000000,
922	(0x0e00 << 16) | (0x8c3c >> 2),
923	0x00000000,
924	(0x0e00 << 16) | (0xae00 >> 2),
925	0x00000000,
926	(0x0e00 << 16) | (0x9604 >> 2),
927	0x00000000,
928	(0x0e00 << 16) | (0xac08 >> 2),
929	0x00000000,
930	(0x0e00 << 16) | (0xac0c >> 2),
931	0x00000000,
932	(0x0e00 << 16) | (0xac10 >> 2),
933	0x00000000,
934	(0x0e00 << 16) | (0xac14 >> 2),
935	0x00000000,
936	(0x0e00 << 16) | (0xac58 >> 2),
937	0x00000000,
938	(0x0e00 << 16) | (0xac68 >> 2),
939	0x00000000,
940	(0x0e00 << 16) | (0xac6c >> 2),
941	0x00000000,
942	(0x0e00 << 16) | (0xac70 >> 2),
943	0x00000000,
944	(0x0e00 << 16) | (0xac74 >> 2),
945	0x00000000,
946	(0x0e00 << 16) | (0xac78 >> 2),
947	0x00000000,
948	(0x0e00 << 16) | (0xac7c >> 2),
949	0x00000000,
950	(0x0e00 << 16) | (0xac80 >> 2),
951	0x00000000,
952	(0x0e00 << 16) | (0xac84 >> 2),
953	0x00000000,
954	(0x0e00 << 16) | (0xac88 >> 2),
955	0x00000000,
956	(0x0e00 << 16) | (0xac8c >> 2),
957	0x00000000,
958	(0x0e00 << 16) | (0x970c >> 2),
959	0x00000000,
960	(0x0e00 << 16) | (0x9714 >> 2),
961	0x00000000,
962	(0x0e00 << 16) | (0x9718 >> 2),
963	0x00000000,
964	(0x0e00 << 16) | (0x971c >> 2),
965	0x00000000,
966	(0x0e00 << 16) | (0x31068 >> 2),
967	0x00000000,
968	(0x4e00 << 16) | (0x31068 >> 2),
969	0x00000000,
970	(0x5e00 << 16) | (0x31068 >> 2),
971	0x00000000,
972	(0x6e00 << 16) | (0x31068 >> 2),
973	0x00000000,
974	(0x7e00 << 16) | (0x31068 >> 2),
975	0x00000000,
976	(0x0e00 << 16) | (0xcd10 >> 2),
977	0x00000000,
978	(0x0e00 << 16) | (0xcd14 >> 2),
979	0x00000000,
980	(0x0e00 << 16) | (0x88b0 >> 2),
981	0x00000000,
982	(0x0e00 << 16) | (0x88b4 >> 2),
983	0x00000000,
984	(0x0e00 << 16) | (0x88b8 >> 2),
985	0x00000000,
986	(0x0e00 << 16) | (0x88bc >> 2),
987	0x00000000,
988	(0x0400 << 16) | (0x89c0 >> 2),
989	0x00000000,
990	(0x0e00 << 16) | (0x88c4 >> 2),
991	0x00000000,
992	(0x0e00 << 16) | (0x88c8 >> 2),
993	0x00000000,
994	(0x0e00 << 16) | (0x88d0 >> 2),
995	0x00000000,
996	(0x0e00 << 16) | (0x88d4 >> 2),
997	0x00000000,
998	(0x0e00 << 16) | (0x88d8 >> 2),
999	0x00000000,
1000	(0x0e00 << 16) | (0x8980 >> 2),
1001	0x00000000,
1002	(0x0e00 << 16) | (0x30938 >> 2),
1003	0x00000000,
1004	(0x0e00 << 16) | (0x3093c >> 2),
1005	0x00000000,
1006	(0x0e00 << 16) | (0x30940 >> 2),
1007	0x00000000,
1008	(0x0e00 << 16) | (0x89a0 >> 2),
1009	0x00000000,
1010	(0x0e00 << 16) | (0x30900 >> 2),
1011	0x00000000,
1012	(0x0e00 << 16) | (0x30904 >> 2),
1013	0x00000000,
1014	(0x0e00 << 16) | (0x89b4 >> 2),
1015	0x00000000,
1016	(0x0e00 << 16) | (0x3e1fc >> 2),
1017	0x00000000,
1018	(0x0e00 << 16) | (0x3c210 >> 2),
1019	0x00000000,
1020	(0x0e00 << 16) | (0x3c214 >> 2),
1021	0x00000000,
1022	(0x0e00 << 16) | (0x3c218 >> 2),
1023	0x00000000,
1024	(0x0e00 << 16) | (0x8904 >> 2),
1025	0x00000000,
1026	0x5,
1027	(0x0e00 << 16) | (0x8c28 >> 2),
1028	(0x0e00 << 16) | (0x8c2c >> 2),
1029	(0x0e00 << 16) | (0x8c30 >> 2),
1030	(0x0e00 << 16) | (0x8c34 >> 2),
1031	(0x0e00 << 16) | (0x9600 >> 2),
1032};
1033
1034static const u32 bonaire_golden_spm_registers[] =
1035{
1036	0x30800, 0xe0ffffff, 0xe0000000
1037};
1038
1039static const u32 bonaire_golden_common_registers[] =
1040{
1041	0xc770, 0xffffffff, 0x00000800,
1042	0xc774, 0xffffffff, 0x00000800,
1043	0xc798, 0xffffffff, 0x00007fbf,
1044	0xc79c, 0xffffffff, 0x00007faf
1045};
1046
1047static const u32 bonaire_golden_registers[] =
1048{
1049	0x3354, 0x00000333, 0x00000333,
1050	0x3350, 0x000c0fc0, 0x00040200,
1051	0x9a10, 0x00010000, 0x00058208,
1052	0x3c000, 0xffff1fff, 0x00140000,
1053	0x3c200, 0xfdfc0fff, 0x00000100,
1054	0x3c234, 0x40000000, 0x40000200,
1055	0x9830, 0xffffffff, 0x00000000,
1056	0x9834, 0xf00fffff, 0x00000400,
1057	0x9838, 0x0002021c, 0x00020200,
1058	0xc78, 0x00000080, 0x00000000,
1059	0x5bb0, 0x000000f0, 0x00000070,
1060	0x5bc0, 0xf0311fff, 0x80300000,
1061	0x98f8, 0x73773777, 0x12010001,
1062	0x350c, 0x00810000, 0x408af000,
1063	0x7030, 0x31000111, 0x00000011,
1064	0x2f48, 0x73773777, 0x12010001,
1065	0x220c, 0x00007fb6, 0x0021a1b1,
1066	0x2210, 0x00007fb6, 0x002021b1,
1067	0x2180, 0x00007fb6, 0x00002191,
1068	0x2218, 0x00007fb6, 0x002121b1,
1069	0x221c, 0x00007fb6, 0x002021b1,
1070	0x21dc, 0x00007fb6, 0x00002191,
1071	0x21e0, 0x00007fb6, 0x00002191,
1072	0x3628, 0x0000003f, 0x0000000a,
1073	0x362c, 0x0000003f, 0x0000000a,
1074	0x2ae4, 0x00073ffe, 0x000022a2,
1075	0x240c, 0x000007ff, 0x00000000,
1076	0x8a14, 0xf000003f, 0x00000007,
1077	0x8bf0, 0x00002001, 0x00000001,
1078	0x8b24, 0xffffffff, 0x00ffffff,
1079	0x30a04, 0x0000ff0f, 0x00000000,
1080	0x28a4c, 0x07ffffff, 0x06000000,
1081	0x4d8, 0x00000fff, 0x00000100,
1082	0x3e78, 0x00000001, 0x00000002,
1083	0x9100, 0x03000000, 0x0362c688,
1084	0x8c00, 0x000000ff, 0x00000001,
1085	0xe40, 0x00001fff, 0x00001fff,
1086	0x9060, 0x0000007f, 0x00000020,
1087	0x9508, 0x00010000, 0x00010000,
1088	0xac14, 0x000003ff, 0x000000f3,
1089	0xac0c, 0xffffffff, 0x00001032
1090};
1091
1092static const u32 bonaire_mgcg_cgcg_init[] =
1093{
1094	0xc420, 0xffffffff, 0xfffffffc,
1095	0x30800, 0xffffffff, 0xe0000000,
1096	0x3c2a0, 0xffffffff, 0x00000100,
1097	0x3c208, 0xffffffff, 0x00000100,
1098	0x3c2c0, 0xffffffff, 0xc0000100,
1099	0x3c2c8, 0xffffffff, 0xc0000100,
1100	0x3c2c4, 0xffffffff, 0xc0000100,
1101	0x55e4, 0xffffffff, 0x00600100,
1102	0x3c280, 0xffffffff, 0x00000100,
1103	0x3c214, 0xffffffff, 0x06000100,
1104	0x3c220, 0xffffffff, 0x00000100,
1105	0x3c218, 0xffffffff, 0x06000100,
1106	0x3c204, 0xffffffff, 0x00000100,
1107	0x3c2e0, 0xffffffff, 0x00000100,
1108	0x3c224, 0xffffffff, 0x00000100,
1109	0x3c200, 0xffffffff, 0x00000100,
1110	0x3c230, 0xffffffff, 0x00000100,
1111	0x3c234, 0xffffffff, 0x00000100,
1112	0x3c250, 0xffffffff, 0x00000100,
1113	0x3c254, 0xffffffff, 0x00000100,
1114	0x3c258, 0xffffffff, 0x00000100,
1115	0x3c25c, 0xffffffff, 0x00000100,
1116	0x3c260, 0xffffffff, 0x00000100,
1117	0x3c27c, 0xffffffff, 0x00000100,
1118	0x3c278, 0xffffffff, 0x00000100,
1119	0x3c210, 0xffffffff, 0x06000100,
1120	0x3c290, 0xffffffff, 0x00000100,
1121	0x3c274, 0xffffffff, 0x00000100,
1122	0x3c2b4, 0xffffffff, 0x00000100,
1123	0x3c2b0, 0xffffffff, 0x00000100,
1124	0x3c270, 0xffffffff, 0x00000100,
1125	0x30800, 0xffffffff, 0xe0000000,
1126	0x3c020, 0xffffffff, 0x00010000,
1127	0x3c024, 0xffffffff, 0x00030002,
1128	0x3c028, 0xffffffff, 0x00040007,
1129	0x3c02c, 0xffffffff, 0x00060005,
1130	0x3c030, 0xffffffff, 0x00090008,
1131	0x3c034, 0xffffffff, 0x00010000,
1132	0x3c038, 0xffffffff, 0x00030002,
1133	0x3c03c, 0xffffffff, 0x00040007,
1134	0x3c040, 0xffffffff, 0x00060005,
1135	0x3c044, 0xffffffff, 0x00090008,
1136	0x3c048, 0xffffffff, 0x00010000,
1137	0x3c04c, 0xffffffff, 0x00030002,
1138	0x3c050, 0xffffffff, 0x00040007,
1139	0x3c054, 0xffffffff, 0x00060005,
1140	0x3c058, 0xffffffff, 0x00090008,
1141	0x3c05c, 0xffffffff, 0x00010000,
1142	0x3c060, 0xffffffff, 0x00030002,
1143	0x3c064, 0xffffffff, 0x00040007,
1144	0x3c068, 0xffffffff, 0x00060005,
1145	0x3c06c, 0xffffffff, 0x00090008,
1146	0x3c070, 0xffffffff, 0x00010000,
1147	0x3c074, 0xffffffff, 0x00030002,
1148	0x3c078, 0xffffffff, 0x00040007,
1149	0x3c07c, 0xffffffff, 0x00060005,
1150	0x3c080, 0xffffffff, 0x00090008,
1151	0x3c084, 0xffffffff, 0x00010000,
1152	0x3c088, 0xffffffff, 0x00030002,
1153	0x3c08c, 0xffffffff, 0x00040007,
1154	0x3c090, 0xffffffff, 0x00060005,
1155	0x3c094, 0xffffffff, 0x00090008,
1156	0x3c098, 0xffffffff, 0x00010000,
1157	0x3c09c, 0xffffffff, 0x00030002,
1158	0x3c0a0, 0xffffffff, 0x00040007,
1159	0x3c0a4, 0xffffffff, 0x00060005,
1160	0x3c0a8, 0xffffffff, 0x00090008,
1161	0x3c000, 0xffffffff, 0x96e00200,
1162	0x8708, 0xffffffff, 0x00900100,
1163	0xc424, 0xffffffff, 0x0020003f,
1164	0x38, 0xffffffff, 0x0140001c,
1165	0x3c, 0x000f0000, 0x000f0000,
1166	0x220, 0xffffffff, 0xC060000C,
1167	0x224, 0xc0000fff, 0x00000100,
1168	0xf90, 0xffffffff, 0x00000100,
1169	0xf98, 0x00000101, 0x00000000,
1170	0x20a8, 0xffffffff, 0x00000104,
1171	0x55e4, 0xff000fff, 0x00000100,
1172	0x30cc, 0xc0000fff, 0x00000104,
1173	0xc1e4, 0x00000001, 0x00000001,
1174	0xd00c, 0xff000ff0, 0x00000100,
1175	0xd80c, 0xff000ff0, 0x00000100
1176};
1177
1178static const u32 spectre_golden_spm_registers[] =
1179{
1180	0x30800, 0xe0ffffff, 0xe0000000
1181};
1182
1183static const u32 spectre_golden_common_registers[] =
1184{
1185	0xc770, 0xffffffff, 0x00000800,
1186	0xc774, 0xffffffff, 0x00000800,
1187	0xc798, 0xffffffff, 0x00007fbf,
1188	0xc79c, 0xffffffff, 0x00007faf
1189};
1190
1191static const u32 spectre_golden_registers[] =
1192{
1193	0x3c000, 0xffff1fff, 0x96940200,
1194	0x3c00c, 0xffff0001, 0xff000000,
1195	0x3c200, 0xfffc0fff, 0x00000100,
1196	0x6ed8, 0x00010101, 0x00010000,
1197	0x9834, 0xf00fffff, 0x00000400,
1198	0x9838, 0xfffffffc, 0x00020200,
1199	0x5bb0, 0x000000f0, 0x00000070,
1200	0x5bc0, 0xf0311fff, 0x80300000,
1201	0x98f8, 0x73773777, 0x12010001,
1202	0x9b7c, 0x00ff0000, 0x00fc0000,
1203	0x2f48, 0x73773777, 0x12010001,
1204	0x8a14, 0xf000003f, 0x00000007,
1205	0x8b24, 0xffffffff, 0x00ffffff,
1206	0x28350, 0x3f3f3fff, 0x00000082,
1207	0x28354, 0x0000003f, 0x00000000,
1208	0x3e78, 0x00000001, 0x00000002,
1209	0x913c, 0xffff03df, 0x00000004,
1210	0xc768, 0x00000008, 0x00000008,
1211	0x8c00, 0x000008ff, 0x00000800,
1212	0x9508, 0x00010000, 0x00010000,
1213	0xac0c, 0xffffffff, 0x54763210,
1214	0x214f8, 0x01ff01ff, 0x00000002,
1215	0x21498, 0x007ff800, 0x00200000,
1216	0x2015c, 0xffffffff, 0x00000f40,
1217	0x30934, 0xffffffff, 0x00000001
1218};
1219
1220static const u32 spectre_mgcg_cgcg_init[] =
1221{
1222	0xc420, 0xffffffff, 0xfffffffc,
1223	0x30800, 0xffffffff, 0xe0000000,
1224	0x3c2a0, 0xffffffff, 0x00000100,
1225	0x3c208, 0xffffffff, 0x00000100,
1226	0x3c2c0, 0xffffffff, 0x00000100,
1227	0x3c2c8, 0xffffffff, 0x00000100,
1228	0x3c2c4, 0xffffffff, 0x00000100,
1229	0x55e4, 0xffffffff, 0x00600100,
1230	0x3c280, 0xffffffff, 0x00000100,
1231	0x3c214, 0xffffffff, 0x06000100,
1232	0x3c220, 0xffffffff, 0x00000100,
1233	0x3c218, 0xffffffff, 0x06000100,
1234	0x3c204, 0xffffffff, 0x00000100,
1235	0x3c2e0, 0xffffffff, 0x00000100,
1236	0x3c224, 0xffffffff, 0x00000100,
1237	0x3c200, 0xffffffff, 0x00000100,
1238	0x3c230, 0xffffffff, 0x00000100,
1239	0x3c234, 0xffffffff, 0x00000100,
1240	0x3c250, 0xffffffff, 0x00000100,
1241	0x3c254, 0xffffffff, 0x00000100,
1242	0x3c258, 0xffffffff, 0x00000100,
1243	0x3c25c, 0xffffffff, 0x00000100,
1244	0x3c260, 0xffffffff, 0x00000100,
1245	0x3c27c, 0xffffffff, 0x00000100,
1246	0x3c278, 0xffffffff, 0x00000100,
1247	0x3c210, 0xffffffff, 0x06000100,
1248	0x3c290, 0xffffffff, 0x00000100,
1249	0x3c274, 0xffffffff, 0x00000100,
1250	0x3c2b4, 0xffffffff, 0x00000100,
1251	0x3c2b0, 0xffffffff, 0x00000100,
1252	0x3c270, 0xffffffff, 0x00000100,
1253	0x30800, 0xffffffff, 0xe0000000,
1254	0x3c020, 0xffffffff, 0x00010000,
1255	0x3c024, 0xffffffff, 0x00030002,
1256	0x3c028, 0xffffffff, 0x00040007,
1257	0x3c02c, 0xffffffff, 0x00060005,
1258	0x3c030, 0xffffffff, 0x00090008,
1259	0x3c034, 0xffffffff, 0x00010000,
1260	0x3c038, 0xffffffff, 0x00030002,
1261	0x3c03c, 0xffffffff, 0x00040007,
1262	0x3c040, 0xffffffff, 0x00060005,
1263	0x3c044, 0xffffffff, 0x00090008,
1264	0x3c048, 0xffffffff, 0x00010000,
1265	0x3c04c, 0xffffffff, 0x00030002,
1266	0x3c050, 0xffffffff, 0x00040007,
1267	0x3c054, 0xffffffff, 0x00060005,
1268	0x3c058, 0xffffffff, 0x00090008,
1269	0x3c05c, 0xffffffff, 0x00010000,
1270	0x3c060, 0xffffffff, 0x00030002,
1271	0x3c064, 0xffffffff, 0x00040007,
1272	0x3c068, 0xffffffff, 0x00060005,
1273	0x3c06c, 0xffffffff, 0x00090008,
1274	0x3c070, 0xffffffff, 0x00010000,
1275	0x3c074, 0xffffffff, 0x00030002,
1276	0x3c078, 0xffffffff, 0x00040007,
1277	0x3c07c, 0xffffffff, 0x00060005,
1278	0x3c080, 0xffffffff, 0x00090008,
1279	0x3c084, 0xffffffff, 0x00010000,
1280	0x3c088, 0xffffffff, 0x00030002,
1281	0x3c08c, 0xffffffff, 0x00040007,
1282	0x3c090, 0xffffffff, 0x00060005,
1283	0x3c094, 0xffffffff, 0x00090008,
1284	0x3c098, 0xffffffff, 0x00010000,
1285	0x3c09c, 0xffffffff, 0x00030002,
1286	0x3c0a0, 0xffffffff, 0x00040007,
1287	0x3c0a4, 0xffffffff, 0x00060005,
1288	0x3c0a8, 0xffffffff, 0x00090008,
1289	0x3c0ac, 0xffffffff, 0x00010000,
1290	0x3c0b0, 0xffffffff, 0x00030002,
1291	0x3c0b4, 0xffffffff, 0x00040007,
1292	0x3c0b8, 0xffffffff, 0x00060005,
1293	0x3c0bc, 0xffffffff, 0x00090008,
1294	0x3c000, 0xffffffff, 0x96e00200,
1295	0x8708, 0xffffffff, 0x00900100,
1296	0xc424, 0xffffffff, 0x0020003f,
1297	0x38, 0xffffffff, 0x0140001c,
1298	0x3c, 0x000f0000, 0x000f0000,
1299	0x220, 0xffffffff, 0xC060000C,
1300	0x224, 0xc0000fff, 0x00000100,
1301	0xf90, 0xffffffff, 0x00000100,
1302	0xf98, 0x00000101, 0x00000000,
1303	0x20a8, 0xffffffff, 0x00000104,
1304	0x55e4, 0xff000fff, 0x00000100,
1305	0x30cc, 0xc0000fff, 0x00000104,
1306	0xc1e4, 0x00000001, 0x00000001,
1307	0xd00c, 0xff000ff0, 0x00000100,
1308	0xd80c, 0xff000ff0, 0x00000100
1309};
1310
1311static const u32 kalindi_golden_spm_registers[] =
1312{
1313	0x30800, 0xe0ffffff, 0xe0000000
1314};
1315
1316static const u32 kalindi_golden_common_registers[] =
1317{
1318	0xc770, 0xffffffff, 0x00000800,
1319	0xc774, 0xffffffff, 0x00000800,
1320	0xc798, 0xffffffff, 0x00007fbf,
1321	0xc79c, 0xffffffff, 0x00007faf
1322};
1323
1324static const u32 kalindi_golden_registers[] =
1325{
1326	0x3c000, 0xffffdfff, 0x6e944040,
1327	0x55e4, 0xff607fff, 0xfc000100,
1328	0x3c220, 0xff000fff, 0x00000100,
1329	0x3c224, 0xff000fff, 0x00000100,
1330	0x3c200, 0xfffc0fff, 0x00000100,
1331	0x6ed8, 0x00010101, 0x00010000,
1332	0x9830, 0xffffffff, 0x00000000,
1333	0x9834, 0xf00fffff, 0x00000400,
1334	0x5bb0, 0x000000f0, 0x00000070,
1335	0x5bc0, 0xf0311fff, 0x80300000,
1336	0x98f8, 0x73773777, 0x12010001,
1337	0x98fc, 0xffffffff, 0x00000010,
1338	0x9b7c, 0x00ff0000, 0x00fc0000,
1339	0x8030, 0x00001f0f, 0x0000100a,
1340	0x2f48, 0x73773777, 0x12010001,
1341	0x2408, 0x000fffff, 0x000c007f,
1342	0x8a14, 0xf000003f, 0x00000007,
1343	0x8b24, 0x3fff3fff, 0x00ffcfff,
1344	0x30a04, 0x0000ff0f, 0x00000000,
1345	0x28a4c, 0x07ffffff, 0x06000000,
1346	0x4d8, 0x00000fff, 0x00000100,
1347	0x3e78, 0x00000001, 0x00000002,
1348	0xc768, 0x00000008, 0x00000008,
1349	0x8c00, 0x000000ff, 0x00000003,
1350	0x214f8, 0x01ff01ff, 0x00000002,
1351	0x21498, 0x007ff800, 0x00200000,
1352	0x2015c, 0xffffffff, 0x00000f40,
1353	0x88c4, 0x001f3ae3, 0x00000082,
1354	0x88d4, 0x0000001f, 0x00000010,
1355	0x30934, 0xffffffff, 0x00000000
1356};
1357
1358static const u32 kalindi_mgcg_cgcg_init[] =
1359{
1360	0xc420, 0xffffffff, 0xfffffffc,
1361	0x30800, 0xffffffff, 0xe0000000,
1362	0x3c2a0, 0xffffffff, 0x00000100,
1363	0x3c208, 0xffffffff, 0x00000100,
1364	0x3c2c0, 0xffffffff, 0x00000100,
1365	0x3c2c8, 0xffffffff, 0x00000100,
1366	0x3c2c4, 0xffffffff, 0x00000100,
1367	0x55e4, 0xffffffff, 0x00600100,
1368	0x3c280, 0xffffffff, 0x00000100,
1369	0x3c214, 0xffffffff, 0x06000100,
1370	0x3c220, 0xffffffff, 0x00000100,
1371	0x3c218, 0xffffffff, 0x06000100,
1372	0x3c204, 0xffffffff, 0x00000100,
1373	0x3c2e0, 0xffffffff, 0x00000100,
1374	0x3c224, 0xffffffff, 0x00000100,
1375	0x3c200, 0xffffffff, 0x00000100,
1376	0x3c230, 0xffffffff, 0x00000100,
1377	0x3c234, 0xffffffff, 0x00000100,
1378	0x3c250, 0xffffffff, 0x00000100,
1379	0x3c254, 0xffffffff, 0x00000100,
1380	0x3c258, 0xffffffff, 0x00000100,
1381	0x3c25c, 0xffffffff, 0x00000100,
1382	0x3c260, 0xffffffff, 0x00000100,
1383	0x3c27c, 0xffffffff, 0x00000100,
1384	0x3c278, 0xffffffff, 0x00000100,
1385	0x3c210, 0xffffffff, 0x06000100,
1386	0x3c290, 0xffffffff, 0x00000100,
1387	0x3c274, 0xffffffff, 0x00000100,
1388	0x3c2b4, 0xffffffff, 0x00000100,
1389	0x3c2b0, 0xffffffff, 0x00000100,
1390	0x3c270, 0xffffffff, 0x00000100,
1391	0x30800, 0xffffffff, 0xe0000000,
1392	0x3c020, 0xffffffff, 0x00010000,
1393	0x3c024, 0xffffffff, 0x00030002,
1394	0x3c028, 0xffffffff, 0x00040007,
1395	0x3c02c, 0xffffffff, 0x00060005,
1396	0x3c030, 0xffffffff, 0x00090008,
1397	0x3c034, 0xffffffff, 0x00010000,
1398	0x3c038, 0xffffffff, 0x00030002,
1399	0x3c03c, 0xffffffff, 0x00040007,
1400	0x3c040, 0xffffffff, 0x00060005,
1401	0x3c044, 0xffffffff, 0x00090008,
1402	0x3c000, 0xffffffff, 0x96e00200,
1403	0x8708, 0xffffffff, 0x00900100,
1404	0xc424, 0xffffffff, 0x0020003f,
1405	0x38, 0xffffffff, 0x0140001c,
1406	0x3c, 0x000f0000, 0x000f0000,
1407	0x220, 0xffffffff, 0xC060000C,
1408	0x224, 0xc0000fff, 0x00000100,
1409	0x20a8, 0xffffffff, 0x00000104,
1410	0x55e4, 0xff000fff, 0x00000100,
1411	0x30cc, 0xc0000fff, 0x00000104,
1412	0xc1e4, 0x00000001, 0x00000001,
1413	0xd00c, 0xff000ff0, 0x00000100,
1414	0xd80c, 0xff000ff0, 0x00000100
1415};
1416
1417static const u32 hawaii_golden_spm_registers[] =
1418{
1419	0x30800, 0xe0ffffff, 0xe0000000
1420};
1421
1422static const u32 hawaii_golden_common_registers[] =
1423{
1424	0x30800, 0xffffffff, 0xe0000000,
1425	0x28350, 0xffffffff, 0x3a00161a,
1426	0x28354, 0xffffffff, 0x0000002e,
1427	0x9a10, 0xffffffff, 0x00018208,
1428	0x98f8, 0xffffffff, 0x12011003
1429};
1430
1431static const u32 hawaii_golden_registers[] =
1432{
1433	0x3354, 0x00000333, 0x00000333,
1434	0x9a10, 0x00010000, 0x00058208,
1435	0x9830, 0xffffffff, 0x00000000,
1436	0x9834, 0xf00fffff, 0x00000400,
1437	0x9838, 0x0002021c, 0x00020200,
1438	0xc78, 0x00000080, 0x00000000,
1439	0x5bb0, 0x000000f0, 0x00000070,
1440	0x5bc0, 0xf0311fff, 0x80300000,
1441	0x350c, 0x00810000, 0x408af000,
1442	0x7030, 0x31000111, 0x00000011,
1443	0x2f48, 0x73773777, 0x12010001,
1444	0x2120, 0x0000007f, 0x0000001b,
1445	0x21dc, 0x00007fb6, 0x00002191,
1446	0x3628, 0x0000003f, 0x0000000a,
1447	0x362c, 0x0000003f, 0x0000000a,
1448	0x2ae4, 0x00073ffe, 0x000022a2,
1449	0x240c, 0x000007ff, 0x00000000,
1450	0x8bf0, 0x00002001, 0x00000001,
1451	0x8b24, 0xffffffff, 0x00ffffff,
1452	0x30a04, 0x0000ff0f, 0x00000000,
1453	0x28a4c, 0x07ffffff, 0x06000000,
1454	0x3e78, 0x00000001, 0x00000002,
1455	0xc768, 0x00000008, 0x00000008,
1456	0xc770, 0x00000f00, 0x00000800,
1457	0xc774, 0x00000f00, 0x00000800,
1458	0xc798, 0x00ffffff, 0x00ff7fbf,
1459	0xc79c, 0x00ffffff, 0x00ff7faf,
1460	0x8c00, 0x000000ff, 0x00000800,
1461	0xe40, 0x00001fff, 0x00001fff,
1462	0x9060, 0x0000007f, 0x00000020,
1463	0x9508, 0x00010000, 0x00010000,
1464	0xae00, 0x00100000, 0x000ff07c,
1465	0xac14, 0x000003ff, 0x0000000f,
1466	0xac10, 0xffffffff, 0x7564fdec,
1467	0xac0c, 0xffffffff, 0x3120b9a8,
1468	0xac08, 0x20000000, 0x0f9c0000
1469};
1470
1471static const u32 hawaii_mgcg_cgcg_init[] =
1472{
1473	0xc420, 0xffffffff, 0xfffffffd,
1474	0x30800, 0xffffffff, 0xe0000000,
1475	0x3c2a0, 0xffffffff, 0x00000100,
1476	0x3c208, 0xffffffff, 0x00000100,
1477	0x3c2c0, 0xffffffff, 0x00000100,
1478	0x3c2c8, 0xffffffff, 0x00000100,
1479	0x3c2c4, 0xffffffff, 0x00000100,
1480	0x55e4, 0xffffffff, 0x00200100,
1481	0x3c280, 0xffffffff, 0x00000100,
1482	0x3c214, 0xffffffff, 0x06000100,
1483	0x3c220, 0xffffffff, 0x00000100,
1484	0x3c218, 0xffffffff, 0x06000100,
1485	0x3c204, 0xffffffff, 0x00000100,
1486	0x3c2e0, 0xffffffff, 0x00000100,
1487	0x3c224, 0xffffffff, 0x00000100,
1488	0x3c200, 0xffffffff, 0x00000100,
1489	0x3c230, 0xffffffff, 0x00000100,
1490	0x3c234, 0xffffffff, 0x00000100,
1491	0x3c250, 0xffffffff, 0x00000100,
1492	0x3c254, 0xffffffff, 0x00000100,
1493	0x3c258, 0xffffffff, 0x00000100,
1494	0x3c25c, 0xffffffff, 0x00000100,
1495	0x3c260, 0xffffffff, 0x00000100,
1496	0x3c27c, 0xffffffff, 0x00000100,
1497	0x3c278, 0xffffffff, 0x00000100,
1498	0x3c210, 0xffffffff, 0x06000100,
1499	0x3c290, 0xffffffff, 0x00000100,
1500	0x3c274, 0xffffffff, 0x00000100,
1501	0x3c2b4, 0xffffffff, 0x00000100,
1502	0x3c2b0, 0xffffffff, 0x00000100,
1503	0x3c270, 0xffffffff, 0x00000100,
1504	0x30800, 0xffffffff, 0xe0000000,
1505	0x3c020, 0xffffffff, 0x00010000,
1506	0x3c024, 0xffffffff, 0x00030002,
1507	0x3c028, 0xffffffff, 0x00040007,
1508	0x3c02c, 0xffffffff, 0x00060005,
1509	0x3c030, 0xffffffff, 0x00090008,
1510	0x3c034, 0xffffffff, 0x00010000,
1511	0x3c038, 0xffffffff, 0x00030002,
1512	0x3c03c, 0xffffffff, 0x00040007,
1513	0x3c040, 0xffffffff, 0x00060005,
1514	0x3c044, 0xffffffff, 0x00090008,
1515	0x3c048, 0xffffffff, 0x00010000,
1516	0x3c04c, 0xffffffff, 0x00030002,
1517	0x3c050, 0xffffffff, 0x00040007,
1518	0x3c054, 0xffffffff, 0x00060005,
1519	0x3c058, 0xffffffff, 0x00090008,
1520	0x3c05c, 0xffffffff, 0x00010000,
1521	0x3c060, 0xffffffff, 0x00030002,
1522	0x3c064, 0xffffffff, 0x00040007,
1523	0x3c068, 0xffffffff, 0x00060005,
1524	0x3c06c, 0xffffffff, 0x00090008,
1525	0x3c070, 0xffffffff, 0x00010000,
1526	0x3c074, 0xffffffff, 0x00030002,
1527	0x3c078, 0xffffffff, 0x00040007,
1528	0x3c07c, 0xffffffff, 0x00060005,
1529	0x3c080, 0xffffffff, 0x00090008,
1530	0x3c084, 0xffffffff, 0x00010000,
1531	0x3c088, 0xffffffff, 0x00030002,
1532	0x3c08c, 0xffffffff, 0x00040007,
1533	0x3c090, 0xffffffff, 0x00060005,
1534	0x3c094, 0xffffffff, 0x00090008,
1535	0x3c098, 0xffffffff, 0x00010000,
1536	0x3c09c, 0xffffffff, 0x00030002,
1537	0x3c0a0, 0xffffffff, 0x00040007,
1538	0x3c0a4, 0xffffffff, 0x00060005,
1539	0x3c0a8, 0xffffffff, 0x00090008,
1540	0x3c0ac, 0xffffffff, 0x00010000,
1541	0x3c0b0, 0xffffffff, 0x00030002,
1542	0x3c0b4, 0xffffffff, 0x00040007,
1543	0x3c0b8, 0xffffffff, 0x00060005,
1544	0x3c0bc, 0xffffffff, 0x00090008,
1545	0x3c0c0, 0xffffffff, 0x00010000,
1546	0x3c0c4, 0xffffffff, 0x00030002,
1547	0x3c0c8, 0xffffffff, 0x00040007,
1548	0x3c0cc, 0xffffffff, 0x00060005,
1549	0x3c0d0, 0xffffffff, 0x00090008,
1550	0x3c0d4, 0xffffffff, 0x00010000,
1551	0x3c0d8, 0xffffffff, 0x00030002,
1552	0x3c0dc, 0xffffffff, 0x00040007,
1553	0x3c0e0, 0xffffffff, 0x00060005,
1554	0x3c0e4, 0xffffffff, 0x00090008,
1555	0x3c0e8, 0xffffffff, 0x00010000,
1556	0x3c0ec, 0xffffffff, 0x00030002,
1557	0x3c0f0, 0xffffffff, 0x00040007,
1558	0x3c0f4, 0xffffffff, 0x00060005,
1559	0x3c0f8, 0xffffffff, 0x00090008,
1560	0xc318, 0xffffffff, 0x00020200,
1561	0x3350, 0xffffffff, 0x00000200,
1562	0x15c0, 0xffffffff, 0x00000400,
1563	0x55e8, 0xffffffff, 0x00000000,
1564	0x2f50, 0xffffffff, 0x00000902,
1565	0x3c000, 0xffffffff, 0x96940200,
1566	0x8708, 0xffffffff, 0x00900100,
1567	0xc424, 0xffffffff, 0x0020003f,
1568	0x38, 0xffffffff, 0x0140001c,
1569	0x3c, 0x000f0000, 0x000f0000,
1570	0x220, 0xffffffff, 0xc060000c,
1571	0x224, 0xc0000fff, 0x00000100,
1572	0xf90, 0xffffffff, 0x00000100,
1573	0xf98, 0x00000101, 0x00000000,
1574	0x20a8, 0xffffffff, 0x00000104,
1575	0x55e4, 0xff000fff, 0x00000100,
1576	0x30cc, 0xc0000fff, 0x00000104,
1577	0xc1e4, 0x00000001, 0x00000001,
1578	0xd00c, 0xff000ff0, 0x00000100,
1579	0xd80c, 0xff000ff0, 0x00000100
1580};
1581
1582static const u32 godavari_golden_registers[] =
1583{
1584	0x55e4, 0xff607fff, 0xfc000100,
1585	0x6ed8, 0x00010101, 0x00010000,
1586	0x9830, 0xffffffff, 0x00000000,
1587	0x98302, 0xf00fffff, 0x00000400,
1588	0x6130, 0xffffffff, 0x00010000,
1589	0x5bb0, 0x000000f0, 0x00000070,
1590	0x5bc0, 0xf0311fff, 0x80300000,
1591	0x98f8, 0x73773777, 0x12010001,
1592	0x98fc, 0xffffffff, 0x00000010,
1593	0x8030, 0x00001f0f, 0x0000100a,
1594	0x2f48, 0x73773777, 0x12010001,
1595	0x2408, 0x000fffff, 0x000c007f,
1596	0x8a14, 0xf000003f, 0x00000007,
1597	0x8b24, 0xffffffff, 0x00ff0fff,
1598	0x30a04, 0x0000ff0f, 0x00000000,
1599	0x28a4c, 0x07ffffff, 0x06000000,
1600	0x4d8, 0x00000fff, 0x00000100,
1601	0xd014, 0x00010000, 0x00810001,
1602	0xd814, 0x00010000, 0x00810001,
1603	0x3e78, 0x00000001, 0x00000002,
1604	0xc768, 0x00000008, 0x00000008,
1605	0xc770, 0x00000f00, 0x00000800,
1606	0xc774, 0x00000f00, 0x00000800,
1607	0xc798, 0x00ffffff, 0x00ff7fbf,
1608	0xc79c, 0x00ffffff, 0x00ff7faf,
1609	0x8c00, 0x000000ff, 0x00000001,
1610	0x214f8, 0x01ff01ff, 0x00000002,
1611	0x21498, 0x007ff800, 0x00200000,
1612	0x2015c, 0xffffffff, 0x00000f40,
1613	0x88c4, 0x001f3ae3, 0x00000082,
1614	0x88d4, 0x0000001f, 0x00000010,
1615	0x30934, 0xffffffff, 0x00000000
1616};
1617
1618
1619static void cik_init_golden_registers(struct radeon_device *rdev)
1620{
1621	switch (rdev->family) {
1622	case CHIP_BONAIRE:
1623		radeon_program_register_sequence(rdev,
1624						 bonaire_mgcg_cgcg_init,
1625						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626		radeon_program_register_sequence(rdev,
1627						 bonaire_golden_registers,
1628						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629		radeon_program_register_sequence(rdev,
1630						 bonaire_golden_common_registers,
1631						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632		radeon_program_register_sequence(rdev,
1633						 bonaire_golden_spm_registers,
1634						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635		break;
1636	case CHIP_KABINI:
1637		radeon_program_register_sequence(rdev,
1638						 kalindi_mgcg_cgcg_init,
1639						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640		radeon_program_register_sequence(rdev,
1641						 kalindi_golden_registers,
1642						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643		radeon_program_register_sequence(rdev,
1644						 kalindi_golden_common_registers,
1645						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646		radeon_program_register_sequence(rdev,
1647						 kalindi_golden_spm_registers,
1648						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649		break;
1650	case CHIP_MULLINS:
1651		radeon_program_register_sequence(rdev,
1652						 kalindi_mgcg_cgcg_init,
1653						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654		radeon_program_register_sequence(rdev,
1655						 godavari_golden_registers,
1656						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1657		radeon_program_register_sequence(rdev,
1658						 kalindi_golden_common_registers,
1659						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660		radeon_program_register_sequence(rdev,
1661						 kalindi_golden_spm_registers,
1662						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663		break;
1664	case CHIP_KAVERI:
1665		radeon_program_register_sequence(rdev,
1666						 spectre_mgcg_cgcg_init,
1667						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668		radeon_program_register_sequence(rdev,
1669						 spectre_golden_registers,
1670						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1671		radeon_program_register_sequence(rdev,
1672						 spectre_golden_common_registers,
1673						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674		radeon_program_register_sequence(rdev,
1675						 spectre_golden_spm_registers,
1676						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677		break;
1678	case CHIP_HAWAII:
1679		radeon_program_register_sequence(rdev,
1680						 hawaii_mgcg_cgcg_init,
1681						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682		radeon_program_register_sequence(rdev,
1683						 hawaii_golden_registers,
1684						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685		radeon_program_register_sequence(rdev,
1686						 hawaii_golden_common_registers,
1687						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688		radeon_program_register_sequence(rdev,
1689						 hawaii_golden_spm_registers,
1690						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691		break;
1692	default:
1693		break;
1694	}
1695}
1696
1697/**
1698 * cik_get_xclk - get the xclk
1699 *
1700 * @rdev: radeon_device pointer
1701 *
1702 * Returns the reference clock used by the gfx engine
1703 * (CIK).
1704 */
1705u32 cik_get_xclk(struct radeon_device *rdev)
1706{
1707	u32 reference_clock = rdev->clock.spll.reference_freq;
1708
1709	if (rdev->flags & RADEON_IS_IGP) {
1710		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711			return reference_clock / 2;
1712	} else {
1713		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714			return reference_clock / 4;
1715	}
1716	return reference_clock;
1717}
1718
1719/**
1720 * cik_mm_rdoorbell - read a doorbell dword
1721 *
1722 * @rdev: radeon_device pointer
1723 * @index: doorbell index
1724 *
1725 * Returns the value in the doorbell aperture at the
1726 * requested doorbell index (CIK).
1727 */
1728u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729{
1730	if (index < rdev->doorbell.num_doorbells) {
1731		return readl(rdev->doorbell.ptr + index);
1732	} else {
1733		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734		return 0;
1735	}
1736}
1737
1738/**
1739 * cik_mm_wdoorbell - write a doorbell dword
1740 *
1741 * @rdev: radeon_device pointer
1742 * @index: doorbell index
1743 * @v: value to write
1744 *
1745 * Writes @v to the doorbell aperture at the
1746 * requested doorbell index (CIK).
1747 */
1748void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749{
1750	if (index < rdev->doorbell.num_doorbells) {
1751		writel(v, rdev->doorbell.ptr + index);
1752	} else {
1753		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754	}
1755}
1756
1757#define BONAIRE_IO_MC_REGS_SIZE 36
1758
1759static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760{
1761	{0x00000070, 0x04400000},
1762	{0x00000071, 0x80c01803},
1763	{0x00000072, 0x00004004},
1764	{0x00000073, 0x00000100},
1765	{0x00000074, 0x00ff0000},
1766	{0x00000075, 0x34000000},
1767	{0x00000076, 0x08000014},
1768	{0x00000077, 0x00cc08ec},
1769	{0x00000078, 0x00000400},
1770	{0x00000079, 0x00000000},
1771	{0x0000007a, 0x04090000},
1772	{0x0000007c, 0x00000000},
1773	{0x0000007e, 0x4408a8e8},
1774	{0x0000007f, 0x00000304},
1775	{0x00000080, 0x00000000},
1776	{0x00000082, 0x00000001},
1777	{0x00000083, 0x00000002},
1778	{0x00000084, 0xf3e4f400},
1779	{0x00000085, 0x052024e3},
1780	{0x00000087, 0x00000000},
1781	{0x00000088, 0x01000000},
1782	{0x0000008a, 0x1c0a0000},
1783	{0x0000008b, 0xff010000},
1784	{0x0000008d, 0xffffefff},
1785	{0x0000008e, 0xfff3efff},
1786	{0x0000008f, 0xfff3efbf},
1787	{0x00000092, 0xf7ffffff},
1788	{0x00000093, 0xffffff7f},
1789	{0x00000095, 0x00101101},
1790	{0x00000096, 0x00000fff},
1791	{0x00000097, 0x00116fff},
1792	{0x00000098, 0x60010000},
1793	{0x00000099, 0x10010000},
1794	{0x0000009a, 0x00006000},
1795	{0x0000009b, 0x00001000},
1796	{0x0000009f, 0x00b48000}
1797};
1798
1799#define HAWAII_IO_MC_REGS_SIZE 22
1800
1801static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802{
1803	{0x0000007d, 0x40000000},
1804	{0x0000007e, 0x40180304},
1805	{0x0000007f, 0x0000ff00},
1806	{0x00000081, 0x00000000},
1807	{0x00000083, 0x00000800},
1808	{0x00000086, 0x00000000},
1809	{0x00000087, 0x00000100},
1810	{0x00000088, 0x00020100},
1811	{0x00000089, 0x00000000},
1812	{0x0000008b, 0x00040000},
1813	{0x0000008c, 0x00000100},
1814	{0x0000008e, 0xff010000},
1815	{0x00000090, 0xffffefff},
1816	{0x00000091, 0xfff3efff},
1817	{0x00000092, 0xfff3efbf},
1818	{0x00000093, 0xf7ffffff},
1819	{0x00000094, 0xffffff7f},
1820	{0x00000095, 0x00000fff},
1821	{0x00000096, 0x00116fff},
1822	{0x00000097, 0x60010000},
1823	{0x00000098, 0x10010000},
1824	{0x0000009f, 0x00c79000}
1825};
1826
1827
1828/**
1829 * cik_srbm_select - select specific register instances
1830 *
1831 * @rdev: radeon_device pointer
1832 * @me: selected ME (micro engine)
1833 * @pipe: pipe
1834 * @queue: queue
1835 * @vmid: VMID
1836 *
1837 * Switches the currently active registers instances.  Some
1838 * registers are instanced per VMID, others are instanced per
1839 * me/pipe/queue combination.
1840 */
1841static void cik_srbm_select(struct radeon_device *rdev,
1842			    u32 me, u32 pipe, u32 queue, u32 vmid)
1843{
1844	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845			     MEID(me & 0x3) |
1846			     VMID(vmid & 0xf) |
1847			     QUEUEID(queue & 0x7));
1848	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849}
1850
1851/* ucode loading */
1852/**
1853 * ci_mc_load_microcode - load MC ucode into the hw
1854 *
1855 * @rdev: radeon_device pointer
1856 *
1857 * Load the GDDR MC ucode into the hw (CIK).
1858 * Returns 0 on success, error on failure.
1859 */
1860int ci_mc_load_microcode(struct radeon_device *rdev)
1861{
1862	const __be32 *fw_data = NULL;
1863	const __le32 *new_fw_data = NULL;
1864	u32 running, tmp;
1865	u32 *io_mc_regs = NULL;
1866	const __le32 *new_io_mc_regs = NULL;
1867	int i, regs_size, ucode_size;
1868
1869	if (!rdev->mc_fw)
1870		return -EINVAL;
1871
1872	if (rdev->new_fw) {
1873		const struct mc_firmware_header_v1_0 *hdr =
1874			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875
1876		radeon_ucode_print_mc_hdr(&hdr->header);
1877
1878		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879		new_io_mc_regs = (const __le32 *)
1880			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882		new_fw_data = (const __le32 *)
1883			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884	} else {
1885		ucode_size = rdev->mc_fw->size / 4;
1886
1887		switch (rdev->family) {
1888		case CHIP_BONAIRE:
1889			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891			break;
1892		case CHIP_HAWAII:
1893			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894			regs_size = HAWAII_IO_MC_REGS_SIZE;
1895			break;
1896		default:
1897			return -EINVAL;
1898		}
1899		fw_data = (const __be32 *)rdev->mc_fw->data;
1900	}
1901
1902	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903
1904	if (running == 0) {
1905		/* reset the engine and set to writable */
1906		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908
1909		/* load mc io regs */
1910		for (i = 0; i < regs_size; i++) {
1911			if (rdev->new_fw) {
1912				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914			} else {
1915				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917			}
1918		}
1919
1920		tmp = RREG32(MC_SEQ_MISC0);
1921		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926		}
1927
1928		/* load the MC ucode */
1929		for (i = 0; i < ucode_size; i++) {
1930			if (rdev->new_fw)
1931				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932			else
1933				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934		}
1935
1936		/* put the engine back into the active state */
1937		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940
1941		/* wait for training to complete */
1942		for (i = 0; i < rdev->usec_timeout; i++) {
1943			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944				break;
1945			udelay(1);
1946		}
1947		for (i = 0; i < rdev->usec_timeout; i++) {
1948			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949				break;
1950			udelay(1);
1951		}
1952	}
1953
1954	return 0;
1955}
1956
1957/**
1958 * cik_init_microcode - load ucode images from disk
1959 *
1960 * @rdev: radeon_device pointer
1961 *
1962 * Use the firmware interface to load the ucode images into
1963 * the driver (not loaded into hw).
1964 * Returns 0 on success, error on failure.
1965 */
1966static int cik_init_microcode(struct radeon_device *rdev)
1967{
1968	const char *chip_name;
1969	const char *new_chip_name;
1970	size_t pfp_req_size, me_req_size, ce_req_size,
1971		mec_req_size, rlc_req_size, mc_req_size = 0,
1972		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973	char fw_name[30];
1974	int new_fw = 0;
1975	int err;
1976	int num_fw;
1977	bool new_smc = false;
1978
1979	DRM_DEBUG("\n");
1980
1981	switch (rdev->family) {
1982	case CHIP_BONAIRE:
1983		chip_name = "BONAIRE";
1984		if ((rdev->pdev->revision == 0x80) ||
1985		    (rdev->pdev->revision == 0x81) ||
1986		    (rdev->pdev->device == 0x665f))
1987			new_smc = true;
1988		new_chip_name = "bonaire";
1989		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998		num_fw = 8;
1999		break;
2000	case CHIP_HAWAII:
2001		chip_name = "HAWAII";
2002		if (rdev->pdev->revision == 0x80)
2003			new_smc = true;
2004		new_chip_name = "hawaii";
2005		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006		me_req_size = CIK_ME_UCODE_SIZE * 4;
2007		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014		num_fw = 8;
2015		break;
2016	case CHIP_KAVERI:
2017		chip_name = "KAVERI";
2018		new_chip_name = "kaveri";
2019		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020		me_req_size = CIK_ME_UCODE_SIZE * 4;
2021		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025		num_fw = 7;
2026		break;
2027	case CHIP_KABINI:
2028		chip_name = "KABINI";
2029		new_chip_name = "kabini";
2030		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031		me_req_size = CIK_ME_UCODE_SIZE * 4;
2032		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036		num_fw = 6;
2037		break;
2038	case CHIP_MULLINS:
2039		chip_name = "MULLINS";
2040		new_chip_name = "mullins";
2041		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042		me_req_size = CIK_ME_UCODE_SIZE * 4;
2043		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047		num_fw = 6;
2048		break;
2049	default: BUG();
2050	}
2051
2052	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053
2054	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056	if (err) {
2057		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059		if (err)
2060			goto out;
2061		if (rdev->pfp_fw->size != pfp_req_size) {
2062			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063			       rdev->pfp_fw->size, fw_name);
2064			err = -EINVAL;
2065			goto out;
2066		}
2067	} else {
2068		err = radeon_ucode_validate(rdev->pfp_fw);
2069		if (err) {
2070			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071			       fw_name);
2072			goto out;
2073		} else {
2074			new_fw++;
2075		}
2076	}
2077
2078	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080	if (err) {
2081		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083		if (err)
2084			goto out;
2085		if (rdev->me_fw->size != me_req_size) {
2086			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087			       rdev->me_fw->size, fw_name);
2088			err = -EINVAL;
2089		}
2090	} else {
2091		err = radeon_ucode_validate(rdev->me_fw);
2092		if (err) {
2093			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094			       fw_name);
2095			goto out;
2096		} else {
2097			new_fw++;
2098		}
2099	}
2100
2101	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103	if (err) {
2104		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106		if (err)
2107			goto out;
2108		if (rdev->ce_fw->size != ce_req_size) {
2109			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110			       rdev->ce_fw->size, fw_name);
2111			err = -EINVAL;
2112		}
2113	} else {
2114		err = radeon_ucode_validate(rdev->ce_fw);
2115		if (err) {
2116			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117			       fw_name);
2118			goto out;
2119		} else {
2120			new_fw++;
2121		}
2122	}
2123
2124	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126	if (err) {
2127		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129		if (err)
2130			goto out;
2131		if (rdev->mec_fw->size != mec_req_size) {
2132			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133			       rdev->mec_fw->size, fw_name);
2134			err = -EINVAL;
2135		}
2136	} else {
2137		err = radeon_ucode_validate(rdev->mec_fw);
2138		if (err) {
2139			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140			       fw_name);
2141			goto out;
2142		} else {
2143			new_fw++;
2144		}
2145	}
2146
2147	if (rdev->family == CHIP_KAVERI) {
2148		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150		if (err) {
2151			goto out;
2152		} else {
2153			err = radeon_ucode_validate(rdev->mec2_fw);
2154			if (err) {
2155				goto out;
2156			} else {
2157				new_fw++;
2158			}
2159		}
2160	}
2161
2162	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164	if (err) {
2165		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167		if (err)
2168			goto out;
2169		if (rdev->rlc_fw->size != rlc_req_size) {
2170			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171			       rdev->rlc_fw->size, fw_name);
2172			err = -EINVAL;
2173		}
2174	} else {
2175		err = radeon_ucode_validate(rdev->rlc_fw);
2176		if (err) {
2177			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178			       fw_name);
2179			goto out;
2180		} else {
2181			new_fw++;
2182		}
2183	}
2184
2185	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187	if (err) {
2188		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190		if (err)
2191			goto out;
2192		if (rdev->sdma_fw->size != sdma_req_size) {
2193			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194			       rdev->sdma_fw->size, fw_name);
2195			err = -EINVAL;
2196		}
2197	} else {
2198		err = radeon_ucode_validate(rdev->sdma_fw);
2199		if (err) {
2200			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201			       fw_name);
2202			goto out;
2203		} else {
2204			new_fw++;
2205		}
2206	}
2207
2208	/* No SMC, MC ucode on APUs */
2209	if (!(rdev->flags & RADEON_IS_IGP)) {
2210		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212		if (err) {
2213			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215			if (err) {
2216				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218				if (err)
2219					goto out;
2220			}
2221			if ((rdev->mc_fw->size != mc_req_size) &&
2222			    (rdev->mc_fw->size != mc2_req_size)){
2223				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224				       rdev->mc_fw->size, fw_name);
2225				err = -EINVAL;
2226			}
2227			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228		} else {
2229			err = radeon_ucode_validate(rdev->mc_fw);
2230			if (err) {
2231				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232				       fw_name);
2233				goto out;
2234			} else {
2235				new_fw++;
2236			}
2237		}
2238
2239		if (new_smc)
2240			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241		else
2242			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244		if (err) {
2245			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247			if (err) {
2248				pr_err("smc: error loading firmware \"%s\"\n",
2249				       fw_name);
2250				release_firmware(rdev->smc_fw);
2251				rdev->smc_fw = NULL;
2252				err = 0;
2253			} else if (rdev->smc_fw->size != smc_req_size) {
2254				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255				       rdev->smc_fw->size, fw_name);
2256				err = -EINVAL;
2257			}
2258		} else {
2259			err = radeon_ucode_validate(rdev->smc_fw);
2260			if (err) {
2261				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262				       fw_name);
2263				goto out;
2264			} else {
2265				new_fw++;
2266			}
2267		}
2268	}
2269
2270	if (new_fw == 0) {
2271		rdev->new_fw = false;
2272	} else if (new_fw < num_fw) {
2273		pr_err("ci_fw: mixing new and old firmware!\n");
2274		err = -EINVAL;
2275	} else {
2276		rdev->new_fw = true;
2277	}
2278
2279out:
2280	if (err) {
2281		if (err != -EINVAL)
2282			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283			       fw_name);
2284		release_firmware(rdev->pfp_fw);
2285		rdev->pfp_fw = NULL;
2286		release_firmware(rdev->me_fw);
2287		rdev->me_fw = NULL;
2288		release_firmware(rdev->ce_fw);
2289		rdev->ce_fw = NULL;
2290		release_firmware(rdev->mec_fw);
2291		rdev->mec_fw = NULL;
2292		release_firmware(rdev->mec2_fw);
2293		rdev->mec2_fw = NULL;
2294		release_firmware(rdev->rlc_fw);
2295		rdev->rlc_fw = NULL;
2296		release_firmware(rdev->sdma_fw);
2297		rdev->sdma_fw = NULL;
2298		release_firmware(rdev->mc_fw);
2299		rdev->mc_fw = NULL;
2300		release_firmware(rdev->smc_fw);
2301		rdev->smc_fw = NULL;
2302	}
2303	return err;
2304}
2305
2306/*
2307 * Core functions
2308 */
2309/**
2310 * cik_tiling_mode_table_init - init the hw tiling table
2311 *
2312 * @rdev: radeon_device pointer
2313 *
2314 * Starting with SI, the tiling setup is done globally in a
2315 * set of 32 tiling modes.  Rather than selecting each set of
2316 * parameters per surface as on older asics, we just select
2317 * which index in the tiling table we want to use, and the
2318 * surface uses those parameters (CIK).
2319 */
2320static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321{
2322	u32 *tile = rdev->config.cik.tile_mode_array;
2323	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324	const u32 num_tile_mode_states =
2325			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326	const u32 num_secondary_tile_mode_states =
2327			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328	u32 reg_offset, split_equal_to_row_size;
2329	u32 num_pipe_configs;
2330	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331		rdev->config.cik.max_shader_engines;
2332
2333	switch (rdev->config.cik.mem_row_size_in_kb) {
2334	case 1:
2335		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336		break;
2337	case 2:
2338	default:
2339		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340		break;
2341	case 4:
2342		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343		break;
2344	}
2345
2346	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347	if (num_pipe_configs > 8)
2348		num_pipe_configs = 16;
2349
2350	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351		tile[reg_offset] = 0;
2352	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353		macrotile[reg_offset] = 0;
2354
2355	switch(num_pipe_configs) {
2356	case 16:
2357		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376			   TILE_SPLIT(split_equal_to_row_size));
2377		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387			   TILE_SPLIT(split_equal_to_row_size));
2388		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435
2436		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439			   NUM_BANKS(ADDR_SURF_16_BANK));
2440		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443			   NUM_BANKS(ADDR_SURF_16_BANK));
2444		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447			   NUM_BANKS(ADDR_SURF_16_BANK));
2448		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451			   NUM_BANKS(ADDR_SURF_16_BANK));
2452		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455			   NUM_BANKS(ADDR_SURF_8_BANK));
2456		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459			   NUM_BANKS(ADDR_SURF_4_BANK));
2460		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463			   NUM_BANKS(ADDR_SURF_2_BANK));
2464		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467			   NUM_BANKS(ADDR_SURF_16_BANK));
2468		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471			   NUM_BANKS(ADDR_SURF_16_BANK));
2472		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475			    NUM_BANKS(ADDR_SURF_16_BANK));
2476		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479			    NUM_BANKS(ADDR_SURF_8_BANK));
2480		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483			    NUM_BANKS(ADDR_SURF_4_BANK));
2484		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487			    NUM_BANKS(ADDR_SURF_2_BANK));
2488		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491			    NUM_BANKS(ADDR_SURF_2_BANK));
2492
2493		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497		break;
2498
2499	case 8:
2500		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519			   TILE_SPLIT(split_equal_to_row_size));
2520		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530			   TILE_SPLIT(split_equal_to_row_size));
2531		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578
2579		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582				NUM_BANKS(ADDR_SURF_16_BANK));
2583		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586				NUM_BANKS(ADDR_SURF_16_BANK));
2587		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590				NUM_BANKS(ADDR_SURF_16_BANK));
2591		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594				NUM_BANKS(ADDR_SURF_16_BANK));
2595		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598				NUM_BANKS(ADDR_SURF_8_BANK));
2599		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602				NUM_BANKS(ADDR_SURF_4_BANK));
2603		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606				NUM_BANKS(ADDR_SURF_2_BANK));
2607		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610				NUM_BANKS(ADDR_SURF_16_BANK));
2611		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614				NUM_BANKS(ADDR_SURF_16_BANK));
2615		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618				NUM_BANKS(ADDR_SURF_16_BANK));
2619		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622				NUM_BANKS(ADDR_SURF_16_BANK));
2623		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626				NUM_BANKS(ADDR_SURF_8_BANK));
2627		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630				NUM_BANKS(ADDR_SURF_4_BANK));
2631		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634				NUM_BANKS(ADDR_SURF_2_BANK));
2635
2636		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640		break;
2641
2642	case 4:
2643		if (num_rbs == 4) {
2644		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663			   TILE_SPLIT(split_equal_to_row_size));
2664		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674			   TILE_SPLIT(split_equal_to_row_size));
2675		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722
2723		} else if (num_rbs < 4) {
2724		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743			   TILE_SPLIT(split_equal_to_row_size));
2744		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754			   TILE_SPLIT(split_equal_to_row_size));
2755		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802		}
2803
2804		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807				NUM_BANKS(ADDR_SURF_16_BANK));
2808		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811				NUM_BANKS(ADDR_SURF_16_BANK));
2812		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815				NUM_BANKS(ADDR_SURF_16_BANK));
2816		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819				NUM_BANKS(ADDR_SURF_16_BANK));
2820		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823				NUM_BANKS(ADDR_SURF_16_BANK));
2824		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827				NUM_BANKS(ADDR_SURF_8_BANK));
2828		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831				NUM_BANKS(ADDR_SURF_4_BANK));
2832		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835				NUM_BANKS(ADDR_SURF_16_BANK));
2836		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839				NUM_BANKS(ADDR_SURF_16_BANK));
2840		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843				NUM_BANKS(ADDR_SURF_16_BANK));
2844		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847				NUM_BANKS(ADDR_SURF_16_BANK));
2848		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851				NUM_BANKS(ADDR_SURF_16_BANK));
2852		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855				NUM_BANKS(ADDR_SURF_8_BANK));
2856		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859				NUM_BANKS(ADDR_SURF_4_BANK));
2860
2861		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865		break;
2866
2867	case 2:
2868		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870			   PIPE_CONFIG(ADDR_SURF_P2) |
2871			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874			   PIPE_CONFIG(ADDR_SURF_P2) |
2875			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878			   PIPE_CONFIG(ADDR_SURF_P2) |
2879			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882			   PIPE_CONFIG(ADDR_SURF_P2) |
2883			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886			   PIPE_CONFIG(ADDR_SURF_P2) |
2887			   TILE_SPLIT(split_equal_to_row_size));
2888		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889			   PIPE_CONFIG(ADDR_SURF_P2) |
2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893			   PIPE_CONFIG(ADDR_SURF_P2) |
2894			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897			   PIPE_CONFIG(ADDR_SURF_P2) |
2898			   TILE_SPLIT(split_equal_to_row_size));
2899		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900			   PIPE_CONFIG(ADDR_SURF_P2);
2901		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903			   PIPE_CONFIG(ADDR_SURF_P2));
2904		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906			    PIPE_CONFIG(ADDR_SURF_P2) |
2907			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910			    PIPE_CONFIG(ADDR_SURF_P2) |
2911			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914			    PIPE_CONFIG(ADDR_SURF_P2) |
2915			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917			    PIPE_CONFIG(ADDR_SURF_P2) |
2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921			    PIPE_CONFIG(ADDR_SURF_P2) |
2922			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925			    PIPE_CONFIG(ADDR_SURF_P2) |
2926			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929			    PIPE_CONFIG(ADDR_SURF_P2) |
2930			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933			    PIPE_CONFIG(ADDR_SURF_P2));
2934		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936			    PIPE_CONFIG(ADDR_SURF_P2) |
2937			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940			    PIPE_CONFIG(ADDR_SURF_P2) |
2941			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944			    PIPE_CONFIG(ADDR_SURF_P2) |
2945			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946
2947		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950				NUM_BANKS(ADDR_SURF_16_BANK));
2951		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954				NUM_BANKS(ADDR_SURF_16_BANK));
2955		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958				NUM_BANKS(ADDR_SURF_16_BANK));
2959		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962				NUM_BANKS(ADDR_SURF_16_BANK));
2963		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966				NUM_BANKS(ADDR_SURF_16_BANK));
2967		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970				NUM_BANKS(ADDR_SURF_16_BANK));
2971		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974				NUM_BANKS(ADDR_SURF_8_BANK));
2975		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978				NUM_BANKS(ADDR_SURF_16_BANK));
2979		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982				NUM_BANKS(ADDR_SURF_16_BANK));
2983		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986				NUM_BANKS(ADDR_SURF_16_BANK));
2987		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990				NUM_BANKS(ADDR_SURF_16_BANK));
2991		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994				NUM_BANKS(ADDR_SURF_16_BANK));
2995		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998				NUM_BANKS(ADDR_SURF_16_BANK));
2999		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002				NUM_BANKS(ADDR_SURF_8_BANK));
3003
3004		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008		break;
3009
3010	default:
3011		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012	}
3013}
3014
3015/**
3016 * cik_select_se_sh - select which SE, SH to address
3017 *
3018 * @rdev: radeon_device pointer
3019 * @se_num: shader engine to address
3020 * @sh_num: sh block to address
3021 *
3022 * Select which SE, SH combinations to address. Certain
3023 * registers are instanced per SE or SH.  0xffffffff means
3024 * broadcast to all SEs or SHs (CIK).
3025 */
3026static void cik_select_se_sh(struct radeon_device *rdev,
3027			     u32 se_num, u32 sh_num)
3028{
3029	u32 data = INSTANCE_BROADCAST_WRITES;
3030
3031	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033	else if (se_num == 0xffffffff)
3034		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035	else if (sh_num == 0xffffffff)
3036		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037	else
3038		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039	WREG32(GRBM_GFX_INDEX, data);
3040}
3041
3042/**
3043 * cik_create_bitmask - create a bitmask
3044 *
3045 * @bit_width: length of the mask
3046 *
3047 * create a variable length bit mask (CIK).
3048 * Returns the bitmask.
3049 */
3050static u32 cik_create_bitmask(u32 bit_width)
3051{
3052	u32 i, mask = 0;
3053
3054	for (i = 0; i < bit_width; i++) {
3055		mask <<= 1;
3056		mask |= 1;
3057	}
3058	return mask;
3059}
3060
3061/**
3062 * cik_get_rb_disabled - computes the mask of disabled RBs
3063 *
3064 * @rdev: radeon_device pointer
3065 * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
3066 * @sh_per_se: number of SH blocks per SE for the asic
3067 *
3068 * Calculates the bitmask of disabled RBs (CIK).
3069 * Returns the disabled RB bitmask.
3070 */
3071static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072			      u32 max_rb_num_per_se,
3073			      u32 sh_per_se)
3074{
3075	u32 data, mask;
3076
3077	data = RREG32(CC_RB_BACKEND_DISABLE);
3078	if (data & 1)
3079		data &= BACKEND_DISABLE_MASK;
3080	else
3081		data = 0;
3082	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083
3084	data >>= BACKEND_DISABLE_SHIFT;
3085
3086	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087
3088	return data & mask;
3089}
3090
3091/**
3092 * cik_setup_rb - setup the RBs on the asic
3093 *
3094 * @rdev: radeon_device pointer
3095 * @se_num: number of SEs (shader engines) for the asic
3096 * @sh_per_se: number of SH blocks per SE for the asic
3097 * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098 *
3099 * Configures per-SE/SH RB registers (CIK).
3100 */
3101static void cik_setup_rb(struct radeon_device *rdev,
3102			 u32 se_num, u32 sh_per_se,
3103			 u32 max_rb_num_per_se)
3104{
3105	int i, j;
3106	u32 data, mask;
3107	u32 disabled_rbs = 0;
3108	u32 enabled_rbs = 0;
3109
3110	for (i = 0; i < se_num; i++) {
3111		for (j = 0; j < sh_per_se; j++) {
3112			cik_select_se_sh(rdev, i, j);
3113			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114			if (rdev->family == CHIP_HAWAII)
3115				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116			else
3117				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118		}
3119	}
3120	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121
3122	mask = 1;
3123	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124		if (!(disabled_rbs & mask))
3125			enabled_rbs |= mask;
3126		mask <<= 1;
3127	}
3128
3129	rdev->config.cik.backend_enable_mask = enabled_rbs;
3130
3131	for (i = 0; i < se_num; i++) {
3132		cik_select_se_sh(rdev, i, 0xffffffff);
3133		data = 0;
3134		for (j = 0; j < sh_per_se; j++) {
3135			switch (enabled_rbs & 3) {
3136			case 0:
3137				if (j == 0)
3138					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139				else
3140					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141				break;
3142			case 1:
3143				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144				break;
3145			case 2:
3146				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147				break;
3148			case 3:
3149			default:
3150				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151				break;
3152			}
3153			enabled_rbs >>= 2;
3154		}
3155		WREG32(PA_SC_RASTER_CONFIG, data);
3156	}
3157	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158}
3159
3160/**
3161 * cik_gpu_init - setup the 3D engine
3162 *
3163 * @rdev: radeon_device pointer
3164 *
3165 * Configures the 3D engine and tiling configuration
3166 * registers so that the 3D engine is usable.
3167 */
3168static void cik_gpu_init(struct radeon_device *rdev)
3169{
3170	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171	u32 mc_arb_ramcfg;
3172	u32 hdp_host_path_cntl;
3173	u32 tmp;
3174	int i, j;
3175
3176	switch (rdev->family) {
3177	case CHIP_BONAIRE:
3178		rdev->config.cik.max_shader_engines = 2;
3179		rdev->config.cik.max_tile_pipes = 4;
3180		rdev->config.cik.max_cu_per_sh = 7;
3181		rdev->config.cik.max_sh_per_se = 1;
3182		rdev->config.cik.max_backends_per_se = 2;
3183		rdev->config.cik.max_texture_channel_caches = 4;
3184		rdev->config.cik.max_gprs = 256;
3185		rdev->config.cik.max_gs_threads = 32;
3186		rdev->config.cik.max_hw_contexts = 8;
3187
3188		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193		break;
3194	case CHIP_HAWAII:
3195		rdev->config.cik.max_shader_engines = 4;
3196		rdev->config.cik.max_tile_pipes = 16;
3197		rdev->config.cik.max_cu_per_sh = 11;
3198		rdev->config.cik.max_sh_per_se = 1;
3199		rdev->config.cik.max_backends_per_se = 4;
3200		rdev->config.cik.max_texture_channel_caches = 16;
3201		rdev->config.cik.max_gprs = 256;
3202		rdev->config.cik.max_gs_threads = 32;
3203		rdev->config.cik.max_hw_contexts = 8;
3204
3205		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210		break;
3211	case CHIP_KAVERI:
3212		rdev->config.cik.max_shader_engines = 1;
3213		rdev->config.cik.max_tile_pipes = 4;
3214		rdev->config.cik.max_cu_per_sh = 8;
3215		rdev->config.cik.max_backends_per_se = 2;
3216		rdev->config.cik.max_sh_per_se = 1;
3217		rdev->config.cik.max_texture_channel_caches = 4;
3218		rdev->config.cik.max_gprs = 256;
3219		rdev->config.cik.max_gs_threads = 16;
3220		rdev->config.cik.max_hw_contexts = 8;
3221
3222		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227		break;
3228	case CHIP_KABINI:
3229	case CHIP_MULLINS:
3230	default:
3231		rdev->config.cik.max_shader_engines = 1;
3232		rdev->config.cik.max_tile_pipes = 2;
3233		rdev->config.cik.max_cu_per_sh = 2;
3234		rdev->config.cik.max_sh_per_se = 1;
3235		rdev->config.cik.max_backends_per_se = 1;
3236		rdev->config.cik.max_texture_channel_caches = 2;
3237		rdev->config.cik.max_gprs = 256;
3238		rdev->config.cik.max_gs_threads = 16;
3239		rdev->config.cik.max_hw_contexts = 8;
3240
3241		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246		break;
3247	}
3248
3249	/* Initialize HDP */
3250	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251		WREG32((0x2c14 + j), 0x00000000);
3252		WREG32((0x2c18 + j), 0x00000000);
3253		WREG32((0x2c1c + j), 0x00000000);
3254		WREG32((0x2c20 + j), 0x00000000);
3255		WREG32((0x2c24 + j), 0x00000000);
3256	}
3257
3258	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259	WREG32(SRBM_INT_CNTL, 0x1);
3260	WREG32(SRBM_INT_ACK, 0x1);
3261
3262	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263
3264	RREG32(MC_SHARED_CHMAP);
3265	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266
3267	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268	rdev->config.cik.mem_max_burst_length_bytes = 256;
3269	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271	if (rdev->config.cik.mem_row_size_in_kb > 4)
3272		rdev->config.cik.mem_row_size_in_kb = 4;
3273	/* XXX use MC settings? */
3274	rdev->config.cik.shader_engine_tile_size = 32;
3275	rdev->config.cik.num_gpus = 1;
3276	rdev->config.cik.multi_gpu_tile_size = 64;
3277
3278	/* fix up row size */
3279	gb_addr_config &= ~ROW_SIZE_MASK;
3280	switch (rdev->config.cik.mem_row_size_in_kb) {
3281	case 1:
3282	default:
3283		gb_addr_config |= ROW_SIZE(0);
3284		break;
3285	case 2:
3286		gb_addr_config |= ROW_SIZE(1);
3287		break;
3288	case 4:
3289		gb_addr_config |= ROW_SIZE(2);
3290		break;
3291	}
3292
3293	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3294	 * not have bank info, so create a custom tiling dword.
3295	 * bits 3:0   num_pipes
3296	 * bits 7:4   num_banks
3297	 * bits 11:8  group_size
3298	 * bits 15:12 row_size
3299	 */
3300	rdev->config.cik.tile_config = 0;
3301	switch (rdev->config.cik.num_tile_pipes) {
3302	case 1:
3303		rdev->config.cik.tile_config |= (0 << 0);
3304		break;
3305	case 2:
3306		rdev->config.cik.tile_config |= (1 << 0);
3307		break;
3308	case 4:
3309		rdev->config.cik.tile_config |= (2 << 0);
3310		break;
3311	case 8:
3312	default:
3313		/* XXX what about 12? */
3314		rdev->config.cik.tile_config |= (3 << 0);
3315		break;
3316	}
3317	rdev->config.cik.tile_config |=
3318		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319	rdev->config.cik.tile_config |=
3320		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321	rdev->config.cik.tile_config |=
3322		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323
3324	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332
3333	cik_tiling_mode_table_init(rdev);
3334
3335	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336		     rdev->config.cik.max_sh_per_se,
3337		     rdev->config.cik.max_backends_per_se);
3338
3339	rdev->config.cik.active_cus = 0;
3340	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342			rdev->config.cik.active_cus +=
3343				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344		}
3345	}
3346
3347	/* set HW defaults for 3D engine */
3348	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349
3350	WREG32(SX_DEBUG_1, 0x20);
3351
3352	WREG32(TA_CNTL_AUX, 0x00010000);
3353
3354	tmp = RREG32(SPI_CONFIG_CNTL);
3355	tmp |= 0x03000000;
3356	WREG32(SPI_CONFIG_CNTL, tmp);
3357
3358	WREG32(SQ_CONFIG, 1);
3359
3360	WREG32(DB_DEBUG, 0);
3361
3362	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363	tmp |= 0x00000400;
3364	WREG32(DB_DEBUG2, tmp);
3365
3366	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367	tmp |= 0x00020200;
3368	WREG32(DB_DEBUG3, tmp);
3369
3370	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371	tmp |= 0x00018208;
3372	WREG32(CB_HW_CONTROL, tmp);
3373
3374	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375
3376	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380
3381	WREG32(VGT_NUM_INSTANCES, 1);
3382
3383	WREG32(CP_PERFMON_CNTL, 0);
3384
3385	WREG32(SQ_CONFIG, 0);
3386
3387	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388					  FORCE_EOV_MAX_REZ_CNT(255)));
3389
3390	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392
3393	WREG32(VGT_GS_VERTEX_REUSE, 16);
3394	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395
3396	tmp = RREG32(HDP_MISC_CNTL);
3397	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398	WREG32(HDP_MISC_CNTL, tmp);
3399
3400	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402
3403	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405
3406	udelay(50);
3407}
3408
3409/*
3410 * GPU scratch registers helpers function.
3411 */
3412/**
3413 * cik_scratch_init - setup driver info for CP scratch regs
3414 *
3415 * @rdev: radeon_device pointer
3416 *
3417 * Set up the number and offset of the CP scratch registers.
3418 * NOTE: use of CP scratch registers is a legacy inferface and
3419 * is not used by default on newer asics (r6xx+).  On newer asics,
3420 * memory buffers are used for fences rather than scratch regs.
3421 */
3422static void cik_scratch_init(struct radeon_device *rdev)
3423{
3424	int i;
3425
3426	rdev->scratch.num_reg = 7;
3427	rdev->scratch.reg_base = SCRATCH_REG0;
3428	for (i = 0; i < rdev->scratch.num_reg; i++) {
3429		rdev->scratch.free[i] = true;
3430		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431	}
3432}
3433
3434/**
3435 * cik_ring_test - basic gfx ring test
3436 *
3437 * @rdev: radeon_device pointer
3438 * @ring: radeon_ring structure holding ring information
3439 *
3440 * Allocate a scratch register and write to it using the gfx ring (CIK).
3441 * Provides a basic gfx ring test to verify that the ring is working.
3442 * Used by cik_cp_gfx_resume();
3443 * Returns 0 on success, error on failure.
3444 */
3445int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446{
3447	uint32_t scratch;
3448	uint32_t tmp = 0;
3449	unsigned i;
3450	int r;
3451
3452	r = radeon_scratch_get(rdev, &scratch);
3453	if (r) {
3454		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455		return r;
3456	}
3457	WREG32(scratch, 0xCAFEDEAD);
3458	r = radeon_ring_lock(rdev, ring, 3);
3459	if (r) {
3460		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461		radeon_scratch_free(rdev, scratch);
3462		return r;
3463	}
3464	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466	radeon_ring_write(ring, 0xDEADBEEF);
3467	radeon_ring_unlock_commit(rdev, ring, false);
3468
3469	for (i = 0; i < rdev->usec_timeout; i++) {
3470		tmp = RREG32(scratch);
3471		if (tmp == 0xDEADBEEF)
3472			break;
3473		udelay(1);
3474	}
3475	if (i < rdev->usec_timeout) {
3476		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477	} else {
3478		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479			  ring->idx, scratch, tmp);
3480		r = -EINVAL;
3481	}
3482	radeon_scratch_free(rdev, scratch);
3483	return r;
3484}
3485
3486/**
3487 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488 *
3489 * @rdev: radeon_device pointer
3490 * @ridx: radeon ring index
3491 *
3492 * Emits an hdp flush on the cp.
3493 */
3494static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495				       int ridx)
3496{
3497	struct radeon_ring *ring = &rdev->ring[ridx];
3498	u32 ref_and_mask;
3499
3500	switch (ring->idx) {
3501	case CAYMAN_RING_TYPE_CP1_INDEX:
3502	case CAYMAN_RING_TYPE_CP2_INDEX:
3503	default:
3504		switch (ring->me) {
3505		case 0:
3506			ref_and_mask = CP2 << ring->pipe;
3507			break;
3508		case 1:
3509			ref_and_mask = CP6 << ring->pipe;
3510			break;
3511		default:
3512			return;
3513		}
3514		break;
3515	case RADEON_RING_TYPE_GFX_INDEX:
3516		ref_and_mask = CP0;
3517		break;
3518	}
3519
3520	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3523				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3524	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526	radeon_ring_write(ring, ref_and_mask);
3527	radeon_ring_write(ring, ref_and_mask);
3528	radeon_ring_write(ring, 0x20); /* poll interval */
3529}
3530
3531/**
3532 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533 *
3534 * @rdev: radeon_device pointer
3535 * @fence: radeon fence object
3536 *
3537 * Emits a fence sequnce number on the gfx ring and flushes
3538 * GPU caches.
3539 */
3540void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541			     struct radeon_fence *fence)
3542{
3543	struct radeon_ring *ring = &rdev->ring[fence->ring];
3544	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545
3546	/* Workaround for cache flush problems. First send a dummy EOP
3547	 * event down the pipe with seq one below.
3548	 */
3549	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551				 EOP_TC_ACTION_EN |
3552				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553				 EVENT_INDEX(5)));
3554	radeon_ring_write(ring, addr & 0xfffffffc);
3555	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556				DATA_SEL(1) | INT_SEL(0));
3557	radeon_ring_write(ring, fence->seq - 1);
3558	radeon_ring_write(ring, 0);
3559
3560	/* Then send the real EOP event down the pipe. */
3561	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563				 EOP_TC_ACTION_EN |
3564				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565				 EVENT_INDEX(5)));
3566	radeon_ring_write(ring, addr & 0xfffffffc);
3567	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568	radeon_ring_write(ring, fence->seq);
3569	radeon_ring_write(ring, 0);
3570}
3571
3572/**
3573 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574 *
3575 * @rdev: radeon_device pointer
3576 * @fence: radeon fence object
3577 *
3578 * Emits a fence sequnce number on the compute ring and flushes
3579 * GPU caches.
3580 */
3581void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582				 struct radeon_fence *fence)
3583{
3584	struct radeon_ring *ring = &rdev->ring[fence->ring];
3585	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586
3587	/* RELEASE_MEM - flush caches, send int */
3588	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590				 EOP_TC_ACTION_EN |
3591				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592				 EVENT_INDEX(5)));
3593	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594	radeon_ring_write(ring, addr & 0xfffffffc);
3595	radeon_ring_write(ring, upper_32_bits(addr));
3596	radeon_ring_write(ring, fence->seq);
3597	radeon_ring_write(ring, 0);
3598}
3599
3600/**
3601 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602 *
3603 * @rdev: radeon_device pointer
3604 * @ring: radeon ring buffer object
3605 * @semaphore: radeon semaphore object
3606 * @emit_wait: Is this a semaphore wait?
3607 *
3608 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609 * from running ahead of semaphore waits.
3610 */
3611bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612			     struct radeon_ring *ring,
3613			     struct radeon_semaphore *semaphore,
3614			     bool emit_wait)
3615{
3616	uint64_t addr = semaphore->gpu_addr;
3617	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618
3619	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620	radeon_ring_write(ring, lower_32_bits(addr));
3621	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622
3623	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624		/* Prevent the PFP from running ahead of the semaphore wait */
3625		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626		radeon_ring_write(ring, 0x0);
3627	}
3628
3629	return true;
3630}
3631
3632/**
3633 * cik_copy_cpdma - copy pages using the CP DMA engine
3634 *
3635 * @rdev: radeon_device pointer
3636 * @src_offset: src GPU address
3637 * @dst_offset: dst GPU address
3638 * @num_gpu_pages: number of GPU pages to xfer
3639 * @resv: reservation object to sync to
3640 *
3641 * Copy GPU paging using the CP DMA engine (CIK+).
3642 * Used by the radeon ttm implementation to move pages if
3643 * registered as the asic copy callback.
3644 */
3645struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646				    uint64_t src_offset, uint64_t dst_offset,
3647				    unsigned num_gpu_pages,
3648				    struct dma_resv *resv)
3649{
3650	struct radeon_fence *fence;
3651	struct radeon_sync sync;
3652	int ring_index = rdev->asic->copy.blit_ring_index;
3653	struct radeon_ring *ring = &rdev->ring[ring_index];
3654	u32 size_in_bytes, cur_size_in_bytes, control;
3655	int i, num_loops;
3656	int r = 0;
3657
3658	radeon_sync_create(&sync);
3659
3660	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663	if (r) {
3664		DRM_ERROR("radeon: moving bo (%d).\n", r);
3665		radeon_sync_free(rdev, &sync, NULL);
3666		return ERR_PTR(r);
3667	}
3668
3669	radeon_sync_resv(rdev, &sync, resv, false);
3670	radeon_sync_rings(rdev, &sync, ring->idx);
3671
3672	for (i = 0; i < num_loops; i++) {
3673		cur_size_in_bytes = size_in_bytes;
3674		if (cur_size_in_bytes > 0x1fffff)
3675			cur_size_in_bytes = 0x1fffff;
3676		size_in_bytes -= cur_size_in_bytes;
3677		control = 0;
3678		if (size_in_bytes == 0)
3679			control |= PACKET3_DMA_DATA_CP_SYNC;
3680		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681		radeon_ring_write(ring, control);
3682		radeon_ring_write(ring, lower_32_bits(src_offset));
3683		radeon_ring_write(ring, upper_32_bits(src_offset));
3684		radeon_ring_write(ring, lower_32_bits(dst_offset));
3685		radeon_ring_write(ring, upper_32_bits(dst_offset));
3686		radeon_ring_write(ring, cur_size_in_bytes);
3687		src_offset += cur_size_in_bytes;
3688		dst_offset += cur_size_in_bytes;
3689	}
3690
3691	r = radeon_fence_emit(rdev, &fence, ring->idx);
3692	if (r) {
3693		radeon_ring_unlock_undo(rdev, ring);
3694		radeon_sync_free(rdev, &sync, NULL);
3695		return ERR_PTR(r);
3696	}
3697
3698	radeon_ring_unlock_commit(rdev, ring, false);
3699	radeon_sync_free(rdev, &sync, fence);
3700
3701	return fence;
3702}
3703
3704/*
3705 * IB stuff
3706 */
3707/**
3708 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709 *
3710 * @rdev: radeon_device pointer
3711 * @ib: radeon indirect buffer object
3712 *
3713 * Emits a DE (drawing engine) or CE (constant engine) IB
3714 * on the gfx ring.  IBs are usually generated by userspace
3715 * acceleration drivers and submitted to the kernel for
3716 * scheduling on the ring.  This function schedules the IB
3717 * on the gfx ring for execution by the GPU.
3718 */
3719void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720{
3721	struct radeon_ring *ring = &rdev->ring[ib->ring];
3722	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723	u32 header, control = INDIRECT_BUFFER_VALID;
3724
3725	if (ib->is_const_ib) {
3726		/* set switch buffer packet before const IB */
3727		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728		radeon_ring_write(ring, 0);
3729
3730		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731	} else {
3732		u32 next_rptr;
3733		if (ring->rptr_save_reg) {
3734			next_rptr = ring->wptr + 3 + 4;
3735			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736			radeon_ring_write(ring, ((ring->rptr_save_reg -
3737						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3738			radeon_ring_write(ring, next_rptr);
3739		} else if (rdev->wb.enabled) {
3740			next_rptr = ring->wptr + 5 + 4;
3741			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745			radeon_ring_write(ring, next_rptr);
3746		}
3747
3748		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749	}
3750
3751	control |= ib->length_dw | (vm_id << 24);
3752
3753	radeon_ring_write(ring, header);
3754	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756	radeon_ring_write(ring, control);
3757}
3758
3759/**
3760 * cik_ib_test - basic gfx ring IB test
3761 *
3762 * @rdev: radeon_device pointer
3763 * @ring: radeon_ring structure holding ring information
3764 *
3765 * Allocate an IB and execute it on the gfx ring (CIK).
3766 * Provides a basic gfx ring test to verify that IBs are working.
3767 * Returns 0 on success, error on failure.
3768 */
3769int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770{
3771	struct radeon_ib ib;
3772	uint32_t scratch;
3773	uint32_t tmp = 0;
3774	unsigned i;
3775	int r;
3776
3777	r = radeon_scratch_get(rdev, &scratch);
3778	if (r) {
3779		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780		return r;
3781	}
3782	WREG32(scratch, 0xCAFEDEAD);
3783	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784	if (r) {
3785		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786		radeon_scratch_free(rdev, scratch);
3787		return r;
3788	}
3789	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791	ib.ptr[2] = 0xDEADBEEF;
3792	ib.length_dw = 3;
3793	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794	if (r) {
3795		radeon_scratch_free(rdev, scratch);
3796		radeon_ib_free(rdev, &ib);
3797		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798		return r;
3799	}
3800	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801		RADEON_USEC_IB_TEST_TIMEOUT));
3802	if (r < 0) {
3803		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804		radeon_scratch_free(rdev, scratch);
3805		radeon_ib_free(rdev, &ib);
3806		return r;
3807	} else if (r == 0) {
3808		DRM_ERROR("radeon: fence wait timed out.\n");
3809		radeon_scratch_free(rdev, scratch);
3810		radeon_ib_free(rdev, &ib);
3811		return -ETIMEDOUT;
3812	}
3813	r = 0;
3814	for (i = 0; i < rdev->usec_timeout; i++) {
3815		tmp = RREG32(scratch);
3816		if (tmp == 0xDEADBEEF)
3817			break;
3818		udelay(1);
3819	}
3820	if (i < rdev->usec_timeout) {
3821		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822	} else {
3823		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824			  scratch, tmp);
3825		r = -EINVAL;
3826	}
3827	radeon_scratch_free(rdev, scratch);
3828	radeon_ib_free(rdev, &ib);
3829	return r;
3830}
3831
3832/*
3833 * CP.
3834 * On CIK, gfx and compute now have independant command processors.
3835 *
3836 * GFX
3837 * Gfx consists of a single ring and can process both gfx jobs and
3838 * compute jobs.  The gfx CP consists of three microengines (ME):
3839 * PFP - Pre-Fetch Parser
3840 * ME - Micro Engine
3841 * CE - Constant Engine
3842 * The PFP and ME make up what is considered the Drawing Engine (DE).
3843 * The CE is an asynchronous engine used for updating buffer desciptors
3844 * used by the DE so that they can be loaded into cache in parallel
3845 * while the DE is processing state update packets.
3846 *
3847 * Compute
3848 * The compute CP consists of two microengines (ME):
3849 * MEC1 - Compute MicroEngine 1
3850 * MEC2 - Compute MicroEngine 2
3851 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852 * The queues are exposed to userspace and are programmed directly
3853 * by the compute runtime.
3854 */
3855/**
3856 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857 *
3858 * @rdev: radeon_device pointer
3859 * @enable: enable or disable the MEs
3860 *
3861 * Halts or unhalts the gfx MEs.
3862 */
3863static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864{
3865	if (enable)
3866		WREG32(CP_ME_CNTL, 0);
3867	else {
3868		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872	}
3873	udelay(50);
3874}
3875
3876/**
3877 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878 *
3879 * @rdev: radeon_device pointer
3880 *
3881 * Loads the gfx PFP, ME, and CE ucode.
3882 * Returns 0 for success, -EINVAL if the ucode is not available.
3883 */
3884static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885{
3886	int i;
3887
3888	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889		return -EINVAL;
3890
3891	cik_cp_gfx_enable(rdev, false);
3892
3893	if (rdev->new_fw) {
3894		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896		const struct gfx_firmware_header_v1_0 *ce_hdr =
3897			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898		const struct gfx_firmware_header_v1_0 *me_hdr =
3899			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900		const __le32 *fw_data;
3901		u32 fw_size;
3902
3903		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906
3907		/* PFP */
3908		fw_data = (const __le32 *)
3909			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911		WREG32(CP_PFP_UCODE_ADDR, 0);
3912		for (i = 0; i < fw_size; i++)
3913			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915
3916		/* CE */
3917		fw_data = (const __le32 *)
3918			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920		WREG32(CP_CE_UCODE_ADDR, 0);
3921		for (i = 0; i < fw_size; i++)
3922			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924
3925		/* ME */
3926		fw_data = (const __be32 *)
3927			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929		WREG32(CP_ME_RAM_WADDR, 0);
3930		for (i = 0; i < fw_size; i++)
3931			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934	} else {
3935		const __be32 *fw_data;
3936
3937		/* PFP */
3938		fw_data = (const __be32 *)rdev->pfp_fw->data;
3939		WREG32(CP_PFP_UCODE_ADDR, 0);
3940		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942		WREG32(CP_PFP_UCODE_ADDR, 0);
3943
3944		/* CE */
3945		fw_data = (const __be32 *)rdev->ce_fw->data;
3946		WREG32(CP_CE_UCODE_ADDR, 0);
3947		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949		WREG32(CP_CE_UCODE_ADDR, 0);
3950
3951		/* ME */
3952		fw_data = (const __be32 *)rdev->me_fw->data;
3953		WREG32(CP_ME_RAM_WADDR, 0);
3954		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956		WREG32(CP_ME_RAM_WADDR, 0);
3957	}
3958
3959	return 0;
3960}
3961
3962/**
3963 * cik_cp_gfx_start - start the gfx ring
3964 *
3965 * @rdev: radeon_device pointer
3966 *
3967 * Enables the ring and loads the clear state context and other
3968 * packets required to init the ring.
3969 * Returns 0 for success, error for failure.
3970 */
3971static int cik_cp_gfx_start(struct radeon_device *rdev)
3972{
3973	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974	int r, i;
3975
3976	/* init the CP */
3977	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978	WREG32(CP_ENDIAN_SWAP, 0);
3979	WREG32(CP_DEVICE_ID, 1);
3980
3981	cik_cp_gfx_enable(rdev, true);
3982
3983	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984	if (r) {
3985		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986		return r;
3987	}
3988
3989	/* init the CE partitions.  CE only used for gfx on CIK */
3990	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992	radeon_ring_write(ring, 0x8000);
3993	radeon_ring_write(ring, 0x8000);
3994
3995	/* setup clear context state */
3996	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998
3999	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000	radeon_ring_write(ring, 0x80000000);
4001	radeon_ring_write(ring, 0x80000000);
4002
4003	for (i = 0; i < cik_default_size; i++)
4004		radeon_ring_write(ring, cik_default_state[i]);
4005
4006	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008
4009	/* set clear context state */
4010	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011	radeon_ring_write(ring, 0);
4012
4013	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014	radeon_ring_write(ring, 0x00000316);
4015	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017
4018	radeon_ring_unlock_commit(rdev, ring, false);
4019
4020	return 0;
4021}
4022
4023/**
4024 * cik_cp_gfx_fini - stop the gfx ring
4025 *
4026 * @rdev: radeon_device pointer
4027 *
4028 * Stop the gfx ring and tear down the driver ring
4029 * info.
4030 */
4031static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032{
4033	cik_cp_gfx_enable(rdev, false);
4034	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035}
4036
4037/**
4038 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Program the location and size of the gfx ring buffer
4043 * and test it to make sure it's working.
4044 * Returns 0 for success, error for failure.
4045 */
4046static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047{
4048	struct radeon_ring *ring;
4049	u32 tmp;
4050	u32 rb_bufsz;
4051	u64 rb_addr;
4052	int r;
4053
4054	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055	if (rdev->family != CHIP_HAWAII)
4056		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057
4058	/* Set the write pointer delay */
4059	WREG32(CP_RB_WPTR_DELAY, 0);
4060
4061	/* set the RB to use vmid 0 */
4062	WREG32(CP_RB_VMID, 0);
4063
4064	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065
4066	/* ring 0 - compute and gfx */
4067	/* Set ring buffer size */
4068	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069	rb_bufsz = order_base_2(ring->ring_size / 8);
4070	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071#ifdef __BIG_ENDIAN
4072	tmp |= BUF_SWAP_32BIT;
4073#endif
4074	WREG32(CP_RB0_CNTL, tmp);
4075
4076	/* Initialize the ring buffer's read and write pointers */
4077	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078	ring->wptr = 0;
4079	WREG32(CP_RB0_WPTR, ring->wptr);
4080
4081	/* set the wb address wether it's enabled or not */
4082	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084
4085	/* scratch register shadowing is no longer supported */
4086	WREG32(SCRATCH_UMSK, 0);
4087
4088	if (!rdev->wb.enabled)
4089		tmp |= RB_NO_UPDATE;
4090
4091	mdelay(1);
4092	WREG32(CP_RB0_CNTL, tmp);
4093
4094	rb_addr = ring->gpu_addr >> 8;
4095	WREG32(CP_RB0_BASE, rb_addr);
4096	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097
4098	/* start the ring */
4099	cik_cp_gfx_start(rdev);
4100	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102	if (r) {
4103		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104		return r;
4105	}
4106
4107	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109
4110	return 0;
4111}
4112
4113u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114		     struct radeon_ring *ring)
4115{
4116	u32 rptr;
4117
4118	if (rdev->wb.enabled)
4119		rptr = rdev->wb.wb[ring->rptr_offs/4];
4120	else
4121		rptr = RREG32(CP_RB0_RPTR);
4122
4123	return rptr;
4124}
4125
4126u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127		     struct radeon_ring *ring)
4128{
4129	return RREG32(CP_RB0_WPTR);
4130}
4131
4132void cik_gfx_set_wptr(struct radeon_device *rdev,
4133		      struct radeon_ring *ring)
4134{
4135	WREG32(CP_RB0_WPTR, ring->wptr);
4136	(void)RREG32(CP_RB0_WPTR);
4137}
4138
4139u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140			 struct radeon_ring *ring)
4141{
4142	u32 rptr;
4143
4144	if (rdev->wb.enabled) {
4145		rptr = rdev->wb.wb[ring->rptr_offs/4];
4146	} else {
4147		mutex_lock(&rdev->srbm_mutex);
4148		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149		rptr = RREG32(CP_HQD_PQ_RPTR);
4150		cik_srbm_select(rdev, 0, 0, 0, 0);
4151		mutex_unlock(&rdev->srbm_mutex);
4152	}
4153
4154	return rptr;
4155}
4156
4157u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158			 struct radeon_ring *ring)
4159{
4160	u32 wptr;
4161
4162	if (rdev->wb.enabled) {
4163		/* XXX check if swapping is necessary on BE */
4164		wptr = rdev->wb.wb[ring->wptr_offs/4];
4165	} else {
4166		mutex_lock(&rdev->srbm_mutex);
4167		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168		wptr = RREG32(CP_HQD_PQ_WPTR);
4169		cik_srbm_select(rdev, 0, 0, 0, 0);
4170		mutex_unlock(&rdev->srbm_mutex);
4171	}
4172
4173	return wptr;
4174}
4175
4176void cik_compute_set_wptr(struct radeon_device *rdev,
4177			  struct radeon_ring *ring)
4178{
4179	/* XXX check if swapping is necessary on BE */
4180	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181	WDOORBELL32(ring->doorbell_index, ring->wptr);
4182}
4183
4184static void cik_compute_stop(struct radeon_device *rdev,
4185			     struct radeon_ring *ring)
4186{
4187	u32 j, tmp;
4188
4189	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190	/* Disable wptr polling. */
4191	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192	tmp &= ~WPTR_POLL_EN;
4193	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194	/* Disable HQD. */
4195	if (RREG32(CP_HQD_ACTIVE) & 1) {
4196		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197		for (j = 0; j < rdev->usec_timeout; j++) {
4198			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199				break;
4200			udelay(1);
4201		}
4202		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203		WREG32(CP_HQD_PQ_RPTR, 0);
4204		WREG32(CP_HQD_PQ_WPTR, 0);
4205	}
4206	cik_srbm_select(rdev, 0, 0, 0, 0);
4207}
4208
4209/**
4210 * cik_cp_compute_enable - enable/disable the compute CP MEs
4211 *
4212 * @rdev: radeon_device pointer
4213 * @enable: enable or disable the MEs
4214 *
4215 * Halts or unhalts the compute MEs.
4216 */
4217static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218{
4219	if (enable)
4220		WREG32(CP_MEC_CNTL, 0);
4221	else {
4222		/*
4223		 * To make hibernation reliable we need to clear compute ring
4224		 * configuration before halting the compute ring.
4225		 */
4226		mutex_lock(&rdev->srbm_mutex);
4227		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229		mutex_unlock(&rdev->srbm_mutex);
4230
4231		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234	}
4235	udelay(50);
4236}
4237
4238/**
4239 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240 *
4241 * @rdev: radeon_device pointer
4242 *
4243 * Loads the compute MEC1&2 ucode.
4244 * Returns 0 for success, -EINVAL if the ucode is not available.
4245 */
4246static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247{
4248	int i;
4249
4250	if (!rdev->mec_fw)
4251		return -EINVAL;
4252
4253	cik_cp_compute_enable(rdev, false);
4254
4255	if (rdev->new_fw) {
4256		const struct gfx_firmware_header_v1_0 *mec_hdr =
4257			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258		const __le32 *fw_data;
4259		u32 fw_size;
4260
4261		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262
4263		/* MEC1 */
4264		fw_data = (const __le32 *)
4265			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268		for (i = 0; i < fw_size; i++)
4269			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271
4272		/* MEC2 */
4273		if (rdev->family == CHIP_KAVERI) {
4274			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276
4277			fw_data = (const __le32 *)
4278				(rdev->mec2_fw->data +
4279				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282			for (i = 0; i < fw_size; i++)
4283				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285		}
4286	} else {
4287		const __be32 *fw_data;
4288
4289		/* MEC1 */
4290		fw_data = (const __be32 *)rdev->mec_fw->data;
4291		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295
4296		if (rdev->family == CHIP_KAVERI) {
4297			/* MEC2 */
4298			fw_data = (const __be32 *)rdev->mec_fw->data;
4299			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303		}
4304	}
4305
4306	return 0;
4307}
4308
4309/**
4310 * cik_cp_compute_start - start the compute queues
4311 *
4312 * @rdev: radeon_device pointer
4313 *
4314 * Enable the compute queues.
4315 * Returns 0 for success, error for failure.
4316 */
4317static int cik_cp_compute_start(struct radeon_device *rdev)
4318{
4319	cik_cp_compute_enable(rdev, true);
4320
4321	return 0;
4322}
4323
4324/**
4325 * cik_cp_compute_fini - stop the compute queues
4326 *
4327 * @rdev: radeon_device pointer
4328 *
4329 * Stop the compute queues and tear down the driver queue
4330 * info.
4331 */
4332static void cik_cp_compute_fini(struct radeon_device *rdev)
4333{
4334	int i, idx, r;
4335
4336	cik_cp_compute_enable(rdev, false);
4337
4338	for (i = 0; i < 2; i++) {
4339		if (i == 0)
4340			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341		else
4342			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343
4344		if (rdev->ring[idx].mqd_obj) {
4345			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346			if (unlikely(r != 0))
4347				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348
4349			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351
4352			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353			rdev->ring[idx].mqd_obj = NULL;
4354		}
4355	}
4356}
4357
4358static void cik_mec_fini(struct radeon_device *rdev)
4359{
4360	int r;
4361
4362	if (rdev->mec.hpd_eop_obj) {
4363		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364		if (unlikely(r != 0))
4365			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368
4369		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370		rdev->mec.hpd_eop_obj = NULL;
4371	}
4372}
4373
4374#define MEC_HPD_SIZE 2048
4375
4376static int cik_mec_init(struct radeon_device *rdev)
4377{
4378	int r;
4379	u32 *hpd;
4380
4381	/*
4382	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384	 */
4385	if (rdev->family == CHIP_KAVERI)
4386		rdev->mec.num_mec = 2;
4387	else
4388		rdev->mec.num_mec = 1;
4389	rdev->mec.num_pipe = 4;
4390	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391
4392	if (rdev->mec.hpd_eop_obj == NULL) {
4393		r = radeon_bo_create(rdev,
4394				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395				     PAGE_SIZE, true,
4396				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397				     &rdev->mec.hpd_eop_obj);
4398		if (r) {
4399			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400			return r;
4401		}
4402	}
4403
4404	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405	if (unlikely(r != 0)) {
4406		cik_mec_fini(rdev);
4407		return r;
4408	}
4409	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410			  &rdev->mec.hpd_eop_gpu_addr);
4411	if (r) {
4412		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413		cik_mec_fini(rdev);
4414		return r;
4415	}
4416	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417	if (r) {
4418		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419		cik_mec_fini(rdev);
4420		return r;
4421	}
4422
4423	/* clear memory.  Not sure if this is required or not */
4424	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425
4426	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428
4429	return 0;
4430}
4431
4432struct hqd_registers
4433{
4434	u32 cp_mqd_base_addr;
4435	u32 cp_mqd_base_addr_hi;
4436	u32 cp_hqd_active;
4437	u32 cp_hqd_vmid;
4438	u32 cp_hqd_persistent_state;
4439	u32 cp_hqd_pipe_priority;
4440	u32 cp_hqd_queue_priority;
4441	u32 cp_hqd_quantum;
4442	u32 cp_hqd_pq_base;
4443	u32 cp_hqd_pq_base_hi;
4444	u32 cp_hqd_pq_rptr;
4445	u32 cp_hqd_pq_rptr_report_addr;
4446	u32 cp_hqd_pq_rptr_report_addr_hi;
4447	u32 cp_hqd_pq_wptr_poll_addr;
4448	u32 cp_hqd_pq_wptr_poll_addr_hi;
4449	u32 cp_hqd_pq_doorbell_control;
4450	u32 cp_hqd_pq_wptr;
4451	u32 cp_hqd_pq_control;
4452	u32 cp_hqd_ib_base_addr;
4453	u32 cp_hqd_ib_base_addr_hi;
4454	u32 cp_hqd_ib_rptr;
4455	u32 cp_hqd_ib_control;
4456	u32 cp_hqd_iq_timer;
4457	u32 cp_hqd_iq_rptr;
4458	u32 cp_hqd_dequeue_request;
4459	u32 cp_hqd_dma_offload;
4460	u32 cp_hqd_sema_cmd;
4461	u32 cp_hqd_msg_type;
4462	u32 cp_hqd_atomic0_preop_lo;
4463	u32 cp_hqd_atomic0_preop_hi;
4464	u32 cp_hqd_atomic1_preop_lo;
4465	u32 cp_hqd_atomic1_preop_hi;
4466	u32 cp_hqd_hq_scheduler0;
4467	u32 cp_hqd_hq_scheduler1;
4468	u32 cp_mqd_control;
4469};
4470
4471struct bonaire_mqd
4472{
4473	u32 header;
4474	u32 dispatch_initiator;
4475	u32 dimensions[3];
4476	u32 start_idx[3];
4477	u32 num_threads[3];
4478	u32 pipeline_stat_enable;
4479	u32 perf_counter_enable;
4480	u32 pgm[2];
4481	u32 tba[2];
4482	u32 tma[2];
4483	u32 pgm_rsrc[2];
4484	u32 vmid;
4485	u32 resource_limits;
4486	u32 static_thread_mgmt01[2];
4487	u32 tmp_ring_size;
4488	u32 static_thread_mgmt23[2];
4489	u32 restart[3];
4490	u32 thread_trace_enable;
4491	u32 reserved1;
4492	u32 user_data[16];
4493	u32 vgtcs_invoke_count[2];
4494	struct hqd_registers queue_state;
4495	u32 dequeue_cntr;
4496	u32 interrupt_queue[64];
4497};
4498
4499/**
4500 * cik_cp_compute_resume - setup the compute queue registers
4501 *
4502 * @rdev: radeon_device pointer
4503 *
4504 * Program the compute queues and test them to make sure they
4505 * are working.
4506 * Returns 0 for success, error for failure.
4507 */
4508static int cik_cp_compute_resume(struct radeon_device *rdev)
4509{
4510	int r, i, j, idx;
4511	u32 tmp;
4512	bool use_doorbell = true;
4513	u64 hqd_gpu_addr;
4514	u64 mqd_gpu_addr;
4515	u64 eop_gpu_addr;
4516	u64 wb_gpu_addr;
4517	u32 *buf;
4518	struct bonaire_mqd *mqd;
4519
4520	r = cik_cp_compute_start(rdev);
4521	if (r)
4522		return r;
4523
4524	/* fix up chicken bits */
4525	tmp = RREG32(CP_CPF_DEBUG);
4526	tmp |= (1 << 23);
4527	WREG32(CP_CPF_DEBUG, tmp);
4528
4529	/* init the pipes */
4530	mutex_lock(&rdev->srbm_mutex);
4531
4532	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533		int me = (i < 4) ? 1 : 2;
4534		int pipe = (i < 4) ? i : (i - 4);
4535
4536		cik_srbm_select(rdev, me, pipe, 0, 0);
4537
4538		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539		/* write the EOP addr */
4540		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542
4543		/* set the VMID assigned */
4544		WREG32(CP_HPD_EOP_VMID, 0);
4545
4546		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547		tmp = RREG32(CP_HPD_EOP_CONTROL);
4548		tmp &= ~EOP_SIZE_MASK;
4549		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550		WREG32(CP_HPD_EOP_CONTROL, tmp);
4551
4552	}
4553	cik_srbm_select(rdev, 0, 0, 0, 0);
4554	mutex_unlock(&rdev->srbm_mutex);
4555
4556	/* init the queues.  Just two for now. */
4557	for (i = 0; i < 2; i++) {
4558		if (i == 0)
4559			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560		else
4561			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562
4563		if (rdev->ring[idx].mqd_obj == NULL) {
4564			r = radeon_bo_create(rdev,
4565					     sizeof(struct bonaire_mqd),
4566					     PAGE_SIZE, true,
4567					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568					     NULL, &rdev->ring[idx].mqd_obj);
4569			if (r) {
4570				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571				return r;
4572			}
4573		}
4574
4575		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576		if (unlikely(r != 0)) {
4577			cik_cp_compute_fini(rdev);
4578			return r;
4579		}
4580		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581				  &mqd_gpu_addr);
4582		if (r) {
4583			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584			cik_cp_compute_fini(rdev);
4585			return r;
4586		}
4587		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588		if (r) {
4589			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590			cik_cp_compute_fini(rdev);
4591			return r;
4592		}
4593
4594		/* init the mqd struct */
4595		memset(buf, 0, sizeof(struct bonaire_mqd));
4596
4597		mqd = (struct bonaire_mqd *)buf;
4598		mqd->header = 0xC0310800;
4599		mqd->static_thread_mgmt01[0] = 0xffffffff;
4600		mqd->static_thread_mgmt01[1] = 0xffffffff;
4601		mqd->static_thread_mgmt23[0] = 0xffffffff;
4602		mqd->static_thread_mgmt23[1] = 0xffffffff;
4603
4604		mutex_lock(&rdev->srbm_mutex);
4605		cik_srbm_select(rdev, rdev->ring[idx].me,
4606				rdev->ring[idx].pipe,
4607				rdev->ring[idx].queue, 0);
4608
4609		/* disable wptr polling */
4610		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611		tmp &= ~WPTR_POLL_EN;
4612		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613
4614		/* enable doorbell? */
4615		mqd->queue_state.cp_hqd_pq_doorbell_control =
4616			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617		if (use_doorbell)
4618			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619		else
4620			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4623
4624		/* disable the queue if it's active */
4625		mqd->queue_state.cp_hqd_dequeue_request = 0;
4626		mqd->queue_state.cp_hqd_pq_rptr = 0;
4627		mqd->queue_state.cp_hqd_pq_wptr= 0;
4628		if (RREG32(CP_HQD_ACTIVE) & 1) {
4629			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630			for (j = 0; j < rdev->usec_timeout; j++) {
4631				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632					break;
4633				udelay(1);
4634			}
4635			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638		}
4639
4640		/* set the pointer to the MQD */
4641		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645		/* set MQD vmid to 0 */
4646		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649
4650		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656
4657		/* set up the HQD, this is similar to CP_RB0_CNTL */
4658		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659		mqd->queue_state.cp_hqd_pq_control &=
4660			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661
4662		mqd->queue_state.cp_hqd_pq_control |=
4663			order_base_2(rdev->ring[idx].ring_size / 8);
4664		mqd->queue_state.cp_hqd_pq_control |=
4665			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666#ifdef __BIG_ENDIAN
4667		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668#endif
4669		mqd->queue_state.cp_hqd_pq_control &=
4670			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671		mqd->queue_state.cp_hqd_pq_control |=
4672			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674
4675		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676		if (i == 0)
4677			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678		else
4679			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685
4686		/* set the wb address wether it's enabled or not */
4687		if (i == 0)
4688			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689		else
4690			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693			upper_32_bits(wb_gpu_addr) & 0xffff;
4694		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698
4699		/* enable the doorbell if requested */
4700		if (use_doorbell) {
4701			mqd->queue_state.cp_hqd_pq_doorbell_control =
4702				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708				~(DOORBELL_SOURCE | DOORBELL_HIT);
4709
4710		} else {
4711			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712		}
4713		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4715
4716		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717		rdev->ring[idx].wptr = 0;
4718		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721
4722		/* set the vmid for the queue */
4723		mqd->queue_state.cp_hqd_vmid = 0;
4724		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725
4726		/* activate the queue */
4727		mqd->queue_state.cp_hqd_active = 1;
4728		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729
4730		cik_srbm_select(rdev, 0, 0, 0, 0);
4731		mutex_unlock(&rdev->srbm_mutex);
4732
4733		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735
4736		rdev->ring[idx].ready = true;
4737		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738		if (r)
4739			rdev->ring[idx].ready = false;
4740	}
4741
4742	return 0;
4743}
4744
4745static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746{
4747	cik_cp_gfx_enable(rdev, enable);
4748	cik_cp_compute_enable(rdev, enable);
4749}
4750
4751static int cik_cp_load_microcode(struct radeon_device *rdev)
4752{
4753	int r;
4754
4755	r = cik_cp_gfx_load_microcode(rdev);
4756	if (r)
4757		return r;
4758	r = cik_cp_compute_load_microcode(rdev);
4759	if (r)
4760		return r;
4761
4762	return 0;
4763}
4764
4765static void cik_cp_fini(struct radeon_device *rdev)
4766{
4767	cik_cp_gfx_fini(rdev);
4768	cik_cp_compute_fini(rdev);
4769}
4770
4771static int cik_cp_resume(struct radeon_device *rdev)
4772{
4773	int r;
4774
4775	cik_enable_gui_idle_interrupt(rdev, false);
4776
4777	r = cik_cp_load_microcode(rdev);
4778	if (r)
4779		return r;
4780
4781	r = cik_cp_gfx_resume(rdev);
4782	if (r)
4783		return r;
4784	r = cik_cp_compute_resume(rdev);
4785	if (r)
4786		return r;
4787
4788	cik_enable_gui_idle_interrupt(rdev, true);
4789
4790	return 0;
4791}
4792
4793static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794{
4795	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4796		RREG32(GRBM_STATUS));
4797	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4798		RREG32(GRBM_STATUS2));
4799	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4800		RREG32(GRBM_STATUS_SE0));
4801	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4802		RREG32(GRBM_STATUS_SE1));
4803	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4804		RREG32(GRBM_STATUS_SE2));
4805	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4806		RREG32(GRBM_STATUS_SE3));
4807	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4808		RREG32(SRBM_STATUS));
4809	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4810		RREG32(SRBM_STATUS2));
4811	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4812		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4814		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4817		 RREG32(CP_STALLED_STAT1));
4818	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4819		 RREG32(CP_STALLED_STAT2));
4820	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4821		 RREG32(CP_STALLED_STAT3));
4822	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4823		 RREG32(CP_CPF_BUSY_STAT));
4824	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825		 RREG32(CP_CPF_STALLED_STAT1));
4826	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829		 RREG32(CP_CPC_STALLED_STAT1));
4830	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831}
4832
4833/**
4834 * cik_gpu_check_soft_reset - check which blocks are busy
4835 *
4836 * @rdev: radeon_device pointer
4837 *
4838 * Check which blocks are busy and return the relevant reset
4839 * mask to be used by cik_gpu_soft_reset().
4840 * Returns a mask of the blocks to be reset.
4841 */
4842u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843{
4844	u32 reset_mask = 0;
4845	u32 tmp;
4846
4847	/* GRBM_STATUS */
4848	tmp = RREG32(GRBM_STATUS);
4849	if (tmp & (PA_BUSY | SC_BUSY |
4850		   BCI_BUSY | SX_BUSY |
4851		   TA_BUSY | VGT_BUSY |
4852		   DB_BUSY | CB_BUSY |
4853		   GDS_BUSY | SPI_BUSY |
4854		   IA_BUSY | IA_BUSY_NO_DMA))
4855		reset_mask |= RADEON_RESET_GFX;
4856
4857	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858		reset_mask |= RADEON_RESET_CP;
4859
4860	/* GRBM_STATUS2 */
4861	tmp = RREG32(GRBM_STATUS2);
4862	if (tmp & RLC_BUSY)
4863		reset_mask |= RADEON_RESET_RLC;
4864
4865	/* SDMA0_STATUS_REG */
4866	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867	if (!(tmp & SDMA_IDLE))
4868		reset_mask |= RADEON_RESET_DMA;
4869
4870	/* SDMA1_STATUS_REG */
4871	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872	if (!(tmp & SDMA_IDLE))
4873		reset_mask |= RADEON_RESET_DMA1;
4874
4875	/* SRBM_STATUS2 */
4876	tmp = RREG32(SRBM_STATUS2);
4877	if (tmp & SDMA_BUSY)
4878		reset_mask |= RADEON_RESET_DMA;
4879
4880	if (tmp & SDMA1_BUSY)
4881		reset_mask |= RADEON_RESET_DMA1;
4882
4883	/* SRBM_STATUS */
4884	tmp = RREG32(SRBM_STATUS);
4885
4886	if (tmp & IH_BUSY)
4887		reset_mask |= RADEON_RESET_IH;
4888
4889	if (tmp & SEM_BUSY)
4890		reset_mask |= RADEON_RESET_SEM;
4891
4892	if (tmp & GRBM_RQ_PENDING)
4893		reset_mask |= RADEON_RESET_GRBM;
4894
4895	if (tmp & VMC_BUSY)
4896		reset_mask |= RADEON_RESET_VMC;
4897
4898	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899		   MCC_BUSY | MCD_BUSY))
4900		reset_mask |= RADEON_RESET_MC;
4901
4902	if (evergreen_is_display_hung(rdev))
4903		reset_mask |= RADEON_RESET_DISPLAY;
4904
4905	/* Skip MC reset as it's mostly likely not hung, just busy */
4906	if (reset_mask & RADEON_RESET_MC) {
4907		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908		reset_mask &= ~RADEON_RESET_MC;
4909	}
4910
4911	return reset_mask;
4912}
4913
4914/**
4915 * cik_gpu_soft_reset - soft reset GPU
4916 *
4917 * @rdev: radeon_device pointer
4918 * @reset_mask: mask of which blocks to reset
4919 *
4920 * Soft reset the blocks specified in @reset_mask.
4921 */
4922static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923{
4924	struct evergreen_mc_save save;
4925	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926	u32 tmp;
4927
4928	if (reset_mask == 0)
4929		return;
4930
4931	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932
4933	cik_print_gpu_status_regs(rdev);
4934	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4935		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938
4939	/* disable CG/PG */
4940	cik_fini_pg(rdev);
4941	cik_fini_cg(rdev);
4942
4943	/* stop the rlc */
4944	cik_rlc_stop(rdev);
4945
4946	/* Disable GFX parsing/prefetching */
4947	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948
4949	/* Disable MEC parsing/prefetching */
4950	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951
4952	if (reset_mask & RADEON_RESET_DMA) {
4953		/* sdma0 */
4954		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955		tmp |= SDMA_HALT;
4956		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957	}
4958	if (reset_mask & RADEON_RESET_DMA1) {
4959		/* sdma1 */
4960		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961		tmp |= SDMA_HALT;
4962		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963	}
4964
4965	evergreen_mc_stop(rdev, &save);
4966	if (evergreen_mc_wait_for_idle(rdev)) {
4967		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968	}
4969
4970	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972
4973	if (reset_mask & RADEON_RESET_CP) {
4974		grbm_soft_reset |= SOFT_RESET_CP;
4975
4976		srbm_soft_reset |= SOFT_RESET_GRBM;
4977	}
4978
4979	if (reset_mask & RADEON_RESET_DMA)
4980		srbm_soft_reset |= SOFT_RESET_SDMA;
4981
4982	if (reset_mask & RADEON_RESET_DMA1)
4983		srbm_soft_reset |= SOFT_RESET_SDMA1;
4984
4985	if (reset_mask & RADEON_RESET_DISPLAY)
4986		srbm_soft_reset |= SOFT_RESET_DC;
4987
4988	if (reset_mask & RADEON_RESET_RLC)
4989		grbm_soft_reset |= SOFT_RESET_RLC;
4990
4991	if (reset_mask & RADEON_RESET_SEM)
4992		srbm_soft_reset |= SOFT_RESET_SEM;
4993
4994	if (reset_mask & RADEON_RESET_IH)
4995		srbm_soft_reset |= SOFT_RESET_IH;
4996
4997	if (reset_mask & RADEON_RESET_GRBM)
4998		srbm_soft_reset |= SOFT_RESET_GRBM;
4999
5000	if (reset_mask & RADEON_RESET_VMC)
5001		srbm_soft_reset |= SOFT_RESET_VMC;
5002
5003	if (!(rdev->flags & RADEON_IS_IGP)) {
5004		if (reset_mask & RADEON_RESET_MC)
5005			srbm_soft_reset |= SOFT_RESET_MC;
5006	}
5007
5008	if (grbm_soft_reset) {
5009		tmp = RREG32(GRBM_SOFT_RESET);
5010		tmp |= grbm_soft_reset;
5011		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012		WREG32(GRBM_SOFT_RESET, tmp);
5013		tmp = RREG32(GRBM_SOFT_RESET);
5014
5015		udelay(50);
5016
5017		tmp &= ~grbm_soft_reset;
5018		WREG32(GRBM_SOFT_RESET, tmp);
5019		tmp = RREG32(GRBM_SOFT_RESET);
5020	}
5021
5022	if (srbm_soft_reset) {
5023		tmp = RREG32(SRBM_SOFT_RESET);
5024		tmp |= srbm_soft_reset;
5025		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026		WREG32(SRBM_SOFT_RESET, tmp);
5027		tmp = RREG32(SRBM_SOFT_RESET);
5028
5029		udelay(50);
5030
5031		tmp &= ~srbm_soft_reset;
5032		WREG32(SRBM_SOFT_RESET, tmp);
5033		tmp = RREG32(SRBM_SOFT_RESET);
5034	}
5035
5036	/* Wait a little for things to settle down */
5037	udelay(50);
5038
5039	evergreen_mc_resume(rdev, &save);
5040	udelay(50);
5041
5042	cik_print_gpu_status_regs(rdev);
5043}
5044
5045struct kv_reset_save_regs {
5046	u32 gmcon_reng_execute;
5047	u32 gmcon_misc;
5048	u32 gmcon_misc3;
5049};
5050
5051static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052				   struct kv_reset_save_regs *save)
5053{
5054	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055	save->gmcon_misc = RREG32(GMCON_MISC);
5056	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057
5058	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060						STCTRL_STUTTER_EN));
5061}
5062
5063static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064				      struct kv_reset_save_regs *save)
5065{
5066	int i;
5067
5068	WREG32(GMCON_PGFSM_WRITE, 0);
5069	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070
5071	for (i = 0; i < 5; i++)
5072		WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074	WREG32(GMCON_PGFSM_WRITE, 0);
5075	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076
5077	for (i = 0; i < 5; i++)
5078		WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082
5083	for (i = 0; i < 5; i++)
5084		WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088
5089	for (i = 0; i < 5; i++)
5090		WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094
5095	for (i = 0; i < 5; i++)
5096		WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098	WREG32(GMCON_PGFSM_WRITE, 0);
5099	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100
5101	for (i = 0; i < 5; i++)
5102		WREG32(GMCON_PGFSM_WRITE, 0);
5103
5104	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106
5107	for (i = 0; i < 5; i++)
5108		WREG32(GMCON_PGFSM_WRITE, 0);
5109
5110	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112
5113	for (i = 0; i < 5; i++)
5114		WREG32(GMCON_PGFSM_WRITE, 0);
5115
5116	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118
5119	for (i = 0; i < 5; i++)
5120		WREG32(GMCON_PGFSM_WRITE, 0);
5121
5122	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124
5125	for (i = 0; i < 5; i++)
5126		WREG32(GMCON_PGFSM_WRITE, 0);
5127
5128	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130
5131	WREG32(GMCON_MISC3, save->gmcon_misc3);
5132	WREG32(GMCON_MISC, save->gmcon_misc);
5133	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134}
5135
5136static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137{
5138	struct evergreen_mc_save save;
5139	struct kv_reset_save_regs kv_save = { 0 };
5140	u32 tmp, i;
5141
5142	dev_info(rdev->dev, "GPU pci config reset\n");
5143
5144	/* disable dpm? */
5145
5146	/* disable cg/pg */
5147	cik_fini_pg(rdev);
5148	cik_fini_cg(rdev);
5149
5150	/* Disable GFX parsing/prefetching */
5151	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152
5153	/* Disable MEC parsing/prefetching */
5154	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155
5156	/* sdma0 */
5157	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158	tmp |= SDMA_HALT;
5159	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160	/* sdma1 */
5161	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162	tmp |= SDMA_HALT;
5163	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164	/* XXX other engines? */
5165
5166	/* halt the rlc, disable cp internal ints */
5167	cik_rlc_stop(rdev);
5168
5169	udelay(50);
5170
5171	/* disable mem access */
5172	evergreen_mc_stop(rdev, &save);
5173	if (evergreen_mc_wait_for_idle(rdev)) {
5174		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175	}
5176
5177	if (rdev->flags & RADEON_IS_IGP)
5178		kv_save_regs_for_reset(rdev, &kv_save);
5179
5180	/* disable BM */
5181	pci_clear_master(rdev->pdev);
5182	/* reset */
5183	radeon_pci_config_reset(rdev);
5184
5185	udelay(100);
5186
5187	/* wait for asic to come out of reset */
5188	for (i = 0; i < rdev->usec_timeout; i++) {
5189		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190			break;
5191		udelay(1);
5192	}
5193
5194	/* does asic init need to be run first??? */
5195	if (rdev->flags & RADEON_IS_IGP)
5196		kv_restore_regs_for_reset(rdev, &kv_save);
5197}
5198
5199/**
5200 * cik_asic_reset - soft reset GPU
5201 *
5202 * @rdev: radeon_device pointer
5203 * @hard: force hard reset
5204 *
5205 * Look up which blocks are hung and attempt
5206 * to reset them.
5207 * Returns 0 for success.
5208 */
5209int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210{
5211	u32 reset_mask;
5212
5213	if (hard) {
5214		cik_gpu_pci_config_reset(rdev);
5215		return 0;
5216	}
5217
5218	reset_mask = cik_gpu_check_soft_reset(rdev);
5219
5220	if (reset_mask)
5221		r600_set_bios_scratch_engine_hung(rdev, true);
5222
5223	/* try soft reset */
5224	cik_gpu_soft_reset(rdev, reset_mask);
5225
5226	reset_mask = cik_gpu_check_soft_reset(rdev);
5227
5228	/* try pci config reset */
5229	if (reset_mask && radeon_hard_reset)
5230		cik_gpu_pci_config_reset(rdev);
5231
5232	reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234	if (!reset_mask)
5235		r600_set_bios_scratch_engine_hung(rdev, false);
5236
5237	return 0;
5238}
5239
5240/**
5241 * cik_gfx_is_lockup - check if the 3D engine is locked up
5242 *
5243 * @rdev: radeon_device pointer
5244 * @ring: radeon_ring structure holding ring information
5245 *
5246 * Check if the 3D engine is locked up (CIK).
5247 * Returns true if the engine is locked, false if not.
5248 */
5249bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250{
5251	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252
5253	if (!(reset_mask & (RADEON_RESET_GFX |
5254			    RADEON_RESET_COMPUTE |
5255			    RADEON_RESET_CP))) {
5256		radeon_ring_lockup_update(rdev, ring);
5257		return false;
5258	}
5259	return radeon_ring_test_lockup(rdev, ring);
5260}
5261
5262/* MC */
5263/**
5264 * cik_mc_program - program the GPU memory controller
5265 *
5266 * @rdev: radeon_device pointer
5267 *
5268 * Set the location of vram, gart, and AGP in the GPU's
5269 * physical address space (CIK).
5270 */
5271static void cik_mc_program(struct radeon_device *rdev)
5272{
5273	struct evergreen_mc_save save;
5274	u32 tmp;
5275	int i, j;
5276
5277	/* Initialize HDP */
5278	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279		WREG32((0x2c14 + j), 0x00000000);
5280		WREG32((0x2c18 + j), 0x00000000);
5281		WREG32((0x2c1c + j), 0x00000000);
5282		WREG32((0x2c20 + j), 0x00000000);
5283		WREG32((0x2c24 + j), 0x00000000);
5284	}
5285	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286
5287	evergreen_mc_stop(rdev, &save);
5288	if (radeon_mc_wait_for_idle(rdev)) {
5289		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290	}
5291	/* Lockout access through VGA aperture*/
5292	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293	/* Update configuration */
5294	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295	       rdev->mc.vram_start >> 12);
5296	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297	       rdev->mc.vram_end >> 12);
5298	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299	       rdev->vram_scratch.gpu_addr >> 12);
5300	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302	WREG32(MC_VM_FB_LOCATION, tmp);
5303	/* XXX double check these! */
5304	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307	WREG32(MC_VM_AGP_BASE, 0);
5308	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310	if (radeon_mc_wait_for_idle(rdev)) {
5311		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312	}
5313	evergreen_mc_resume(rdev, &save);
5314	/* we need to own VRAM, so turn off the VGA renderer here
5315	 * to stop it overwriting our objects */
5316	rv515_vga_render_disable(rdev);
5317}
5318
5319/**
5320 * cik_mc_init - initialize the memory controller driver params
5321 *
5322 * @rdev: radeon_device pointer
5323 *
5324 * Look up the amount of vram, vram width, and decide how to place
5325 * vram and gart within the GPU's physical address space (CIK).
5326 * Returns 0 for success.
5327 */
5328static int cik_mc_init(struct radeon_device *rdev)
5329{
5330	u32 tmp;
5331	int chansize, numchan;
5332
5333	/* Get VRAM informations */
5334	rdev->mc.vram_is_ddr = true;
5335	tmp = RREG32(MC_ARB_RAMCFG);
5336	if (tmp & CHANSIZE_MASK) {
5337		chansize = 64;
5338	} else {
5339		chansize = 32;
5340	}
5341	tmp = RREG32(MC_SHARED_CHMAP);
5342	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343	case 0:
5344	default:
5345		numchan = 1;
5346		break;
5347	case 1:
5348		numchan = 2;
5349		break;
5350	case 2:
5351		numchan = 4;
5352		break;
5353	case 3:
5354		numchan = 8;
5355		break;
5356	case 4:
5357		numchan = 3;
5358		break;
5359	case 5:
5360		numchan = 6;
5361		break;
5362	case 6:
5363		numchan = 10;
5364		break;
5365	case 7:
5366		numchan = 12;
5367		break;
5368	case 8:
5369		numchan = 16;
5370		break;
5371	}
5372	rdev->mc.vram_width = numchan * chansize;
5373	/* Could aper size report 0 ? */
5374	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5375	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5376	/* size in MB on si */
5377	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380	si_vram_gtt_location(rdev, &rdev->mc);
5381	radeon_update_bandwidth_info(rdev);
5382
5383	return 0;
5384}
5385
5386/*
5387 * GART
5388 * VMID 0 is the physical GPU addresses as used by the kernel.
5389 * VMIDs 1-15 are used for userspace clients and are handled
5390 * by the radeon vm/hsa code.
5391 */
5392/**
5393 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394 *
5395 * @rdev: radeon_device pointer
5396 *
5397 * Flush the TLB for the VMID 0 page table (CIK).
5398 */
5399void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400{
5401	/* flush hdp cache */
5402	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403
5404	/* bits 0-15 are the VM contexts0-15 */
5405	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406}
5407
5408/**
5409 * cik_pcie_gart_enable - gart enable
5410 *
5411 * @rdev: radeon_device pointer
5412 *
5413 * This sets up the TLBs, programs the page tables for VMID0,
5414 * sets up the hw for VMIDs 1-15 which are allocated on
5415 * demand, and sets up the global locations for the LDS, GDS,
5416 * and GPUVM for FSA64 clients (CIK).
5417 * Returns 0 for success, errors for failure.
5418 */
5419static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420{
5421	int r, i;
5422
5423	if (rdev->gart.robj == NULL) {
5424		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425		return -EINVAL;
5426	}
5427	r = radeon_gart_table_vram_pin(rdev);
5428	if (r)
5429		return r;
5430	/* Setup TLB control */
5431	WREG32(MC_VM_MX_L1_TLB_CNTL,
5432	       (0xA << 7) |
5433	       ENABLE_L1_TLB |
5434	       ENABLE_L1_FRAGMENT_PROCESSING |
5435	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436	       ENABLE_ADVANCED_DRIVER_MODEL |
5437	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438	/* Setup L2 cache */
5439	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440	       ENABLE_L2_FRAGMENT_PROCESSING |
5441	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5444	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447	       BANK_SELECT(4) |
5448	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449	/* setup context0 */
5450	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454			(u32)(rdev->dummy_page.addr >> 12));
5455	WREG32(VM_CONTEXT0_CNTL2, 0);
5456	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458
5459	WREG32(0x15D4, 0);
5460	WREG32(0x15D8, 0);
5461	WREG32(0x15DC, 0);
5462
5463	/* restore context1-15 */
5464	/* set vm size, must be a multiple of 4 */
5465	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467	for (i = 1; i < 16; i++) {
5468		if (i < 8)
5469			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470			       rdev->vm_manager.saved_table_addr[i]);
5471		else
5472			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473			       rdev->vm_manager.saved_table_addr[i]);
5474	}
5475
5476	/* enable context1-15 */
5477	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478	       (u32)(rdev->dummy_page.addr >> 12));
5479	WREG32(VM_CONTEXT1_CNTL2, 4);
5480	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494
5495	if (rdev->family == CHIP_KAVERI) {
5496		u32 tmp = RREG32(CHUB_CONTROL);
5497		tmp &= ~BYPASS_VM;
5498		WREG32(CHUB_CONTROL, tmp);
5499	}
5500
5501	/* XXX SH_MEM regs */
5502	/* where to put LDS, scratch, GPUVM in FSA64 space */
5503	mutex_lock(&rdev->srbm_mutex);
5504	for (i = 0; i < 16; i++) {
5505		cik_srbm_select(rdev, 0, 0, 0, i);
5506		/* CP and shaders */
5507		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508		WREG32(SH_MEM_APE1_BASE, 1);
5509		WREG32(SH_MEM_APE1_LIMIT, 0);
5510		WREG32(SH_MEM_BASES, 0);
5511		/* SDMA GFX */
5512		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516		/* XXX SDMA RLC - todo */
5517	}
5518	cik_srbm_select(rdev, 0, 0, 0, 0);
5519	mutex_unlock(&rdev->srbm_mutex);
5520
5521	cik_pcie_gart_tlb_flush(rdev);
5522	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523		 (unsigned)(rdev->mc.gtt_size >> 20),
5524		 (unsigned long long)rdev->gart.table_addr);
5525	rdev->gart.ready = true;
5526	return 0;
5527}
5528
5529/**
5530 * cik_pcie_gart_disable - gart disable
5531 *
5532 * @rdev: radeon_device pointer
5533 *
5534 * This disables all VM page table (CIK).
5535 */
5536static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537{
5538	unsigned i;
5539
5540	for (i = 1; i < 16; ++i) {
5541		uint32_t reg;
5542		if (i < 8)
5543			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544		else
5545			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547	}
5548
5549	/* Disable all tables */
5550	WREG32(VM_CONTEXT0_CNTL, 0);
5551	WREG32(VM_CONTEXT1_CNTL, 0);
5552	/* Setup TLB control */
5553	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555	/* Setup L2 cache */
5556	WREG32(VM_L2_CNTL,
5557	       ENABLE_L2_FRAGMENT_PROCESSING |
5558	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5561	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562	WREG32(VM_L2_CNTL2, 0);
5563	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565	radeon_gart_table_vram_unpin(rdev);
5566}
5567
5568/**
5569 * cik_pcie_gart_fini - vm fini callback
5570 *
5571 * @rdev: radeon_device pointer
5572 *
5573 * Tears down the driver GART/VM setup (CIK).
5574 */
5575static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576{
5577	cik_pcie_gart_disable(rdev);
5578	radeon_gart_table_vram_free(rdev);
5579	radeon_gart_fini(rdev);
5580}
5581
5582/* vm parser */
5583/**
5584 * cik_ib_parse - vm ib_parse callback
5585 *
5586 * @rdev: radeon_device pointer
5587 * @ib: indirect buffer pointer
5588 *
5589 * CIK uses hw IB checking so this is a nop (CIK).
5590 */
5591int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592{
5593	return 0;
5594}
5595
5596/*
5597 * vm
5598 * VMID 0 is the physical GPU addresses as used by the kernel.
5599 * VMIDs 1-15 are used for userspace clients and are handled
5600 * by the radeon vm/hsa code.
5601 */
5602/**
5603 * cik_vm_init - cik vm init callback
5604 *
5605 * @rdev: radeon_device pointer
5606 *
5607 * Inits cik specific vm parameters (number of VMs, base of vram for
5608 * VMIDs 1-15) (CIK).
5609 * Returns 0 for success.
5610 */
5611int cik_vm_init(struct radeon_device *rdev)
5612{
5613	/*
5614	 * number of VMs
5615	 * VMID 0 is reserved for System
5616	 * radeon graphics/compute will use VMIDs 1-15
5617	 */
5618	rdev->vm_manager.nvm = 16;
5619	/* base offset of vram pages */
5620	if (rdev->flags & RADEON_IS_IGP) {
5621		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622		tmp <<= 22;
5623		rdev->vm_manager.vram_base_offset = tmp;
5624	} else
5625		rdev->vm_manager.vram_base_offset = 0;
5626
5627	return 0;
5628}
5629
5630/**
5631 * cik_vm_fini - cik vm fini callback
5632 *
5633 * @rdev: radeon_device pointer
5634 *
5635 * Tear down any asic specific VM setup (CIK).
5636 */
5637void cik_vm_fini(struct radeon_device *rdev)
5638{
5639}
5640
5641/**
5642 * cik_vm_decode_fault - print human readable fault info
5643 *
5644 * @rdev: radeon_device pointer
5645 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647 * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648 *
5649 * Print human readable fault information (CIK).
5650 */
5651static void cik_vm_decode_fault(struct radeon_device *rdev,
5652				u32 status, u32 addr, u32 mc_client)
5653{
5654	u32 mc_id;
5655	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659
5660	if (rdev->family == CHIP_HAWAII)
5661		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662	else
5663		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664
5665	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666	       protections, vmid, addr,
5667	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668	       block, mc_client, mc_id);
5669}
5670
5671/*
5672 * cik_vm_flush - cik vm flush using the CP
5673 *
5674 * Update the page table base and flush the VM TLB
5675 * using the CP (CIK).
5676 */
5677void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678		  unsigned vm_id, uint64_t pd_addr)
5679{
5680	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681
5682	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684				 WRITE_DATA_DST_SEL(0)));
5685	if (vm_id < 8) {
5686		radeon_ring_write(ring,
5687				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688	} else {
5689		radeon_ring_write(ring,
5690				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691	}
5692	radeon_ring_write(ring, 0);
5693	radeon_ring_write(ring, pd_addr >> 12);
5694
5695	/* update SH_MEM_* regs */
5696	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698				 WRITE_DATA_DST_SEL(0)));
5699	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700	radeon_ring_write(ring, 0);
5701	radeon_ring_write(ring, VMID(vm_id));
5702
5703	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705				 WRITE_DATA_DST_SEL(0)));
5706	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707	radeon_ring_write(ring, 0);
5708
5709	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713
5714	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716				 WRITE_DATA_DST_SEL(0)));
5717	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718	radeon_ring_write(ring, 0);
5719	radeon_ring_write(ring, VMID(0));
5720
5721	/* HDP flush */
5722	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723
5724	/* bits 0-15 are the VM contexts0-15 */
5725	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727				 WRITE_DATA_DST_SEL(0)));
5728	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729	radeon_ring_write(ring, 0);
5730	radeon_ring_write(ring, 1 << vm_id);
5731
5732	/* wait for the invalidate to complete */
5733	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5736				 WAIT_REG_MEM_ENGINE(0))); /* me */
5737	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738	radeon_ring_write(ring, 0);
5739	radeon_ring_write(ring, 0); /* ref */
5740	radeon_ring_write(ring, 0); /* mask */
5741	radeon_ring_write(ring, 0x20); /* poll interval */
5742
5743	/* compute doesn't have PFP */
5744	if (usepfp) {
5745		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5746		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747		radeon_ring_write(ring, 0x0);
5748	}
5749}
5750
5751/*
5752 * RLC
5753 * The RLC is a multi-purpose microengine that handles a
5754 * variety of functions, the most important of which is
5755 * the interrupt controller.
5756 */
5757static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758					  bool enable)
5759{
5760	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761
5762	if (enable)
5763		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764	else
5765		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766	WREG32(CP_INT_CNTL_RING0, tmp);
5767}
5768
5769static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770{
5771	u32 tmp;
5772
5773	tmp = RREG32(RLC_LB_CNTL);
5774	if (enable)
5775		tmp |= LOAD_BALANCE_ENABLE;
5776	else
5777		tmp &= ~LOAD_BALANCE_ENABLE;
5778	WREG32(RLC_LB_CNTL, tmp);
5779}
5780
5781static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782{
5783	u32 i, j, k;
5784	u32 mask;
5785
5786	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788			cik_select_se_sh(rdev, i, j);
5789			for (k = 0; k < rdev->usec_timeout; k++) {
5790				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791					break;
5792				udelay(1);
5793			}
5794		}
5795	}
5796	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797
5798	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799	for (k = 0; k < rdev->usec_timeout; k++) {
5800		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801			break;
5802		udelay(1);
5803	}
5804}
5805
5806static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807{
5808	u32 tmp;
5809
5810	tmp = RREG32(RLC_CNTL);
5811	if (tmp != rlc)
5812		WREG32(RLC_CNTL, rlc);
5813}
5814
5815static u32 cik_halt_rlc(struct radeon_device *rdev)
5816{
5817	u32 data, orig;
5818
5819	orig = data = RREG32(RLC_CNTL);
5820
5821	if (data & RLC_ENABLE) {
5822		u32 i;
5823
5824		data &= ~RLC_ENABLE;
5825		WREG32(RLC_CNTL, data);
5826
5827		for (i = 0; i < rdev->usec_timeout; i++) {
5828			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829				break;
5830			udelay(1);
5831		}
5832
5833		cik_wait_for_rlc_serdes(rdev);
5834	}
5835
5836	return orig;
5837}
5838
5839void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840{
5841	u32 tmp, i, mask;
5842
5843	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844	WREG32(RLC_GPR_REG2, tmp);
5845
5846	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847	for (i = 0; i < rdev->usec_timeout; i++) {
5848		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849			break;
5850		udelay(1);
5851	}
5852
5853	for (i = 0; i < rdev->usec_timeout; i++) {
5854		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855			break;
5856		udelay(1);
5857	}
5858}
5859
5860void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861{
5862	u32 tmp;
5863
5864	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865	WREG32(RLC_GPR_REG2, tmp);
5866}
5867
5868/**
5869 * cik_rlc_stop - stop the RLC ME
5870 *
5871 * @rdev: radeon_device pointer
5872 *
5873 * Halt the RLC ME (MicroEngine) (CIK).
5874 */
5875static void cik_rlc_stop(struct radeon_device *rdev)
5876{
5877	WREG32(RLC_CNTL, 0);
5878
5879	cik_enable_gui_idle_interrupt(rdev, false);
5880
5881	cik_wait_for_rlc_serdes(rdev);
5882}
5883
5884/**
5885 * cik_rlc_start - start the RLC ME
5886 *
5887 * @rdev: radeon_device pointer
5888 *
5889 * Unhalt the RLC ME (MicroEngine) (CIK).
5890 */
5891static void cik_rlc_start(struct radeon_device *rdev)
5892{
5893	WREG32(RLC_CNTL, RLC_ENABLE);
5894
5895	cik_enable_gui_idle_interrupt(rdev, true);
5896
5897	udelay(50);
5898}
5899
5900/**
5901 * cik_rlc_resume - setup the RLC hw
5902 *
5903 * @rdev: radeon_device pointer
5904 *
5905 * Initialize the RLC registers, load the ucode,
5906 * and start the RLC (CIK).
5907 * Returns 0 for success, -EINVAL if the ucode is not available.
5908 */
5909static int cik_rlc_resume(struct radeon_device *rdev)
5910{
5911	u32 i, size, tmp;
5912
5913	if (!rdev->rlc_fw)
5914		return -EINVAL;
5915
5916	cik_rlc_stop(rdev);
5917
5918	/* disable CG */
5919	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921
5922	si_rlc_reset(rdev);
5923
5924	cik_init_pg(rdev);
5925
5926	cik_init_cg(rdev);
5927
5928	WREG32(RLC_LB_CNTR_INIT, 0);
5929	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930
5931	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933	WREG32(RLC_LB_PARAMS, 0x00600408);
5934	WREG32(RLC_LB_CNTL, 0x80000004);
5935
5936	WREG32(RLC_MC_CNTL, 0);
5937	WREG32(RLC_UCODE_CNTL, 0);
5938
5939	if (rdev->new_fw) {
5940		const struct rlc_firmware_header_v1_0 *hdr =
5941			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942		const __le32 *fw_data = (const __le32 *)
5943			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944
5945		radeon_ucode_print_rlc_hdr(&hdr->header);
5946
5947		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948		WREG32(RLC_GPM_UCODE_ADDR, 0);
5949		for (i = 0; i < size; i++)
5950			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952	} else {
5953		const __be32 *fw_data;
5954
5955		switch (rdev->family) {
5956		case CHIP_BONAIRE:
5957		case CHIP_HAWAII:
5958		default:
5959			size = BONAIRE_RLC_UCODE_SIZE;
5960			break;
5961		case CHIP_KAVERI:
5962			size = KV_RLC_UCODE_SIZE;
5963			break;
5964		case CHIP_KABINI:
5965			size = KB_RLC_UCODE_SIZE;
5966			break;
5967		case CHIP_MULLINS:
5968			size = ML_RLC_UCODE_SIZE;
5969			break;
5970		}
5971
5972		fw_data = (const __be32 *)rdev->rlc_fw->data;
5973		WREG32(RLC_GPM_UCODE_ADDR, 0);
5974		for (i = 0; i < size; i++)
5975			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976		WREG32(RLC_GPM_UCODE_ADDR, 0);
5977	}
5978
5979	/* XXX - find out what chips support lbpw */
5980	cik_enable_lbpw(rdev, false);
5981
5982	if (rdev->family == CHIP_BONAIRE)
5983		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984
5985	cik_rlc_start(rdev);
5986
5987	return 0;
5988}
5989
5990static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991{
5992	u32 data, orig, tmp, tmp2;
5993
5994	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995
5996	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997		cik_enable_gui_idle_interrupt(rdev, true);
5998
5999		tmp = cik_halt_rlc(rdev);
6000
6001		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006
6007		cik_update_rlc(rdev, tmp);
6008
6009		data |= CGCG_EN | CGLS_EN;
6010	} else {
6011		cik_enable_gui_idle_interrupt(rdev, false);
6012
6013		RREG32(CB_CGTT_SCLK_CTRL);
6014		RREG32(CB_CGTT_SCLK_CTRL);
6015		RREG32(CB_CGTT_SCLK_CTRL);
6016		RREG32(CB_CGTT_SCLK_CTRL);
6017
6018		data &= ~(CGCG_EN | CGLS_EN);
6019	}
6020
6021	if (orig != data)
6022		WREG32(RLC_CGCG_CGLS_CTRL, data);
6023
6024}
6025
6026static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027{
6028	u32 data, orig, tmp = 0;
6029
6030	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033				orig = data = RREG32(CP_MEM_SLP_CNTL);
6034				data |= CP_MEM_LS_EN;
6035				if (orig != data)
6036					WREG32(CP_MEM_SLP_CNTL, data);
6037			}
6038		}
6039
6040		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041		data |= 0x00000001;
6042		data &= 0xfffffffd;
6043		if (orig != data)
6044			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045
6046		tmp = cik_halt_rlc(rdev);
6047
6048		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052		WREG32(RLC_SERDES_WR_CTRL, data);
6053
6054		cik_update_rlc(rdev, tmp);
6055
6056		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057			orig = data = RREG32(CGTS_SM_CTRL_REG);
6058			data &= ~SM_MODE_MASK;
6059			data |= SM_MODE(0x2);
6060			data |= SM_MODE_ENABLE;
6061			data &= ~CGTS_OVERRIDE;
6062			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064				data &= ~CGTS_LS_OVERRIDE;
6065			data &= ~ON_MONITOR_ADD_MASK;
6066			data |= ON_MONITOR_ADD_EN;
6067			data |= ON_MONITOR_ADD(0x96);
6068			if (orig != data)
6069				WREG32(CGTS_SM_CTRL_REG, data);
6070		}
6071	} else {
6072		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073		data |= 0x00000003;
6074		if (orig != data)
6075			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076
6077		data = RREG32(RLC_MEM_SLP_CNTL);
6078		if (data & RLC_MEM_LS_EN) {
6079			data &= ~RLC_MEM_LS_EN;
6080			WREG32(RLC_MEM_SLP_CNTL, data);
6081		}
6082
6083		data = RREG32(CP_MEM_SLP_CNTL);
6084		if (data & CP_MEM_LS_EN) {
6085			data &= ~CP_MEM_LS_EN;
6086			WREG32(CP_MEM_SLP_CNTL, data);
6087		}
6088
6089		orig = data = RREG32(CGTS_SM_CTRL_REG);
6090		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091		if (orig != data)
6092			WREG32(CGTS_SM_CTRL_REG, data);
6093
6094		tmp = cik_halt_rlc(rdev);
6095
6096		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100		WREG32(RLC_SERDES_WR_CTRL, data);
6101
6102		cik_update_rlc(rdev, tmp);
6103	}
6104}
6105
6106static const u32 mc_cg_registers[] =
6107{
6108	MC_HUB_MISC_HUB_CG,
6109	MC_HUB_MISC_SIP_CG,
6110	MC_HUB_MISC_VM_CG,
6111	MC_XPB_CLK_GAT,
6112	ATC_MISC_CG,
6113	MC_CITF_MISC_WR_CG,
6114	MC_CITF_MISC_RD_CG,
6115	MC_CITF_MISC_VM_CG,
6116	VM_L2_CG,
6117};
6118
6119static void cik_enable_mc_ls(struct radeon_device *rdev,
6120			     bool enable)
6121{
6122	int i;
6123	u32 orig, data;
6124
6125	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126		orig = data = RREG32(mc_cg_registers[i]);
6127		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128			data |= MC_LS_ENABLE;
6129		else
6130			data &= ~MC_LS_ENABLE;
6131		if (data != orig)
6132			WREG32(mc_cg_registers[i], data);
6133	}
6134}
6135
6136static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137			       bool enable)
6138{
6139	int i;
6140	u32 orig, data;
6141
6142	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143		orig = data = RREG32(mc_cg_registers[i]);
6144		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145			data |= MC_CG_ENABLE;
6146		else
6147			data &= ~MC_CG_ENABLE;
6148		if (data != orig)
6149			WREG32(mc_cg_registers[i], data);
6150	}
6151}
6152
6153static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154				 bool enable)
6155{
6156	u32 orig, data;
6157
6158	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161	} else {
6162		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163		data |= 0xff000000;
6164		if (data != orig)
6165			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166
6167		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168		data |= 0xff000000;
6169		if (data != orig)
6170			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171	}
6172}
6173
6174static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175				 bool enable)
6176{
6177	u32 orig, data;
6178
6179	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181		data |= 0x100;
6182		if (orig != data)
6183			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184
6185		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186		data |= 0x100;
6187		if (orig != data)
6188			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189	} else {
6190		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191		data &= ~0x100;
6192		if (orig != data)
6193			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194
6195		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196		data &= ~0x100;
6197		if (orig != data)
6198			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199	}
6200}
6201
6202static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203				bool enable)
6204{
6205	u32 orig, data;
6206
6207	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209		data = 0xfff;
6210		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211
6212		orig = data = RREG32(UVD_CGC_CTRL);
6213		data |= DCM;
6214		if (orig != data)
6215			WREG32(UVD_CGC_CTRL, data);
6216	} else {
6217		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218		data &= ~0xfff;
6219		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220
6221		orig = data = RREG32(UVD_CGC_CTRL);
6222		data &= ~DCM;
6223		if (orig != data)
6224			WREG32(UVD_CGC_CTRL, data);
6225	}
6226}
6227
6228static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229			       bool enable)
6230{
6231	u32 orig, data;
6232
6233	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234
6235	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238	else
6239		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241
6242	if (orig != data)
6243		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244}
6245
6246static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247				bool enable)
6248{
6249	u32 orig, data;
6250
6251	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252
6253	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254		data &= ~CLOCK_GATING_DIS;
6255	else
6256		data |= CLOCK_GATING_DIS;
6257
6258	if (orig != data)
6259		WREG32(HDP_HOST_PATH_CNTL, data);
6260}
6261
6262static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263			      bool enable)
6264{
6265	u32 orig, data;
6266
6267	orig = data = RREG32(HDP_MEM_POWER_LS);
6268
6269	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270		data |= HDP_LS_ENABLE;
6271	else
6272		data &= ~HDP_LS_ENABLE;
6273
6274	if (orig != data)
6275		WREG32(HDP_MEM_POWER_LS, data);
6276}
6277
6278void cik_update_cg(struct radeon_device *rdev,
6279		   u32 block, bool enable)
6280{
6281
6282	if (block & RADEON_CG_BLOCK_GFX) {
6283		cik_enable_gui_idle_interrupt(rdev, false);
6284		/* order matters! */
6285		if (enable) {
6286			cik_enable_mgcg(rdev, true);
6287			cik_enable_cgcg(rdev, true);
6288		} else {
6289			cik_enable_cgcg(rdev, false);
6290			cik_enable_mgcg(rdev, false);
6291		}
6292		cik_enable_gui_idle_interrupt(rdev, true);
6293	}
6294
6295	if (block & RADEON_CG_BLOCK_MC) {
6296		if (!(rdev->flags & RADEON_IS_IGP)) {
6297			cik_enable_mc_mgcg(rdev, enable);
6298			cik_enable_mc_ls(rdev, enable);
6299		}
6300	}
6301
6302	if (block & RADEON_CG_BLOCK_SDMA) {
6303		cik_enable_sdma_mgcg(rdev, enable);
6304		cik_enable_sdma_mgls(rdev, enable);
6305	}
6306
6307	if (block & RADEON_CG_BLOCK_BIF) {
6308		cik_enable_bif_mgls(rdev, enable);
6309	}
6310
6311	if (block & RADEON_CG_BLOCK_UVD) {
6312		if (rdev->has_uvd)
6313			cik_enable_uvd_mgcg(rdev, enable);
6314	}
6315
6316	if (block & RADEON_CG_BLOCK_HDP) {
6317		cik_enable_hdp_mgcg(rdev, enable);
6318		cik_enable_hdp_ls(rdev, enable);
6319	}
6320
6321	if (block & RADEON_CG_BLOCK_VCE) {
6322		vce_v2_0_enable_mgcg(rdev, enable);
6323	}
6324}
6325
6326static void cik_init_cg(struct radeon_device *rdev)
6327{
6328
6329	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330
6331	if (rdev->has_uvd)
6332		si_init_uvd_internal_cg(rdev);
6333
6334	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335			     RADEON_CG_BLOCK_SDMA |
6336			     RADEON_CG_BLOCK_BIF |
6337			     RADEON_CG_BLOCK_UVD |
6338			     RADEON_CG_BLOCK_HDP), true);
6339}
6340
6341static void cik_fini_cg(struct radeon_device *rdev)
6342{
6343	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344			     RADEON_CG_BLOCK_SDMA |
6345			     RADEON_CG_BLOCK_BIF |
6346			     RADEON_CG_BLOCK_UVD |
6347			     RADEON_CG_BLOCK_HDP), false);
6348
6349	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350}
6351
6352static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353					  bool enable)
6354{
6355	u32 data, orig;
6356
6357	orig = data = RREG32(RLC_PG_CNTL);
6358	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360	else
6361		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362	if (orig != data)
6363		WREG32(RLC_PG_CNTL, data);
6364}
6365
6366static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367					  bool enable)
6368{
6369	u32 data, orig;
6370
6371	orig = data = RREG32(RLC_PG_CNTL);
6372	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374	else
6375		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376	if (orig != data)
6377		WREG32(RLC_PG_CNTL, data);
6378}
6379
6380static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381{
6382	u32 data, orig;
6383
6384	orig = data = RREG32(RLC_PG_CNTL);
6385	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386		data &= ~DISABLE_CP_PG;
6387	else
6388		data |= DISABLE_CP_PG;
6389	if (orig != data)
6390		WREG32(RLC_PG_CNTL, data);
6391}
6392
6393static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394{
6395	u32 data, orig;
6396
6397	orig = data = RREG32(RLC_PG_CNTL);
6398	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399		data &= ~DISABLE_GDS_PG;
6400	else
6401		data |= DISABLE_GDS_PG;
6402	if (orig != data)
6403		WREG32(RLC_PG_CNTL, data);
6404}
6405
6406#define CP_ME_TABLE_SIZE    96
6407#define CP_ME_TABLE_OFFSET  2048
6408#define CP_MEC_TABLE_OFFSET 4096
6409
6410void cik_init_cp_pg_table(struct radeon_device *rdev)
6411{
6412	volatile u32 *dst_ptr;
6413	int me, i, max_me = 4;
6414	u32 bo_offset = 0;
6415	u32 table_offset, table_size;
6416
6417	if (rdev->family == CHIP_KAVERI)
6418		max_me = 5;
6419
6420	if (rdev->rlc.cp_table_ptr == NULL)
6421		return;
6422
6423	/* write the cp table buffer */
6424	dst_ptr = rdev->rlc.cp_table_ptr;
6425	for (me = 0; me < max_me; me++) {
6426		if (rdev->new_fw) {
6427			const __le32 *fw_data;
6428			const struct gfx_firmware_header_v1_0 *hdr;
6429
6430			if (me == 0) {
6431				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432				fw_data = (const __le32 *)
6433					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434				table_offset = le32_to_cpu(hdr->jt_offset);
6435				table_size = le32_to_cpu(hdr->jt_size);
6436			} else if (me == 1) {
6437				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438				fw_data = (const __le32 *)
6439					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440				table_offset = le32_to_cpu(hdr->jt_offset);
6441				table_size = le32_to_cpu(hdr->jt_size);
6442			} else if (me == 2) {
6443				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444				fw_data = (const __le32 *)
6445					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446				table_offset = le32_to_cpu(hdr->jt_offset);
6447				table_size = le32_to_cpu(hdr->jt_size);
6448			} else if (me == 3) {
6449				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450				fw_data = (const __le32 *)
6451					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452				table_offset = le32_to_cpu(hdr->jt_offset);
6453				table_size = le32_to_cpu(hdr->jt_size);
6454			} else {
6455				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456				fw_data = (const __le32 *)
6457					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458				table_offset = le32_to_cpu(hdr->jt_offset);
6459				table_size = le32_to_cpu(hdr->jt_size);
6460			}
6461
6462			for (i = 0; i < table_size; i ++) {
6463				dst_ptr[bo_offset + i] =
6464					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465			}
6466			bo_offset += table_size;
6467		} else {
6468			const __be32 *fw_data;
6469			table_size = CP_ME_TABLE_SIZE;
6470
6471			if (me == 0) {
6472				fw_data = (const __be32 *)rdev->ce_fw->data;
6473				table_offset = CP_ME_TABLE_OFFSET;
6474			} else if (me == 1) {
6475				fw_data = (const __be32 *)rdev->pfp_fw->data;
6476				table_offset = CP_ME_TABLE_OFFSET;
6477			} else if (me == 2) {
6478				fw_data = (const __be32 *)rdev->me_fw->data;
6479				table_offset = CP_ME_TABLE_OFFSET;
6480			} else {
6481				fw_data = (const __be32 *)rdev->mec_fw->data;
6482				table_offset = CP_MEC_TABLE_OFFSET;
6483			}
6484
6485			for (i = 0; i < table_size; i ++) {
6486				dst_ptr[bo_offset + i] =
6487					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488			}
6489			bo_offset += table_size;
6490		}
6491	}
6492}
6493
6494static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495				bool enable)
6496{
6497	u32 data, orig;
6498
6499	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500		orig = data = RREG32(RLC_PG_CNTL);
6501		data |= GFX_PG_ENABLE;
6502		if (orig != data)
6503			WREG32(RLC_PG_CNTL, data);
6504
6505		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506		data |= AUTO_PG_EN;
6507		if (orig != data)
6508			WREG32(RLC_AUTO_PG_CTRL, data);
6509	} else {
6510		orig = data = RREG32(RLC_PG_CNTL);
6511		data &= ~GFX_PG_ENABLE;
6512		if (orig != data)
6513			WREG32(RLC_PG_CNTL, data);
6514
6515		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516		data &= ~AUTO_PG_EN;
6517		if (orig != data)
6518			WREG32(RLC_AUTO_PG_CTRL, data);
6519
6520		data = RREG32(DB_RENDER_CONTROL);
6521	}
6522}
6523
6524static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525{
6526	u32 mask = 0, tmp, tmp1;
6527	int i;
6528
6529	cik_select_se_sh(rdev, se, sh);
6530	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533
6534	tmp &= 0xffff0000;
6535
6536	tmp |= tmp1;
6537	tmp >>= 16;
6538
6539	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540		mask <<= 1;
6541		mask |= 1;
6542	}
6543
6544	return (~tmp) & mask;
6545}
6546
6547static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548{
6549	u32 i, j, k, active_cu_number = 0;
6550	u32 mask, counter, cu_bitmap;
6551	u32 tmp = 0;
6552
6553	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555			mask = 1;
6556			cu_bitmap = 0;
6557			counter = 0;
6558			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560					if (counter < 2)
6561						cu_bitmap |= mask;
6562					counter ++;
6563				}
6564				mask <<= 1;
6565			}
6566
6567			active_cu_number += counter;
6568			tmp |= (cu_bitmap << (i * 16 + j * 8));
6569		}
6570	}
6571
6572	WREG32(RLC_PG_AO_CU_MASK, tmp);
6573
6574	tmp = RREG32(RLC_MAX_PG_CU);
6575	tmp &= ~MAX_PU_CU_MASK;
6576	tmp |= MAX_PU_CU(active_cu_number);
6577	WREG32(RLC_MAX_PG_CU, tmp);
6578}
6579
6580static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581				       bool enable)
6582{
6583	u32 data, orig;
6584
6585	orig = data = RREG32(RLC_PG_CNTL);
6586	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587		data |= STATIC_PER_CU_PG_ENABLE;
6588	else
6589		data &= ~STATIC_PER_CU_PG_ENABLE;
6590	if (orig != data)
6591		WREG32(RLC_PG_CNTL, data);
6592}
6593
6594static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595					bool enable)
6596{
6597	u32 data, orig;
6598
6599	orig = data = RREG32(RLC_PG_CNTL);
6600	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601		data |= DYN_PER_CU_PG_ENABLE;
6602	else
6603		data &= ~DYN_PER_CU_PG_ENABLE;
6604	if (orig != data)
6605		WREG32(RLC_PG_CNTL, data);
6606}
6607
6608#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6610
6611static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612{
6613	u32 data, orig;
6614	u32 i;
6615
6616	if (rdev->rlc.cs_data) {
6617		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621	} else {
6622		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623		for (i = 0; i < 3; i++)
6624			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625	}
6626	if (rdev->rlc.reg_list) {
6627		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630	}
6631
6632	orig = data = RREG32(RLC_PG_CNTL);
6633	data |= GFX_PG_SRC;
6634	if (orig != data)
6635		WREG32(RLC_PG_CNTL, data);
6636
6637	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639
6640	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641	data &= ~IDLE_POLL_COUNT_MASK;
6642	data |= IDLE_POLL_COUNT(0x60);
6643	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644
6645	data = 0x10101010;
6646	WREG32(RLC_PG_DELAY, data);
6647
6648	data = RREG32(RLC_PG_DELAY_2);
6649	data &= ~0xff;
6650	data |= 0x3;
6651	WREG32(RLC_PG_DELAY_2, data);
6652
6653	data = RREG32(RLC_AUTO_PG_CTRL);
6654	data &= ~GRBM_REG_SGIT_MASK;
6655	data |= GRBM_REG_SGIT(0x700);
6656	WREG32(RLC_AUTO_PG_CTRL, data);
6657
6658}
6659
6660static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661{
6662	cik_enable_gfx_cgpg(rdev, enable);
6663	cik_enable_gfx_static_mgpg(rdev, enable);
6664	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665}
6666
6667u32 cik_get_csb_size(struct radeon_device *rdev)
6668{
6669	u32 count = 0;
6670	const struct cs_section_def *sect = NULL;
6671	const struct cs_extent_def *ext = NULL;
6672
6673	if (rdev->rlc.cs_data == NULL)
6674		return 0;
6675
6676	/* begin clear state */
6677	count += 2;
6678	/* context control state */
6679	count += 3;
6680
6681	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682		for (ext = sect->section; ext->extent != NULL; ++ext) {
6683			if (sect->id == SECT_CONTEXT)
6684				count += 2 + ext->reg_count;
6685			else
6686				return 0;
6687		}
6688	}
6689	/* pa_sc_raster_config/pa_sc_raster_config1 */
6690	count += 4;
6691	/* end clear state */
6692	count += 2;
6693	/* clear state */
6694	count += 2;
6695
6696	return count;
6697}
6698
6699void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700{
6701	u32 count = 0, i;
6702	const struct cs_section_def *sect = NULL;
6703	const struct cs_extent_def *ext = NULL;
6704
6705	if (rdev->rlc.cs_data == NULL)
6706		return;
6707	if (buffer == NULL)
6708		return;
6709
6710	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712
6713	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714	buffer[count++] = cpu_to_le32(0x80000000);
6715	buffer[count++] = cpu_to_le32(0x80000000);
6716
6717	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718		for (ext = sect->section; ext->extent != NULL; ++ext) {
6719			if (sect->id == SECT_CONTEXT) {
6720				buffer[count++] =
6721					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723				for (i = 0; i < ext->reg_count; i++)
6724					buffer[count++] = cpu_to_le32(ext->extent[i]);
6725			} else {
6726				return;
6727			}
6728		}
6729	}
6730
6731	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733	switch (rdev->family) {
6734	case CHIP_BONAIRE:
6735		buffer[count++] = cpu_to_le32(0x16000012);
6736		buffer[count++] = cpu_to_le32(0x00000000);
6737		break;
6738	case CHIP_KAVERI:
6739		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740		buffer[count++] = cpu_to_le32(0x00000000);
6741		break;
6742	case CHIP_KABINI:
6743	case CHIP_MULLINS:
6744		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745		buffer[count++] = cpu_to_le32(0x00000000);
6746		break;
6747	case CHIP_HAWAII:
6748		buffer[count++] = cpu_to_le32(0x3a00161a);
6749		buffer[count++] = cpu_to_le32(0x0000002e);
6750		break;
6751	default:
6752		buffer[count++] = cpu_to_le32(0x00000000);
6753		buffer[count++] = cpu_to_le32(0x00000000);
6754		break;
6755	}
6756
6757	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759
6760	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761	buffer[count++] = cpu_to_le32(0);
6762}
6763
6764static void cik_init_pg(struct radeon_device *rdev)
6765{
6766	if (rdev->pg_flags) {
6767		cik_enable_sck_slowdown_on_pu(rdev, true);
6768		cik_enable_sck_slowdown_on_pd(rdev, true);
6769		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770			cik_init_gfx_cgpg(rdev);
6771			cik_enable_cp_pg(rdev, true);
6772			cik_enable_gds_pg(rdev, true);
6773		}
6774		cik_init_ao_cu_mask(rdev);
6775		cik_update_gfx_pg(rdev, true);
6776	}
6777}
6778
6779static void cik_fini_pg(struct radeon_device *rdev)
6780{
6781	if (rdev->pg_flags) {
6782		cik_update_gfx_pg(rdev, false);
6783		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784			cik_enable_cp_pg(rdev, false);
6785			cik_enable_gds_pg(rdev, false);
6786		}
6787	}
6788}
6789
6790/*
6791 * Interrupts
6792 * Starting with r6xx, interrupts are handled via a ring buffer.
6793 * Ring buffers are areas of GPU accessible memory that the GPU
6794 * writes interrupt vectors into and the host reads vectors out of.
6795 * There is a rptr (read pointer) that determines where the
6796 * host is currently reading, and a wptr (write pointer)
6797 * which determines where the GPU has written.  When the
6798 * pointers are equal, the ring is idle.  When the GPU
6799 * writes vectors to the ring buffer, it increments the
6800 * wptr.  When there is an interrupt, the host then starts
6801 * fetching commands and processing them until the pointers are
6802 * equal again at which point it updates the rptr.
6803 */
6804
6805/**
6806 * cik_enable_interrupts - Enable the interrupt ring buffer
6807 *
6808 * @rdev: radeon_device pointer
6809 *
6810 * Enable the interrupt ring buffer (CIK).
6811 */
6812static void cik_enable_interrupts(struct radeon_device *rdev)
6813{
6814	u32 ih_cntl = RREG32(IH_CNTL);
6815	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816
6817	ih_cntl |= ENABLE_INTR;
6818	ih_rb_cntl |= IH_RB_ENABLE;
6819	WREG32(IH_CNTL, ih_cntl);
6820	WREG32(IH_RB_CNTL, ih_rb_cntl);
6821	rdev->ih.enabled = true;
6822}
6823
6824/**
6825 * cik_disable_interrupts - Disable the interrupt ring buffer
6826 *
6827 * @rdev: radeon_device pointer
6828 *
6829 * Disable the interrupt ring buffer (CIK).
6830 */
6831static void cik_disable_interrupts(struct radeon_device *rdev)
6832{
6833	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834	u32 ih_cntl = RREG32(IH_CNTL);
6835
6836	ih_rb_cntl &= ~IH_RB_ENABLE;
6837	ih_cntl &= ~ENABLE_INTR;
6838	WREG32(IH_RB_CNTL, ih_rb_cntl);
6839	WREG32(IH_CNTL, ih_cntl);
6840	/* set rptr, wptr to 0 */
6841	WREG32(IH_RB_RPTR, 0);
6842	WREG32(IH_RB_WPTR, 0);
6843	rdev->ih.enabled = false;
6844	rdev->ih.rptr = 0;
6845}
6846
6847/**
6848 * cik_disable_interrupt_state - Disable all interrupt sources
6849 *
6850 * @rdev: radeon_device pointer
6851 *
6852 * Clear all interrupt enable bits used by the driver (CIK).
6853 */
6854static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855{
6856	u32 tmp;
6857
6858	/* gfx ring */
6859	tmp = RREG32(CP_INT_CNTL_RING0) &
6860		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861	WREG32(CP_INT_CNTL_RING0, tmp);
6862	/* sdma */
6863	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867	/* compute queues */
6868	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876	/* grbm */
6877	WREG32(GRBM_INT_CNTL, 0);
6878	/* SRBM */
6879	WREG32(SRBM_INT_CNTL, 0);
6880	/* vline/vblank, etc. */
6881	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883	if (rdev->num_crtc >= 4) {
6884		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886	}
6887	if (rdev->num_crtc >= 6) {
6888		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890	}
6891	/* pflip */
6892	if (rdev->num_crtc >= 2) {
6893		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895	}
6896	if (rdev->num_crtc >= 4) {
6897		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899	}
6900	if (rdev->num_crtc >= 6) {
6901		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903	}
6904
6905	/* dac hotplug */
6906	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907
6908	/* digital hotplug */
6909	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910	WREG32(DC_HPD1_INT_CONTROL, tmp);
6911	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912	WREG32(DC_HPD2_INT_CONTROL, tmp);
6913	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914	WREG32(DC_HPD3_INT_CONTROL, tmp);
6915	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916	WREG32(DC_HPD4_INT_CONTROL, tmp);
6917	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918	WREG32(DC_HPD5_INT_CONTROL, tmp);
6919	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920	WREG32(DC_HPD6_INT_CONTROL, tmp);
6921
6922}
6923
6924/**
6925 * cik_irq_init - init and enable the interrupt ring
6926 *
6927 * @rdev: radeon_device pointer
6928 *
6929 * Allocate a ring buffer for the interrupt controller,
6930 * enable the RLC, disable interrupts, enable the IH
6931 * ring buffer and enable it (CIK).
6932 * Called at device load and reume.
6933 * Returns 0 for success, errors for failure.
6934 */
6935static int cik_irq_init(struct radeon_device *rdev)
6936{
6937	int ret = 0;
6938	int rb_bufsz;
6939	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940
6941	/* allocate ring */
6942	ret = r600_ih_ring_alloc(rdev);
6943	if (ret)
6944		return ret;
6945
6946	/* disable irqs */
6947	cik_disable_interrupts(rdev);
6948
6949	/* init rlc */
6950	ret = cik_rlc_resume(rdev);
6951	if (ret) {
6952		r600_ih_ring_fini(rdev);
6953		return ret;
6954	}
6955
6956	/* setup interrupt control */
6957	/* set dummy read address to dummy page address */
6958	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962	 */
6963	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967
6968	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970
6971	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972		      IH_WPTR_OVERFLOW_CLEAR |
6973		      (rb_bufsz << 1));
6974
6975	if (rdev->wb.enabled)
6976		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977
6978	/* set the writeback address whether it's enabled or not */
6979	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981
6982	WREG32(IH_RB_CNTL, ih_rb_cntl);
6983
6984	/* set rptr, wptr to 0 */
6985	WREG32(IH_RB_RPTR, 0);
6986	WREG32(IH_RB_WPTR, 0);
6987
6988	/* Default settings for IH_CNTL (disabled at first) */
6989	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990	/* RPTR_REARM only works if msi's are enabled */
6991	if (rdev->msi_enabled)
6992		ih_cntl |= RPTR_REARM;
6993	WREG32(IH_CNTL, ih_cntl);
6994
6995	/* force the active interrupt state to all disabled */
6996	cik_disable_interrupt_state(rdev);
6997
6998	pci_set_master(rdev->pdev);
6999
7000	/* enable irqs */
7001	cik_enable_interrupts(rdev);
7002
7003	return ret;
7004}
7005
7006/**
7007 * cik_irq_set - enable/disable interrupt sources
7008 *
7009 * @rdev: radeon_device pointer
7010 *
7011 * Enable interrupt sources on the GPU (vblanks, hpd,
7012 * etc.) (CIK).
7013 * Returns 0 for success, errors for failure.
7014 */
7015int cik_irq_set(struct radeon_device *rdev)
7016{
7017	u32 cp_int_cntl;
7018	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022	u32 grbm_int_cntl = 0;
7023	u32 dma_cntl, dma_cntl1;
7024
7025	if (!rdev->irq.installed) {
7026		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027		return -EINVAL;
7028	}
7029	/* don't enable anything if the ih is disabled */
7030	if (!rdev->ih.enabled) {
7031		cik_disable_interrupts(rdev);
7032		/* force the active interrupt state to all disabled */
7033		cik_disable_interrupt_state(rdev);
7034		return 0;
7035	}
7036
7037	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040
7041	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047
7048	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050
7051	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059
7060	/* enable CP interrupts on all rings */
7061	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064	}
7065	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067		DRM_DEBUG("si_irq_set: sw int cp1\n");
7068		if (ring->me == 1) {
7069			switch (ring->pipe) {
7070			case 0:
7071				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072				break;
7073			case 1:
7074				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075				break;
7076			case 2:
7077				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078				break;
7079			case 3:
7080				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081				break;
7082			default:
7083				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084				break;
7085			}
7086		} else if (ring->me == 2) {
7087			switch (ring->pipe) {
7088			case 0:
7089				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090				break;
7091			case 1:
7092				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093				break;
7094			case 2:
7095				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096				break;
7097			case 3:
7098				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099				break;
7100			default:
7101				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102				break;
7103			}
7104		} else {
7105			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106		}
7107	}
7108	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110		DRM_DEBUG("si_irq_set: sw int cp2\n");
7111		if (ring->me == 1) {
7112			switch (ring->pipe) {
7113			case 0:
7114				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115				break;
7116			case 1:
7117				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118				break;
7119			case 2:
7120				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121				break;
7122			case 3:
7123				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124				break;
7125			default:
7126				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127				break;
7128			}
7129		} else if (ring->me == 2) {
7130			switch (ring->pipe) {
7131			case 0:
7132				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133				break;
7134			case 1:
7135				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136				break;
7137			case 2:
7138				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139				break;
7140			case 3:
7141				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142				break;
7143			default:
7144				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145				break;
7146			}
7147		} else {
7148			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149		}
7150	}
7151
7152	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153		DRM_DEBUG("cik_irq_set: sw int dma\n");
7154		dma_cntl |= TRAP_ENABLE;
7155	}
7156
7157	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159		dma_cntl1 |= TRAP_ENABLE;
7160	}
7161
7162	if (rdev->irq.crtc_vblank_int[0] ||
7163	    atomic_read(&rdev->irq.pflip[0])) {
7164		DRM_DEBUG("cik_irq_set: vblank 0\n");
7165		crtc1 |= VBLANK_INTERRUPT_MASK;
7166	}
7167	if (rdev->irq.crtc_vblank_int[1] ||
7168	    atomic_read(&rdev->irq.pflip[1])) {
7169		DRM_DEBUG("cik_irq_set: vblank 1\n");
7170		crtc2 |= VBLANK_INTERRUPT_MASK;
7171	}
7172	if (rdev->irq.crtc_vblank_int[2] ||
7173	    atomic_read(&rdev->irq.pflip[2])) {
7174		DRM_DEBUG("cik_irq_set: vblank 2\n");
7175		crtc3 |= VBLANK_INTERRUPT_MASK;
7176	}
7177	if (rdev->irq.crtc_vblank_int[3] ||
7178	    atomic_read(&rdev->irq.pflip[3])) {
7179		DRM_DEBUG("cik_irq_set: vblank 3\n");
7180		crtc4 |= VBLANK_INTERRUPT_MASK;
7181	}
7182	if (rdev->irq.crtc_vblank_int[4] ||
7183	    atomic_read(&rdev->irq.pflip[4])) {
7184		DRM_DEBUG("cik_irq_set: vblank 4\n");
7185		crtc5 |= VBLANK_INTERRUPT_MASK;
7186	}
7187	if (rdev->irq.crtc_vblank_int[5] ||
7188	    atomic_read(&rdev->irq.pflip[5])) {
7189		DRM_DEBUG("cik_irq_set: vblank 5\n");
7190		crtc6 |= VBLANK_INTERRUPT_MASK;
7191	}
7192	if (rdev->irq.hpd[0]) {
7193		DRM_DEBUG("cik_irq_set: hpd 1\n");
7194		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195	}
7196	if (rdev->irq.hpd[1]) {
7197		DRM_DEBUG("cik_irq_set: hpd 2\n");
7198		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199	}
7200	if (rdev->irq.hpd[2]) {
7201		DRM_DEBUG("cik_irq_set: hpd 3\n");
7202		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203	}
7204	if (rdev->irq.hpd[3]) {
7205		DRM_DEBUG("cik_irq_set: hpd 4\n");
7206		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207	}
7208	if (rdev->irq.hpd[4]) {
7209		DRM_DEBUG("cik_irq_set: hpd 5\n");
7210		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211	}
7212	if (rdev->irq.hpd[5]) {
7213		DRM_DEBUG("cik_irq_set: hpd 6\n");
7214		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215	}
7216
7217	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218
7219	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221
7222	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230
7231	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232
7233	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235	if (rdev->num_crtc >= 4) {
7236		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238	}
7239	if (rdev->num_crtc >= 6) {
7240		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242	}
7243
7244	if (rdev->num_crtc >= 2) {
7245		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246		       GRPH_PFLIP_INT_MASK);
7247		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248		       GRPH_PFLIP_INT_MASK);
7249	}
7250	if (rdev->num_crtc >= 4) {
7251		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252		       GRPH_PFLIP_INT_MASK);
7253		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254		       GRPH_PFLIP_INT_MASK);
7255	}
7256	if (rdev->num_crtc >= 6) {
7257		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258		       GRPH_PFLIP_INT_MASK);
7259		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260		       GRPH_PFLIP_INT_MASK);
7261	}
7262
7263	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269
7270	/* posting read */
7271	RREG32(SRBM_STATUS);
7272
7273	return 0;
7274}
7275
7276/**
7277 * cik_irq_ack - ack interrupt sources
7278 *
7279 * @rdev: radeon_device pointer
7280 *
7281 * Ack interrupt sources on the GPU (vblanks, hpd,
7282 * etc.) (CIK).  Certain interrupts sources are sw
7283 * generated and do not require an explicit ack.
7284 */
7285static inline void cik_irq_ack(struct radeon_device *rdev)
7286{
7287	u32 tmp;
7288
7289	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296
7297	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298		EVERGREEN_CRTC0_REGISTER_OFFSET);
7299	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300		EVERGREEN_CRTC1_REGISTER_OFFSET);
7301	if (rdev->num_crtc >= 4) {
7302		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303			EVERGREEN_CRTC2_REGISTER_OFFSET);
7304		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305			EVERGREEN_CRTC3_REGISTER_OFFSET);
7306	}
7307	if (rdev->num_crtc >= 6) {
7308		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309			EVERGREEN_CRTC4_REGISTER_OFFSET);
7310		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311			EVERGREEN_CRTC5_REGISTER_OFFSET);
7312	}
7313
7314	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316		       GRPH_PFLIP_INT_CLEAR);
7317	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319		       GRPH_PFLIP_INT_CLEAR);
7320	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328
7329	if (rdev->num_crtc >= 4) {
7330		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332			       GRPH_PFLIP_INT_CLEAR);
7333		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335			       GRPH_PFLIP_INT_CLEAR);
7336		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344	}
7345
7346	if (rdev->num_crtc >= 6) {
7347		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349			       GRPH_PFLIP_INT_CLEAR);
7350		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352			       GRPH_PFLIP_INT_CLEAR);
7353		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361	}
7362
7363	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364		tmp = RREG32(DC_HPD1_INT_CONTROL);
7365		tmp |= DC_HPDx_INT_ACK;
7366		WREG32(DC_HPD1_INT_CONTROL, tmp);
7367	}
7368	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369		tmp = RREG32(DC_HPD2_INT_CONTROL);
7370		tmp |= DC_HPDx_INT_ACK;
7371		WREG32(DC_HPD2_INT_CONTROL, tmp);
7372	}
7373	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374		tmp = RREG32(DC_HPD3_INT_CONTROL);
7375		tmp |= DC_HPDx_INT_ACK;
7376		WREG32(DC_HPD3_INT_CONTROL, tmp);
7377	}
7378	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379		tmp = RREG32(DC_HPD4_INT_CONTROL);
7380		tmp |= DC_HPDx_INT_ACK;
7381		WREG32(DC_HPD4_INT_CONTROL, tmp);
7382	}
7383	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384		tmp = RREG32(DC_HPD5_INT_CONTROL);
7385		tmp |= DC_HPDx_INT_ACK;
7386		WREG32(DC_HPD5_INT_CONTROL, tmp);
7387	}
7388	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389		tmp = RREG32(DC_HPD6_INT_CONTROL);
7390		tmp |= DC_HPDx_INT_ACK;
7391		WREG32(DC_HPD6_INT_CONTROL, tmp);
7392	}
7393	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394		tmp = RREG32(DC_HPD1_INT_CONTROL);
7395		tmp |= DC_HPDx_RX_INT_ACK;
7396		WREG32(DC_HPD1_INT_CONTROL, tmp);
7397	}
7398	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399		tmp = RREG32(DC_HPD2_INT_CONTROL);
7400		tmp |= DC_HPDx_RX_INT_ACK;
7401		WREG32(DC_HPD2_INT_CONTROL, tmp);
7402	}
7403	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404		tmp = RREG32(DC_HPD3_INT_CONTROL);
7405		tmp |= DC_HPDx_RX_INT_ACK;
7406		WREG32(DC_HPD3_INT_CONTROL, tmp);
7407	}
7408	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409		tmp = RREG32(DC_HPD4_INT_CONTROL);
7410		tmp |= DC_HPDx_RX_INT_ACK;
7411		WREG32(DC_HPD4_INT_CONTROL, tmp);
7412	}
7413	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414		tmp = RREG32(DC_HPD5_INT_CONTROL);
7415		tmp |= DC_HPDx_RX_INT_ACK;
7416		WREG32(DC_HPD5_INT_CONTROL, tmp);
7417	}
7418	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419		tmp = RREG32(DC_HPD6_INT_CONTROL);
7420		tmp |= DC_HPDx_RX_INT_ACK;
7421		WREG32(DC_HPD6_INT_CONTROL, tmp);
7422	}
7423}
7424
7425/**
7426 * cik_irq_disable - disable interrupts
7427 *
7428 * @rdev: radeon_device pointer
7429 *
7430 * Disable interrupts on the hw (CIK).
7431 */
7432static void cik_irq_disable(struct radeon_device *rdev)
7433{
7434	cik_disable_interrupts(rdev);
7435	/* Wait and acknowledge irq */
7436	mdelay(1);
7437	cik_irq_ack(rdev);
7438	cik_disable_interrupt_state(rdev);
7439}
7440
7441/**
7442 * cik_irq_suspend - disable interrupts for suspend
7443 *
7444 * @rdev: radeon_device pointer
7445 *
7446 * Disable interrupts and stop the RLC (CIK).
7447 * Used for suspend.
7448 */
7449static void cik_irq_suspend(struct radeon_device *rdev)
7450{
7451	cik_irq_disable(rdev);
7452	cik_rlc_stop(rdev);
7453}
7454
7455/**
7456 * cik_irq_fini - tear down interrupt support
7457 *
7458 * @rdev: radeon_device pointer
7459 *
7460 * Disable interrupts on the hw and free the IH ring
7461 * buffer (CIK).
7462 * Used for driver unload.
7463 */
7464static void cik_irq_fini(struct radeon_device *rdev)
7465{
7466	cik_irq_suspend(rdev);
7467	r600_ih_ring_fini(rdev);
7468}
7469
7470/**
7471 * cik_get_ih_wptr - get the IH ring buffer wptr
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Get the IH ring buffer wptr from either the register
7476 * or the writeback memory buffer (CIK).  Also check for
7477 * ring buffer overflow and deal with it.
7478 * Used by cik_irq_process().
7479 * Returns the value of the wptr.
7480 */
7481static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482{
7483	u32 wptr, tmp;
7484
7485	if (rdev->wb.enabled)
7486		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487	else
7488		wptr = RREG32(IH_RB_WPTR);
7489
7490	if (wptr & RB_OVERFLOW) {
7491		wptr &= ~RB_OVERFLOW;
7492		/* When a ring buffer overflow happen start parsing interrupt
7493		 * from the last not overwritten vector (wptr + 16). Hopefully
7494		 * this should allow us to catchup.
7495		 */
7496		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499		tmp = RREG32(IH_RB_CNTL);
7500		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501		WREG32(IH_RB_CNTL, tmp);
7502	}
7503	return (wptr & rdev->ih.ptr_mask);
7504}
7505
7506/*        CIK IV Ring
7507 * Each IV ring entry is 128 bits:
7508 * [7:0]    - interrupt source id
7509 * [31:8]   - reserved
7510 * [59:32]  - interrupt source data
7511 * [63:60]  - reserved
7512 * [71:64]  - RINGID
7513 *            CP:
7514 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518 *            PIPE_ID - ME0 0=3D
7519 *                    - ME1&2 compute dispatcher (4 pipes each)
7520 *            SDMA:
7521 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524 * [79:72]  - VMID
7525 * [95:80]  - PASID
7526 * [127:96] - reserved
7527 */
7528/**
7529 * cik_irq_process - interrupt handler
7530 *
7531 * @rdev: radeon_device pointer
7532 *
7533 * Interrupt hander (CIK).  Walk the IH ring,
7534 * ack interrupts and schedule work to handle
7535 * interrupt events.
7536 * Returns irq process return code.
7537 */
7538int cik_irq_process(struct radeon_device *rdev)
7539{
7540	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542	u32 wptr;
7543	u32 rptr;
7544	u32 src_id, src_data, ring_id;
7545	u8 me_id, pipe_id, queue_id;
7546	u32 ring_index;
7547	bool queue_hotplug = false;
7548	bool queue_dp = false;
7549	bool queue_reset = false;
7550	u32 addr, status, mc_client;
7551	bool queue_thermal = false;
7552
7553	if (!rdev->ih.enabled || rdev->shutdown)
7554		return IRQ_NONE;
7555
7556	wptr = cik_get_ih_wptr(rdev);
7557
7558restart_ih:
7559	/* is somebody else already processing irqs? */
7560	if (atomic_xchg(&rdev->ih.lock, 1))
7561		return IRQ_NONE;
7562
7563	rptr = rdev->ih.rptr;
7564	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7565
7566	/* Order reading of wptr vs. reading of IH ring data */
7567	rmb();
7568
7569	/* display interrupts */
7570	cik_irq_ack(rdev);
7571
7572	while (rptr != wptr) {
7573		/* wptr/rptr are in bytes! */
7574		ring_index = rptr / 4;
7575
7576		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7577		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7578		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7579
7580		switch (src_id) {
7581		case 1: /* D1 vblank/vline */
7582			switch (src_data) {
7583			case 0: /* D1 vblank */
7584				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7585					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586
7587				if (rdev->irq.crtc_vblank_int[0]) {
7588					drm_handle_vblank(rdev->ddev, 0);
7589					rdev->pm.vblank_sync = true;
7590					wake_up(&rdev->irq.vblank_queue);
7591				}
7592				if (atomic_read(&rdev->irq.pflip[0]))
7593					radeon_crtc_handle_vblank(rdev, 0);
7594				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7595				DRM_DEBUG("IH: D1 vblank\n");
7596
7597				break;
7598			case 1: /* D1 vline */
7599				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7600					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7603				DRM_DEBUG("IH: D1 vline\n");
7604
7605				break;
7606			default:
7607				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7608				break;
7609			}
7610			break;
7611		case 2: /* D2 vblank/vline */
7612			switch (src_data) {
7613			case 0: /* D2 vblank */
7614				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7615					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617				if (rdev->irq.crtc_vblank_int[1]) {
7618					drm_handle_vblank(rdev->ddev, 1);
7619					rdev->pm.vblank_sync = true;
7620					wake_up(&rdev->irq.vblank_queue);
7621				}
7622				if (atomic_read(&rdev->irq.pflip[1]))
7623					radeon_crtc_handle_vblank(rdev, 1);
7624				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7625				DRM_DEBUG("IH: D2 vblank\n");
7626
7627				break;
7628			case 1: /* D2 vline */
7629				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7630					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7633				DRM_DEBUG("IH: D2 vline\n");
7634
7635				break;
7636			default:
7637				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638				break;
7639			}
7640			break;
7641		case 3: /* D3 vblank/vline */
7642			switch (src_data) {
7643			case 0: /* D3 vblank */
7644				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7645					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647				if (rdev->irq.crtc_vblank_int[2]) {
7648					drm_handle_vblank(rdev->ddev, 2);
7649					rdev->pm.vblank_sync = true;
7650					wake_up(&rdev->irq.vblank_queue);
7651				}
7652				if (atomic_read(&rdev->irq.pflip[2]))
7653					radeon_crtc_handle_vblank(rdev, 2);
7654				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7655				DRM_DEBUG("IH: D3 vblank\n");
7656
7657				break;
7658			case 1: /* D3 vline */
7659				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7660					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7663				DRM_DEBUG("IH: D3 vline\n");
7664
7665				break;
7666			default:
7667				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7668				break;
7669			}
7670			break;
7671		case 4: /* D4 vblank/vline */
7672			switch (src_data) {
7673			case 0: /* D4 vblank */
7674				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7675					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677				if (rdev->irq.crtc_vblank_int[3]) {
7678					drm_handle_vblank(rdev->ddev, 3);
7679					rdev->pm.vblank_sync = true;
7680					wake_up(&rdev->irq.vblank_queue);
7681				}
7682				if (atomic_read(&rdev->irq.pflip[3]))
7683					radeon_crtc_handle_vblank(rdev, 3);
7684				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7685				DRM_DEBUG("IH: D4 vblank\n");
7686
7687				break;
7688			case 1: /* D4 vline */
7689				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7690					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7693				DRM_DEBUG("IH: D4 vline\n");
7694
7695				break;
7696			default:
7697				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698				break;
7699			}
7700			break;
7701		case 5: /* D5 vblank/vline */
7702			switch (src_data) {
7703			case 0: /* D5 vblank */
7704				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7705					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707				if (rdev->irq.crtc_vblank_int[4]) {
7708					drm_handle_vblank(rdev->ddev, 4);
7709					rdev->pm.vblank_sync = true;
7710					wake_up(&rdev->irq.vblank_queue);
7711				}
7712				if (atomic_read(&rdev->irq.pflip[4]))
7713					radeon_crtc_handle_vblank(rdev, 4);
7714				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7715				DRM_DEBUG("IH: D5 vblank\n");
7716
7717				break;
7718			case 1: /* D5 vline */
7719				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7720					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7723				DRM_DEBUG("IH: D5 vline\n");
7724
7725				break;
7726			default:
7727				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7728				break;
7729			}
7730			break;
7731		case 6: /* D6 vblank/vline */
7732			switch (src_data) {
7733			case 0: /* D6 vblank */
7734				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7735					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737				if (rdev->irq.crtc_vblank_int[5]) {
7738					drm_handle_vblank(rdev->ddev, 5);
7739					rdev->pm.vblank_sync = true;
7740					wake_up(&rdev->irq.vblank_queue);
7741				}
7742				if (atomic_read(&rdev->irq.pflip[5]))
7743					radeon_crtc_handle_vblank(rdev, 5);
7744				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7745				DRM_DEBUG("IH: D6 vblank\n");
7746
7747				break;
7748			case 1: /* D6 vline */
7749				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7750					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7753				DRM_DEBUG("IH: D6 vline\n");
7754
7755				break;
7756			default:
7757				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7758				break;
7759			}
7760			break;
7761		case 8: /* D1 page flip */
7762		case 10: /* D2 page flip */
7763		case 12: /* D3 page flip */
7764		case 14: /* D4 page flip */
7765		case 16: /* D5 page flip */
7766		case 18: /* D6 page flip */
7767			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7768			if (radeon_use_pflipirq > 0)
7769				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7770			break;
7771		case 42: /* HPD hotplug */
7772			switch (src_data) {
7773			case 0:
7774				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7775					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776
7777				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7778				queue_hotplug = true;
7779				DRM_DEBUG("IH: HPD1\n");
7780
7781				break;
7782			case 1:
7783				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7784					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785
7786				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7787				queue_hotplug = true;
7788				DRM_DEBUG("IH: HPD2\n");
7789
7790				break;
7791			case 2:
7792				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7793					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794
7795				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7796				queue_hotplug = true;
7797				DRM_DEBUG("IH: HPD3\n");
7798
7799				break;
7800			case 3:
7801				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7802					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803
7804				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7805				queue_hotplug = true;
7806				DRM_DEBUG("IH: HPD4\n");
7807
7808				break;
7809			case 4:
7810				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7811					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812
7813				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7814				queue_hotplug = true;
7815				DRM_DEBUG("IH: HPD5\n");
7816
7817				break;
7818			case 5:
7819				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7820					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7823				queue_hotplug = true;
7824				DRM_DEBUG("IH: HPD6\n");
7825
7826				break;
7827			case 6:
7828				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7829					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7832				queue_dp = true;
7833				DRM_DEBUG("IH: HPD_RX 1\n");
7834
7835				break;
7836			case 7:
7837				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7838					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7841				queue_dp = true;
7842				DRM_DEBUG("IH: HPD_RX 2\n");
7843
7844				break;
7845			case 8:
7846				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7847					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7850				queue_dp = true;
7851				DRM_DEBUG("IH: HPD_RX 3\n");
7852
7853				break;
7854			case 9:
7855				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7856					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7859				queue_dp = true;
7860				DRM_DEBUG("IH: HPD_RX 4\n");
7861
7862				break;
7863			case 10:
7864				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7865					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7868				queue_dp = true;
7869				DRM_DEBUG("IH: HPD_RX 5\n");
7870
7871				break;
7872			case 11:
7873				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7874					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7877				queue_dp = true;
7878				DRM_DEBUG("IH: HPD_RX 6\n");
7879
7880				break;
7881			default:
7882				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883				break;
7884			}
7885			break;
7886		case 96:
7887			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7888			WREG32(SRBM_INT_ACK, 0x1);
7889			break;
7890		case 124: /* UVD */
7891			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7892			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7893			break;
7894		case 146:
7895		case 147:
7896			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7897			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7898			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7899			/* reset addr and status */
7900			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7901			if (addr == 0x0 && status == 0x0)
7902				break;
7903			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7904			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7905				addr);
7906			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7907				status);
7908			cik_vm_decode_fault(rdev, status, addr, mc_client);
7909			break;
7910		case 167: /* VCE */
7911			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7912			switch (src_data) {
7913			case 0:
7914				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7915				break;
7916			case 1:
7917				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7918				break;
7919			default:
7920				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7921				break;
7922			}
7923			break;
7924		case 176: /* GFX RB CP_INT */
7925		case 177: /* GFX IB CP_INT */
7926			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927			break;
7928		case 181: /* CP EOP event */
7929			DRM_DEBUG("IH: CP EOP\n");
7930			/* XXX check the bitfield order! */
7931			me_id = (ring_id & 0x60) >> 5;
7932			pipe_id = (ring_id & 0x18) >> 3;
7933			queue_id = (ring_id & 0x7) >> 0;
7934			switch (me_id) {
7935			case 0:
7936				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7937				break;
7938			case 1:
7939			case 2:
7940				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7941					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7942				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7943					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7944				break;
7945			}
7946			break;
7947		case 184: /* CP Privileged reg access */
7948			DRM_ERROR("Illegal register access in command stream\n");
7949			/* XXX check the bitfield order! */
7950			me_id = (ring_id & 0x60) >> 5;
7951			switch (me_id) {
7952			case 0:
7953				/* This results in a full GPU reset, but all we need to do is soft
7954				 * reset the CP for gfx
7955				 */
7956				queue_reset = true;
7957				break;
7958			case 1:
7959				/* XXX compute */
7960				queue_reset = true;
7961				break;
7962			case 2:
7963				/* XXX compute */
7964				queue_reset = true;
7965				break;
7966			}
7967			break;
7968		case 185: /* CP Privileged inst */
7969			DRM_ERROR("Illegal instruction in command stream\n");
7970			/* XXX check the bitfield order! */
7971			me_id = (ring_id & 0x60) >> 5;
7972			switch (me_id) {
7973			case 0:
7974				/* This results in a full GPU reset, but all we need to do is soft
7975				 * reset the CP for gfx
7976				 */
7977				queue_reset = true;
7978				break;
7979			case 1:
7980				/* XXX compute */
7981				queue_reset = true;
7982				break;
7983			case 2:
7984				/* XXX compute */
7985				queue_reset = true;
7986				break;
7987			}
7988			break;
7989		case 224: /* SDMA trap event */
7990			/* XXX check the bitfield order! */
7991			me_id = (ring_id & 0x3) >> 0;
7992			queue_id = (ring_id & 0xc) >> 2;
7993			DRM_DEBUG("IH: SDMA trap\n");
7994			switch (me_id) {
7995			case 0:
7996				switch (queue_id) {
7997				case 0:
7998					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7999					break;
8000				case 1:
8001					/* XXX compute */
8002					break;
8003				case 2:
8004					/* XXX compute */
8005					break;
8006				}
8007				break;
8008			case 1:
8009				switch (queue_id) {
8010				case 0:
8011					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8012					break;
8013				case 1:
8014					/* XXX compute */
8015					break;
8016				case 2:
8017					/* XXX compute */
8018					break;
8019				}
8020				break;
8021			}
8022			break;
8023		case 230: /* thermal low to high */
8024			DRM_DEBUG("IH: thermal low to high\n");
8025			rdev->pm.dpm.thermal.high_to_low = false;
8026			queue_thermal = true;
8027			break;
8028		case 231: /* thermal high to low */
8029			DRM_DEBUG("IH: thermal high to low\n");
8030			rdev->pm.dpm.thermal.high_to_low = true;
8031			queue_thermal = true;
8032			break;
8033		case 233: /* GUI IDLE */
8034			DRM_DEBUG("IH: GUI idle\n");
8035			break;
8036		case 241: /* SDMA Privileged inst */
8037		case 247: /* SDMA Privileged inst */
8038			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8039			/* XXX check the bitfield order! */
8040			me_id = (ring_id & 0x3) >> 0;
8041			queue_id = (ring_id & 0xc) >> 2;
8042			switch (me_id) {
8043			case 0:
8044				switch (queue_id) {
8045				case 0:
8046					queue_reset = true;
8047					break;
8048				case 1:
8049					/* XXX compute */
8050					queue_reset = true;
8051					break;
8052				case 2:
8053					/* XXX compute */
8054					queue_reset = true;
8055					break;
8056				}
8057				break;
8058			case 1:
8059				switch (queue_id) {
8060				case 0:
8061					queue_reset = true;
8062					break;
8063				case 1:
8064					/* XXX compute */
8065					queue_reset = true;
8066					break;
8067				case 2:
8068					/* XXX compute */
8069					queue_reset = true;
8070					break;
8071				}
8072				break;
8073			}
8074			break;
8075		default:
8076			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077			break;
8078		}
8079
8080		/* wptr/rptr are in bytes! */
8081		rptr += 16;
8082		rptr &= rdev->ih.ptr_mask;
8083		WREG32(IH_RB_RPTR, rptr);
8084	}
8085	if (queue_dp)
8086		schedule_work(&rdev->dp_work);
8087	if (queue_hotplug)
8088		schedule_delayed_work(&rdev->hotplug_work, 0);
8089	if (queue_reset) {
8090		rdev->needs_reset = true;
8091		wake_up_all(&rdev->fence_queue);
8092	}
8093	if (queue_thermal)
8094		schedule_work(&rdev->pm.dpm.thermal.work);
8095	rdev->ih.rptr = rptr;
8096	atomic_set(&rdev->ih.lock, 0);
8097
8098	/* make sure wptr hasn't changed while processing */
8099	wptr = cik_get_ih_wptr(rdev);
8100	if (wptr != rptr)
8101		goto restart_ih;
8102
8103	return IRQ_HANDLED;
8104}
8105
8106/*
8107 * startup/shutdown callbacks
8108 */
8109static void cik_uvd_init(struct radeon_device *rdev)
8110{
8111	int r;
8112
8113	if (!rdev->has_uvd)
8114		return;
8115
8116	r = radeon_uvd_init(rdev);
8117	if (r) {
8118		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8119		/*
8120		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8121		 * to early fails cik_uvd_start() and thus nothing happens
8122		 * there. So it is pointless to try to go through that code
8123		 * hence why we disable uvd here.
8124		 */
8125		rdev->has_uvd = false;
8126		return;
8127	}
8128	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8129	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8130}
8131
8132static void cik_uvd_start(struct radeon_device *rdev)
8133{
8134	int r;
8135
8136	if (!rdev->has_uvd)
8137		return;
8138
8139	r = radeon_uvd_resume(rdev);
8140	if (r) {
8141		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8142		goto error;
8143	}
8144	r = uvd_v4_2_resume(rdev);
8145	if (r) {
8146		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8147		goto error;
8148	}
8149	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8150	if (r) {
8151		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8152		goto error;
8153	}
8154	return;
8155
8156error:
8157	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8158}
8159
8160static void cik_uvd_resume(struct radeon_device *rdev)
8161{
8162	struct radeon_ring *ring;
8163	int r;
8164
8165	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8166		return;
8167
8168	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8169	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8170	if (r) {
8171		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8172		return;
8173	}
8174	r = uvd_v1_0_init(rdev);
8175	if (r) {
8176		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8177		return;
8178	}
8179}
8180
8181static void cik_vce_init(struct radeon_device *rdev)
8182{
8183	int r;
8184
8185	if (!rdev->has_vce)
8186		return;
8187
8188	r = radeon_vce_init(rdev);
8189	if (r) {
8190		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8191		/*
8192		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8193		 * to early fails cik_vce_start() and thus nothing happens
8194		 * there. So it is pointless to try to go through that code
8195		 * hence why we disable vce here.
8196		 */
8197		rdev->has_vce = false;
8198		return;
8199	}
8200	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8201	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8202	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8203	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8204}
8205
8206static void cik_vce_start(struct radeon_device *rdev)
8207{
8208	int r;
8209
8210	if (!rdev->has_vce)
8211		return;
8212
8213	r = radeon_vce_resume(rdev);
8214	if (r) {
8215		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8216		goto error;
8217	}
8218	r = vce_v2_0_resume(rdev);
8219	if (r) {
8220		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8221		goto error;
8222	}
8223	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8224	if (r) {
8225		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8226		goto error;
8227	}
8228	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8229	if (r) {
8230		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8231		goto error;
8232	}
8233	return;
8234
8235error:
8236	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8237	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8238}
8239
8240static void cik_vce_resume(struct radeon_device *rdev)
8241{
8242	struct radeon_ring *ring;
8243	int r;
8244
8245	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8246		return;
8247
8248	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8249	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250	if (r) {
8251		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252		return;
8253	}
8254	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8255	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8256	if (r) {
8257		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8258		return;
8259	}
8260	r = vce_v1_0_init(rdev);
8261	if (r) {
8262		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8263		return;
8264	}
8265}
8266
8267/**
8268 * cik_startup - program the asic to a functional state
8269 *
8270 * @rdev: radeon_device pointer
8271 *
8272 * Programs the asic to a functional state (CIK).
8273 * Called by cik_init() and cik_resume().
8274 * Returns 0 for success, error for failure.
8275 */
8276static int cik_startup(struct radeon_device *rdev)
8277{
8278	struct radeon_ring *ring;
8279	u32 nop;
8280	int r;
8281
8282	/* enable pcie gen2/3 link */
8283	cik_pcie_gen3_enable(rdev);
8284	/* enable aspm */
8285	cik_program_aspm(rdev);
8286
8287	/* scratch needs to be initialized before MC */
8288	r = r600_vram_scratch_init(rdev);
8289	if (r)
8290		return r;
8291
8292	cik_mc_program(rdev);
8293
8294	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8295		r = ci_mc_load_microcode(rdev);
8296		if (r) {
8297			DRM_ERROR("Failed to load MC firmware!\n");
8298			return r;
8299		}
8300	}
8301
8302	r = cik_pcie_gart_enable(rdev);
8303	if (r)
8304		return r;
8305	cik_gpu_init(rdev);
8306
8307	/* allocate rlc buffers */
8308	if (rdev->flags & RADEON_IS_IGP) {
8309		if (rdev->family == CHIP_KAVERI) {
8310			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8311			rdev->rlc.reg_list_size =
8312				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8313		} else {
8314			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8315			rdev->rlc.reg_list_size =
8316				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8317		}
8318	}
8319	rdev->rlc.cs_data = ci_cs_data;
8320	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8321	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8322	r = sumo_rlc_init(rdev);
8323	if (r) {
8324		DRM_ERROR("Failed to init rlc BOs!\n");
8325		return r;
8326	}
8327
8328	/* allocate wb buffer */
8329	r = radeon_wb_init(rdev);
8330	if (r)
8331		return r;
8332
8333	/* allocate mec buffers */
8334	r = cik_mec_init(rdev);
8335	if (r) {
8336		DRM_ERROR("Failed to init MEC BOs!\n");
8337		return r;
8338	}
8339
8340	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8341	if (r) {
8342		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343		return r;
8344	}
8345
8346	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8347	if (r) {
8348		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349		return r;
8350	}
8351
8352	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8353	if (r) {
8354		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8355		return r;
8356	}
8357
8358	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8359	if (r) {
8360		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361		return r;
8362	}
8363
8364	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8365	if (r) {
8366		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8367		return r;
8368	}
8369
8370	cik_uvd_start(rdev);
8371	cik_vce_start(rdev);
8372
8373	/* Enable IRQ */
8374	if (!rdev->irq.installed) {
8375		r = radeon_irq_kms_init(rdev);
8376		if (r)
8377			return r;
8378	}
8379
8380	r = cik_irq_init(rdev);
8381	if (r) {
8382		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8383		radeon_irq_kms_fini(rdev);
8384		return r;
8385	}
8386	cik_irq_set(rdev);
8387
8388	if (rdev->family == CHIP_HAWAII) {
8389		if (rdev->new_fw)
8390			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8391		else
8392			nop = RADEON_CP_PACKET2;
8393	} else {
8394		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395	}
8396
8397	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8398	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8399			     nop);
8400	if (r)
8401		return r;
8402
8403	/* set up the compute queues */
8404	/* type-2 packets are deprecated on MEC, use type-3 instead */
8405	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8406	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8407			     nop);
8408	if (r)
8409		return r;
8410	ring->me = 1; /* first MEC */
8411	ring->pipe = 0; /* first pipe */
8412	ring->queue = 0; /* first queue */
8413	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8414
8415	/* type-2 packets are deprecated on MEC, use type-3 instead */
8416	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8417	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8418			     nop);
8419	if (r)
8420		return r;
8421	/* dGPU only have 1 MEC */
8422	ring->me = 1; /* first MEC */
8423	ring->pipe = 0; /* first pipe */
8424	ring->queue = 1; /* second queue */
8425	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8426
8427	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8428	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8429			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430	if (r)
8431		return r;
8432
8433	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8434	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8435			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436	if (r)
8437		return r;
8438
8439	r = cik_cp_resume(rdev);
8440	if (r)
8441		return r;
8442
8443	r = cik_sdma_resume(rdev);
8444	if (r)
8445		return r;
8446
8447	cik_uvd_resume(rdev);
8448	cik_vce_resume(rdev);
8449
8450	r = radeon_ib_pool_init(rdev);
8451	if (r) {
8452		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8453		return r;
8454	}
8455
8456	r = radeon_vm_manager_init(rdev);
8457	if (r) {
8458		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8459		return r;
8460	}
8461
8462	r = radeon_audio_init(rdev);
8463	if (r)
8464		return r;
8465
8466	return 0;
8467}
8468
8469/**
8470 * cik_resume - resume the asic to a functional state
8471 *
8472 * @rdev: radeon_device pointer
8473 *
8474 * Programs the asic to a functional state (CIK).
8475 * Called at resume.
8476 * Returns 0 for success, error for failure.
8477 */
8478int cik_resume(struct radeon_device *rdev)
8479{
8480	int r;
8481
8482	/* post card */
8483	atom_asic_init(rdev->mode_info.atom_context);
8484
8485	/* init golden registers */
8486	cik_init_golden_registers(rdev);
8487
8488	if (rdev->pm.pm_method == PM_METHOD_DPM)
8489		radeon_pm_resume(rdev);
8490
8491	rdev->accel_working = true;
8492	r = cik_startup(rdev);
8493	if (r) {
8494		DRM_ERROR("cik startup failed on resume\n");
8495		rdev->accel_working = false;
8496		return r;
8497	}
8498
8499	return r;
8500
8501}
8502
8503/**
8504 * cik_suspend - suspend the asic
8505 *
8506 * @rdev: radeon_device pointer
8507 *
8508 * Bring the chip into a state suitable for suspend (CIK).
8509 * Called at suspend.
8510 * Returns 0 for success.
8511 */
8512int cik_suspend(struct radeon_device *rdev)
8513{
8514	radeon_pm_suspend(rdev);
8515	radeon_audio_fini(rdev);
8516	radeon_vm_manager_fini(rdev);
8517	cik_cp_enable(rdev, false);
8518	cik_sdma_enable(rdev, false);
8519	if (rdev->has_uvd) {
8520		radeon_uvd_suspend(rdev);
8521		uvd_v1_0_fini(rdev);
8522	}
8523	if (rdev->has_vce)
8524		radeon_vce_suspend(rdev);
8525	cik_fini_pg(rdev);
8526	cik_fini_cg(rdev);
8527	cik_irq_suspend(rdev);
8528	radeon_wb_disable(rdev);
8529	cik_pcie_gart_disable(rdev);
8530	return 0;
8531}
8532
8533/* Plan is to move initialization in that function and use
8534 * helper function so that radeon_device_init pretty much
8535 * do nothing more than calling asic specific function. This
8536 * should also allow to remove a bunch of callback function
8537 * like vram_info.
8538 */
8539/**
8540 * cik_init - asic specific driver and hw init
8541 *
8542 * @rdev: radeon_device pointer
8543 *
8544 * Setup asic specific driver variables and program the hw
8545 * to a functional state (CIK).
8546 * Called at driver startup.
8547 * Returns 0 for success, errors for failure.
8548 */
8549int cik_init(struct radeon_device *rdev)
8550{
8551	struct radeon_ring *ring;
8552	int r;
8553
8554	/* Read BIOS */
8555	if (!radeon_get_bios(rdev)) {
8556		if (ASIC_IS_AVIVO(rdev))
8557			return -EINVAL;
8558	}
8559	/* Must be an ATOMBIOS */
8560	if (!rdev->is_atom_bios) {
8561		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8562		return -EINVAL;
8563	}
8564	r = radeon_atombios_init(rdev);
8565	if (r)
8566		return r;
8567
8568	/* Post card if necessary */
8569	if (!radeon_card_posted(rdev)) {
8570		if (!rdev->bios) {
8571			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8572			return -EINVAL;
8573		}
8574		DRM_INFO("GPU not posted. posting now...\n");
8575		atom_asic_init(rdev->mode_info.atom_context);
8576	}
8577	/* init golden registers */
8578	cik_init_golden_registers(rdev);
8579	/* Initialize scratch registers */
8580	cik_scratch_init(rdev);
8581	/* Initialize surface registers */
8582	radeon_surface_init(rdev);
8583	/* Initialize clocks */
8584	radeon_get_clock_info(rdev->ddev);
8585
8586	/* Fence driver */
8587	radeon_fence_driver_init(rdev);
8588
8589	/* initialize memory controller */
8590	r = cik_mc_init(rdev);
8591	if (r)
8592		return r;
8593	/* Memory manager */
8594	r = radeon_bo_init(rdev);
8595	if (r)
8596		return r;
8597
8598	if (rdev->flags & RADEON_IS_IGP) {
8599		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8600		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8601			r = cik_init_microcode(rdev);
8602			if (r) {
8603				DRM_ERROR("Failed to load firmware!\n");
8604				return r;
8605			}
8606		}
8607	} else {
8608		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8609		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8610		    !rdev->mc_fw) {
8611			r = cik_init_microcode(rdev);
8612			if (r) {
8613				DRM_ERROR("Failed to load firmware!\n");
8614				return r;
8615			}
8616		}
8617	}
8618
8619	/* Initialize power management */
8620	radeon_pm_init(rdev);
8621
8622	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8623	ring->ring_obj = NULL;
8624	r600_ring_init(rdev, ring, 1024 * 1024);
8625
8626	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8627	ring->ring_obj = NULL;
8628	r600_ring_init(rdev, ring, 1024 * 1024);
8629	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8630	if (r)
8631		return r;
8632
8633	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8634	ring->ring_obj = NULL;
8635	r600_ring_init(rdev, ring, 1024 * 1024);
8636	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8637	if (r)
8638		return r;
8639
8640	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8641	ring->ring_obj = NULL;
8642	r600_ring_init(rdev, ring, 256 * 1024);
8643
8644	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8645	ring->ring_obj = NULL;
8646	r600_ring_init(rdev, ring, 256 * 1024);
8647
8648	cik_uvd_init(rdev);
8649	cik_vce_init(rdev);
8650
8651	rdev->ih.ring_obj = NULL;
8652	r600_ih_ring_init(rdev, 64 * 1024);
8653
8654	r = r600_pcie_gart_init(rdev);
8655	if (r)
8656		return r;
8657
8658	rdev->accel_working = true;
8659	r = cik_startup(rdev);
8660	if (r) {
8661		dev_err(rdev->dev, "disabling GPU acceleration\n");
8662		cik_cp_fini(rdev);
8663		cik_sdma_fini(rdev);
8664		cik_irq_fini(rdev);
8665		sumo_rlc_fini(rdev);
8666		cik_mec_fini(rdev);
8667		radeon_wb_fini(rdev);
8668		radeon_ib_pool_fini(rdev);
8669		radeon_vm_manager_fini(rdev);
8670		radeon_irq_kms_fini(rdev);
8671		cik_pcie_gart_fini(rdev);
8672		rdev->accel_working = false;
8673	}
8674
8675	/* Don't start up if the MC ucode is missing.
8676	 * The default clocks and voltages before the MC ucode
8677	 * is loaded are not suffient for advanced operations.
8678	 */
8679	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8680		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8681		return -EINVAL;
8682	}
8683
8684	return 0;
8685}
8686
8687/**
8688 * cik_fini - asic specific driver and hw fini
8689 *
8690 * @rdev: radeon_device pointer
8691 *
8692 * Tear down the asic specific driver variables and program the hw
8693 * to an idle state (CIK).
8694 * Called at driver unload.
8695 */
8696void cik_fini(struct radeon_device *rdev)
8697{
8698	radeon_pm_fini(rdev);
8699	cik_cp_fini(rdev);
8700	cik_sdma_fini(rdev);
8701	cik_fini_pg(rdev);
8702	cik_fini_cg(rdev);
8703	cik_irq_fini(rdev);
8704	sumo_rlc_fini(rdev);
8705	cik_mec_fini(rdev);
8706	radeon_wb_fini(rdev);
8707	radeon_vm_manager_fini(rdev);
8708	radeon_ib_pool_fini(rdev);
8709	radeon_irq_kms_fini(rdev);
8710	uvd_v1_0_fini(rdev);
8711	radeon_uvd_fini(rdev);
8712	radeon_vce_fini(rdev);
8713	cik_pcie_gart_fini(rdev);
8714	r600_vram_scratch_fini(rdev);
8715	radeon_gem_fini(rdev);
8716	radeon_fence_driver_fini(rdev);
8717	radeon_bo_fini(rdev);
8718	radeon_atombios_fini(rdev);
8719	kfree(rdev->bios);
8720	rdev->bios = NULL;
8721}
8722
8723void dce8_program_fmt(struct drm_encoder *encoder)
8724{
8725	struct drm_device *dev = encoder->dev;
8726	struct radeon_device *rdev = dev->dev_private;
8727	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8728	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8729	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8730	int bpc = 0;
8731	u32 tmp = 0;
8732	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8733
8734	if (connector) {
8735		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8736		bpc = radeon_get_monitor_bpc(connector);
8737		dither = radeon_connector->dither;
8738	}
8739
8740	/* LVDS/eDP FMT is set up by atom */
8741	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8742		return;
8743
8744	/* not needed for analog */
8745	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8746	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8747		return;
8748
8749	if (bpc == 0)
8750		return;
8751
8752	switch (bpc) {
8753	case 6:
8754		if (dither == RADEON_FMT_DITHER_ENABLE)
8755			/* XXX sort out optimal dither settings */
8756			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8757				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8758		else
8759			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8760		break;
8761	case 8:
8762		if (dither == RADEON_FMT_DITHER_ENABLE)
8763			/* XXX sort out optimal dither settings */
8764			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8765				FMT_RGB_RANDOM_ENABLE |
8766				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8767		else
8768			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8769		break;
8770	case 10:
8771		if (dither == RADEON_FMT_DITHER_ENABLE)
8772			/* XXX sort out optimal dither settings */
8773			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774				FMT_RGB_RANDOM_ENABLE |
8775				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8776		else
8777			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8778		break;
8779	default:
8780		/* not needed */
8781		break;
8782	}
8783
8784	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8785}
8786
8787/* display watermark setup */
8788/**
8789 * dce8_line_buffer_adjust - Set up the line buffer
8790 *
8791 * @rdev: radeon_device pointer
8792 * @radeon_crtc: the selected display controller
8793 * @mode: the current display mode on the selected display
8794 * controller
8795 *
8796 * Setup up the line buffer allocation for
8797 * the selected display controller (CIK).
8798 * Returns the line buffer size in pixels.
8799 */
8800static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8801				   struct radeon_crtc *radeon_crtc,
8802				   struct drm_display_mode *mode)
8803{
8804	u32 tmp, buffer_alloc, i;
8805	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8806	/*
8807	 * Line Buffer Setup
8808	 * There are 6 line buffers, one for each display controllers.
8809	 * There are 3 partitions per LB. Select the number of partitions
8810	 * to enable based on the display width.  For display widths larger
8811	 * than 4096, you need use to use 2 display controllers and combine
8812	 * them using the stereo blender.
8813	 */
8814	if (radeon_crtc->base.enabled && mode) {
8815		if (mode->crtc_hdisplay < 1920) {
8816			tmp = 1;
8817			buffer_alloc = 2;
8818		} else if (mode->crtc_hdisplay < 2560) {
8819			tmp = 2;
8820			buffer_alloc = 2;
8821		} else if (mode->crtc_hdisplay < 4096) {
8822			tmp = 0;
8823			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824		} else {
8825			DRM_DEBUG_KMS("Mode too big for LB!\n");
8826			tmp = 0;
8827			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8828		}
8829	} else {
8830		tmp = 1;
8831		buffer_alloc = 0;
8832	}
8833
8834	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8835	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8836
8837	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8838	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8839	for (i = 0; i < rdev->usec_timeout; i++) {
8840		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8841		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8842			break;
8843		udelay(1);
8844	}
8845
8846	if (radeon_crtc->base.enabled && mode) {
8847		switch (tmp) {
8848		case 0:
8849		default:
8850			return 4096 * 2;
8851		case 1:
8852			return 1920 * 2;
8853		case 2:
8854			return 2560 * 2;
8855		}
8856	}
8857
8858	/* controller not enabled, so no lb used */
8859	return 0;
8860}
8861
8862/**
8863 * cik_get_number_of_dram_channels - get the number of dram channels
8864 *
8865 * @rdev: radeon_device pointer
8866 *
8867 * Look up the number of video ram channels (CIK).
8868 * Used for display watermark bandwidth calculations
8869 * Returns the number of dram channels
8870 */
8871static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8872{
8873	u32 tmp = RREG32(MC_SHARED_CHMAP);
8874
8875	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8876	case 0:
8877	default:
8878		return 1;
8879	case 1:
8880		return 2;
8881	case 2:
8882		return 4;
8883	case 3:
8884		return 8;
8885	case 4:
8886		return 3;
8887	case 5:
8888		return 6;
8889	case 6:
8890		return 10;
8891	case 7:
8892		return 12;
8893	case 8:
8894		return 16;
8895	}
8896}
8897
8898struct dce8_wm_params {
8899	u32 dram_channels; /* number of dram channels */
8900	u32 yclk;          /* bandwidth per dram data pin in kHz */
8901	u32 sclk;          /* engine clock in kHz */
8902	u32 disp_clk;      /* display clock in kHz */
8903	u32 src_width;     /* viewport width */
8904	u32 active_time;   /* active display time in ns */
8905	u32 blank_time;    /* blank time in ns */
8906	bool interlaced;    /* mode is interlaced */
8907	fixed20_12 vsc;    /* vertical scale ratio */
8908	u32 num_heads;     /* number of active crtcs */
8909	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8910	u32 lb_size;       /* line buffer allocated to pipe */
8911	u32 vtaps;         /* vertical scaler taps */
8912};
8913
8914/**
8915 * dce8_dram_bandwidth - get the dram bandwidth
8916 *
8917 * @wm: watermark calculation data
8918 *
8919 * Calculate the raw dram bandwidth (CIK).
8920 * Used for display watermark bandwidth calculations
8921 * Returns the dram bandwidth in MBytes/s
8922 */
8923static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8924{
8925	/* Calculate raw DRAM Bandwidth */
8926	fixed20_12 dram_efficiency; /* 0.7 */
8927	fixed20_12 yclk, dram_channels, bandwidth;
8928	fixed20_12 a;
8929
8930	a.full = dfixed_const(1000);
8931	yclk.full = dfixed_const(wm->yclk);
8932	yclk.full = dfixed_div(yclk, a);
8933	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8934	a.full = dfixed_const(10);
8935	dram_efficiency.full = dfixed_const(7);
8936	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8937	bandwidth.full = dfixed_mul(dram_channels, yclk);
8938	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8939
8940	return dfixed_trunc(bandwidth);
8941}
8942
8943/**
8944 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8945 *
8946 * @wm: watermark calculation data
8947 *
8948 * Calculate the dram bandwidth used for display (CIK).
8949 * Used for display watermark bandwidth calculations
8950 * Returns the dram bandwidth for display in MBytes/s
8951 */
8952static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8953{
8954	/* Calculate DRAM Bandwidth and the part allocated to display. */
8955	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8956	fixed20_12 yclk, dram_channels, bandwidth;
8957	fixed20_12 a;
8958
8959	a.full = dfixed_const(1000);
8960	yclk.full = dfixed_const(wm->yclk);
8961	yclk.full = dfixed_div(yclk, a);
8962	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963	a.full = dfixed_const(10);
8964	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8965	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8966	bandwidth.full = dfixed_mul(dram_channels, yclk);
8967	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8968
8969	return dfixed_trunc(bandwidth);
8970}
8971
8972/**
8973 * dce8_data_return_bandwidth - get the data return bandwidth
8974 *
8975 * @wm: watermark calculation data
8976 *
8977 * Calculate the data return bandwidth used for display (CIK).
8978 * Used for display watermark bandwidth calculations
8979 * Returns the data return bandwidth in MBytes/s
8980 */
8981static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8982{
8983	/* Calculate the display Data return Bandwidth */
8984	fixed20_12 return_efficiency; /* 0.8 */
8985	fixed20_12 sclk, bandwidth;
8986	fixed20_12 a;
8987
8988	a.full = dfixed_const(1000);
8989	sclk.full = dfixed_const(wm->sclk);
8990	sclk.full = dfixed_div(sclk, a);
8991	a.full = dfixed_const(10);
8992	return_efficiency.full = dfixed_const(8);
8993	return_efficiency.full = dfixed_div(return_efficiency, a);
8994	a.full = dfixed_const(32);
8995	bandwidth.full = dfixed_mul(a, sclk);
8996	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8997
8998	return dfixed_trunc(bandwidth);
8999}
9000
9001/**
9002 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9003 *
9004 * @wm: watermark calculation data
9005 *
9006 * Calculate the dmif bandwidth used for display (CIK).
9007 * Used for display watermark bandwidth calculations
9008 * Returns the dmif bandwidth in MBytes/s
9009 */
9010static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9011{
9012	/* Calculate the DMIF Request Bandwidth */
9013	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9014	fixed20_12 disp_clk, bandwidth;
9015	fixed20_12 a, b;
9016
9017	a.full = dfixed_const(1000);
9018	disp_clk.full = dfixed_const(wm->disp_clk);
9019	disp_clk.full = dfixed_div(disp_clk, a);
9020	a.full = dfixed_const(32);
9021	b.full = dfixed_mul(a, disp_clk);
9022
9023	a.full = dfixed_const(10);
9024	disp_clk_request_efficiency.full = dfixed_const(8);
9025	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9026
9027	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9028
9029	return dfixed_trunc(bandwidth);
9030}
9031
9032/**
9033 * dce8_available_bandwidth - get the min available bandwidth
9034 *
9035 * @wm: watermark calculation data
9036 *
9037 * Calculate the min available bandwidth used for display (CIK).
9038 * Used for display watermark bandwidth calculations
9039 * Returns the min available bandwidth in MBytes/s
9040 */
9041static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9042{
9043	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9044	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9045	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9046	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9047
9048	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9049}
9050
9051/**
9052 * dce8_average_bandwidth - get the average available bandwidth
9053 *
9054 * @wm: watermark calculation data
9055 *
9056 * Calculate the average available bandwidth used for display (CIK).
9057 * Used for display watermark bandwidth calculations
9058 * Returns the average available bandwidth in MBytes/s
9059 */
9060static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9061{
9062	/* Calculate the display mode Average Bandwidth
9063	 * DisplayMode should contain the source and destination dimensions,
9064	 * timing, etc.
9065	 */
9066	fixed20_12 bpp;
9067	fixed20_12 line_time;
9068	fixed20_12 src_width;
9069	fixed20_12 bandwidth;
9070	fixed20_12 a;
9071
9072	a.full = dfixed_const(1000);
9073	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9074	line_time.full = dfixed_div(line_time, a);
9075	bpp.full = dfixed_const(wm->bytes_per_pixel);
9076	src_width.full = dfixed_const(wm->src_width);
9077	bandwidth.full = dfixed_mul(src_width, bpp);
9078	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9079	bandwidth.full = dfixed_div(bandwidth, line_time);
9080
9081	return dfixed_trunc(bandwidth);
9082}
9083
9084/**
9085 * dce8_latency_watermark - get the latency watermark
9086 *
9087 * @wm: watermark calculation data
9088 *
9089 * Calculate the latency watermark (CIK).
9090 * Used for display watermark bandwidth calculations
9091 * Returns the latency watermark in ns
9092 */
9093static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9094{
9095	/* First calculate the latency in ns */
9096	u32 mc_latency = 2000; /* 2000 ns. */
9097	u32 available_bandwidth = dce8_available_bandwidth(wm);
9098	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9099	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9100	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9101	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9102		(wm->num_heads * cursor_line_pair_return_time);
9103	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9104	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9105	u32 tmp, dmif_size = 12288;
9106	fixed20_12 a, b, c;
9107
9108	if (wm->num_heads == 0)
9109		return 0;
9110
9111	a.full = dfixed_const(2);
9112	b.full = dfixed_const(1);
9113	if ((wm->vsc.full > a.full) ||
9114	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9115	    (wm->vtaps >= 5) ||
9116	    ((wm->vsc.full >= a.full) && wm->interlaced))
9117		max_src_lines_per_dst_line = 4;
9118	else
9119		max_src_lines_per_dst_line = 2;
9120
9121	a.full = dfixed_const(available_bandwidth);
9122	b.full = dfixed_const(wm->num_heads);
9123	a.full = dfixed_div(a, b);
9124	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9125	tmp = min(dfixed_trunc(a), tmp);
9126
9127	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9128
9129	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9130	b.full = dfixed_const(1000);
9131	c.full = dfixed_const(lb_fill_bw);
9132	b.full = dfixed_div(c, b);
9133	a.full = dfixed_div(a, b);
9134	line_fill_time = dfixed_trunc(a);
9135
9136	if (line_fill_time < wm->active_time)
9137		return latency;
9138	else
9139		return latency + (line_fill_time - wm->active_time);
9140
9141}
9142
9143/**
9144 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9145 * average and available dram bandwidth
9146 *
9147 * @wm: watermark calculation data
9148 *
9149 * Check if the display average bandwidth fits in the display
9150 * dram bandwidth (CIK).
9151 * Used for display watermark bandwidth calculations
9152 * Returns true if the display fits, false if not.
9153 */
9154static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9155{
9156	if (dce8_average_bandwidth(wm) <=
9157	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9158		return true;
9159	else
9160		return false;
9161}
9162
9163/**
9164 * dce8_average_bandwidth_vs_available_bandwidth - check
9165 * average and available bandwidth
9166 *
9167 * @wm: watermark calculation data
9168 *
9169 * Check if the display average bandwidth fits in the display
9170 * available bandwidth (CIK).
9171 * Used for display watermark bandwidth calculations
9172 * Returns true if the display fits, false if not.
9173 */
9174static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9175{
9176	if (dce8_average_bandwidth(wm) <=
9177	    (dce8_available_bandwidth(wm) / wm->num_heads))
9178		return true;
9179	else
9180		return false;
9181}
9182
9183/**
9184 * dce8_check_latency_hiding - check latency hiding
9185 *
9186 * @wm: watermark calculation data
9187 *
9188 * Check latency hiding (CIK).
9189 * Used for display watermark bandwidth calculations
9190 * Returns true if the display fits, false if not.
9191 */
9192static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9193{
9194	u32 lb_partitions = wm->lb_size / wm->src_width;
9195	u32 line_time = wm->active_time + wm->blank_time;
9196	u32 latency_tolerant_lines;
9197	u32 latency_hiding;
9198	fixed20_12 a;
9199
9200	a.full = dfixed_const(1);
9201	if (wm->vsc.full > a.full)
9202		latency_tolerant_lines = 1;
9203	else {
9204		if (lb_partitions <= (wm->vtaps + 1))
9205			latency_tolerant_lines = 1;
9206		else
9207			latency_tolerant_lines = 2;
9208	}
9209
9210	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9211
9212	if (dce8_latency_watermark(wm) <= latency_hiding)
9213		return true;
9214	else
9215		return false;
9216}
9217
9218/**
9219 * dce8_program_watermarks - program display watermarks
9220 *
9221 * @rdev: radeon_device pointer
9222 * @radeon_crtc: the selected display controller
9223 * @lb_size: line buffer size
9224 * @num_heads: number of display controllers in use
9225 *
9226 * Calculate and program the display watermarks for the
9227 * selected display controller (CIK).
9228 */
9229static void dce8_program_watermarks(struct radeon_device *rdev,
9230				    struct radeon_crtc *radeon_crtc,
9231				    u32 lb_size, u32 num_heads)
9232{
9233	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9234	struct dce8_wm_params wm_low, wm_high;
9235	u32 active_time;
9236	u32 line_time = 0;
9237	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9238	u32 tmp, wm_mask;
9239
9240	if (radeon_crtc->base.enabled && num_heads && mode) {
9241		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9242					    (u32)mode->clock);
9243		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9244					  (u32)mode->clock);
9245		line_time = min(line_time, (u32)65535);
9246
9247		/* watermark for high clocks */
9248		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9249		    rdev->pm.dpm_enabled) {
9250			wm_high.yclk =
9251				radeon_dpm_get_mclk(rdev, false) * 10;
9252			wm_high.sclk =
9253				radeon_dpm_get_sclk(rdev, false) * 10;
9254		} else {
9255			wm_high.yclk = rdev->pm.current_mclk * 10;
9256			wm_high.sclk = rdev->pm.current_sclk * 10;
9257		}
9258
9259		wm_high.disp_clk = mode->clock;
9260		wm_high.src_width = mode->crtc_hdisplay;
9261		wm_high.active_time = active_time;
9262		wm_high.blank_time = line_time - wm_high.active_time;
9263		wm_high.interlaced = false;
9264		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9265			wm_high.interlaced = true;
9266		wm_high.vsc = radeon_crtc->vsc;
9267		wm_high.vtaps = 1;
9268		if (radeon_crtc->rmx_type != RMX_OFF)
9269			wm_high.vtaps = 2;
9270		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9271		wm_high.lb_size = lb_size;
9272		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9273		wm_high.num_heads = num_heads;
9274
9275		/* set for high clocks */
9276		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9277
9278		/* possibly force display priority to high */
9279		/* should really do this at mode validation time... */
9280		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9281		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9282		    !dce8_check_latency_hiding(&wm_high) ||
9283		    (rdev->disp_priority == 2)) {
9284			DRM_DEBUG_KMS("force priority to high\n");
9285		}
9286
9287		/* watermark for low clocks */
9288		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9289		    rdev->pm.dpm_enabled) {
9290			wm_low.yclk =
9291				radeon_dpm_get_mclk(rdev, true) * 10;
9292			wm_low.sclk =
9293				radeon_dpm_get_sclk(rdev, true) * 10;
9294		} else {
9295			wm_low.yclk = rdev->pm.current_mclk * 10;
9296			wm_low.sclk = rdev->pm.current_sclk * 10;
9297		}
9298
9299		wm_low.disp_clk = mode->clock;
9300		wm_low.src_width = mode->crtc_hdisplay;
9301		wm_low.active_time = active_time;
9302		wm_low.blank_time = line_time - wm_low.active_time;
9303		wm_low.interlaced = false;
9304		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9305			wm_low.interlaced = true;
9306		wm_low.vsc = radeon_crtc->vsc;
9307		wm_low.vtaps = 1;
9308		if (radeon_crtc->rmx_type != RMX_OFF)
9309			wm_low.vtaps = 2;
9310		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9311		wm_low.lb_size = lb_size;
9312		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9313		wm_low.num_heads = num_heads;
9314
9315		/* set for low clocks */
9316		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9317
9318		/* possibly force display priority to high */
9319		/* should really do this at mode validation time... */
9320		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9321		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9322		    !dce8_check_latency_hiding(&wm_low) ||
9323		    (rdev->disp_priority == 2)) {
9324			DRM_DEBUG_KMS("force priority to high\n");
9325		}
9326
9327		/* Save number of lines the linebuffer leads before the scanout */
9328		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9329	}
9330
9331	/* select wm A */
9332	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9333	tmp = wm_mask;
9334	tmp &= ~LATENCY_WATERMARK_MASK(3);
9335	tmp |= LATENCY_WATERMARK_MASK(1);
9336	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9337	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9338	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9339		LATENCY_HIGH_WATERMARK(line_time)));
9340	/* select wm B */
9341	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9342	tmp &= ~LATENCY_WATERMARK_MASK(3);
9343	tmp |= LATENCY_WATERMARK_MASK(2);
9344	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9345	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9346	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9347		LATENCY_HIGH_WATERMARK(line_time)));
9348	/* restore original selection */
9349	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9350
9351	/* save values for DPM */
9352	radeon_crtc->line_time = line_time;
9353	radeon_crtc->wm_high = latency_watermark_a;
9354	radeon_crtc->wm_low = latency_watermark_b;
9355}
9356
9357/**
9358 * dce8_bandwidth_update - program display watermarks
9359 *
9360 * @rdev: radeon_device pointer
9361 *
9362 * Calculate and program the display watermarks and line
9363 * buffer allocation (CIK).
9364 */
9365void dce8_bandwidth_update(struct radeon_device *rdev)
9366{
9367	struct drm_display_mode *mode = NULL;
9368	u32 num_heads = 0, lb_size;
9369	int i;
9370
9371	if (!rdev->mode_info.mode_config_initialized)
9372		return;
9373
9374	radeon_update_display_priority(rdev);
9375
9376	for (i = 0; i < rdev->num_crtc; i++) {
9377		if (rdev->mode_info.crtcs[i]->base.enabled)
9378			num_heads++;
9379	}
9380	for (i = 0; i < rdev->num_crtc; i++) {
9381		mode = &rdev->mode_info.crtcs[i]->base.mode;
9382		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9383		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9384	}
9385}
9386
9387/**
9388 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9389 *
9390 * @rdev: radeon_device pointer
9391 *
9392 * Fetches a GPU clock counter snapshot (SI).
9393 * Returns the 64 bit clock counter snapshot.
9394 */
9395uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9396{
9397	uint64_t clock;
9398
9399	mutex_lock(&rdev->gpu_clock_mutex);
9400	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9401	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9402		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9403	mutex_unlock(&rdev->gpu_clock_mutex);
9404	return clock;
9405}
9406
9407static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9408			     u32 cntl_reg, u32 status_reg)
9409{
9410	int r, i;
9411	struct atom_clock_dividers dividers;
9412	uint32_t tmp;
9413
9414	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9415					   clock, false, &dividers);
9416	if (r)
9417		return r;
9418
9419	tmp = RREG32_SMC(cntl_reg);
9420	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9421	tmp |= dividers.post_divider;
9422	WREG32_SMC(cntl_reg, tmp);
9423
9424	for (i = 0; i < 100; i++) {
9425		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9426			break;
9427		mdelay(10);
9428	}
9429	if (i == 100)
9430		return -ETIMEDOUT;
9431
9432	return 0;
9433}
9434
9435int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9436{
9437	int r = 0;
9438
9439	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9440	if (r)
9441		return r;
9442
9443	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9444	return r;
9445}
9446
9447int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9448{
9449	int r, i;
9450	struct atom_clock_dividers dividers;
9451	u32 tmp;
9452
9453	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9454					   ecclk, false, &dividers);
9455	if (r)
9456		return r;
9457
9458	for (i = 0; i < 100; i++) {
9459		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9460			break;
9461		mdelay(10);
9462	}
9463	if (i == 100)
9464		return -ETIMEDOUT;
9465
9466	tmp = RREG32_SMC(CG_ECLK_CNTL);
9467	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9468	tmp |= dividers.post_divider;
9469	WREG32_SMC(CG_ECLK_CNTL, tmp);
9470
9471	for (i = 0; i < 100; i++) {
9472		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9473			break;
9474		mdelay(10);
9475	}
9476	if (i == 100)
9477		return -ETIMEDOUT;
9478
9479	return 0;
9480}
9481
9482static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9483{
9484	struct pci_dev *root = rdev->pdev->bus->self;
9485	enum pci_bus_speed speed_cap;
9486	u32 speed_cntl, current_data_rate;
9487	int i;
9488	u16 tmp16;
9489
9490	if (pci_is_root_bus(rdev->pdev->bus))
9491		return;
9492
9493	if (radeon_pcie_gen2 == 0)
9494		return;
9495
9496	if (rdev->flags & RADEON_IS_IGP)
9497		return;
9498
9499	if (!(rdev->flags & RADEON_IS_PCIE))
9500		return;
9501
9502	speed_cap = pcie_get_speed_cap(root);
9503	if (speed_cap == PCI_SPEED_UNKNOWN)
9504		return;
9505
9506	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9507	    (speed_cap != PCIE_SPEED_5_0GT))
9508		return;
9509
9510	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9511	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9512		LC_CURRENT_DATA_RATE_SHIFT;
9513	if (speed_cap == PCIE_SPEED_8_0GT) {
9514		if (current_data_rate == 2) {
9515			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9516			return;
9517		}
9518		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9519	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9520		if (current_data_rate == 1) {
9521			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9522			return;
9523		}
9524		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9525	}
9526
9527	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9528		return;
9529
9530	if (speed_cap == PCIE_SPEED_8_0GT) {
9531		/* re-try equalization if gen3 is not already enabled */
9532		if (current_data_rate != 2) {
9533			u16 bridge_cfg, gpu_cfg;
9534			u16 bridge_cfg2, gpu_cfg2;
9535			u32 max_lw, current_lw, tmp;
9536
9537			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9538			pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9539
9540			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9541			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9542			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9543
9544			if (current_lw < max_lw) {
9545				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9546				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9547					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9548					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9549					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9550					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9551				}
9552			}
9553
9554			for (i = 0; i < 10; i++) {
9555				/* check status */
9556				pcie_capability_read_word(rdev->pdev,
9557							  PCI_EXP_DEVSTA,
9558							  &tmp16);
9559				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9560					break;
9561
9562				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9563							  &bridge_cfg);
9564				pcie_capability_read_word(rdev->pdev,
9565							  PCI_EXP_LNKCTL,
9566							  &gpu_cfg);
9567
9568				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9569							  &bridge_cfg2);
9570				pcie_capability_read_word(rdev->pdev,
9571							  PCI_EXP_LNKCTL2,
9572							  &gpu_cfg2);
9573
9574				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9575				tmp |= LC_SET_QUIESCE;
9576				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9577
9578				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9579				tmp |= LC_REDO_EQ;
9580				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9581
9582				msleep(100);
9583
9584				/* linkctl */
9585				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
9586								   PCI_EXP_LNKCTL_HAWD,
9587								   bridge_cfg &
9588								   PCI_EXP_LNKCTL_HAWD);
9589				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
9590								   PCI_EXP_LNKCTL_HAWD,
9591								   gpu_cfg &
9592								   PCI_EXP_LNKCTL_HAWD);
9593
9594				/* linkctl2 */
9595				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
9596								   PCI_EXP_LNKCTL2_ENTER_COMP |
9597								   PCI_EXP_LNKCTL2_TX_MARGIN,
9598								   bridge_cfg2 |
9599								   (PCI_EXP_LNKCTL2_ENTER_COMP |
9600								    PCI_EXP_LNKCTL2_TX_MARGIN));
9601				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9602								   PCI_EXP_LNKCTL2_ENTER_COMP |
9603								   PCI_EXP_LNKCTL2_TX_MARGIN,
9604								   gpu_cfg2 |
9605								   (PCI_EXP_LNKCTL2_ENTER_COMP |
9606								    PCI_EXP_LNKCTL2_TX_MARGIN));
9607
9608				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9609				tmp &= ~LC_SET_QUIESCE;
9610				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9611			}
9612		}
9613	}
9614
9615	/* set the link speed */
9616	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9617	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9618	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9619
9620	tmp16 = 0;
9621	if (speed_cap == PCIE_SPEED_8_0GT)
9622		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9623	else if (speed_cap == PCIE_SPEED_5_0GT)
9624		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9625	else
9626		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9627	pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9628					   PCI_EXP_LNKCTL2_TLS, tmp16);
9629
9630	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9631	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9632	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9633
9634	for (i = 0; i < rdev->usec_timeout; i++) {
9635		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9636		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9637			break;
9638		udelay(1);
9639	}
9640}
9641
9642static void cik_program_aspm(struct radeon_device *rdev)
9643{
9644	u32 data, orig;
9645	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9646	bool disable_clkreq = false;
9647
9648	if (radeon_aspm == 0)
9649		return;
9650
9651	/* XXX double check IGPs */
9652	if (rdev->flags & RADEON_IS_IGP)
9653		return;
9654
9655	if (!(rdev->flags & RADEON_IS_PCIE))
9656		return;
9657
9658	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9659	data &= ~LC_XMIT_N_FTS_MASK;
9660	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9661	if (orig != data)
9662		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9663
9664	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9665	data |= LC_GO_TO_RECOVERY;
9666	if (orig != data)
9667		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9668
9669	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9670	data |= P_IGNORE_EDB_ERR;
9671	if (orig != data)
9672		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9673
9674	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9675	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9676	data |= LC_PMI_TO_L1_DIS;
9677	if (!disable_l0s)
9678		data |= LC_L0S_INACTIVITY(7);
9679
9680	if (!disable_l1) {
9681		data |= LC_L1_INACTIVITY(7);
9682		data &= ~LC_PMI_TO_L1_DIS;
9683		if (orig != data)
9684			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9685
9686		if (!disable_plloff_in_l1) {
9687			bool clk_req_support;
9688
9689			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9690			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9691			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9692			if (orig != data)
9693				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9694
9695			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9696			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9697			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9698			if (orig != data)
9699				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9700
9701			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9702			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9703			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9704			if (orig != data)
9705				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9706
9707			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9708			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9709			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9710			if (orig != data)
9711				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9712
9713			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9714			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9715			data |= LC_DYN_LANES_PWR_STATE(3);
9716			if (orig != data)
9717				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9718
9719			if (!disable_clkreq &&
9720			    !pci_is_root_bus(rdev->pdev->bus)) {
9721				struct pci_dev *root = rdev->pdev->bus->self;
9722				u32 lnkcap;
9723
9724				clk_req_support = false;
9725				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9726				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9727					clk_req_support = true;
9728			} else {
9729				clk_req_support = false;
9730			}
9731
9732			if (clk_req_support) {
9733				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9734				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9735				if (orig != data)
9736					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9737
9738				orig = data = RREG32_SMC(THM_CLK_CNTL);
9739				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9740				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9741				if (orig != data)
9742					WREG32_SMC(THM_CLK_CNTL, data);
9743
9744				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9745				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9746				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9747				if (orig != data)
9748					WREG32_SMC(MISC_CLK_CTRL, data);
9749
9750				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9751				data &= ~BCLK_AS_XCLK;
9752				if (orig != data)
9753					WREG32_SMC(CG_CLKPIN_CNTL, data);
9754
9755				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9756				data &= ~FORCE_BIF_REFCLK_EN;
9757				if (orig != data)
9758					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9759
9760				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9761				data &= ~MPLL_CLKOUT_SEL_MASK;
9762				data |= MPLL_CLKOUT_SEL(4);
9763				if (orig != data)
9764					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9765			}
9766		}
9767	} else {
9768		if (orig != data)
9769			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9770	}
9771
9772	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9773	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9774	if (orig != data)
9775		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9776
9777	if (!disable_l0s) {
9778		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9779		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9780			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9781			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9782				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9783				data &= ~LC_L0S_INACTIVITY_MASK;
9784				if (orig != data)
9785					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9786			}
9787		}
9788	}
9789}
9790