r600_cp.c revision 195501
1/*-
2 * Copyright 2008-2009 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *     Dave Airlie <airlied@redhat.com>
26 *     Alex Deucher <alexander.deucher@amd.com>
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/drm/r600_cp.c 195501 2009-07-09 16:39:28Z rnoland $");
31
32#include "dev/drm/drmP.h"
33#include "dev/drm/drm.h"
34#include "dev/drm/radeon_drm.h"
35#include "dev/drm/radeon_drv.h"
36
37#include "dev/drm/r600_microcode.h"
38
39# define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
40# define ATI_PCIGART_PAGE_MASK		(~(ATI_PCIGART_PAGE_SIZE-1))
41
42#define R600_PTE_VALID     (1 << 0)
43#define R600_PTE_SYSTEM    (1 << 1)
44#define R600_PTE_SNOOPED   (1 << 2)
45#define R600_PTE_READABLE  (1 << 5)
46#define R600_PTE_WRITEABLE (1 << 6)
47
48/* MAX values used for gfx init */
49#define R6XX_MAX_SH_GPRS           256
50#define R6XX_MAX_TEMP_GPRS         16
51#define R6XX_MAX_SH_THREADS        256
52#define R6XX_MAX_SH_STACK_ENTRIES  4096
53#define R6XX_MAX_BACKENDS          8
54#define R6XX_MAX_BACKENDS_MASK     0xff
55#define R6XX_MAX_SIMDS             8
56#define R6XX_MAX_SIMDS_MASK        0xff
57#define R6XX_MAX_PIPES             8
58#define R6XX_MAX_PIPES_MASK        0xff
59
60#define R7XX_MAX_SH_GPRS           256
61#define R7XX_MAX_TEMP_GPRS         16
62#define R7XX_MAX_SH_THREADS        256
63#define R7XX_MAX_SH_STACK_ENTRIES  4096
64#define R7XX_MAX_BACKENDS          8
65#define R7XX_MAX_BACKENDS_MASK     0xff
66#define R7XX_MAX_SIMDS             16
67#define R7XX_MAX_SIMDS_MASK        0xffff
68#define R7XX_MAX_PIPES             8
69#define R7XX_MAX_PIPES_MASK        0xff
70
71static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
72{
73	int i;
74
75	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
76
77	for (i = 0; i < dev_priv->usec_timeout; i++) {
78		int slots;
79		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
80			slots = (RADEON_READ(R600_GRBM_STATUS)
81				 & R700_CMDFIFO_AVAIL_MASK);
82		else
83			slots = (RADEON_READ(R600_GRBM_STATUS)
84				 & R600_CMDFIFO_AVAIL_MASK);
85		if (slots >= entries)
86			return 0;
87		DRM_UDELAY(1);
88	}
89	DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
90		 RADEON_READ(R600_GRBM_STATUS),
91		 RADEON_READ(R600_GRBM_STATUS2));
92
93	return -EBUSY;
94}
95
96static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
97{
98	int i, ret;
99
100	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
101
102	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
103		ret = r600_do_wait_for_fifo(dev_priv, 8);
104	else
105		ret = r600_do_wait_for_fifo(dev_priv, 16);
106	if (ret)
107		return ret;
108	for (i = 0; i < dev_priv->usec_timeout; i++) {
109		if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
110			return 0;
111		DRM_UDELAY(1);
112	}
113	DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
114		 RADEON_READ(R600_GRBM_STATUS),
115		 RADEON_READ(R600_GRBM_STATUS2));
116
117	return -EBUSY;
118}
119
120void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
121{
122#ifdef __linux__
123	struct drm_sg_mem *entry = dev->sg;
124	int max_pages;
125	int pages;
126	int i;
127#endif
128	if (gart_info->bus_addr) {
129#ifdef __linux__
130		max_pages = (gart_info->table_size / sizeof(u32));
131		pages = (entry->pages <= max_pages)
132		  ? entry->pages : max_pages;
133
134		for (i = 0; i < pages; i++) {
135			if (!entry->busaddr[i])
136				break;
137			pci_unmap_single(dev->pdev, entry->busaddr[i],
138					 PAGE_SIZE, PCI_DMA_TODEVICE);
139		}
140#endif
141		if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
142			gart_info->bus_addr = 0;
143	}
144}
145
146/* R600 has page table setup */
147int r600_page_table_init(struct drm_device *dev)
148{
149	drm_radeon_private_t *dev_priv = dev->dev_private;
150	struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
151	struct drm_sg_mem *entry = dev->sg;
152	int ret = 0;
153	int i, j;
154	int max_pages, pages;
155	u64 *pci_gart, page_base;
156	dma_addr_t entry_addr;
157
158	/* okay page table is available - lets rock */
159
160	/* PTEs are 64-bits */
161	pci_gart = (u64 *)gart_info->addr;
162
163	max_pages = (gart_info->table_size / sizeof(u64));
164	pages = (entry->pages <= max_pages) ? entry->pages : max_pages;
165
166	memset(pci_gart, 0, max_pages * sizeof(u64));
167
168	for (i = 0; i < pages; i++) {
169#ifdef __linux__
170		entry->busaddr[i] = pci_map_single(dev->pdev,
171						   page_address(entry->
172								pagelist[i]),
173						   PAGE_SIZE, PCI_DMA_TODEVICE);
174		if (entry->busaddr[i] == 0) {
175			DRM_ERROR("unable to map PCIGART pages!\n");
176			r600_page_table_cleanup(dev, gart_info);
177			goto done;
178		}
179#endif
180		entry_addr = entry->busaddr[i];
181		for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
182			page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
183			page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
184			page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
185
186			*pci_gart = page_base;
187
188			if ((i % 128) == 0)
189				DRM_DEBUG("page entry %d: 0x%016llx\n",
190				    i, (unsigned long long)page_base);
191			pci_gart++;
192			entry_addr += ATI_PCIGART_PAGE_SIZE;
193		}
194	}
195	ret = 1;
196#ifdef __linux__
197done:
198#endif
199	return ret;
200}
201
202static void r600_vm_flush_gart_range(struct drm_device *dev)
203{
204	drm_radeon_private_t *dev_priv = dev->dev_private;
205	u32 resp, countdown = 1000;
206	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
207	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
208	RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
209
210	do {
211		resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
212		countdown--;
213		DRM_UDELAY(1);
214	} while (((resp & 0xf0) == 0) && countdown);
215}
216
217static void r600_vm_init(struct drm_device *dev)
218{
219	drm_radeon_private_t *dev_priv = dev->dev_private;
220	/* initialise the VM to use the page table we constructed up there */
221	u32 vm_c0, i;
222	u32 mc_rd_a;
223	u32 vm_l2_cntl, vm_l2_cntl3;
224	/* okay set up the PCIE aperture type thingo */
225	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
226	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
227	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
228
229	/* setup MC RD a */
230	mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
231		R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
232		R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
233
234	RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
235	RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
236
237	RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
238	RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
239
240	RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
241	RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
242
243	RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
244	RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
245
246	RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
247	RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
248
249	RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
250	RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
251
252	RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
253	RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
254
255	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
256	vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
257	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
258
259	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
260	vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
261		       R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
262		       R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));
263	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
264
265	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
266
267	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
268
269	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
270
271	/* disable all other contexts */
272	for (i = 1; i < 8; i++)
273		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
274
275	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
276	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
277	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
278
279	r600_vm_flush_gart_range(dev);
280}
281
282/* load r600 microcode */
283static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
284{
285	const u32 (*cp)[3];
286	const u32 *pfp;
287	int i;
288
289	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
290	case CHIP_R600:
291		DRM_INFO("Loading R600 Microcode\n");
292		cp  = R600_cp_microcode;
293		pfp = R600_pfp_microcode;
294		break;
295	case CHIP_RV610:
296		DRM_INFO("Loading RV610 Microcode\n");
297		cp  = RV610_cp_microcode;
298		pfp = RV610_pfp_microcode;
299		break;
300	case CHIP_RV630:
301		DRM_INFO("Loading RV630 Microcode\n");
302		cp  = RV630_cp_microcode;
303		pfp = RV630_pfp_microcode;
304		break;
305	case CHIP_RV620:
306		DRM_INFO("Loading RV620 Microcode\n");
307		cp  = RV620_cp_microcode;
308		pfp = RV620_pfp_microcode;
309		break;
310	case CHIP_RV635:
311		DRM_INFO("Loading RV635 Microcode\n");
312		cp  = RV635_cp_microcode;
313		pfp = RV635_pfp_microcode;
314		break;
315	case CHIP_RV670:
316		DRM_INFO("Loading RV670 Microcode\n");
317		cp  = RV670_cp_microcode;
318		pfp = RV670_pfp_microcode;
319		break;
320	case CHIP_RS780:
321		DRM_INFO("Loading RS780 Microcode\n");
322		cp  = RS780_cp_microcode;
323		pfp = RS780_pfp_microcode;
324		break;
325	default:
326		return;
327	}
328
329	r600_do_cp_stop(dev_priv);
330
331	RADEON_WRITE(R600_CP_RB_CNTL,
332		     R600_RB_NO_UPDATE |
333		     R600_RB_BLKSZ(15) |
334		     R600_RB_BUFSZ(3));
335
336	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
337	RADEON_READ(R600_GRBM_SOFT_RESET);
338	DRM_UDELAY(15000);
339	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
340
341	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
342
343	for (i = 0; i < PM4_UCODE_SIZE; i++) {
344		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][0]);
345		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][1]);
346		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][2]);
347	}
348
349	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
350	for (i = 0; i < PFP_UCODE_SIZE; i++)
351		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]);
352
353	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
354	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
355	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
356}
357
358static void r700_vm_init(struct drm_device *dev)
359{
360	drm_radeon_private_t *dev_priv = dev->dev_private;
361	/* initialise the VM to use the page table we constructed up there */
362	u32 vm_c0, i;
363	u32 mc_vm_md_l1;
364	u32 vm_l2_cntl, vm_l2_cntl3;
365	/* okay set up the PCIE aperture type thingo */
366	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
367	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
368	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
369
370	mc_vm_md_l1 = R700_ENABLE_L1_TLB |
371	    R700_ENABLE_L1_FRAGMENT_PROCESSING |
372	    R700_SYSTEM_ACCESS_MODE_IN_SYS |
373	    R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
374	    R700_EFFECTIVE_L1_TLB_SIZE(5) |
375	    R700_EFFECTIVE_L1_QUEUE_SIZE(5);
376
377	RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
378	RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
379	RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
380	RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
381	RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
382	RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
383	RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
384
385	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
386	vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
387	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
388
389	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
390	vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
391	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
392
393	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
394
395	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
396
397	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
398
399	/* disable all other contexts */
400	for (i = 1; i < 8; i++)
401		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
402
403	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
404	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
405	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
406
407	r600_vm_flush_gart_range(dev);
408}
409
410/* load r600 microcode */
411static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
412{
413	const u32 *pfp;
414	const u32 *cp;
415	int i;
416
417	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
418	case CHIP_RV770:
419		DRM_INFO("Loading RV770/RV790 Microcode\n");
420		pfp = RV770_pfp_microcode;
421		cp  = RV770_cp_microcode;
422		break;
423	case CHIP_RV730:
424	case CHIP_RV740:
425		DRM_INFO("Loading RV730/RV740 Microcode\n");
426		pfp = RV730_pfp_microcode;
427		cp  = RV730_cp_microcode;
428		break;
429	case CHIP_RV710:
430		DRM_INFO("Loading RV710 Microcode\n");
431		pfp = RV710_pfp_microcode;
432		cp  = RV710_cp_microcode;
433		break;
434	default:
435		return;
436	}
437
438	r600_do_cp_stop(dev_priv);
439
440	RADEON_WRITE(R600_CP_RB_CNTL,
441		     R600_RB_NO_UPDATE |
442		     (15 << 8) |
443		     (3 << 0));
444
445	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
446	RADEON_READ(R600_GRBM_SOFT_RESET);
447	DRM_UDELAY(15000);
448	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
449
450	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
451	for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
452		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]);
453	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
454
455	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
456	for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
457		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i]);
458	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
459
460	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
461	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
462	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
463}
464
465static void r600_test_writeback(drm_radeon_private_t *dev_priv)
466{
467	u32 tmp;
468
469	/* Start with assuming that writeback doesn't work */
470	dev_priv->writeback_works = 0;
471
472	/* Writeback doesn't seem to work everywhere, test it here and possibly
473	 * enable it if it appears to work
474	 */
475	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
476
477	RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
478
479	for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
480		u32 val;
481
482		val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));
483		if (val == 0xdeadbeef)
484			break;
485		DRM_UDELAY(1);
486	}
487
488	if (tmp < dev_priv->usec_timeout) {
489		dev_priv->writeback_works = 1;
490		DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
491	} else {
492		dev_priv->writeback_works = 0;
493		DRM_INFO("writeback test failed\n");
494	}
495	if (radeon_no_wb == 1) {
496		dev_priv->writeback_works = 0;
497		DRM_INFO("writeback forced off\n");
498	}
499
500	if (!dev_priv->writeback_works) {
501		/* Disable writeback to avoid unnecessary bus master transfer */
502		RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) |
503			     RADEON_RB_NO_UPDATE);
504		RADEON_WRITE(R600_SCRATCH_UMSK, 0);
505	}
506}
507
508int r600_do_engine_reset(struct drm_device *dev)
509{
510	drm_radeon_private_t *dev_priv = dev->dev_private;
511	u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
512
513	DRM_INFO("Resetting GPU\n");
514
515	cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
516	cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
517	RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
518
519	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
520	RADEON_READ(R600_GRBM_SOFT_RESET);
521	DRM_UDELAY(50);
522	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
523	RADEON_READ(R600_GRBM_SOFT_RESET);
524
525	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
526	cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
527	RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA);
528
529	RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
530	RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
531	RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
532	RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
533
534	/* Reset the CP ring */
535	r600_do_cp_reset(dev_priv);
536
537	/* The CP is no longer running after an engine reset */
538	dev_priv->cp_running = 0;
539
540	/* Reset any pending vertex, indirect buffers */
541	radeon_freelist_reset(dev);
542
543	return 0;
544
545}
546
547static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
548					     u32 num_backends,
549					     u32 backend_disable_mask)
550{
551	u32 backend_map = 0;
552	u32 enabled_backends_mask;
553	u32 enabled_backends_count;
554	u32 cur_pipe;
555	u32 swizzle_pipe[R6XX_MAX_PIPES];
556	u32 cur_backend;
557	u32 i;
558
559	if (num_tile_pipes > R6XX_MAX_PIPES)
560		num_tile_pipes = R6XX_MAX_PIPES;
561	if (num_tile_pipes < 1)
562		num_tile_pipes = 1;
563	if (num_backends > R6XX_MAX_BACKENDS)
564		num_backends = R6XX_MAX_BACKENDS;
565	if (num_backends < 1)
566		num_backends = 1;
567
568	enabled_backends_mask = 0;
569	enabled_backends_count = 0;
570	for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
571		if (((backend_disable_mask >> i) & 1) == 0) {
572			enabled_backends_mask |= (1 << i);
573			++enabled_backends_count;
574		}
575		if (enabled_backends_count == num_backends)
576			break;
577	}
578
579	if (enabled_backends_count == 0) {
580		enabled_backends_mask = 1;
581		enabled_backends_count = 1;
582	}
583
584	if (enabled_backends_count != num_backends)
585		num_backends = enabled_backends_count;
586
587	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
588	switch (num_tile_pipes) {
589	case 1:
590		swizzle_pipe[0] = 0;
591		break;
592	case 2:
593		swizzle_pipe[0] = 0;
594		swizzle_pipe[1] = 1;
595		break;
596	case 3:
597		swizzle_pipe[0] = 0;
598		swizzle_pipe[1] = 1;
599		swizzle_pipe[2] = 2;
600		break;
601	case 4:
602		swizzle_pipe[0] = 0;
603		swizzle_pipe[1] = 1;
604		swizzle_pipe[2] = 2;
605		swizzle_pipe[3] = 3;
606		break;
607	case 5:
608		swizzle_pipe[0] = 0;
609		swizzle_pipe[1] = 1;
610		swizzle_pipe[2] = 2;
611		swizzle_pipe[3] = 3;
612		swizzle_pipe[4] = 4;
613		break;
614	case 6:
615		swizzle_pipe[0] = 0;
616		swizzle_pipe[1] = 2;
617		swizzle_pipe[2] = 4;
618		swizzle_pipe[3] = 5;
619		swizzle_pipe[4] = 1;
620		swizzle_pipe[5] = 3;
621		break;
622	case 7:
623		swizzle_pipe[0] = 0;
624		swizzle_pipe[1] = 2;
625		swizzle_pipe[2] = 4;
626		swizzle_pipe[3] = 6;
627		swizzle_pipe[4] = 1;
628		swizzle_pipe[5] = 3;
629		swizzle_pipe[6] = 5;
630		break;
631	case 8:
632		swizzle_pipe[0] = 0;
633		swizzle_pipe[1] = 2;
634		swizzle_pipe[2] = 4;
635		swizzle_pipe[3] = 6;
636		swizzle_pipe[4] = 1;
637		swizzle_pipe[5] = 3;
638		swizzle_pipe[6] = 5;
639		swizzle_pipe[7] = 7;
640		break;
641	}
642
643	cur_backend = 0;
644	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
645		while (((1 << cur_backend) & enabled_backends_mask) == 0)
646			cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
647
648		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
649
650		cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
651	}
652
653	return backend_map;
654}
655
656static int r600_count_pipe_bits(uint32_t val)
657{
658	int i, ret = 0;
659	for (i = 0; i < 32; i++) {
660		ret += val & 1;
661		val >>= 1;
662	}
663	return ret;
664}
665
666static void r600_gfx_init(struct drm_device *dev,
667			  drm_radeon_private_t *dev_priv)
668{
669	int i, j, num_qd_pipes;
670	u32 sx_debug_1;
671	u32 tc_cntl;
672	u32 arb_pop;
673	u32 num_gs_verts_per_thread;
674	u32 vgt_gs_per_es;
675	u32 gs_prim_buffer_depth = 0;
676	u32 sq_ms_fifo_sizes;
677	u32 sq_config;
678	u32 sq_gpr_resource_mgmt_1 = 0;
679	u32 sq_gpr_resource_mgmt_2 = 0;
680	u32 sq_thread_resource_mgmt = 0;
681	u32 sq_stack_resource_mgmt_1 = 0;
682	u32 sq_stack_resource_mgmt_2 = 0;
683	u32 hdp_host_path_cntl;
684	u32 backend_map;
685	u32 gb_tiling_config = 0;
686	u32 cc_rb_backend_disable = 0;
687	u32 cc_gc_shader_pipe_config = 0;
688	u32 ramcfg;
689
690	/* setup chip specs */
691	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
692	case CHIP_R600:
693		dev_priv->r600_max_pipes = 4;
694		dev_priv->r600_max_tile_pipes = 8;
695		dev_priv->r600_max_simds = 4;
696		dev_priv->r600_max_backends = 4;
697		dev_priv->r600_max_gprs = 256;
698		dev_priv->r600_max_threads = 192;
699		dev_priv->r600_max_stack_entries = 256;
700		dev_priv->r600_max_hw_contexts = 8;
701		dev_priv->r600_max_gs_threads = 16;
702		dev_priv->r600_sx_max_export_size = 128;
703		dev_priv->r600_sx_max_export_pos_size = 16;
704		dev_priv->r600_sx_max_export_smx_size = 128;
705		dev_priv->r600_sq_num_cf_insts = 2;
706		break;
707	case CHIP_RV630:
708	case CHIP_RV635:
709		dev_priv->r600_max_pipes = 2;
710		dev_priv->r600_max_tile_pipes = 2;
711		dev_priv->r600_max_simds = 3;
712		dev_priv->r600_max_backends = 1;
713		dev_priv->r600_max_gprs = 128;
714		dev_priv->r600_max_threads = 192;
715		dev_priv->r600_max_stack_entries = 128;
716		dev_priv->r600_max_hw_contexts = 8;
717		dev_priv->r600_max_gs_threads = 4;
718		dev_priv->r600_sx_max_export_size = 128;
719		dev_priv->r600_sx_max_export_pos_size = 16;
720		dev_priv->r600_sx_max_export_smx_size = 128;
721		dev_priv->r600_sq_num_cf_insts = 2;
722		break;
723	case CHIP_RV610:
724	case CHIP_RS780:
725	case CHIP_RV620:
726		dev_priv->r600_max_pipes = 1;
727		dev_priv->r600_max_tile_pipes = 1;
728		dev_priv->r600_max_simds = 2;
729		dev_priv->r600_max_backends = 1;
730		dev_priv->r600_max_gprs = 128;
731		dev_priv->r600_max_threads = 192;
732		dev_priv->r600_max_stack_entries = 128;
733		dev_priv->r600_max_hw_contexts = 4;
734		dev_priv->r600_max_gs_threads = 4;
735		dev_priv->r600_sx_max_export_size = 128;
736		dev_priv->r600_sx_max_export_pos_size = 16;
737		dev_priv->r600_sx_max_export_smx_size = 128;
738		dev_priv->r600_sq_num_cf_insts = 1;
739		break;
740	case CHIP_RV670:
741		dev_priv->r600_max_pipes = 4;
742		dev_priv->r600_max_tile_pipes = 4;
743		dev_priv->r600_max_simds = 4;
744		dev_priv->r600_max_backends = 4;
745		dev_priv->r600_max_gprs = 192;
746		dev_priv->r600_max_threads = 192;
747		dev_priv->r600_max_stack_entries = 256;
748		dev_priv->r600_max_hw_contexts = 8;
749		dev_priv->r600_max_gs_threads = 16;
750		dev_priv->r600_sx_max_export_size = 128;
751		dev_priv->r600_sx_max_export_pos_size = 16;
752		dev_priv->r600_sx_max_export_smx_size = 128;
753		dev_priv->r600_sq_num_cf_insts = 2;
754		break;
755	default:
756		break;
757	}
758
759	/* Initialize HDP */
760	j = 0;
761	for (i = 0; i < 32; i++) {
762		RADEON_WRITE((0x2c14 + j), 0x00000000);
763		RADEON_WRITE((0x2c18 + j), 0x00000000);
764		RADEON_WRITE((0x2c1c + j), 0x00000000);
765		RADEON_WRITE((0x2c20 + j), 0x00000000);
766		RADEON_WRITE((0x2c24 + j), 0x00000000);
767		j += 0x18;
768	}
769
770	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
771
772	/* setup tiling, simd, pipe config */
773	ramcfg = RADEON_READ(R600_RAMCFG);
774
775	switch (dev_priv->r600_max_tile_pipes) {
776	case 1:
777		gb_tiling_config |= R600_PIPE_TILING(0);
778		break;
779	case 2:
780		gb_tiling_config |= R600_PIPE_TILING(1);
781		break;
782	case 4:
783		gb_tiling_config |= R600_PIPE_TILING(2);
784		break;
785	case 8:
786		gb_tiling_config |= R600_PIPE_TILING(3);
787		break;
788	default:
789		break;
790	}
791
792	gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
793
794	gb_tiling_config |= R600_GROUP_SIZE(0);
795
796	if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
797		gb_tiling_config |= R600_ROW_TILING(3);
798		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
799	} else {
800		gb_tiling_config |=
801			R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
802		gb_tiling_config |=
803			R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
804	}
805
806	gb_tiling_config |= R600_BANK_SWAPS(1);
807
808	backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
809							dev_priv->r600_max_backends,
810							(0xff << dev_priv->r600_max_backends) & 0xff);
811	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
812
813	cc_gc_shader_pipe_config =
814		R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
815	cc_gc_shader_pipe_config |=
816		R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
817
818	cc_rb_backend_disable =
819		R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
820
821	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
822	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
823	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
824
825	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
826	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
827	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
828
829	num_qd_pipes =
830		R6XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
831	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
832	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
833
834	/* set HW defaults for 3D engine */
835	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
836						R600_ROQ_IB2_START(0x2b)));
837
838	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
839					      R600_ROQ_END(0x40)));
840
841	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
842					R600_SYNC_GRADIENT |
843					R600_SYNC_WALKER |
844					R600_SYNC_ALIGNER));
845
846	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
847		RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
848
849	sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
850	sx_debug_1 |= R600_SMX_EVENT_RELEASE;
851	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
852		sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
853	RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
854
855	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
856	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
857	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
858	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
859	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
860		RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
861	else
862		RADEON_WRITE(R600_DB_DEBUG, 0);
863
864	RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
865					  R600_DEPTH_FLUSH(16) |
866					  R600_DEPTH_PENDING_FREE(4) |
867					  R600_DEPTH_CACHELINE_FREE(16)));
868	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
869	RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
870
871	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
872	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
873
874	sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
875	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
876	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
877	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
878		sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
879				    R600_FETCH_FIFO_HIWATER(0xa) |
880				    R600_DONE_FIFO_HIWATER(0xe0) |
881				    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
882	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
883		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
884		sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
885		sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
886	}
887	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
888
889	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
890	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
891	 */
892	sq_config = RADEON_READ(R600_SQ_CONFIG);
893	sq_config &= ~(R600_PS_PRIO(3) |
894		       R600_VS_PRIO(3) |
895		       R600_GS_PRIO(3) |
896		       R600_ES_PRIO(3));
897	sq_config |= (R600_DX9_CONSTS |
898		      R600_VC_ENABLE |
899		      R600_PS_PRIO(0) |
900		      R600_VS_PRIO(1) |
901		      R600_GS_PRIO(2) |
902		      R600_ES_PRIO(3));
903
904	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
905		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
906					  R600_NUM_VS_GPRS(124) |
907					  R600_NUM_CLAUSE_TEMP_GPRS(4));
908		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
909					  R600_NUM_ES_GPRS(0));
910		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
911					   R600_NUM_VS_THREADS(48) |
912					   R600_NUM_GS_THREADS(4) |
913					   R600_NUM_ES_THREADS(4));
914		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
915					    R600_NUM_VS_STACK_ENTRIES(128));
916		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
917					    R600_NUM_ES_STACK_ENTRIES(0));
918	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
919		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
920		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
921		/* no vertex cache */
922		sq_config &= ~R600_VC_ENABLE;
923
924		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
925					  R600_NUM_VS_GPRS(44) |
926					  R600_NUM_CLAUSE_TEMP_GPRS(2));
927		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
928					  R600_NUM_ES_GPRS(17));
929		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
930					   R600_NUM_VS_THREADS(78) |
931					   R600_NUM_GS_THREADS(4) |
932					   R600_NUM_ES_THREADS(31));
933		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
934					    R600_NUM_VS_STACK_ENTRIES(40));
935		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
936					    R600_NUM_ES_STACK_ENTRIES(16));
937	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
938		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {
939		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
940					  R600_NUM_VS_GPRS(44) |
941					  R600_NUM_CLAUSE_TEMP_GPRS(2));
942		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
943					  R600_NUM_ES_GPRS(18));
944		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
945					   R600_NUM_VS_THREADS(78) |
946					   R600_NUM_GS_THREADS(4) |
947					   R600_NUM_ES_THREADS(31));
948		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
949					    R600_NUM_VS_STACK_ENTRIES(40));
950		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
951					    R600_NUM_ES_STACK_ENTRIES(16));
952	} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
953		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
954					  R600_NUM_VS_GPRS(44) |
955					  R600_NUM_CLAUSE_TEMP_GPRS(2));
956		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
957					  R600_NUM_ES_GPRS(17));
958		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
959					   R600_NUM_VS_THREADS(78) |
960					   R600_NUM_GS_THREADS(4) |
961					   R600_NUM_ES_THREADS(31));
962		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
963					    R600_NUM_VS_STACK_ENTRIES(64));
964		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
965					    R600_NUM_ES_STACK_ENTRIES(64));
966	}
967
968	RADEON_WRITE(R600_SQ_CONFIG, sq_config);
969	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
970	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
971	RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
972	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
973	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
974
975	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
976	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
977	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
978		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
979	else
980		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
981
982	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
983						    R600_S0_Y(0x4) |
984						    R600_S1_X(0x4) |
985						    R600_S1_Y(0xc)));
986	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
987						    R600_S0_Y(0xe) |
988						    R600_S1_X(0x2) |
989						    R600_S1_Y(0x2) |
990						    R600_S2_X(0xa) |
991						    R600_S2_Y(0x6) |
992						    R600_S3_X(0x6) |
993						    R600_S3_Y(0xa)));
994	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
995							R600_S0_Y(0xb) |
996							R600_S1_X(0x4) |
997							R600_S1_Y(0xc) |
998							R600_S2_X(0x1) |
999							R600_S2_Y(0x6) |
1000							R600_S3_X(0xa) |
1001							R600_S3_Y(0xe)));
1002	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
1003							R600_S4_Y(0x1) |
1004							R600_S5_X(0x0) |
1005							R600_S5_Y(0x0) |
1006							R600_S6_X(0xb) |
1007							R600_S6_Y(0x4) |
1008							R600_S7_X(0x7) |
1009							R600_S7_Y(0x8)));
1010
1011
1012	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1013	case CHIP_R600:
1014	case CHIP_RV630:
1015	case CHIP_RV635:
1016		gs_prim_buffer_depth = 0;
1017		break;
1018	case CHIP_RV610:
1019	case CHIP_RS780:
1020	case CHIP_RV620:
1021		gs_prim_buffer_depth = 32;
1022		break;
1023	case CHIP_RV670:
1024		gs_prim_buffer_depth = 128;
1025		break;
1026	default:
1027		break;
1028	}
1029
1030	num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1031	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1032	/* Max value for this is 256 */
1033	if (vgt_gs_per_es > 256)
1034		vgt_gs_per_es = 256;
1035
1036	RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1037	RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1038	RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1039	RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1040
1041	/* more default values. 2D/3D driver should adjust as needed */
1042	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1043	RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1044	RADEON_WRITE(R600_SX_MISC, 0);
1045	RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1046	RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1047	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1048	RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1049	RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1050	RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1051
1052	/* clear render buffer base addresses */
1053	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1054	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1055	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1056	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1057	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1058	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1059	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1060	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1061
1062	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1063	case CHIP_RV610:
1064	case CHIP_RS780:
1065	case CHIP_RV620:
1066		tc_cntl = R600_TC_L2_SIZE(8);
1067		break;
1068	case CHIP_RV630:
1069	case CHIP_RV635:
1070		tc_cntl = R600_TC_L2_SIZE(4);
1071		break;
1072	case CHIP_R600:
1073		tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
1074		break;
1075	default:
1076		tc_cntl = R600_TC_L2_SIZE(0);
1077		break;
1078	}
1079
1080	RADEON_WRITE(R600_TC_CNTL, tc_cntl);
1081
1082	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1083	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1084
1085	arb_pop = RADEON_READ(R600_ARB_POP);
1086	arb_pop |= R600_ENABLE_TC128;
1087	RADEON_WRITE(R600_ARB_POP, arb_pop);
1088
1089	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1090	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1091					  R600_NUM_CLIP_SEQ(3)));
1092	RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
1093
1094}
1095
1096static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
1097					     u32 num_backends,
1098					     u32 backend_disable_mask)
1099{
1100	u32 backend_map = 0;
1101	u32 enabled_backends_mask;
1102	u32 enabled_backends_count;
1103	u32 cur_pipe;
1104	u32 swizzle_pipe[R7XX_MAX_PIPES];
1105	u32 cur_backend;
1106	u32 i;
1107
1108	if (num_tile_pipes > R7XX_MAX_PIPES)
1109		num_tile_pipes = R7XX_MAX_PIPES;
1110	if (num_tile_pipes < 1)
1111		num_tile_pipes = 1;
1112	if (num_backends > R7XX_MAX_BACKENDS)
1113		num_backends = R7XX_MAX_BACKENDS;
1114	if (num_backends < 1)
1115		num_backends = 1;
1116
1117	enabled_backends_mask = 0;
1118	enabled_backends_count = 0;
1119	for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
1120		if (((backend_disable_mask >> i) & 1) == 0) {
1121			enabled_backends_mask |= (1 << i);
1122			++enabled_backends_count;
1123		}
1124		if (enabled_backends_count == num_backends)
1125			break;
1126	}
1127
1128	if (enabled_backends_count == 0) {
1129		enabled_backends_mask = 1;
1130		enabled_backends_count = 1;
1131	}
1132
1133	if (enabled_backends_count != num_backends)
1134		num_backends = enabled_backends_count;
1135
1136	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
1137	switch (num_tile_pipes) {
1138	case 1:
1139		swizzle_pipe[0] = 0;
1140		break;
1141	case 2:
1142		swizzle_pipe[0] = 0;
1143		swizzle_pipe[1] = 1;
1144		break;
1145	case 3:
1146		swizzle_pipe[0] = 0;
1147		swizzle_pipe[1] = 2;
1148		swizzle_pipe[2] = 1;
1149		break;
1150	case 4:
1151		swizzle_pipe[0] = 0;
1152		swizzle_pipe[1] = 2;
1153		swizzle_pipe[2] = 3;
1154		swizzle_pipe[3] = 1;
1155		break;
1156	case 5:
1157		swizzle_pipe[0] = 0;
1158		swizzle_pipe[1] = 2;
1159		swizzle_pipe[2] = 4;
1160		swizzle_pipe[3] = 1;
1161		swizzle_pipe[4] = 3;
1162		break;
1163	case 6:
1164		swizzle_pipe[0] = 0;
1165		swizzle_pipe[1] = 2;
1166		swizzle_pipe[2] = 4;
1167		swizzle_pipe[3] = 5;
1168		swizzle_pipe[4] = 3;
1169		swizzle_pipe[5] = 1;
1170		break;
1171	case 7:
1172		swizzle_pipe[0] = 0;
1173		swizzle_pipe[1] = 2;
1174		swizzle_pipe[2] = 4;
1175		swizzle_pipe[3] = 6;
1176		swizzle_pipe[4] = 3;
1177		swizzle_pipe[5] = 1;
1178		swizzle_pipe[6] = 5;
1179		break;
1180	case 8:
1181		swizzle_pipe[0] = 0;
1182		swizzle_pipe[1] = 2;
1183		swizzle_pipe[2] = 4;
1184		swizzle_pipe[3] = 6;
1185		swizzle_pipe[4] = 3;
1186		swizzle_pipe[5] = 1;
1187		swizzle_pipe[6] = 7;
1188		swizzle_pipe[7] = 5;
1189		break;
1190	}
1191
1192	cur_backend = 0;
1193	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1194		while (((1 << cur_backend) & enabled_backends_mask) == 0)
1195			cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1196
1197		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
1198
1199		cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1200	}
1201
1202	return backend_map;
1203}
1204
1205static void r700_gfx_init(struct drm_device *dev,
1206			  drm_radeon_private_t *dev_priv)
1207{
1208	int i, j, num_qd_pipes;
1209	u32 sx_debug_1;
1210	u32 smx_dc_ctl0;
1211	u32 num_gs_verts_per_thread;
1212	u32 vgt_gs_per_es;
1213	u32 gs_prim_buffer_depth = 0;
1214	u32 sq_ms_fifo_sizes;
1215	u32 sq_config;
1216	u32 sq_thread_resource_mgmt;
1217	u32 hdp_host_path_cntl;
1218	u32 sq_dyn_gpr_size_simd_ab_0;
1219	u32 backend_map;
1220	u32 gb_tiling_config = 0;
1221	u32 cc_rb_backend_disable = 0;
1222	u32 cc_gc_shader_pipe_config = 0;
1223	u32 mc_arb_ramcfg;
1224	u32 db_debug4;
1225
1226	/* setup chip specs */
1227	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1228	case CHIP_RV770:
1229		dev_priv->r600_max_pipes = 4;
1230		dev_priv->r600_max_tile_pipes = 8;
1231		dev_priv->r600_max_simds = 10;
1232		dev_priv->r600_max_backends = 4;
1233		dev_priv->r600_max_gprs = 256;
1234		dev_priv->r600_max_threads = 248;
1235		dev_priv->r600_max_stack_entries = 512;
1236		dev_priv->r600_max_hw_contexts = 8;
1237		dev_priv->r600_max_gs_threads = 16 * 2;
1238		dev_priv->r600_sx_max_export_size = 128;
1239		dev_priv->r600_sx_max_export_pos_size = 16;
1240		dev_priv->r600_sx_max_export_smx_size = 112;
1241		dev_priv->r600_sq_num_cf_insts = 2;
1242
1243		dev_priv->r700_sx_num_of_sets = 7;
1244		dev_priv->r700_sc_prim_fifo_size = 0xF9;
1245		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1246		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1247		break;
1248	case CHIP_RV740:
1249		dev_priv->r600_max_pipes = 4;
1250		dev_priv->r600_max_tile_pipes = 4;
1251		dev_priv->r600_max_simds = 8;
1252		dev_priv->r600_max_backends = 4;
1253		dev_priv->r600_max_gprs = 256;
1254		dev_priv->r600_max_threads = 248;
1255		dev_priv->r600_max_stack_entries = 512;
1256		dev_priv->r600_max_hw_contexts = 8;
1257		dev_priv->r600_max_gs_threads = 16 * 2;
1258		dev_priv->r600_sx_max_export_size = 256;
1259		dev_priv->r600_sx_max_export_pos_size = 32;
1260		dev_priv->r600_sx_max_export_smx_size = 224;
1261		dev_priv->r600_sq_num_cf_insts = 2;
1262
1263		dev_priv->r700_sx_num_of_sets = 7;
1264		dev_priv->r700_sc_prim_fifo_size = 0x100;
1265		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1266		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1267
1268		if (dev_priv->r600_sx_max_export_pos_size > 16) {
1269			dev_priv->r600_sx_max_export_pos_size -= 16;
1270			dev_priv->r600_sx_max_export_smx_size += 16;
1271		}
1272		break;
1273	case CHIP_RV730:
1274		dev_priv->r600_max_pipes = 2;
1275		dev_priv->r600_max_tile_pipes = 4;
1276		dev_priv->r600_max_simds = 8;
1277		dev_priv->r600_max_backends = 2;
1278		dev_priv->r600_max_gprs = 128;
1279		dev_priv->r600_max_threads = 248;
1280		dev_priv->r600_max_stack_entries = 256;
1281		dev_priv->r600_max_hw_contexts = 8;
1282		dev_priv->r600_max_gs_threads = 16 * 2;
1283		dev_priv->r600_sx_max_export_size = 256;
1284		dev_priv->r600_sx_max_export_pos_size = 32;
1285		dev_priv->r600_sx_max_export_smx_size = 224;
1286		dev_priv->r600_sq_num_cf_insts = 2;
1287
1288		dev_priv->r700_sx_num_of_sets = 7;
1289		dev_priv->r700_sc_prim_fifo_size = 0xf9;
1290		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1291		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1292
1293		if (dev_priv->r600_sx_max_export_pos_size > 16) {
1294			dev_priv->r600_sx_max_export_pos_size -= 16;
1295			dev_priv->r600_sx_max_export_smx_size += 16;
1296		}
1297		break;
1298	case CHIP_RV710:
1299		dev_priv->r600_max_pipes = 2;
1300		dev_priv->r600_max_tile_pipes = 2;
1301		dev_priv->r600_max_simds = 2;
1302		dev_priv->r600_max_backends = 1;
1303		dev_priv->r600_max_gprs = 256;
1304		dev_priv->r600_max_threads = 192;
1305		dev_priv->r600_max_stack_entries = 256;
1306		dev_priv->r600_max_hw_contexts = 4;
1307		dev_priv->r600_max_gs_threads = 8 * 2;
1308		dev_priv->r600_sx_max_export_size = 128;
1309		dev_priv->r600_sx_max_export_pos_size = 16;
1310		dev_priv->r600_sx_max_export_smx_size = 112;
1311		dev_priv->r600_sq_num_cf_insts = 1;
1312
1313		dev_priv->r700_sx_num_of_sets = 7;
1314		dev_priv->r700_sc_prim_fifo_size = 0x40;
1315		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1316		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1317		break;
1318	default:
1319		break;
1320	}
1321
1322	/* Initialize HDP */
1323	j = 0;
1324	for (i = 0; i < 32; i++) {
1325		RADEON_WRITE((0x2c14 + j), 0x00000000);
1326		RADEON_WRITE((0x2c18 + j), 0x00000000);
1327		RADEON_WRITE((0x2c1c + j), 0x00000000);
1328		RADEON_WRITE((0x2c20 + j), 0x00000000);
1329		RADEON_WRITE((0x2c24 + j), 0x00000000);
1330		j += 0x18;
1331	}
1332
1333	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
1334
1335	/* setup tiling, simd, pipe config */
1336	mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
1337
1338	switch (dev_priv->r600_max_tile_pipes) {
1339	case 1:
1340		gb_tiling_config |= R600_PIPE_TILING(0);
1341		break;
1342	case 2:
1343		gb_tiling_config |= R600_PIPE_TILING(1);
1344		break;
1345	case 4:
1346		gb_tiling_config |= R600_PIPE_TILING(2);
1347		break;
1348	case 8:
1349		gb_tiling_config |= R600_PIPE_TILING(3);
1350		break;
1351	default:
1352		break;
1353	}
1354
1355	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1356		gb_tiling_config |= R600_BANK_TILING(1);
1357	else
1358		gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
1359
1360	gb_tiling_config |= R600_GROUP_SIZE(0);
1361
1362	if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
1363		gb_tiling_config |= R600_ROW_TILING(3);
1364		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
1365	} else {
1366		gb_tiling_config |=
1367			R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1368		gb_tiling_config |=
1369			R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1370	}
1371
1372	gb_tiling_config |= R600_BANK_SWAPS(1);
1373
1374	backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
1375							dev_priv->r600_max_backends,
1376							(0xff << dev_priv->r600_max_backends) & 0xff);
1377	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
1378
1379	cc_gc_shader_pipe_config =
1380		R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
1381	cc_gc_shader_pipe_config |=
1382		R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
1383
1384	cc_rb_backend_disable =
1385		R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
1386
1387	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
1388	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1389	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1390
1391	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
1392	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
1393	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1394
1395	RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1396	RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
1397	RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
1398	RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
1399	RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
1400
1401	num_qd_pipes =
1402		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
1403	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
1404	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
1405
1406	/* set HW defaults for 3D engine */
1407	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
1408						R600_ROQ_IB2_START(0x2b)));
1409
1410	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
1411
1412	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
1413					R600_SYNC_GRADIENT |
1414					R600_SYNC_WALKER |
1415					R600_SYNC_ALIGNER));
1416
1417	sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
1418	sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
1419	RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
1420
1421	smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
1422	smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
1423	smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
1424	RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
1425
1426	RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
1427					  R700_GS_FLUSH_CTL(4) |
1428					  R700_ACK_FLUSH_CTL(3) |
1429					  R700_SYNC_FLUSH_CTL));
1430
1431	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1432		RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f));
1433	else {
1434		db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
1435		db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
1436		RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
1437	}
1438
1439	RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
1440						   R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
1441						   R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
1442
1443	RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
1444						 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
1445						 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
1446
1447	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1448
1449	RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
1450
1451	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
1452
1453	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
1454
1455	RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
1456
1457	sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
1458			    R600_DONE_FIFO_HIWATER(0xe0) |
1459			    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
1460	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1461	case CHIP_RV770:
1462		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
1463		break;
1464	case CHIP_RV740:
1465	case CHIP_RV730:
1466	case CHIP_RV710:
1467	default:
1468		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
1469		break;
1470	}
1471	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
1472
1473	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
1474	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
1475	 */
1476	sq_config = RADEON_READ(R600_SQ_CONFIG);
1477	sq_config &= ~(R600_PS_PRIO(3) |
1478		       R600_VS_PRIO(3) |
1479		       R600_GS_PRIO(3) |
1480		       R600_ES_PRIO(3));
1481	sq_config |= (R600_DX9_CONSTS |
1482		      R600_VC_ENABLE |
1483		      R600_EXPORT_SRC_C |
1484		      R600_PS_PRIO(0) |
1485		      R600_VS_PRIO(1) |
1486		      R600_GS_PRIO(2) |
1487		      R600_ES_PRIO(3));
1488	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1489		/* no vertex cache */
1490		sq_config &= ~R600_VC_ENABLE;
1491
1492	RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1493
1494	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1495						    R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1496						    R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
1497
1498	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
1499						    R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
1500
1501	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
1502				   R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
1503				   R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
1504	if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
1505		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
1506	else
1507		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
1508	RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1509
1510	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1511						     R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1512
1513	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1514						     R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1515
1516	sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
1517				     R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
1518				     R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
1519				     R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
1520
1521	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
1522	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
1523	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
1524	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
1525	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
1526	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
1527	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
1528	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
1529
1530	RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
1531						     R700_FORCE_EOV_MAX_REZ_CNT(255)));
1532
1533	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1534		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
1535							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1536	else
1537		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
1538							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1539
1540	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1541	case CHIP_RV770:
1542	case CHIP_RV740:
1543	case CHIP_RV730:
1544		gs_prim_buffer_depth = 384;
1545		break;
1546	case CHIP_RV710:
1547		gs_prim_buffer_depth = 128;
1548		break;
1549	default:
1550		break;
1551	}
1552
1553	num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1554	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1555	/* Max value for this is 256 */
1556	if (vgt_gs_per_es > 256)
1557		vgt_gs_per_es = 256;
1558
1559	RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1560	RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1561	RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1562
1563	/* more default values. 2D/3D driver should adjust as needed */
1564	RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1565	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1566	RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1567	RADEON_WRITE(R600_SX_MISC, 0);
1568	RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1569	RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
1570	RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1571	RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
1572	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1573	RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1574	RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1575	RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1576
1577	/* clear render buffer base addresses */
1578	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1579	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1580	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1581	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1582	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1583	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1584	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1585	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1586
1587	RADEON_WRITE(R700_TCP_CNTL, 0);
1588
1589	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1590	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1591
1592	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1593
1594	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1595					  R600_NUM_CLIP_SEQ(3)));
1596
1597}
1598
1599static void r600_cp_init_ring_buffer(struct drm_device *dev,
1600				       drm_radeon_private_t *dev_priv,
1601				       struct drm_file *file_priv)
1602{
1603	u32 ring_start;
1604	u64 rptr_addr;
1605
1606	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
1607		r700_gfx_init(dev, dev_priv);
1608	else
1609		r600_gfx_init(dev, dev_priv);
1610
1611	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
1612	RADEON_READ(R600_GRBM_SOFT_RESET);
1613	DRM_UDELAY(15000);
1614	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
1615
1616
1617	/* Set ring buffer size */
1618#ifdef __BIG_ENDIAN
1619	RADEON_WRITE(R600_CP_RB_CNTL,
1620		     RADEON_BUF_SWAP_32BIT |
1621		     RADEON_RB_NO_UPDATE |
1622		     (dev_priv->ring.rptr_update_l2qw << 8) |
1623		     dev_priv->ring.size_l2qw);
1624#else
1625	RADEON_WRITE(R600_CP_RB_CNTL,
1626		     RADEON_RB_NO_UPDATE |
1627		     (dev_priv->ring.rptr_update_l2qw << 8) |
1628		     dev_priv->ring.size_l2qw);
1629#endif
1630
1631	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
1632
1633	/* Set the write pointer delay */
1634	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
1635
1636#ifdef __BIG_ENDIAN
1637	RADEON_WRITE(R600_CP_RB_CNTL,
1638		     RADEON_BUF_SWAP_32BIT |
1639		     RADEON_RB_NO_UPDATE |
1640		     RADEON_RB_RPTR_WR_ENA |
1641		     (dev_priv->ring.rptr_update_l2qw << 8) |
1642		     dev_priv->ring.size_l2qw);
1643#else
1644	RADEON_WRITE(R600_CP_RB_CNTL,
1645		     RADEON_RB_NO_UPDATE |
1646		     RADEON_RB_RPTR_WR_ENA |
1647		     (dev_priv->ring.rptr_update_l2qw << 8) |
1648		     dev_priv->ring.size_l2qw);
1649#endif
1650
1651	/* Initialize the ring buffer's read and write pointers */
1652	RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
1653	RADEON_WRITE(R600_CP_RB_WPTR, 0);
1654	SET_RING_HEAD(dev_priv, 0);
1655	dev_priv->ring.tail = 0;
1656
1657#if __OS_HAS_AGP
1658	if (dev_priv->flags & RADEON_IS_AGP) {
1659		rptr_addr = dev_priv->ring_rptr->offset
1660			- dev->agp->base +
1661			dev_priv->gart_vm_start;
1662	} else
1663#endif
1664	{
1665		rptr_addr = dev_priv->ring_rptr->offset
1666			- ((unsigned long) dev->sg->virtual)
1667			+ dev_priv->gart_vm_start;
1668	}
1669	RADEON_WRITE(R600_CP_RB_RPTR_ADDR,
1670		     rptr_addr & 0xffffffff);
1671	RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,
1672		     upper_32_bits(rptr_addr));
1673
1674#ifdef __BIG_ENDIAN
1675	RADEON_WRITE(R600_CP_RB_CNTL,
1676		     RADEON_BUF_SWAP_32BIT |
1677		     (dev_priv->ring.rptr_update_l2qw << 8) |
1678		     dev_priv->ring.size_l2qw);
1679#else
1680	RADEON_WRITE(R600_CP_RB_CNTL,
1681		     (dev_priv->ring.rptr_update_l2qw << 8) |
1682		     dev_priv->ring.size_l2qw);
1683#endif
1684
1685#if __OS_HAS_AGP
1686	if (dev_priv->flags & RADEON_IS_AGP) {
1687		/* XXX */
1688		radeon_write_agp_base(dev_priv, dev->agp->base);
1689
1690		/* XXX */
1691		radeon_write_agp_location(dev_priv,
1692			     (((dev_priv->gart_vm_start - 1 +
1693				dev_priv->gart_size) & 0xffff0000) |
1694			      (dev_priv->gart_vm_start >> 16)));
1695
1696		ring_start = (dev_priv->cp_ring->offset
1697			      - dev->agp->base
1698			      + dev_priv->gart_vm_start);
1699	} else
1700#endif
1701		ring_start = (dev_priv->cp_ring->offset
1702			      - (unsigned long)dev->sg->virtual
1703			      + dev_priv->gart_vm_start);
1704
1705	RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
1706
1707	RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
1708
1709	RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
1710
1711	/* Initialize the scratch register pointer.  This will cause
1712	 * the scratch register values to be written out to memory
1713	 * whenever they are updated.
1714	 *
1715	 * We simply put this behind the ring read pointer, this works
1716	 * with PCI GART as well as (whatever kind of) AGP GART
1717	 */
1718	{
1719		u64 scratch_addr;
1720
1721		scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR);
1722		scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
1723		scratch_addr += R600_SCRATCH_REG_OFFSET;
1724		scratch_addr >>= 8;
1725		scratch_addr &= 0xffffffff;
1726
1727		RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
1728	}
1729
1730	RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
1731
1732	/* Turn on bus mastering */
1733	radeon_enable_bm(dev_priv);
1734
1735	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);
1736	RADEON_WRITE(R600_LAST_FRAME_REG, 0);
1737
1738	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
1739	RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);
1740
1741	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);
1742	RADEON_WRITE(R600_LAST_CLEAR_REG, 0);
1743
1744	/* reset sarea copies of these */
1745	if (dev_priv->sarea_priv) {
1746		dev_priv->sarea_priv->last_frame = 0;
1747		dev_priv->sarea_priv->last_dispatch = 0;
1748		dev_priv->sarea_priv->last_clear = 0;
1749	}
1750
1751	r600_do_wait_for_idle(dev_priv);
1752
1753}
1754
1755int r600_do_cleanup_cp(struct drm_device *dev)
1756{
1757	drm_radeon_private_t *dev_priv = dev->dev_private;
1758	DRM_DEBUG("\n");
1759
1760	/* Make sure interrupts are disabled here because the uninstall ioctl
1761	 * may not have been called from userspace and after dev_private
1762	 * is freed, it's too late.
1763	 */
1764	if (dev->irq_enabled)
1765		drm_irq_uninstall(dev);
1766
1767#if __OS_HAS_AGP
1768	if (dev_priv->flags & RADEON_IS_AGP) {
1769		if (dev_priv->cp_ring != NULL) {
1770			drm_core_ioremapfree(dev_priv->cp_ring, dev);
1771			dev_priv->cp_ring = NULL;
1772		}
1773		if (dev_priv->ring_rptr != NULL) {
1774			drm_core_ioremapfree(dev_priv->ring_rptr, dev);
1775			dev_priv->ring_rptr = NULL;
1776		}
1777		if (dev->agp_buffer_map != NULL) {
1778			drm_core_ioremapfree(dev->agp_buffer_map, dev);
1779			dev->agp_buffer_map = NULL;
1780		}
1781	} else
1782#endif
1783	{
1784
1785		if (dev_priv->gart_info.bus_addr)
1786			r600_page_table_cleanup(dev, &dev_priv->gart_info);
1787
1788		if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
1789			drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
1790			dev_priv->gart_info.addr = 0;
1791		}
1792	}
1793	/* only clear to the start of flags */
1794	memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
1795
1796	return 0;
1797}
1798
1799int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
1800		    struct drm_file *file_priv)
1801{
1802	drm_radeon_private_t *dev_priv = dev->dev_private;
1803
1804	DRM_DEBUG("\n");
1805
1806	/* if we require new memory map but we don't have it fail */
1807	if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
1808		DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
1809		r600_do_cleanup_cp(dev);
1810		return -EINVAL;
1811	}
1812
1813	if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
1814		DRM_DEBUG("Forcing AGP card to PCI mode\n");
1815		dev_priv->flags &= ~RADEON_IS_AGP;
1816		/* The writeback test succeeds, but when writeback is enabled,
1817		 * the ring buffer read ptr update fails after first 128 bytes.
1818		 */
1819		radeon_no_wb = 1;
1820	} else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
1821		 && !init->is_pci) {
1822		DRM_DEBUG("Restoring AGP flag\n");
1823		dev_priv->flags |= RADEON_IS_AGP;
1824	}
1825
1826	dev_priv->usec_timeout = init->usec_timeout;
1827	if (dev_priv->usec_timeout < 1 ||
1828	    dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
1829		DRM_DEBUG("TIMEOUT problem!\n");
1830		r600_do_cleanup_cp(dev);
1831		return -EINVAL;
1832	}
1833
1834	/* Enable vblank on CRTC1 for older X servers
1835	 */
1836	dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
1837
1838	dev_priv->cp_mode = init->cp_mode;
1839
1840	/* We don't support anything other than bus-mastering ring mode,
1841	 * but the ring can be in either AGP or PCI space for the ring
1842	 * read pointer.
1843	 */
1844	if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
1845	    (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
1846		DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
1847		r600_do_cleanup_cp(dev);
1848		return -EINVAL;
1849	}
1850
1851	switch (init->fb_bpp) {
1852	case 16:
1853		dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
1854		break;
1855	case 32:
1856	default:
1857		dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
1858		break;
1859	}
1860	dev_priv->front_offset = init->front_offset;
1861	dev_priv->front_pitch = init->front_pitch;
1862	dev_priv->back_offset = init->back_offset;
1863	dev_priv->back_pitch = init->back_pitch;
1864
1865	dev_priv->ring_offset = init->ring_offset;
1866	dev_priv->ring_rptr_offset = init->ring_rptr_offset;
1867	dev_priv->buffers_offset = init->buffers_offset;
1868	dev_priv->gart_textures_offset = init->gart_textures_offset;
1869
1870	dev_priv->sarea = drm_getsarea(dev);
1871	if (!dev_priv->sarea) {
1872		DRM_ERROR("could not find sarea!\n");
1873		r600_do_cleanup_cp(dev);
1874		return -EINVAL;
1875	}
1876
1877	dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
1878	if (!dev_priv->cp_ring) {
1879		DRM_ERROR("could not find cp ring region!\n");
1880		r600_do_cleanup_cp(dev);
1881		return -EINVAL;
1882	}
1883	dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
1884	if (!dev_priv->ring_rptr) {
1885		DRM_ERROR("could not find ring read pointer!\n");
1886		r600_do_cleanup_cp(dev);
1887		return -EINVAL;
1888	}
1889	dev->agp_buffer_token = init->buffers_offset;
1890	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
1891	if (!dev->agp_buffer_map) {
1892		DRM_ERROR("could not find dma buffer region!\n");
1893		r600_do_cleanup_cp(dev);
1894		return -EINVAL;
1895	}
1896
1897	if (init->gart_textures_offset) {
1898		dev_priv->gart_textures =
1899		    drm_core_findmap(dev, init->gart_textures_offset);
1900		if (!dev_priv->gart_textures) {
1901			DRM_ERROR("could not find GART texture region!\n");
1902			r600_do_cleanup_cp(dev);
1903			return -EINVAL;
1904		}
1905	}
1906
1907	dev_priv->sarea_priv =
1908	    (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle +
1909				    init->sarea_priv_offset);
1910
1911#if __OS_HAS_AGP
1912	/* XXX */
1913	if (dev_priv->flags & RADEON_IS_AGP) {
1914		drm_core_ioremap_wc(dev_priv->cp_ring, dev);
1915		drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
1916		drm_core_ioremap_wc(dev->agp_buffer_map, dev);
1917		if (!dev_priv->cp_ring->handle ||
1918		    !dev_priv->ring_rptr->handle ||
1919		    !dev->agp_buffer_map->handle) {
1920			DRM_ERROR("could not find ioremap agp regions!\n");
1921			r600_do_cleanup_cp(dev);
1922			return -EINVAL;
1923		}
1924	} else
1925#endif
1926	{
1927		dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
1928		dev_priv->ring_rptr->handle =
1929		    (void *)dev_priv->ring_rptr->offset;
1930		dev->agp_buffer_map->handle =
1931		    (void *)dev->agp_buffer_map->offset;
1932
1933		DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
1934			  dev_priv->cp_ring->handle);
1935		DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
1936			  dev_priv->ring_rptr->handle);
1937		DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
1938			  dev->agp_buffer_map->handle);
1939	}
1940
1941	dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
1942	dev_priv->fb_size =
1943		(((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
1944		- dev_priv->fb_location;
1945
1946	dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
1947					((dev_priv->front_offset
1948					  + dev_priv->fb_location) >> 10));
1949
1950	dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
1951				       ((dev_priv->back_offset
1952					 + dev_priv->fb_location) >> 10));
1953
1954	dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
1955					((dev_priv->depth_offset
1956					  + dev_priv->fb_location) >> 10));
1957
1958	dev_priv->gart_size = init->gart_size;
1959
1960	/* New let's set the memory map ... */
1961	if (dev_priv->new_memmap) {
1962		u32 base = 0;
1963
1964		DRM_INFO("Setting GART location based on new memory map\n");
1965
1966		/* If using AGP, try to locate the AGP aperture at the same
1967		 * location in the card and on the bus, though we have to
1968		 * align it down.
1969		 */
1970#if __OS_HAS_AGP
1971		/* XXX */
1972		if (dev_priv->flags & RADEON_IS_AGP) {
1973			base = dev->agp->base;
1974			/* Check if valid */
1975			if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
1976			    base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
1977				DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
1978					 dev->agp->base);
1979				base = 0;
1980			}
1981		}
1982#endif
1983		/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
1984		if (base == 0) {
1985			base = dev_priv->fb_location + dev_priv->fb_size;
1986			if (base < dev_priv->fb_location ||
1987			    ((base + dev_priv->gart_size) & 0xfffffffful) < base)
1988				base = dev_priv->fb_location
1989					- dev_priv->gart_size;
1990		}
1991		dev_priv->gart_vm_start = base & 0xffc00000u;
1992		if (dev_priv->gart_vm_start != base)
1993			DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
1994				 base, dev_priv->gart_vm_start);
1995	}
1996
1997#if __OS_HAS_AGP
1998	/* XXX */
1999	if (dev_priv->flags & RADEON_IS_AGP)
2000		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2001						 - dev->agp->base
2002						 + dev_priv->gart_vm_start);
2003	else
2004#endif
2005		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2006						 - (unsigned long)dev->sg->virtual
2007						 + dev_priv->gart_vm_start);
2008
2009	DRM_DEBUG("fb 0x%08x size %d\n",
2010		  (unsigned int) dev_priv->fb_location,
2011		  (unsigned int) dev_priv->fb_size);
2012	DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
2013	DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
2014		  (unsigned int) dev_priv->gart_vm_start);
2015	DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
2016		  dev_priv->gart_buffers_offset);
2017
2018	dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
2019	dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
2020			      + init->ring_size / sizeof(u32));
2021	dev_priv->ring.size = init->ring_size;
2022	dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
2023
2024	dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
2025	dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8);
2026
2027	dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
2028	dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16);
2029
2030	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
2031
2032	dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
2033
2034#if __OS_HAS_AGP
2035	if (dev_priv->flags & RADEON_IS_AGP) {
2036		/* XXX turn off pcie gart */
2037	} else
2038#endif
2039	{
2040		dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
2041		/* if we have an offset set from userspace */
2042		if (!dev_priv->pcigart_offset_set) {
2043			DRM_ERROR("Need gart offset from userspace\n");
2044			r600_do_cleanup_cp(dev);
2045			return -EINVAL;
2046		}
2047
2048		DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
2049
2050		dev_priv->gart_info.bus_addr =
2051			dev_priv->pcigart_offset + dev_priv->fb_location;
2052		dev_priv->gart_info.mapping.offset =
2053			dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
2054		dev_priv->gart_info.mapping.size =
2055			dev_priv->gart_info.table_size;
2056
2057		drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
2058		if (!dev_priv->gart_info.mapping.handle) {
2059			DRM_ERROR("ioremap failed.\n");
2060			r600_do_cleanup_cp(dev);
2061			return -EINVAL;
2062		}
2063
2064		dev_priv->gart_info.addr =
2065			dev_priv->gart_info.mapping.handle;
2066
2067		DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
2068			  dev_priv->gart_info.addr,
2069			  dev_priv->pcigart_offset);
2070
2071		if (!r600_page_table_init(dev)) {
2072			DRM_ERROR("Failed to init GART table\n");
2073			r600_do_cleanup_cp(dev);
2074			return -EINVAL;
2075		}
2076
2077		if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2078			r700_vm_init(dev);
2079		else
2080			r600_vm_init(dev);
2081	}
2082
2083	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2084		r700_cp_load_microcode(dev_priv);
2085	else
2086		r600_cp_load_microcode(dev_priv);
2087
2088	r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2089
2090	dev_priv->last_buf = 0;
2091
2092	r600_do_engine_reset(dev);
2093	r600_test_writeback(dev_priv);
2094
2095	return 0;
2096}
2097
2098int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
2099{
2100	drm_radeon_private_t *dev_priv = dev->dev_private;
2101
2102	DRM_DEBUG("\n");
2103	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
2104		r700_vm_init(dev);
2105		r700_cp_load_microcode(dev_priv);
2106	} else {
2107		r600_vm_init(dev);
2108		r600_cp_load_microcode(dev_priv);
2109	}
2110	r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2111	r600_do_engine_reset(dev);
2112
2113	return 0;
2114}
2115
2116/* Wait for the CP to go idle.
2117 */
2118int r600_do_cp_idle(drm_radeon_private_t *dev_priv)
2119{
2120	RING_LOCALS;
2121	DRM_DEBUG("\n");
2122
2123	BEGIN_RING(5);
2124	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
2125	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
2126	/* wait for 3D idle clean */
2127	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
2128	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
2129	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
2130
2131	ADVANCE_RING();
2132	COMMIT_RING();
2133
2134	return r600_do_wait_for_idle(dev_priv);
2135}
2136
2137/* Start the Command Processor.
2138 */
2139void r600_do_cp_start(drm_radeon_private_t *dev_priv)
2140{
2141	u32 cp_me;
2142	RING_LOCALS;
2143	DRM_DEBUG("\n");
2144
2145	BEGIN_RING(7);
2146	OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
2147	OUT_RING(0x00000001);
2148	if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))
2149		OUT_RING(0x00000003);
2150	else
2151		OUT_RING(0x00000000);
2152	OUT_RING((dev_priv->r600_max_hw_contexts - 1));
2153	OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
2154	OUT_RING(0x00000000);
2155	OUT_RING(0x00000000);
2156	ADVANCE_RING();
2157	COMMIT_RING();
2158
2159	/* set the mux and reset the halt bit */
2160	cp_me = 0xff;
2161	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2162
2163	dev_priv->cp_running = 1;
2164
2165}
2166
2167void r600_do_cp_reset(drm_radeon_private_t *dev_priv)
2168{
2169	u32 cur_read_ptr;
2170	DRM_DEBUG("\n");
2171
2172	cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
2173	RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
2174	SET_RING_HEAD(dev_priv, cur_read_ptr);
2175	dev_priv->ring.tail = cur_read_ptr;
2176}
2177
2178void r600_do_cp_stop(drm_radeon_private_t *dev_priv)
2179{
2180	uint32_t cp_me;
2181
2182	DRM_DEBUG("\n");
2183
2184	cp_me = 0xff | R600_CP_ME_HALT;
2185
2186	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2187
2188	dev_priv->cp_running = 0;
2189}
2190
2191int r600_cp_dispatch_indirect(struct drm_device *dev,
2192			      struct drm_buf *buf, int start, int end)
2193{
2194	drm_radeon_private_t *dev_priv = dev->dev_private;
2195	RING_LOCALS;
2196
2197	if (start != end) {
2198		unsigned long offset = (dev_priv->gart_buffers_offset
2199					+ buf->offset + start);
2200		int dwords = (end - start + 3) / sizeof(u32);
2201
2202		DRM_DEBUG("dwords:%d\n", dwords);
2203		DRM_DEBUG("offset 0x%lx\n", offset);
2204
2205
2206		/* Indirect buffer data must be a multiple of 16 dwords.
2207		 * pad the data with a Type-2 CP packet.
2208		 */
2209		while (dwords & 0xf) {
2210			u32 *data = (u32 *)
2211			    ((char *)dev->agp_buffer_map->handle
2212			     + buf->offset + start);
2213			data[dwords++] = RADEON_CP_PACKET2;
2214		}
2215
2216		/* Fire off the indirect buffer */
2217		BEGIN_RING(4);
2218		OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
2219		OUT_RING((offset & 0xfffffffc));
2220		OUT_RING((upper_32_bits(offset) & 0xff));
2221		OUT_RING(dwords);
2222		ADVANCE_RING();
2223	}
2224
2225	return 0;
2226}
2227