r600_cp.c revision 190674
1/*-
2 * Copyright 2008-2009 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *     Dave Airlie <airlied@redhat.com>
26 *     Alex Deucher <alexander.deucher@amd.com>
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/drm/r600_cp.c 190674 2009-04-03 19:21:39Z rnoland $");
31
32#include "dev/drm/drmP.h"
33#include "dev/drm/drm.h"
34#include "dev/drm/radeon_drm.h"
35#include "dev/drm/radeon_drv.h"
36
37#include "dev/drm/r600_microcode.h"
38
39# define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
40# define ATI_PCIGART_PAGE_MASK		(~(ATI_PCIGART_PAGE_SIZE-1))
41
42#define R600_PTE_VALID     (1 << 0)
43#define R600_PTE_SYSTEM    (1 << 1)
44#define R600_PTE_SNOOPED   (1 << 2)
45#define R600_PTE_READABLE  (1 << 5)
46#define R600_PTE_WRITEABLE (1 << 6)
47
48/* MAX values used for gfx init */
49#define R6XX_MAX_SH_GPRS           256
50#define R6XX_MAX_TEMP_GPRS         16
51#define R6XX_MAX_SH_THREADS        256
52#define R6XX_MAX_SH_STACK_ENTRIES  4096
53#define R6XX_MAX_BACKENDS          8
54#define R6XX_MAX_BACKENDS_MASK     0xff
55#define R6XX_MAX_SIMDS             8
56#define R6XX_MAX_SIMDS_MASK        0xff
57#define R6XX_MAX_PIPES             8
58#define R6XX_MAX_PIPES_MASK        0xff
59
60#define R7XX_MAX_SH_GPRS           256
61#define R7XX_MAX_TEMP_GPRS         16
62#define R7XX_MAX_SH_THREADS        256
63#define R7XX_MAX_SH_STACK_ENTRIES  4096
64#define R7XX_MAX_BACKENDS          8
65#define R7XX_MAX_BACKENDS_MASK     0xff
66#define R7XX_MAX_SIMDS             16
67#define R7XX_MAX_SIMDS_MASK        0xffff
68#define R7XX_MAX_PIPES             8
69#define R7XX_MAX_PIPES_MASK        0xff
70
71static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
72{
73	int i;
74
75	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
76
77	for (i = 0; i < dev_priv->usec_timeout; i++) {
78		int slots;
79		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
80			slots = (RADEON_READ(R600_GRBM_STATUS)
81				 & R700_CMDFIFO_AVAIL_MASK);
82		else
83			slots = (RADEON_READ(R600_GRBM_STATUS)
84				 & R600_CMDFIFO_AVAIL_MASK);
85		if (slots >= entries)
86			return 0;
87		DRM_UDELAY(1);
88	}
89	DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
90		 RADEON_READ(R600_GRBM_STATUS),
91		 RADEON_READ(R600_GRBM_STATUS2));
92
93	return -EBUSY;
94}
95
96static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
97{
98	int i, ret;
99
100	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
101
102	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
103		ret = r600_do_wait_for_fifo(dev_priv, 8);
104	else
105		ret = r600_do_wait_for_fifo(dev_priv, 16);
106	if (ret)
107		return ret;
108	for (i = 0; i < dev_priv->usec_timeout; i++) {
109		if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
110			return 0;
111		DRM_UDELAY(1);
112	}
113	DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
114		 RADEON_READ(R600_GRBM_STATUS),
115		 RADEON_READ(R600_GRBM_STATUS2));
116
117	return -EBUSY;
118}
119
120void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
121{
122#ifdef __linux__
123	struct drm_sg_mem *entry = dev->sg;
124	int max_pages;
125	int pages;
126	int i;
127#endif
128	if (gart_info->bus_addr) {
129#ifdef __linux__
130		max_pages = (gart_info->table_size / sizeof(u32));
131		pages = (entry->pages <= max_pages)
132		  ? entry->pages : max_pages;
133
134		for (i = 0; i < pages; i++) {
135			if (!entry->busaddr[i])
136				break;
137			pci_unmap_single(dev->pdev, entry->busaddr[i],
138					 PAGE_SIZE, PCI_DMA_TODEVICE);
139		}
140#endif
141		if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
142			gart_info->bus_addr = 0;
143	}
144}
145
146/* R600 has page table setup */
147int r600_page_table_init(struct drm_device *dev)
148{
149	drm_radeon_private_t *dev_priv = dev->dev_private;
150	struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
151	struct drm_sg_mem *entry = dev->sg;
152	int ret = 0;
153	int i, j;
154	int max_pages, pages;
155	u64 *pci_gart, page_base;
156	dma_addr_t entry_addr;
157
158	/* okay page table is available - lets rock */
159
160	/* PTEs are 64-bits */
161	pci_gart = (u64 *)gart_info->addr;
162
163	max_pages = (gart_info->table_size / sizeof(u64));
164	pages = (entry->pages <= max_pages) ? entry->pages : max_pages;
165
166	memset(pci_gart, 0, max_pages * sizeof(u64));
167
168	for (i = 0; i < pages; i++) {
169#ifdef __linux__
170		entry->busaddr[i] = pci_map_single(dev->pdev,
171						   page_address(entry->
172								pagelist[i]),
173						   PAGE_SIZE, PCI_DMA_TODEVICE);
174		if (entry->busaddr[i] == 0) {
175			DRM_ERROR("unable to map PCIGART pages!\n");
176			r600_page_table_cleanup(dev, gart_info);
177			goto done;
178		}
179#endif
180		entry_addr = entry->busaddr[i];
181		for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
182			page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
183			page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
184			page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
185
186			*pci_gart = page_base;
187
188			if ((i % 128) == 0)
189				DRM_DEBUG("page entry %d: 0x%016llx\n",
190				    i, (unsigned long long)page_base);
191			pci_gart++;
192			entry_addr += ATI_PCIGART_PAGE_SIZE;
193		}
194	}
195	ret = 1;
196#ifdef __linux__
197done:
198#endif
199	return ret;
200}
201
202static void r600_vm_flush_gart_range(struct drm_device *dev)
203{
204	drm_radeon_private_t *dev_priv = dev->dev_private;
205	u32 resp, countdown = 1000;
206	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
207	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
208	RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
209
210	do {
211		resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
212		countdown--;
213		DRM_UDELAY(1);
214	} while (((resp & 0xf0) == 0) && countdown);
215}
216
217static void r600_vm_init(struct drm_device *dev)
218{
219	drm_radeon_private_t *dev_priv = dev->dev_private;
220	/* initialise the VM to use the page table we constructed up there */
221	u32 vm_c0, i;
222	u32 mc_rd_a;
223	u32 vm_l2_cntl, vm_l2_cntl3;
224	/* okay set up the PCIE aperture type thingo */
225	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
226	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
227	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
228
229	/* setup MC RD a */
230	mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
231		R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
232		R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
233
234	RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
235	RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
236
237	RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
238	RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
239
240	RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
241	RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
242
243	RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
244	RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
245
246	RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
247	RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
248
249	RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
250	RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
251
252	RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
253	RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
254
255	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
256	vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
257	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
258
259	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
260	vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
261		       R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
262		       R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));
263	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
264
265	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
266
267	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
268
269	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
270
271	/* disable all other contexts */
272	for (i = 1; i < 8; i++)
273		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
274
275	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
276	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
277	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
278
279	r600_vm_flush_gart_range(dev);
280}
281
282/* load r600 microcode */
283static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
284{
285	const u32 (*cp)[3];
286	const u32 *pfp;
287	int i;
288
289	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
290	case CHIP_R600:
291		DRM_INFO("Loading R600 Microcode\n");
292		cp  = R600_cp_microcode;
293		pfp = R600_pfp_microcode;
294		break;
295	case CHIP_RV610:
296		DRM_INFO("Loading RV610 Microcode\n");
297		cp  = RV610_cp_microcode;
298		pfp = RV610_pfp_microcode;
299		break;
300	case CHIP_RV630:
301		DRM_INFO("Loading RV630 Microcode\n");
302		cp  = RV630_cp_microcode;
303		pfp = RV630_pfp_microcode;
304		break;
305	case CHIP_RV620:
306		DRM_INFO("Loading RV620 Microcode\n");
307		cp  = RV620_cp_microcode;
308		pfp = RV620_pfp_microcode;
309		break;
310	case CHIP_RV635:
311		DRM_INFO("Loading RV635 Microcode\n");
312		cp  = RV635_cp_microcode;
313		pfp = RV635_pfp_microcode;
314		break;
315	case CHIP_RV670:
316		DRM_INFO("Loading RV670 Microcode\n");
317		cp  = RV670_cp_microcode;
318		pfp = RV670_pfp_microcode;
319		break;
320	case CHIP_RS780:
321		DRM_INFO("Loading RS780 Microcode\n");
322		cp  = RS780_cp_microcode;
323		pfp = RS780_pfp_microcode;
324		break;
325	default:
326		return;
327	}
328
329	r600_do_cp_stop(dev_priv);
330
331	RADEON_WRITE(R600_CP_RB_CNTL,
332		     R600_RB_NO_UPDATE |
333		     R600_RB_BLKSZ(15) |
334		     R600_RB_BUFSZ(3));
335
336	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
337	RADEON_READ(R600_GRBM_SOFT_RESET);
338	DRM_UDELAY(15000);
339	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
340
341	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
342
343	for (i = 0; i < PM4_UCODE_SIZE; i++) {
344		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][0]);
345		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][1]);
346		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][2]);
347	}
348
349	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
350	for (i = 0; i < PFP_UCODE_SIZE; i++)
351		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]);
352
353	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
354	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
355	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
356}
357
358static void r700_vm_init(struct drm_device *dev)
359{
360	drm_radeon_private_t *dev_priv = dev->dev_private;
361	/* initialise the VM to use the page table we constructed up there */
362	u32 vm_c0, i;
363	u32 mc_vm_md_l1;
364	u32 vm_l2_cntl, vm_l2_cntl3;
365	/* okay set up the PCIE aperture type thingo */
366	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
367	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
368	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
369
370	mc_vm_md_l1 = R700_ENABLE_L1_TLB |
371	    R700_ENABLE_L1_FRAGMENT_PROCESSING |
372	    R700_SYSTEM_ACCESS_MODE_IN_SYS |
373	    R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
374	    R700_EFFECTIVE_L1_TLB_SIZE(5) |
375	    R700_EFFECTIVE_L1_QUEUE_SIZE(5);
376
377	RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
378	RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
379	RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
380	RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
381	RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
382	RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
383	RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
384
385	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
386	vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
387	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
388
389	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
390	vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
391	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
392
393	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
394
395	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
396
397	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
398
399	/* disable all other contexts */
400	for (i = 1; i < 8; i++)
401		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
402
403	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
404	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
405	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
406
407	r600_vm_flush_gart_range(dev);
408}
409
410/* load r600 microcode */
411static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
412{
413	const u32 *pfp;
414	const u32 *cp;
415	int i;
416
417	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
418	case CHIP_RV770:
419		DRM_INFO("Loading RV770/RV790 Microcode\n");
420		pfp = RV770_pfp_microcode;
421		cp  = RV770_cp_microcode;
422		break;
423	case CHIP_RV730:
424		DRM_INFO("Loading RV730 Microcode\n");
425		pfp = RV730_pfp_microcode;
426		cp  = RV730_cp_microcode;
427		break;
428	case CHIP_RV710:
429		DRM_INFO("Loading RV710 Microcode\n");
430		pfp = RV710_pfp_microcode;
431		cp  = RV710_cp_microcode;
432		break;
433	default:
434		return;
435	}
436
437	r600_do_cp_stop(dev_priv);
438
439	RADEON_WRITE(R600_CP_RB_CNTL,
440		     R600_RB_NO_UPDATE |
441		     (15 << 8) |
442		     (3 << 0));
443
444	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
445	RADEON_READ(R600_GRBM_SOFT_RESET);
446	DRM_UDELAY(15000);
447	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
448
449	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
450	for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
451		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]);
452	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
453
454	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
455	for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
456		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i]);
457	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
458
459	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
460	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
461	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
462}
463
464static void r600_test_writeback(drm_radeon_private_t *dev_priv)
465{
466	u32 tmp;
467
468	/* Start with assuming that writeback doesn't work */
469	dev_priv->writeback_works = 0;
470
471	/* Writeback doesn't seem to work everywhere, test it here and possibly
472	 * enable it if it appears to work
473	 */
474	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
475
476	RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
477
478	for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
479		u32 val;
480
481		val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));
482		if (val == 0xdeadbeef)
483			break;
484		DRM_UDELAY(1);
485	}
486
487	if (tmp < dev_priv->usec_timeout) {
488		dev_priv->writeback_works = 1;
489		DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
490	} else {
491		dev_priv->writeback_works = 0;
492		DRM_INFO("writeback test failed\n");
493	}
494	if (radeon_no_wb == 1) {
495		dev_priv->writeback_works = 0;
496		DRM_INFO("writeback forced off\n");
497	}
498
499	if (!dev_priv->writeback_works) {
500		/* Disable writeback to avoid unnecessary bus master transfer */
501		RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) |
502			     RADEON_RB_NO_UPDATE);
503		RADEON_WRITE(R600_SCRATCH_UMSK, 0);
504	}
505}
506
507int r600_do_engine_reset(struct drm_device *dev)
508{
509	drm_radeon_private_t *dev_priv = dev->dev_private;
510	u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
511
512	DRM_INFO("Resetting GPU\n");
513
514	cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
515	cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
516	RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
517
518	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
519	RADEON_READ(R600_GRBM_SOFT_RESET);
520	DRM_UDELAY(50);
521	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
522	RADEON_READ(R600_GRBM_SOFT_RESET);
523
524	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
525	cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
526	RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA);
527
528	RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
529	RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
530	RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
531	RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
532
533	/* Reset the CP ring */
534	r600_do_cp_reset(dev_priv);
535
536	/* The CP is no longer running after an engine reset */
537	dev_priv->cp_running = 0;
538
539	/* Reset any pending vertex, indirect buffers */
540	radeon_freelist_reset(dev);
541
542	return 0;
543
544}
545
546static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
547					     u32 num_backends,
548					     u32 backend_disable_mask)
549{
550	u32 backend_map = 0;
551	u32 enabled_backends_mask;
552	u32 enabled_backends_count;
553	u32 cur_pipe;
554	u32 swizzle_pipe[R6XX_MAX_PIPES];
555	u32 cur_backend;
556	u32 i;
557
558	if (num_tile_pipes > R6XX_MAX_PIPES)
559		num_tile_pipes = R6XX_MAX_PIPES;
560	if (num_tile_pipes < 1)
561		num_tile_pipes = 1;
562	if (num_backends > R6XX_MAX_BACKENDS)
563		num_backends = R6XX_MAX_BACKENDS;
564	if (num_backends < 1)
565		num_backends = 1;
566
567	enabled_backends_mask = 0;
568	enabled_backends_count = 0;
569	for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
570		if (((backend_disable_mask >> i) & 1) == 0) {
571			enabled_backends_mask |= (1 << i);
572			++enabled_backends_count;
573		}
574		if (enabled_backends_count == num_backends)
575			break;
576	}
577
578	if (enabled_backends_count == 0) {
579		enabled_backends_mask = 1;
580		enabled_backends_count = 1;
581	}
582
583	if (enabled_backends_count != num_backends)
584		num_backends = enabled_backends_count;
585
586	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
587	switch (num_tile_pipes) {
588	case 1:
589		swizzle_pipe[0] = 0;
590		break;
591	case 2:
592		swizzle_pipe[0] = 0;
593		swizzle_pipe[1] = 1;
594		break;
595	case 3:
596		swizzle_pipe[0] = 0;
597		swizzle_pipe[1] = 1;
598		swizzle_pipe[2] = 2;
599		break;
600	case 4:
601		swizzle_pipe[0] = 0;
602		swizzle_pipe[1] = 1;
603		swizzle_pipe[2] = 2;
604		swizzle_pipe[3] = 3;
605		break;
606	case 5:
607		swizzle_pipe[0] = 0;
608		swizzle_pipe[1] = 1;
609		swizzle_pipe[2] = 2;
610		swizzle_pipe[3] = 3;
611		swizzle_pipe[4] = 4;
612		break;
613	case 6:
614		swizzle_pipe[0] = 0;
615		swizzle_pipe[1] = 2;
616		swizzle_pipe[2] = 4;
617		swizzle_pipe[3] = 5;
618		swizzle_pipe[4] = 1;
619		swizzle_pipe[5] = 3;
620		break;
621	case 7:
622		swizzle_pipe[0] = 0;
623		swizzle_pipe[1] = 2;
624		swizzle_pipe[2] = 4;
625		swizzle_pipe[3] = 6;
626		swizzle_pipe[4] = 1;
627		swizzle_pipe[5] = 3;
628		swizzle_pipe[6] = 5;
629		break;
630	case 8:
631		swizzle_pipe[0] = 0;
632		swizzle_pipe[1] = 2;
633		swizzle_pipe[2] = 4;
634		swizzle_pipe[3] = 6;
635		swizzle_pipe[4] = 1;
636		swizzle_pipe[5] = 3;
637		swizzle_pipe[6] = 5;
638		swizzle_pipe[7] = 7;
639		break;
640	}
641
642	cur_backend = 0;
643	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
644		while (((1 << cur_backend) & enabled_backends_mask) == 0)
645			cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
646
647		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
648
649		cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
650	}
651
652	return backend_map;
653}
654
655static int r600_count_pipe_bits(uint32_t val)
656{
657	int i, ret = 0;
658	for (i = 0; i < 32; i++) {
659		ret += val & 1;
660		val >>= 1;
661	}
662	return ret;
663}
664
665static void r600_gfx_init(struct drm_device *dev,
666			  drm_radeon_private_t *dev_priv)
667{
668	int i, j, num_qd_pipes;
669	u32 sx_debug_1;
670	u32 tc_cntl;
671	u32 arb_pop;
672	u32 num_gs_verts_per_thread;
673	u32 vgt_gs_per_es;
674	u32 gs_prim_buffer_depth = 0;
675	u32 sq_ms_fifo_sizes;
676	u32 sq_config;
677	u32 sq_gpr_resource_mgmt_1 = 0;
678	u32 sq_gpr_resource_mgmt_2 = 0;
679	u32 sq_thread_resource_mgmt = 0;
680	u32 sq_stack_resource_mgmt_1 = 0;
681	u32 sq_stack_resource_mgmt_2 = 0;
682	u32 hdp_host_path_cntl;
683	u32 backend_map;
684	u32 gb_tiling_config = 0;
685	u32 cc_rb_backend_disable = 0;
686	u32 cc_gc_shader_pipe_config = 0;
687	u32 ramcfg;
688
689	/* setup chip specs */
690	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
691	case CHIP_R600:
692		dev_priv->r600_max_pipes = 4;
693		dev_priv->r600_max_tile_pipes = 8;
694		dev_priv->r600_max_simds = 4;
695		dev_priv->r600_max_backends = 4;
696		dev_priv->r600_max_gprs = 256;
697		dev_priv->r600_max_threads = 192;
698		dev_priv->r600_max_stack_entries = 256;
699		dev_priv->r600_max_hw_contexts = 8;
700		dev_priv->r600_max_gs_threads = 16;
701		dev_priv->r600_sx_max_export_size = 128;
702		dev_priv->r600_sx_max_export_pos_size = 16;
703		dev_priv->r600_sx_max_export_smx_size = 128;
704		dev_priv->r600_sq_num_cf_insts = 2;
705		break;
706	case CHIP_RV630:
707	case CHIP_RV635:
708		dev_priv->r600_max_pipes = 2;
709		dev_priv->r600_max_tile_pipes = 2;
710		dev_priv->r600_max_simds = 3;
711		dev_priv->r600_max_backends = 1;
712		dev_priv->r600_max_gprs = 128;
713		dev_priv->r600_max_threads = 192;
714		dev_priv->r600_max_stack_entries = 128;
715		dev_priv->r600_max_hw_contexts = 8;
716		dev_priv->r600_max_gs_threads = 4;
717		dev_priv->r600_sx_max_export_size = 128;
718		dev_priv->r600_sx_max_export_pos_size = 16;
719		dev_priv->r600_sx_max_export_smx_size = 128;
720		dev_priv->r600_sq_num_cf_insts = 2;
721		break;
722	case CHIP_RV610:
723	case CHIP_RS780:
724	case CHIP_RV620:
725		dev_priv->r600_max_pipes = 1;
726		dev_priv->r600_max_tile_pipes = 1;
727		dev_priv->r600_max_simds = 2;
728		dev_priv->r600_max_backends = 1;
729		dev_priv->r600_max_gprs = 128;
730		dev_priv->r600_max_threads = 192;
731		dev_priv->r600_max_stack_entries = 128;
732		dev_priv->r600_max_hw_contexts = 4;
733		dev_priv->r600_max_gs_threads = 4;
734		dev_priv->r600_sx_max_export_size = 128;
735		dev_priv->r600_sx_max_export_pos_size = 16;
736		dev_priv->r600_sx_max_export_smx_size = 128;
737		dev_priv->r600_sq_num_cf_insts = 1;
738		break;
739	case CHIP_RV670:
740		dev_priv->r600_max_pipes = 4;
741		dev_priv->r600_max_tile_pipes = 4;
742		dev_priv->r600_max_simds = 4;
743		dev_priv->r600_max_backends = 4;
744		dev_priv->r600_max_gprs = 192;
745		dev_priv->r600_max_threads = 192;
746		dev_priv->r600_max_stack_entries = 256;
747		dev_priv->r600_max_hw_contexts = 8;
748		dev_priv->r600_max_gs_threads = 16;
749		dev_priv->r600_sx_max_export_size = 128;
750		dev_priv->r600_sx_max_export_pos_size = 16;
751		dev_priv->r600_sx_max_export_smx_size = 128;
752		dev_priv->r600_sq_num_cf_insts = 2;
753		break;
754	default:
755		break;
756	}
757
758	/* Initialize HDP */
759	j = 0;
760	for (i = 0; i < 32; i++) {
761		RADEON_WRITE((0x2c14 + j), 0x00000000);
762		RADEON_WRITE((0x2c18 + j), 0x00000000);
763		RADEON_WRITE((0x2c1c + j), 0x00000000);
764		RADEON_WRITE((0x2c20 + j), 0x00000000);
765		RADEON_WRITE((0x2c24 + j), 0x00000000);
766		j += 0x18;
767	}
768
769	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
770
771	/* setup tiling, simd, pipe config */
772	ramcfg = RADEON_READ(R600_RAMCFG);
773
774	switch (dev_priv->r600_max_tile_pipes) {
775	case 1:
776		gb_tiling_config |= R600_PIPE_TILING(0);
777		break;
778	case 2:
779		gb_tiling_config |= R600_PIPE_TILING(1);
780		break;
781	case 4:
782		gb_tiling_config |= R600_PIPE_TILING(2);
783		break;
784	case 8:
785		gb_tiling_config |= R600_PIPE_TILING(3);
786		break;
787	default:
788		break;
789	}
790
791	gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
792
793	gb_tiling_config |= R600_GROUP_SIZE(0);
794
795	if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
796		gb_tiling_config |= R600_ROW_TILING(3);
797		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
798	} else {
799		gb_tiling_config |=
800			R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
801		gb_tiling_config |=
802			R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
803	}
804
805	gb_tiling_config |= R600_BANK_SWAPS(1);
806
807	backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
808							dev_priv->r600_max_backends,
809							(0xff << dev_priv->r600_max_backends) & 0xff);
810	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
811
812	cc_gc_shader_pipe_config =
813		R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
814	cc_gc_shader_pipe_config |=
815		R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
816
817	cc_rb_backend_disable =
818		R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
819
820	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
821	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
822	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
823
824	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
825	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
826	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
827
828	num_qd_pipes =
829		R6XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
830	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
831	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
832
833	/* set HW defaults for 3D engine */
834	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
835						R600_ROQ_IB2_START(0x2b)));
836
837	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
838					      R600_ROQ_END(0x40)));
839
840	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
841					R600_SYNC_GRADIENT |
842					R600_SYNC_WALKER |
843					R600_SYNC_ALIGNER));
844
845	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
846		RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
847
848	sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
849	sx_debug_1 |= R600_SMX_EVENT_RELEASE;
850	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
851		sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
852	RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
853
854	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
855	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
856	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
857	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
858	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
859		RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
860	else
861		RADEON_WRITE(R600_DB_DEBUG, 0);
862
863	RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
864					  R600_DEPTH_FLUSH(16) |
865					  R600_DEPTH_PENDING_FREE(4) |
866					  R600_DEPTH_CACHELINE_FREE(16)));
867	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
868	RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
869
870	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
871	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
872
873	sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
874	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
875	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
876	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
877		sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
878				    R600_FETCH_FIFO_HIWATER(0xa) |
879				    R600_DONE_FIFO_HIWATER(0xe0) |
880				    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
881	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
882		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
883		sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
884		sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
885	}
886	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
887
888	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
889	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
890	 */
891	sq_config = RADEON_READ(R600_SQ_CONFIG);
892	sq_config &= ~(R600_PS_PRIO(3) |
893		       R600_VS_PRIO(3) |
894		       R600_GS_PRIO(3) |
895		       R600_ES_PRIO(3));
896	sq_config |= (R600_DX9_CONSTS |
897		      R600_VC_ENABLE |
898		      R600_PS_PRIO(0) |
899		      R600_VS_PRIO(1) |
900		      R600_GS_PRIO(2) |
901		      R600_ES_PRIO(3));
902
903	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
904		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
905					  R600_NUM_VS_GPRS(124) |
906					  R600_NUM_CLAUSE_TEMP_GPRS(4));
907		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
908					  R600_NUM_ES_GPRS(0));
909		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
910					   R600_NUM_VS_THREADS(48) |
911					   R600_NUM_GS_THREADS(4) |
912					   R600_NUM_ES_THREADS(4));
913		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
914					    R600_NUM_VS_STACK_ENTRIES(128));
915		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
916					    R600_NUM_ES_STACK_ENTRIES(0));
917	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
918		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
919		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
920		/* no vertex cache */
921		sq_config &= ~R600_VC_ENABLE;
922
923		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
924					  R600_NUM_VS_GPRS(44) |
925					  R600_NUM_CLAUSE_TEMP_GPRS(2));
926		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
927					  R600_NUM_ES_GPRS(17));
928		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
929					   R600_NUM_VS_THREADS(78) |
930					   R600_NUM_GS_THREADS(4) |
931					   R600_NUM_ES_THREADS(31));
932		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
933					    R600_NUM_VS_STACK_ENTRIES(40));
934		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
935					    R600_NUM_ES_STACK_ENTRIES(16));
936	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
937		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {
938		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
939					  R600_NUM_VS_GPRS(44) |
940					  R600_NUM_CLAUSE_TEMP_GPRS(2));
941		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
942					  R600_NUM_ES_GPRS(18));
943		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
944					   R600_NUM_VS_THREADS(78) |
945					   R600_NUM_GS_THREADS(4) |
946					   R600_NUM_ES_THREADS(31));
947		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
948					    R600_NUM_VS_STACK_ENTRIES(40));
949		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
950					    R600_NUM_ES_STACK_ENTRIES(16));
951	} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
952		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
953					  R600_NUM_VS_GPRS(44) |
954					  R600_NUM_CLAUSE_TEMP_GPRS(2));
955		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
956					  R600_NUM_ES_GPRS(17));
957		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
958					   R600_NUM_VS_THREADS(78) |
959					   R600_NUM_GS_THREADS(4) |
960					   R600_NUM_ES_THREADS(31));
961		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
962					    R600_NUM_VS_STACK_ENTRIES(64));
963		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
964					    R600_NUM_ES_STACK_ENTRIES(64));
965	}
966
967	RADEON_WRITE(R600_SQ_CONFIG, sq_config);
968	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
969	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
970	RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
971	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
972	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
973
974	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
975	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
976	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
977		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
978	else
979		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
980
981	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
982						    R600_S0_Y(0x4) |
983						    R600_S1_X(0x4) |
984						    R600_S1_Y(0xc)));
985	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
986						    R600_S0_Y(0xe) |
987						    R600_S1_X(0x2) |
988						    R600_S1_Y(0x2) |
989						    R600_S2_X(0xa) |
990						    R600_S2_Y(0x6) |
991						    R600_S3_X(0x6) |
992						    R600_S3_Y(0xa)));
993	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
994							R600_S0_Y(0xb) |
995							R600_S1_X(0x4) |
996							R600_S1_Y(0xc) |
997							R600_S2_X(0x1) |
998							R600_S2_Y(0x6) |
999							R600_S3_X(0xa) |
1000							R600_S3_Y(0xe)));
1001	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
1002							R600_S4_Y(0x1) |
1003							R600_S5_X(0x0) |
1004							R600_S5_Y(0x0) |
1005							R600_S6_X(0xb) |
1006							R600_S6_Y(0x4) |
1007							R600_S7_X(0x7) |
1008							R600_S7_Y(0x8)));
1009
1010
1011	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1012	case CHIP_R600:
1013	case CHIP_RV630:
1014	case CHIP_RV635:
1015		gs_prim_buffer_depth = 0;
1016		break;
1017	case CHIP_RV610:
1018	case CHIP_RS780:
1019	case CHIP_RV620:
1020		gs_prim_buffer_depth = 32;
1021		break;
1022	case CHIP_RV670:
1023		gs_prim_buffer_depth = 128;
1024		break;
1025	default:
1026		break;
1027	}
1028
1029	num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1030	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1031	/* Max value for this is 256 */
1032	if (vgt_gs_per_es > 256)
1033		vgt_gs_per_es = 256;
1034
1035	RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1036	RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1037	RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1038	RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1039
1040	/* more default values. 2D/3D driver should adjust as needed */
1041	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1042	RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1043	RADEON_WRITE(R600_SX_MISC, 0);
1044	RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1045	RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1046	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1047	RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1048	RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1049	RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1050
1051	/* clear render buffer base addresses */
1052	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1053	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1054	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1055	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1056	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1057	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1058	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1059	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1060
1061	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1062	case CHIP_RV610:
1063	case CHIP_RS780:
1064	case CHIP_RV620:
1065		tc_cntl = R600_TC_L2_SIZE(8);
1066		break;
1067	case CHIP_RV630:
1068	case CHIP_RV635:
1069		tc_cntl = R600_TC_L2_SIZE(4);
1070		break;
1071	case CHIP_R600:
1072		tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
1073		break;
1074	default:
1075		tc_cntl = R600_TC_L2_SIZE(0);
1076		break;
1077	}
1078
1079	RADEON_WRITE(R600_TC_CNTL, tc_cntl);
1080
1081	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1082	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1083
1084	arb_pop = RADEON_READ(R600_ARB_POP);
1085	arb_pop |= R600_ENABLE_TC128;
1086	RADEON_WRITE(R600_ARB_POP, arb_pop);
1087
1088	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1089	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1090					  R600_NUM_CLIP_SEQ(3)));
1091	RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
1092
1093}
1094
1095static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
1096					     u32 num_backends,
1097					     u32 backend_disable_mask)
1098{
1099	u32 backend_map = 0;
1100	u32 enabled_backends_mask;
1101	u32 enabled_backends_count;
1102	u32 cur_pipe;
1103	u32 swizzle_pipe[R7XX_MAX_PIPES];
1104	u32 cur_backend;
1105	u32 i;
1106
1107	if (num_tile_pipes > R7XX_MAX_PIPES)
1108		num_tile_pipes = R7XX_MAX_PIPES;
1109	if (num_tile_pipes < 1)
1110		num_tile_pipes = 1;
1111	if (num_backends > R7XX_MAX_BACKENDS)
1112		num_backends = R7XX_MAX_BACKENDS;
1113	if (num_backends < 1)
1114		num_backends = 1;
1115
1116	enabled_backends_mask = 0;
1117	enabled_backends_count = 0;
1118	for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
1119		if (((backend_disable_mask >> i) & 1) == 0) {
1120			enabled_backends_mask |= (1 << i);
1121			++enabled_backends_count;
1122		}
1123		if (enabled_backends_count == num_backends)
1124			break;
1125	}
1126
1127	if (enabled_backends_count == 0) {
1128		enabled_backends_mask = 1;
1129		enabled_backends_count = 1;
1130	}
1131
1132	if (enabled_backends_count != num_backends)
1133		num_backends = enabled_backends_count;
1134
1135	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
1136	switch (num_tile_pipes) {
1137	case 1:
1138		swizzle_pipe[0] = 0;
1139		break;
1140	case 2:
1141		swizzle_pipe[0] = 0;
1142		swizzle_pipe[1] = 1;
1143		break;
1144	case 3:
1145		swizzle_pipe[0] = 0;
1146		swizzle_pipe[1] = 2;
1147		swizzle_pipe[2] = 1;
1148		break;
1149	case 4:
1150		swizzle_pipe[0] = 0;
1151		swizzle_pipe[1] = 2;
1152		swizzle_pipe[2] = 3;
1153		swizzle_pipe[3] = 1;
1154		break;
1155	case 5:
1156		swizzle_pipe[0] = 0;
1157		swizzle_pipe[1] = 2;
1158		swizzle_pipe[2] = 4;
1159		swizzle_pipe[3] = 1;
1160		swizzle_pipe[4] = 3;
1161		break;
1162	case 6:
1163		swizzle_pipe[0] = 0;
1164		swizzle_pipe[1] = 2;
1165		swizzle_pipe[2] = 4;
1166		swizzle_pipe[3] = 5;
1167		swizzle_pipe[4] = 3;
1168		swizzle_pipe[5] = 1;
1169		break;
1170	case 7:
1171		swizzle_pipe[0] = 0;
1172		swizzle_pipe[1] = 2;
1173		swizzle_pipe[2] = 4;
1174		swizzle_pipe[3] = 6;
1175		swizzle_pipe[4] = 3;
1176		swizzle_pipe[5] = 1;
1177		swizzle_pipe[6] = 5;
1178		break;
1179	case 8:
1180		swizzle_pipe[0] = 0;
1181		swizzle_pipe[1] = 2;
1182		swizzle_pipe[2] = 4;
1183		swizzle_pipe[3] = 6;
1184		swizzle_pipe[4] = 3;
1185		swizzle_pipe[5] = 1;
1186		swizzle_pipe[6] = 7;
1187		swizzle_pipe[7] = 5;
1188		break;
1189	}
1190
1191	cur_backend = 0;
1192	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1193		while (((1 << cur_backend) & enabled_backends_mask) == 0)
1194			cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1195
1196		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
1197
1198		cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1199	}
1200
1201	return backend_map;
1202}
1203
1204static void r700_gfx_init(struct drm_device *dev,
1205			  drm_radeon_private_t *dev_priv)
1206{
1207	int i, j, num_qd_pipes;
1208	u32 sx_debug_1;
1209	u32 smx_dc_ctl0;
1210	u32 num_gs_verts_per_thread;
1211	u32 vgt_gs_per_es;
1212	u32 gs_prim_buffer_depth = 0;
1213	u32 sq_ms_fifo_sizes;
1214	u32 sq_config;
1215	u32 sq_thread_resource_mgmt;
1216	u32 hdp_host_path_cntl;
1217	u32 sq_dyn_gpr_size_simd_ab_0;
1218	u32 backend_map;
1219	u32 gb_tiling_config = 0;
1220	u32 cc_rb_backend_disable = 0;
1221	u32 cc_gc_shader_pipe_config = 0;
1222	u32 mc_arb_ramcfg;
1223	u32 db_debug4;
1224
1225	/* setup chip specs */
1226	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1227	case CHIP_RV770:
1228		dev_priv->r600_max_pipes = 4;
1229		dev_priv->r600_max_tile_pipes = 8;
1230		dev_priv->r600_max_simds = 10;
1231		dev_priv->r600_max_backends = 4;
1232		dev_priv->r600_max_gprs = 256;
1233		dev_priv->r600_max_threads = 248;
1234		dev_priv->r600_max_stack_entries = 512;
1235		dev_priv->r600_max_hw_contexts = 8;
1236		dev_priv->r600_max_gs_threads = 16 * 2;
1237		dev_priv->r600_sx_max_export_size = 128;
1238		dev_priv->r600_sx_max_export_pos_size = 16;
1239		dev_priv->r600_sx_max_export_smx_size = 112;
1240		dev_priv->r600_sq_num_cf_insts = 2;
1241
1242		dev_priv->r700_sx_num_of_sets = 7;
1243		dev_priv->r700_sc_prim_fifo_size = 0xF9;
1244		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1245		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1246		break;
1247	case CHIP_RV730:
1248		dev_priv->r600_max_pipes = 2;
1249		dev_priv->r600_max_tile_pipes = 4;
1250		dev_priv->r600_max_simds = 8;
1251		dev_priv->r600_max_backends = 2;
1252		dev_priv->r600_max_gprs = 128;
1253		dev_priv->r600_max_threads = 248;
1254		dev_priv->r600_max_stack_entries = 256;
1255		dev_priv->r600_max_hw_contexts = 8;
1256		dev_priv->r600_max_gs_threads = 16 * 2;
1257		dev_priv->r600_sx_max_export_size = 256;
1258		dev_priv->r600_sx_max_export_pos_size = 32;
1259		dev_priv->r600_sx_max_export_smx_size = 224;
1260		dev_priv->r600_sq_num_cf_insts = 2;
1261
1262		dev_priv->r700_sx_num_of_sets = 7;
1263		dev_priv->r700_sc_prim_fifo_size = 0xf9;
1264		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1265		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1266		break;
1267	case CHIP_RV710:
1268		dev_priv->r600_max_pipes = 2;
1269		dev_priv->r600_max_tile_pipes = 2;
1270		dev_priv->r600_max_simds = 2;
1271		dev_priv->r600_max_backends = 1;
1272		dev_priv->r600_max_gprs = 256;
1273		dev_priv->r600_max_threads = 192;
1274		dev_priv->r600_max_stack_entries = 256;
1275		dev_priv->r600_max_hw_contexts = 4;
1276		dev_priv->r600_max_gs_threads = 8 * 2;
1277		dev_priv->r600_sx_max_export_size = 128;
1278		dev_priv->r600_sx_max_export_pos_size = 16;
1279		dev_priv->r600_sx_max_export_smx_size = 112;
1280		dev_priv->r600_sq_num_cf_insts = 1;
1281
1282		dev_priv->r700_sx_num_of_sets = 7;
1283		dev_priv->r700_sc_prim_fifo_size = 0x40;
1284		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1285		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1286		break;
1287	default:
1288		break;
1289	}
1290
1291	/* Initialize HDP */
1292	j = 0;
1293	for (i = 0; i < 32; i++) {
1294		RADEON_WRITE((0x2c14 + j), 0x00000000);
1295		RADEON_WRITE((0x2c18 + j), 0x00000000);
1296		RADEON_WRITE((0x2c1c + j), 0x00000000);
1297		RADEON_WRITE((0x2c20 + j), 0x00000000);
1298		RADEON_WRITE((0x2c24 + j), 0x00000000);
1299		j += 0x18;
1300	}
1301
1302	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
1303
1304	/* setup tiling, simd, pipe config */
1305	mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
1306
1307	switch (dev_priv->r600_max_tile_pipes) {
1308	case 1:
1309		gb_tiling_config |= R600_PIPE_TILING(0);
1310		break;
1311	case 2:
1312		gb_tiling_config |= R600_PIPE_TILING(1);
1313		break;
1314	case 4:
1315		gb_tiling_config |= R600_PIPE_TILING(2);
1316		break;
1317	case 8:
1318		gb_tiling_config |= R600_PIPE_TILING(3);
1319		break;
1320	default:
1321		break;
1322	}
1323
1324	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1325		gb_tiling_config |= R600_BANK_TILING(1);
1326	else
1327		gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
1328
1329	gb_tiling_config |= R600_GROUP_SIZE(0);
1330
1331	if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
1332		gb_tiling_config |= R600_ROW_TILING(3);
1333		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
1334	} else {
1335		gb_tiling_config |=
1336			R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1337		gb_tiling_config |=
1338			R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1339	}
1340
1341	gb_tiling_config |= R600_BANK_SWAPS(1);
1342
1343	backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
1344							dev_priv->r600_max_backends,
1345							(0xff << dev_priv->r600_max_backends) & 0xff);
1346	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
1347
1348	cc_gc_shader_pipe_config =
1349		R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
1350	cc_gc_shader_pipe_config |=
1351		R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
1352
1353	cc_rb_backend_disable =
1354		R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
1355
1356	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
1357	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1358	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1359
1360	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
1361	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
1362	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1363
1364	RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1365	RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
1366	RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
1367	RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
1368	RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
1369
1370	num_qd_pipes =
1371		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
1372	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
1373	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
1374
1375	/* set HW defaults for 3D engine */
1376	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
1377						R600_ROQ_IB2_START(0x2b)));
1378
1379	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
1380
1381	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
1382					R600_SYNC_GRADIENT |
1383					R600_SYNC_WALKER |
1384					R600_SYNC_ALIGNER));
1385
1386	sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
1387	sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
1388	RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
1389
1390	smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
1391	smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
1392	smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
1393	RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
1394
1395	RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
1396					  R700_GS_FLUSH_CTL(4) |
1397					  R700_ACK_FLUSH_CTL(3) |
1398					  R700_SYNC_FLUSH_CTL));
1399
1400	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1401		RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f));
1402	else {
1403		db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
1404		db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
1405		RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
1406	}
1407
1408	RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
1409						   R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
1410						   R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
1411
1412	RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
1413						 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
1414						 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
1415
1416	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1417
1418	RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
1419
1420	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
1421
1422	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
1423
1424	RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
1425
1426	sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
1427			    R600_DONE_FIFO_HIWATER(0xe0) |
1428			    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
1429	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1430	case CHIP_RV770:
1431		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
1432		break;
1433	case CHIP_RV730:
1434	case CHIP_RV710:
1435	default:
1436		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
1437		break;
1438	}
1439	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
1440
1441	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
1442	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
1443	 */
1444	sq_config = RADEON_READ(R600_SQ_CONFIG);
1445	sq_config &= ~(R600_PS_PRIO(3) |
1446		       R600_VS_PRIO(3) |
1447		       R600_GS_PRIO(3) |
1448		       R600_ES_PRIO(3));
1449	sq_config |= (R600_DX9_CONSTS |
1450		      R600_VC_ENABLE |
1451		      R600_EXPORT_SRC_C |
1452		      R600_PS_PRIO(0) |
1453		      R600_VS_PRIO(1) |
1454		      R600_GS_PRIO(2) |
1455		      R600_ES_PRIO(3));
1456	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1457		/* no vertex cache */
1458		sq_config &= ~R600_VC_ENABLE;
1459
1460	RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1461
1462	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1463						    R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1464						    R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
1465
1466	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
1467						    R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
1468
1469	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
1470				   R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
1471				   R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
1472	if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
1473		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
1474	else
1475		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
1476	RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1477
1478	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1479						     R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1480
1481	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1482						     R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1483
1484	sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
1485				     R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
1486				     R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
1487				     R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
1488
1489	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
1490	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
1491	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
1492	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
1493	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
1494	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
1495	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
1496	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
1497
1498	RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
1499						     R700_FORCE_EOV_MAX_REZ_CNT(255)));
1500
1501	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1502		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
1503							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1504	else
1505		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
1506							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1507
1508	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1509	case CHIP_RV770:
1510	case CHIP_RV730:
1511		gs_prim_buffer_depth = 384;
1512		break;
1513	case CHIP_RV710:
1514		gs_prim_buffer_depth = 128;
1515		break;
1516	default:
1517		break;
1518	}
1519
1520	num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1521	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1522	/* Max value for this is 256 */
1523	if (vgt_gs_per_es > 256)
1524		vgt_gs_per_es = 256;
1525
1526	RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1527	RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1528	RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1529
1530	/* more default values. 2D/3D driver should adjust as needed */
1531	RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1532	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1533	RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1534	RADEON_WRITE(R600_SX_MISC, 0);
1535	RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1536	RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
1537	RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1538	RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
1539	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1540	RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1541	RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1542	RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1543
1544	/* clear render buffer base addresses */
1545	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1546	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1547	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1548	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1549	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1550	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1551	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1552	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1553
1554	RADEON_WRITE(R700_TCP_CNTL, 0);
1555
1556	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1557	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1558
1559	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1560
1561	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1562					  R600_NUM_CLIP_SEQ(3)));
1563
1564}
1565
1566static void r600_cp_init_ring_buffer(struct drm_device *dev,
1567				       drm_radeon_private_t *dev_priv,
1568				       struct drm_file *file_priv)
1569{
1570	u32 ring_start;
1571	u64 rptr_addr;
1572
1573	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
1574		r700_gfx_init(dev, dev_priv);
1575	else
1576		r600_gfx_init(dev, dev_priv);
1577
1578	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
1579	RADEON_READ(R600_GRBM_SOFT_RESET);
1580	DRM_UDELAY(15000);
1581	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
1582
1583
1584	/* Set ring buffer size */
1585#ifdef __BIG_ENDIAN
1586	RADEON_WRITE(R600_CP_RB_CNTL,
1587		     RADEON_BUF_SWAP_32BIT |
1588		     RADEON_RB_NO_UPDATE |
1589		     (dev_priv->ring.rptr_update_l2qw << 8) |
1590		     dev_priv->ring.size_l2qw);
1591#else
1592	RADEON_WRITE(R600_CP_RB_CNTL,
1593		     RADEON_RB_NO_UPDATE |
1594		     (dev_priv->ring.rptr_update_l2qw << 8) |
1595		     dev_priv->ring.size_l2qw);
1596#endif
1597
1598	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
1599
1600	/* Set the write pointer delay */
1601	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
1602
1603#ifdef __BIG_ENDIAN
1604	RADEON_WRITE(R600_CP_RB_CNTL,
1605		     RADEON_BUF_SWAP_32BIT |
1606		     RADEON_RB_NO_UPDATE |
1607		     RADEON_RB_RPTR_WR_ENA |
1608		     (dev_priv->ring.rptr_update_l2qw << 8) |
1609		     dev_priv->ring.size_l2qw);
1610#else
1611	RADEON_WRITE(R600_CP_RB_CNTL,
1612		     RADEON_RB_NO_UPDATE |
1613		     RADEON_RB_RPTR_WR_ENA |
1614		     (dev_priv->ring.rptr_update_l2qw << 8) |
1615		     dev_priv->ring.size_l2qw);
1616#endif
1617
1618	/* Initialize the ring buffer's read and write pointers */
1619	RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
1620	RADEON_WRITE(R600_CP_RB_WPTR, 0);
1621	SET_RING_HEAD(dev_priv, 0);
1622	dev_priv->ring.tail = 0;
1623
1624#if __OS_HAS_AGP
1625	if (dev_priv->flags & RADEON_IS_AGP) {
1626		rptr_addr = dev_priv->ring_rptr->offset
1627			- dev->agp->base +
1628			dev_priv->gart_vm_start;
1629	} else
1630#endif
1631	{
1632		rptr_addr = dev_priv->ring_rptr->offset
1633			- ((unsigned long) dev->sg->virtual)
1634			+ dev_priv->gart_vm_start;
1635	}
1636	RADEON_WRITE(R600_CP_RB_RPTR_ADDR,
1637		     rptr_addr & 0xffffffff);
1638	RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,
1639		     upper_32_bits(rptr_addr));
1640
1641#ifdef __BIG_ENDIAN
1642	RADEON_WRITE(R600_CP_RB_CNTL,
1643		     RADEON_BUF_SWAP_32BIT |
1644		     (dev_priv->ring.rptr_update_l2qw << 8) |
1645		     dev_priv->ring.size_l2qw);
1646#else
1647	RADEON_WRITE(R600_CP_RB_CNTL,
1648		     (dev_priv->ring.rptr_update_l2qw << 8) |
1649		     dev_priv->ring.size_l2qw);
1650#endif
1651
1652#if __OS_HAS_AGP
1653	if (dev_priv->flags & RADEON_IS_AGP) {
1654		/* XXX */
1655		radeon_write_agp_base(dev_priv, dev->agp->base);
1656
1657		/* XXX */
1658		radeon_write_agp_location(dev_priv,
1659			     (((dev_priv->gart_vm_start - 1 +
1660				dev_priv->gart_size) & 0xffff0000) |
1661			      (dev_priv->gart_vm_start >> 16)));
1662
1663		ring_start = (dev_priv->cp_ring->offset
1664			      - dev->agp->base
1665			      + dev_priv->gart_vm_start);
1666	} else
1667#endif
1668		ring_start = (dev_priv->cp_ring->offset
1669			      - (unsigned long)dev->sg->virtual
1670			      + dev_priv->gart_vm_start);
1671
1672	RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
1673
1674	RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
1675
1676	RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
1677
1678	/* Initialize the scratch register pointer.  This will cause
1679	 * the scratch register values to be written out to memory
1680	 * whenever they are updated.
1681	 *
1682	 * We simply put this behind the ring read pointer, this works
1683	 * with PCI GART as well as (whatever kind of) AGP GART
1684	 */
1685	{
1686		u64 scratch_addr;
1687
1688		scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR);
1689		scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
1690		scratch_addr += R600_SCRATCH_REG_OFFSET;
1691		scratch_addr >>= 8;
1692		scratch_addr &= 0xffffffff;
1693
1694		RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
1695	}
1696
1697	RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
1698
1699	/* Turn on bus mastering */
1700	radeon_enable_bm(dev_priv);
1701
1702	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);
1703	RADEON_WRITE(R600_LAST_FRAME_REG, 0);
1704
1705	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
1706	RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);
1707
1708	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);
1709	RADEON_WRITE(R600_LAST_CLEAR_REG, 0);
1710
1711	/* reset sarea copies of these */
1712	if (dev_priv->sarea_priv) {
1713		dev_priv->sarea_priv->last_frame = 0;
1714		dev_priv->sarea_priv->last_dispatch = 0;
1715		dev_priv->sarea_priv->last_clear = 0;
1716	}
1717
1718	r600_do_wait_for_idle(dev_priv);
1719
1720}
1721
1722int r600_do_cleanup_cp(struct drm_device *dev)
1723{
1724	drm_radeon_private_t *dev_priv = dev->dev_private;
1725	DRM_DEBUG("\n");
1726
1727	/* Make sure interrupts are disabled here because the uninstall ioctl
1728	 * may not have been called from userspace and after dev_private
1729	 * is freed, it's too late.
1730	 */
1731	if (dev->irq_enabled)
1732		drm_irq_uninstall(dev);
1733
1734#if __OS_HAS_AGP
1735	if (dev_priv->flags & RADEON_IS_AGP) {
1736		if (dev_priv->cp_ring != NULL) {
1737			drm_core_ioremapfree(dev_priv->cp_ring, dev);
1738			dev_priv->cp_ring = NULL;
1739		}
1740		if (dev_priv->ring_rptr != NULL) {
1741			drm_core_ioremapfree(dev_priv->ring_rptr, dev);
1742			dev_priv->ring_rptr = NULL;
1743		}
1744		if (dev->agp_buffer_map != NULL) {
1745			drm_core_ioremapfree(dev->agp_buffer_map, dev);
1746			dev->agp_buffer_map = NULL;
1747		}
1748	} else
1749#endif
1750	{
1751
1752		if (dev_priv->gart_info.bus_addr)
1753			r600_page_table_cleanup(dev, &dev_priv->gart_info);
1754
1755		if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
1756			drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
1757			dev_priv->gart_info.addr = 0;
1758		}
1759	}
1760	/* only clear to the start of flags */
1761	memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
1762
1763	return 0;
1764}
1765
1766int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
1767		    struct drm_file *file_priv)
1768{
1769	drm_radeon_private_t *dev_priv = dev->dev_private;
1770
1771	DRM_DEBUG("\n");
1772
1773	/* if we require new memory map but we don't have it fail */
1774	if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
1775		DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
1776		r600_do_cleanup_cp(dev);
1777		return -EINVAL;
1778	}
1779
1780	if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
1781		DRM_DEBUG("Forcing AGP card to PCI mode\n");
1782		dev_priv->flags &= ~RADEON_IS_AGP;
1783		/* The writeback test succeeds, but when writeback is enabled,
1784		 * the ring buffer read ptr update fails after first 128 bytes.
1785		 */
1786		radeon_no_wb = 1;
1787	} else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
1788		 && !init->is_pci) {
1789		DRM_DEBUG("Restoring AGP flag\n");
1790		dev_priv->flags |= RADEON_IS_AGP;
1791	}
1792
1793	dev_priv->usec_timeout = init->usec_timeout;
1794	if (dev_priv->usec_timeout < 1 ||
1795	    dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
1796		DRM_DEBUG("TIMEOUT problem!\n");
1797		r600_do_cleanup_cp(dev);
1798		return -EINVAL;
1799	}
1800
1801	/* Enable vblank on CRTC1 for older X servers
1802	 */
1803	dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
1804
1805	dev_priv->cp_mode = init->cp_mode;
1806
1807	/* We don't support anything other than bus-mastering ring mode,
1808	 * but the ring can be in either AGP or PCI space for the ring
1809	 * read pointer.
1810	 */
1811	if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
1812	    (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
1813		DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
1814		r600_do_cleanup_cp(dev);
1815		return -EINVAL;
1816	}
1817
1818	switch (init->fb_bpp) {
1819	case 16:
1820		dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
1821		break;
1822	case 32:
1823	default:
1824		dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
1825		break;
1826	}
1827	dev_priv->front_offset = init->front_offset;
1828	dev_priv->front_pitch = init->front_pitch;
1829	dev_priv->back_offset = init->back_offset;
1830	dev_priv->back_pitch = init->back_pitch;
1831
1832	dev_priv->ring_offset = init->ring_offset;
1833	dev_priv->ring_rptr_offset = init->ring_rptr_offset;
1834	dev_priv->buffers_offset = init->buffers_offset;
1835	dev_priv->gart_textures_offset = init->gart_textures_offset;
1836
1837	dev_priv->sarea = drm_getsarea(dev);
1838	if (!dev_priv->sarea) {
1839		DRM_ERROR("could not find sarea!\n");
1840		r600_do_cleanup_cp(dev);
1841		return -EINVAL;
1842	}
1843
1844	dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
1845	if (!dev_priv->cp_ring) {
1846		DRM_ERROR("could not find cp ring region!\n");
1847		r600_do_cleanup_cp(dev);
1848		return -EINVAL;
1849	}
1850	dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
1851	if (!dev_priv->ring_rptr) {
1852		DRM_ERROR("could not find ring read pointer!\n");
1853		r600_do_cleanup_cp(dev);
1854		return -EINVAL;
1855	}
1856	dev->agp_buffer_token = init->buffers_offset;
1857	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
1858	if (!dev->agp_buffer_map) {
1859		DRM_ERROR("could not find dma buffer region!\n");
1860		r600_do_cleanup_cp(dev);
1861		return -EINVAL;
1862	}
1863
1864	if (init->gart_textures_offset) {
1865		dev_priv->gart_textures =
1866		    drm_core_findmap(dev, init->gart_textures_offset);
1867		if (!dev_priv->gart_textures) {
1868			DRM_ERROR("could not find GART texture region!\n");
1869			r600_do_cleanup_cp(dev);
1870			return -EINVAL;
1871		}
1872	}
1873
1874	dev_priv->sarea_priv =
1875	    (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle +
1876				    init->sarea_priv_offset);
1877
1878#if __OS_HAS_AGP
1879	/* XXX */
1880	if (dev_priv->flags & RADEON_IS_AGP) {
1881		drm_core_ioremap_wc(dev_priv->cp_ring, dev);
1882		drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
1883		drm_core_ioremap_wc(dev->agp_buffer_map, dev);
1884		if (!dev_priv->cp_ring->handle ||
1885		    !dev_priv->ring_rptr->handle ||
1886		    !dev->agp_buffer_map->handle) {
1887			DRM_ERROR("could not find ioremap agp regions!\n");
1888			r600_do_cleanup_cp(dev);
1889			return -EINVAL;
1890		}
1891	} else
1892#endif
1893	{
1894		dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
1895		dev_priv->ring_rptr->handle =
1896		    (void *)dev_priv->ring_rptr->offset;
1897		dev->agp_buffer_map->handle =
1898		    (void *)dev->agp_buffer_map->offset;
1899
1900		DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
1901			  dev_priv->cp_ring->handle);
1902		DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
1903			  dev_priv->ring_rptr->handle);
1904		DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
1905			  dev->agp_buffer_map->handle);
1906	}
1907
1908	dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
1909	dev_priv->fb_size =
1910		(((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
1911		- dev_priv->fb_location;
1912
1913	dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
1914					((dev_priv->front_offset
1915					  + dev_priv->fb_location) >> 10));
1916
1917	dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
1918				       ((dev_priv->back_offset
1919					 + dev_priv->fb_location) >> 10));
1920
1921	dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
1922					((dev_priv->depth_offset
1923					  + dev_priv->fb_location) >> 10));
1924
1925	dev_priv->gart_size = init->gart_size;
1926
1927	/* New let's set the memory map ... */
1928	if (dev_priv->new_memmap) {
1929		u32 base = 0;
1930
1931		DRM_INFO("Setting GART location based on new memory map\n");
1932
1933		/* If using AGP, try to locate the AGP aperture at the same
1934		 * location in the card and on the bus, though we have to
1935		 * align it down.
1936		 */
1937#if __OS_HAS_AGP
1938		/* XXX */
1939		if (dev_priv->flags & RADEON_IS_AGP) {
1940			base = dev->agp->base;
1941			/* Check if valid */
1942			if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
1943			    base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
1944				DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
1945					 dev->agp->base);
1946				base = 0;
1947			}
1948		}
1949#endif
1950		/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
1951		if (base == 0) {
1952			base = dev_priv->fb_location + dev_priv->fb_size;
1953			if (base < dev_priv->fb_location ||
1954			    ((base + dev_priv->gart_size) & 0xfffffffful) < base)
1955				base = dev_priv->fb_location
1956					- dev_priv->gart_size;
1957		}
1958		dev_priv->gart_vm_start = base & 0xffc00000u;
1959		if (dev_priv->gart_vm_start != base)
1960			DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
1961				 base, dev_priv->gart_vm_start);
1962	}
1963
1964#if __OS_HAS_AGP
1965	/* XXX */
1966	if (dev_priv->flags & RADEON_IS_AGP)
1967		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
1968						 - dev->agp->base
1969						 + dev_priv->gart_vm_start);
1970	else
1971#endif
1972		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
1973						 - (unsigned long)dev->sg->virtual
1974						 + dev_priv->gart_vm_start);
1975
1976	DRM_DEBUG("fb 0x%08x size %d\n",
1977		  (unsigned int) dev_priv->fb_location,
1978		  (unsigned int) dev_priv->fb_size);
1979	DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
1980	DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
1981		  (unsigned int) dev_priv->gart_vm_start);
1982	DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
1983		  dev_priv->gart_buffers_offset);
1984
1985	dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
1986	dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
1987			      + init->ring_size / sizeof(u32));
1988	dev_priv->ring.size = init->ring_size;
1989	dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
1990
1991	dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
1992	dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8);
1993
1994	dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
1995	dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16);
1996
1997	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
1998
1999	dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
2000
2001#if __OS_HAS_AGP
2002	if (dev_priv->flags & RADEON_IS_AGP) {
2003		/* XXX turn off pcie gart */
2004	} else
2005#endif
2006	{
2007		dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
2008		/* if we have an offset set from userspace */
2009		if (!dev_priv->pcigart_offset_set) {
2010			DRM_ERROR("Need gart offset from userspace\n");
2011			r600_do_cleanup_cp(dev);
2012			return -EINVAL;
2013		}
2014
2015		DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
2016
2017		dev_priv->gart_info.bus_addr =
2018			dev_priv->pcigart_offset + dev_priv->fb_location;
2019		dev_priv->gart_info.mapping.offset =
2020			dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
2021		dev_priv->gart_info.mapping.size =
2022			dev_priv->gart_info.table_size;
2023
2024		drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
2025		if (!dev_priv->gart_info.mapping.handle) {
2026			DRM_ERROR("ioremap failed.\n");
2027			r600_do_cleanup_cp(dev);
2028			return -EINVAL;
2029		}
2030
2031		dev_priv->gart_info.addr =
2032			dev_priv->gart_info.mapping.handle;
2033
2034		DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
2035			  dev_priv->gart_info.addr,
2036			  dev_priv->pcigart_offset);
2037
2038		if (!r600_page_table_init(dev)) {
2039			DRM_ERROR("Failed to init GART table\n");
2040			r600_do_cleanup_cp(dev);
2041			return -EINVAL;
2042		}
2043
2044		if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2045			r700_vm_init(dev);
2046		else
2047			r600_vm_init(dev);
2048	}
2049
2050	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2051		r700_cp_load_microcode(dev_priv);
2052	else
2053		r600_cp_load_microcode(dev_priv);
2054
2055	r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2056
2057	dev_priv->last_buf = 0;
2058
2059	r600_do_engine_reset(dev);
2060	r600_test_writeback(dev_priv);
2061
2062	return 0;
2063}
2064
2065int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
2066{
2067	drm_radeon_private_t *dev_priv = dev->dev_private;
2068
2069	DRM_DEBUG("\n");
2070	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
2071		r700_vm_init(dev);
2072		r700_cp_load_microcode(dev_priv);
2073	} else {
2074		r600_vm_init(dev);
2075		r600_cp_load_microcode(dev_priv);
2076	}
2077	r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2078	r600_do_engine_reset(dev);
2079
2080	return 0;
2081}
2082
2083/* Wait for the CP to go idle.
2084 */
2085int r600_do_cp_idle(drm_radeon_private_t *dev_priv)
2086{
2087	RING_LOCALS;
2088	DRM_DEBUG("\n");
2089
2090	BEGIN_RING(5);
2091	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
2092	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
2093	/* wait for 3D idle clean */
2094	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
2095	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
2096	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
2097
2098	ADVANCE_RING();
2099	COMMIT_RING();
2100
2101	return r600_do_wait_for_idle(dev_priv);
2102}
2103
2104/* Start the Command Processor.
2105 */
2106void r600_do_cp_start(drm_radeon_private_t *dev_priv)
2107{
2108	u32 cp_me;
2109	RING_LOCALS;
2110	DRM_DEBUG("\n");
2111
2112	BEGIN_RING(7);
2113	OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
2114	OUT_RING(0x00000001);
2115	if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))
2116		OUT_RING(0x00000003);
2117	else
2118		OUT_RING(0x00000000);
2119	OUT_RING((dev_priv->r600_max_hw_contexts - 1));
2120	OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
2121	OUT_RING(0x00000000);
2122	OUT_RING(0x00000000);
2123	ADVANCE_RING();
2124	COMMIT_RING();
2125
2126	/* set the mux and reset the halt bit */
2127	cp_me = 0xff;
2128	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2129
2130	dev_priv->cp_running = 1;
2131
2132}
2133
2134void r600_do_cp_reset(drm_radeon_private_t *dev_priv)
2135{
2136	u32 cur_read_ptr;
2137	DRM_DEBUG("\n");
2138
2139	cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
2140	RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
2141	SET_RING_HEAD(dev_priv, cur_read_ptr);
2142	dev_priv->ring.tail = cur_read_ptr;
2143}
2144
2145void r600_do_cp_stop(drm_radeon_private_t *dev_priv)
2146{
2147	uint32_t cp_me;
2148
2149	DRM_DEBUG("\n");
2150
2151	cp_me = 0xff | R600_CP_ME_HALT;
2152
2153	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2154
2155	dev_priv->cp_running = 0;
2156}
2157
2158int r600_cp_dispatch_indirect(struct drm_device *dev,
2159			      struct drm_buf *buf, int start, int end)
2160{
2161	drm_radeon_private_t *dev_priv = dev->dev_private;
2162	RING_LOCALS;
2163
2164	if (start != end) {
2165		unsigned long offset = (dev_priv->gart_buffers_offset
2166					+ buf->offset + start);
2167		int dwords = (end - start + 3) / sizeof(u32);
2168
2169		DRM_DEBUG("dwords:%d\n", dwords);
2170		DRM_DEBUG("offset 0x%lx\n", offset);
2171
2172
2173		/* Indirect buffer data must be a multiple of 16 dwords.
2174		 * pad the data with a Type-2 CP packet.
2175		 */
2176		while (dwords & 0xf) {
2177			u32 *data = (u32 *)
2178			    ((char *)dev->agp_buffer_map->handle
2179			     + buf->offset + start);
2180			data[dwords++] = RADEON_CP_PACKET2;
2181		}
2182
2183		/* Fire off the indirect buffer */
2184		BEGIN_RING(4);
2185		OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
2186		OUT_RING((offset & 0xfffffffc));
2187		OUT_RING((upper_32_bits(offset) & 0xff));
2188		OUT_RING(dwords);
2189		ADVANCE_RING();
2190	}
2191
2192	return 0;
2193}
2194