r600_cp.c revision 190674
1189499Srnoland/*-
2189499Srnoland * Copyright 2008-2009 Advanced Micro Devices, Inc.
3189499Srnoland * Copyright 2008 Red Hat Inc.
4189499Srnoland *
5189499Srnoland * Permission is hereby granted, free of charge, to any person obtaining a
6189499Srnoland * copy of this software and associated documentation files (the "Software"),
7189499Srnoland * to deal in the Software without restriction, including without limitation
8189499Srnoland * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9189499Srnoland * and/or sell copies of the Software, and to permit persons to whom the
10189499Srnoland * Software is furnished to do so, subject to the following conditions:
11189499Srnoland *
12189499Srnoland * The above copyright notice and this permission notice (including the next
13189499Srnoland * paragraph) shall be included in all copies or substantial portions of the
14189499Srnoland * Software.
15189499Srnoland *
16189499Srnoland * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17189499Srnoland * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18189499Srnoland * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19189499Srnoland * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20189499Srnoland * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21189499Srnoland * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22189499Srnoland * DEALINGS IN THE SOFTWARE.
23189499Srnoland *
24189499Srnoland * Authors:
25189499Srnoland *     Dave Airlie <airlied@redhat.com>
26189499Srnoland *     Alex Deucher <alexander.deucher@amd.com>
27189499Srnoland */
28189499Srnoland
29189499Srnoland#include <sys/cdefs.h>
30189499Srnoland__FBSDID("$FreeBSD: head/sys/dev/drm/r600_cp.c 190674 2009-04-03 19:21:39Z rnoland $");
31189499Srnoland
32189499Srnoland#include "dev/drm/drmP.h"
33189499Srnoland#include "dev/drm/drm.h"
34189499Srnoland#include "dev/drm/radeon_drm.h"
35189499Srnoland#include "dev/drm/radeon_drv.h"
36189499Srnoland
37189499Srnoland#include "dev/drm/r600_microcode.h"
38189499Srnoland
39189499Srnoland# define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
40189499Srnoland# define ATI_PCIGART_PAGE_MASK		(~(ATI_PCIGART_PAGE_SIZE-1))
41189499Srnoland
42189499Srnoland#define R600_PTE_VALID     (1 << 0)
43189499Srnoland#define R600_PTE_SYSTEM    (1 << 1)
44189499Srnoland#define R600_PTE_SNOOPED   (1 << 2)
45189499Srnoland#define R600_PTE_READABLE  (1 << 5)
46189499Srnoland#define R600_PTE_WRITEABLE (1 << 6)
47189499Srnoland
48189499Srnoland/* MAX values used for gfx init */
49189499Srnoland#define R6XX_MAX_SH_GPRS           256
50189499Srnoland#define R6XX_MAX_TEMP_GPRS         16
51189499Srnoland#define R6XX_MAX_SH_THREADS        256
52189499Srnoland#define R6XX_MAX_SH_STACK_ENTRIES  4096
53189499Srnoland#define R6XX_MAX_BACKENDS          8
54189499Srnoland#define R6XX_MAX_BACKENDS_MASK     0xff
55189499Srnoland#define R6XX_MAX_SIMDS             8
56189499Srnoland#define R6XX_MAX_SIMDS_MASK        0xff
57189499Srnoland#define R6XX_MAX_PIPES             8
58189499Srnoland#define R6XX_MAX_PIPES_MASK        0xff
59189499Srnoland
60189499Srnoland#define R7XX_MAX_SH_GPRS           256
61189499Srnoland#define R7XX_MAX_TEMP_GPRS         16
62189499Srnoland#define R7XX_MAX_SH_THREADS        256
63189499Srnoland#define R7XX_MAX_SH_STACK_ENTRIES  4096
64189499Srnoland#define R7XX_MAX_BACKENDS          8
65189499Srnoland#define R7XX_MAX_BACKENDS_MASK     0xff
66189499Srnoland#define R7XX_MAX_SIMDS             16
67189499Srnoland#define R7XX_MAX_SIMDS_MASK        0xffff
68189499Srnoland#define R7XX_MAX_PIPES             8
69189499Srnoland#define R7XX_MAX_PIPES_MASK        0xff
70189499Srnoland
71189499Srnolandstatic int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
72189499Srnoland{
73189499Srnoland	int i;
74189499Srnoland
75189499Srnoland	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
76189499Srnoland
77189499Srnoland	for (i = 0; i < dev_priv->usec_timeout; i++) {
78189499Srnoland		int slots;
79189499Srnoland		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
80189499Srnoland			slots = (RADEON_READ(R600_GRBM_STATUS)
81189499Srnoland				 & R700_CMDFIFO_AVAIL_MASK);
82189499Srnoland		else
83189499Srnoland			slots = (RADEON_READ(R600_GRBM_STATUS)
84189499Srnoland				 & R600_CMDFIFO_AVAIL_MASK);
85189499Srnoland		if (slots >= entries)
86189499Srnoland			return 0;
87189499Srnoland		DRM_UDELAY(1);
88189499Srnoland	}
89189499Srnoland	DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
90189499Srnoland		 RADEON_READ(R600_GRBM_STATUS),
91189499Srnoland		 RADEON_READ(R600_GRBM_STATUS2));
92189499Srnoland
93189499Srnoland	return -EBUSY;
94189499Srnoland}
95189499Srnoland
96189499Srnolandstatic int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
97189499Srnoland{
98189499Srnoland	int i, ret;
99189499Srnoland
100189499Srnoland	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
101189499Srnoland
102189499Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
103189499Srnoland		ret = r600_do_wait_for_fifo(dev_priv, 8);
104189499Srnoland	else
105189499Srnoland		ret = r600_do_wait_for_fifo(dev_priv, 16);
106189499Srnoland	if (ret)
107189499Srnoland		return ret;
108189499Srnoland	for (i = 0; i < dev_priv->usec_timeout; i++) {
109189499Srnoland		if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
110189499Srnoland			return 0;
111189499Srnoland		DRM_UDELAY(1);
112189499Srnoland	}
113189499Srnoland	DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
114189499Srnoland		 RADEON_READ(R600_GRBM_STATUS),
115189499Srnoland		 RADEON_READ(R600_GRBM_STATUS2));
116189499Srnoland
117189499Srnoland	return -EBUSY;
118189499Srnoland}
119189499Srnoland
120189499Srnolandvoid r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
121189499Srnoland{
122189499Srnoland#ifdef __linux__
123189499Srnoland	struct drm_sg_mem *entry = dev->sg;
124189499Srnoland	int max_pages;
125189499Srnoland	int pages;
126189499Srnoland	int i;
127189499Srnoland#endif
128189499Srnoland	if (gart_info->bus_addr) {
129189499Srnoland#ifdef __linux__
130189499Srnoland		max_pages = (gart_info->table_size / sizeof(u32));
131189499Srnoland		pages = (entry->pages <= max_pages)
132189499Srnoland		  ? entry->pages : max_pages;
133189499Srnoland
134189499Srnoland		for (i = 0; i < pages; i++) {
135189499Srnoland			if (!entry->busaddr[i])
136189499Srnoland				break;
137189499Srnoland			pci_unmap_single(dev->pdev, entry->busaddr[i],
138189499Srnoland					 PAGE_SIZE, PCI_DMA_TODEVICE);
139189499Srnoland		}
140189499Srnoland#endif
141189499Srnoland		if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
142189499Srnoland			gart_info->bus_addr = 0;
143189499Srnoland	}
144189499Srnoland}
145189499Srnoland
146189499Srnoland/* R600 has page table setup */
147189499Srnolandint r600_page_table_init(struct drm_device *dev)
148189499Srnoland{
149189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
150189499Srnoland	struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
151189499Srnoland	struct drm_sg_mem *entry = dev->sg;
152189499Srnoland	int ret = 0;
153189499Srnoland	int i, j;
154189499Srnoland	int max_pages, pages;
155189499Srnoland	u64 *pci_gart, page_base;
156189499Srnoland	dma_addr_t entry_addr;
157189499Srnoland
158189499Srnoland	/* okay page table is available - lets rock */
159189499Srnoland
160189499Srnoland	/* PTEs are 64-bits */
161189499Srnoland	pci_gart = (u64 *)gart_info->addr;
162189499Srnoland
163189499Srnoland	max_pages = (gart_info->table_size / sizeof(u64));
164189499Srnoland	pages = (entry->pages <= max_pages) ? entry->pages : max_pages;
165189499Srnoland
166189499Srnoland	memset(pci_gart, 0, max_pages * sizeof(u64));
167189499Srnoland
168189499Srnoland	for (i = 0; i < pages; i++) {
169189499Srnoland#ifdef __linux__
170189499Srnoland		entry->busaddr[i] = pci_map_single(dev->pdev,
171189499Srnoland						   page_address(entry->
172189499Srnoland								pagelist[i]),
173189499Srnoland						   PAGE_SIZE, PCI_DMA_TODEVICE);
174189499Srnoland		if (entry->busaddr[i] == 0) {
175189499Srnoland			DRM_ERROR("unable to map PCIGART pages!\n");
176189499Srnoland			r600_page_table_cleanup(dev, gart_info);
177189499Srnoland			goto done;
178189499Srnoland		}
179189499Srnoland#endif
180189499Srnoland		entry_addr = entry->busaddr[i];
181189499Srnoland		for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
182189499Srnoland			page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
183189499Srnoland			page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
184189499Srnoland			page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
185189499Srnoland
186189499Srnoland			*pci_gart = page_base;
187189499Srnoland
188189499Srnoland			if ((i % 128) == 0)
189189499Srnoland				DRM_DEBUG("page entry %d: 0x%016llx\n",
190189499Srnoland				    i, (unsigned long long)page_base);
191189499Srnoland			pci_gart++;
192189499Srnoland			entry_addr += ATI_PCIGART_PAGE_SIZE;
193189499Srnoland		}
194189499Srnoland	}
195189909Srnoland	ret = 1;
196189499Srnoland#ifdef __linux__
197189499Srnolanddone:
198189499Srnoland#endif
199189499Srnoland	return ret;
200189499Srnoland}
201189499Srnoland
202189499Srnolandstatic void r600_vm_flush_gart_range(struct drm_device *dev)
203189499Srnoland{
204189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
205189499Srnoland	u32 resp, countdown = 1000;
206189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
207189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
208189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
209189499Srnoland
210189499Srnoland	do {
211189499Srnoland		resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
212189499Srnoland		countdown--;
213189499Srnoland		DRM_UDELAY(1);
214189499Srnoland	} while (((resp & 0xf0) == 0) && countdown);
215189499Srnoland}
216189499Srnoland
217189499Srnolandstatic void r600_vm_init(struct drm_device *dev)
218189499Srnoland{
219189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
220189499Srnoland	/* initialise the VM to use the page table we constructed up there */
221189499Srnoland	u32 vm_c0, i;
222189499Srnoland	u32 mc_rd_a;
223189499Srnoland	u32 vm_l2_cntl, vm_l2_cntl3;
224189499Srnoland	/* okay set up the PCIE aperture type thingo */
225189499Srnoland	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
226189499Srnoland	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
227189499Srnoland	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
228189499Srnoland
229189499Srnoland	/* setup MC RD a */
230189499Srnoland	mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
231189499Srnoland		R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
232189499Srnoland		R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
233189499Srnoland
234189499Srnoland	RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
235189499Srnoland	RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
236189499Srnoland
237189499Srnoland	RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
238189499Srnoland	RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
239189499Srnoland
240189499Srnoland	RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
241189499Srnoland	RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
242189499Srnoland
243189499Srnoland	RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
244189499Srnoland	RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
245189499Srnoland
246189499Srnoland	RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
247189499Srnoland	RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
248189499Srnoland
249189499Srnoland	RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
250189499Srnoland	RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
251189499Srnoland
252189499Srnoland	RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
253189499Srnoland	RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
254189499Srnoland
255189499Srnoland	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
256189499Srnoland	vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
257189499Srnoland	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
258189499Srnoland
259189499Srnoland	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
260189499Srnoland	vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
261189499Srnoland		       R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
262189499Srnoland		       R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));
263189499Srnoland	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
264189499Srnoland
265189499Srnoland	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
266189499Srnoland
267189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
268189499Srnoland
269189499Srnoland	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
270189499Srnoland
271189499Srnoland	/* disable all other contexts */
272189499Srnoland	for (i = 1; i < 8; i++)
273189499Srnoland		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
274189499Srnoland
275189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
276189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
277189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
278189499Srnoland
279189499Srnoland	r600_vm_flush_gart_range(dev);
280189499Srnoland}
281189499Srnoland
282189499Srnoland/* load r600 microcode */
283189499Srnolandstatic void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
284189499Srnoland{
285190595Srnoland	const u32 (*cp)[3];
286190595Srnoland	const u32 *pfp;
287189499Srnoland	int i;
288189499Srnoland
289190595Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
290190595Srnoland	case CHIP_R600:
291190595Srnoland		DRM_INFO("Loading R600 Microcode\n");
292190595Srnoland		cp  = R600_cp_microcode;
293190595Srnoland		pfp = R600_pfp_microcode;
294190595Srnoland		break;
295190595Srnoland	case CHIP_RV610:
296190595Srnoland		DRM_INFO("Loading RV610 Microcode\n");
297190595Srnoland		cp  = RV610_cp_microcode;
298190595Srnoland		pfp = RV610_pfp_microcode;
299190595Srnoland		break;
300190595Srnoland	case CHIP_RV630:
301190595Srnoland		DRM_INFO("Loading RV630 Microcode\n");
302190595Srnoland		cp  = RV630_cp_microcode;
303190595Srnoland		pfp = RV630_pfp_microcode;
304190595Srnoland		break;
305190595Srnoland	case CHIP_RV620:
306190595Srnoland		DRM_INFO("Loading RV620 Microcode\n");
307190595Srnoland		cp  = RV620_cp_microcode;
308190595Srnoland		pfp = RV620_pfp_microcode;
309190595Srnoland		break;
310190595Srnoland	case CHIP_RV635:
311190595Srnoland		DRM_INFO("Loading RV635 Microcode\n");
312190595Srnoland		cp  = RV635_cp_microcode;
313190595Srnoland		pfp = RV635_pfp_microcode;
314190595Srnoland		break;
315190595Srnoland	case CHIP_RV670:
316190595Srnoland		DRM_INFO("Loading RV670 Microcode\n");
317190595Srnoland		cp  = RV670_cp_microcode;
318190595Srnoland		pfp = RV670_pfp_microcode;
319190595Srnoland		break;
320190595Srnoland	case CHIP_RS780:
321190595Srnoland		DRM_INFO("Loading RS780 Microcode\n");
322190595Srnoland		cp  = RS780_cp_microcode;
323190595Srnoland		pfp = RS780_pfp_microcode;
324190595Srnoland		break;
325190595Srnoland	default:
326190674Srnoland		return;
327190595Srnoland	}
328189499Srnoland
329190674Srnoland	r600_do_cp_stop(dev_priv);
330190674Srnoland
331190674Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
332190674Srnoland		     R600_RB_NO_UPDATE |
333190674Srnoland		     R600_RB_BLKSZ(15) |
334190674Srnoland		     R600_RB_BUFSZ(3));
335190674Srnoland
336190674Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
337190674Srnoland	RADEON_READ(R600_GRBM_SOFT_RESET);
338190674Srnoland	DRM_UDELAY(15000);
339190674Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
340190674Srnoland
341190674Srnoland	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
342190674Srnoland
343190674Srnoland	for (i = 0; i < PM4_UCODE_SIZE; i++) {
344190595Srnoland		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][0]);
345190595Srnoland		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][1]);
346190595Srnoland		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][2]);
347190595Srnoland	}
348189499Srnoland
349190595Srnoland	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
350190674Srnoland	for (i = 0; i < PFP_UCODE_SIZE; i++)
351190595Srnoland		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]);
352189499Srnoland
353189499Srnoland	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
354189499Srnoland	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
355189499Srnoland	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
356189499Srnoland}
357189499Srnoland
358189499Srnolandstatic void r700_vm_init(struct drm_device *dev)
359189499Srnoland{
360189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
361189499Srnoland	/* initialise the VM to use the page table we constructed up there */
362189499Srnoland	u32 vm_c0, i;
363189499Srnoland	u32 mc_vm_md_l1;
364189499Srnoland	u32 vm_l2_cntl, vm_l2_cntl3;
365189499Srnoland	/* okay set up the PCIE aperture type thingo */
366189499Srnoland	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
367189499Srnoland	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
368189499Srnoland	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
369189499Srnoland
370189499Srnoland	mc_vm_md_l1 = R700_ENABLE_L1_TLB |
371189499Srnoland	    R700_ENABLE_L1_FRAGMENT_PROCESSING |
372189499Srnoland	    R700_SYSTEM_ACCESS_MODE_IN_SYS |
373189499Srnoland	    R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
374189499Srnoland	    R700_EFFECTIVE_L1_TLB_SIZE(5) |
375189499Srnoland	    R700_EFFECTIVE_L1_QUEUE_SIZE(5);
376189499Srnoland
377189499Srnoland	RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
378189499Srnoland	RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
379189499Srnoland	RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
380189499Srnoland	RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
381189499Srnoland	RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
382189499Srnoland	RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
383189499Srnoland	RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
384189499Srnoland
385189499Srnoland	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
386189499Srnoland	vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
387189499Srnoland	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
388189499Srnoland
389189499Srnoland	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
390189499Srnoland	vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
391189499Srnoland	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
392189499Srnoland
393189499Srnoland	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
394189499Srnoland
395189499Srnoland	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
396189499Srnoland
397189499Srnoland	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
398189499Srnoland
399189499Srnoland	/* disable all other contexts */
400189499Srnoland	for (i = 1; i < 8; i++)
401189499Srnoland		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
402189499Srnoland
403189499Srnoland	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
404189499Srnoland	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
405189499Srnoland	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
406189499Srnoland
407189499Srnoland	r600_vm_flush_gart_range(dev);
408189499Srnoland}
409189499Srnoland
410189499Srnoland/* load r600 microcode */
411189499Srnolandstatic void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
412189499Srnoland{
413190595Srnoland	const u32 *pfp;
414190595Srnoland	const u32 *cp;
415189499Srnoland	int i;
416189499Srnoland
417190595Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
418190595Srnoland	case CHIP_RV770:
419190674Srnoland		DRM_INFO("Loading RV770/RV790 Microcode\n");
420190595Srnoland		pfp = RV770_pfp_microcode;
421190595Srnoland		cp  = RV770_cp_microcode;
422190595Srnoland		break;
423190595Srnoland	case CHIP_RV730:
424190595Srnoland		DRM_INFO("Loading RV730 Microcode\n");
425190595Srnoland		pfp = RV730_pfp_microcode;
426190595Srnoland		cp  = RV730_cp_microcode;
427190595Srnoland		break;
428190595Srnoland	case CHIP_RV710:
429190595Srnoland		DRM_INFO("Loading RV710 Microcode\n");
430190595Srnoland		pfp = RV710_pfp_microcode;
431190595Srnoland		cp  = RV710_cp_microcode;
432190595Srnoland		break;
433190595Srnoland	default:
434190674Srnoland		return;
435190595Srnoland	}
436189499Srnoland
437190674Srnoland	r600_do_cp_stop(dev_priv);
438190674Srnoland
439190674Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
440190674Srnoland		     R600_RB_NO_UPDATE |
441190674Srnoland		     (15 << 8) |
442190674Srnoland		     (3 << 0));
443190674Srnoland
444190674Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
445190674Srnoland	RADEON_READ(R600_GRBM_SOFT_RESET);
446190674Srnoland	DRM_UDELAY(15000);
447190674Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
448190674Srnoland
449190595Srnoland	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
450190674Srnoland	for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
451190595Srnoland		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]);
452190595Srnoland	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
453189499Srnoland
454190595Srnoland	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
455190674Srnoland	for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
456190595Srnoland		RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i]);
457190595Srnoland	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
458189499Srnoland
459189499Srnoland	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
460189499Srnoland	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
461189499Srnoland	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
462189499Srnoland}
463189499Srnoland
464189499Srnolandstatic void r600_test_writeback(drm_radeon_private_t *dev_priv)
465189499Srnoland{
466189499Srnoland	u32 tmp;
467189499Srnoland
468189499Srnoland	/* Start with assuming that writeback doesn't work */
469189499Srnoland	dev_priv->writeback_works = 0;
470189499Srnoland
471189499Srnoland	/* Writeback doesn't seem to work everywhere, test it here and possibly
472189499Srnoland	 * enable it if it appears to work
473189499Srnoland	 */
474189499Srnoland	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
475189499Srnoland
476189499Srnoland	RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
477189499Srnoland
478189499Srnoland	for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
479189499Srnoland		u32 val;
480189499Srnoland
481189499Srnoland		val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));
482189499Srnoland		if (val == 0xdeadbeef)
483189499Srnoland			break;
484189499Srnoland		DRM_UDELAY(1);
485189499Srnoland	}
486189499Srnoland
487189499Srnoland	if (tmp < dev_priv->usec_timeout) {
488189499Srnoland		dev_priv->writeback_works = 1;
489189499Srnoland		DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
490189499Srnoland	} else {
491189499Srnoland		dev_priv->writeback_works = 0;
492189499Srnoland		DRM_INFO("writeback test failed\n");
493189499Srnoland	}
494189499Srnoland	if (radeon_no_wb == 1) {
495189499Srnoland		dev_priv->writeback_works = 0;
496189499Srnoland		DRM_INFO("writeback forced off\n");
497189499Srnoland	}
498189499Srnoland
499189499Srnoland	if (!dev_priv->writeback_works) {
500189499Srnoland		/* Disable writeback to avoid unnecessary bus master transfer */
501189499Srnoland		RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) |
502189499Srnoland			     RADEON_RB_NO_UPDATE);
503189499Srnoland		RADEON_WRITE(R600_SCRATCH_UMSK, 0);
504189499Srnoland	}
505189499Srnoland}
506189499Srnoland
507189499Srnolandint r600_do_engine_reset(struct drm_device *dev)
508189499Srnoland{
509189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
510189499Srnoland	u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
511189499Srnoland
512189499Srnoland	DRM_INFO("Resetting GPU\n");
513189499Srnoland
514189499Srnoland	cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
515189499Srnoland	cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
516189499Srnoland	RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
517189499Srnoland
518189499Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
519189499Srnoland	RADEON_READ(R600_GRBM_SOFT_RESET);
520189499Srnoland	DRM_UDELAY(50);
521189499Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
522189499Srnoland	RADEON_READ(R600_GRBM_SOFT_RESET);
523189499Srnoland
524189499Srnoland	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
525189499Srnoland	cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
526189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA);
527189499Srnoland
528189499Srnoland	RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
529189499Srnoland	RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
530189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
531189499Srnoland	RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
532189499Srnoland
533189499Srnoland	/* Reset the CP ring */
534189499Srnoland	r600_do_cp_reset(dev_priv);
535189499Srnoland
536189499Srnoland	/* The CP is no longer running after an engine reset */
537189499Srnoland	dev_priv->cp_running = 0;
538189499Srnoland
539189499Srnoland	/* Reset any pending vertex, indirect buffers */
540189499Srnoland	radeon_freelist_reset(dev);
541189499Srnoland
542189499Srnoland	return 0;
543189499Srnoland
544189499Srnoland}
545189499Srnoland
546189499Srnolandstatic u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
547189499Srnoland					     u32 num_backends,
548189499Srnoland					     u32 backend_disable_mask)
549189499Srnoland{
550189499Srnoland	u32 backend_map = 0;
551189499Srnoland	u32 enabled_backends_mask;
552189499Srnoland	u32 enabled_backends_count;
553189499Srnoland	u32 cur_pipe;
554189499Srnoland	u32 swizzle_pipe[R6XX_MAX_PIPES];
555189499Srnoland	u32 cur_backend;
556189499Srnoland	u32 i;
557189499Srnoland
558189499Srnoland	if (num_tile_pipes > R6XX_MAX_PIPES)
559189499Srnoland		num_tile_pipes = R6XX_MAX_PIPES;
560189499Srnoland	if (num_tile_pipes < 1)
561189499Srnoland		num_tile_pipes = 1;
562189499Srnoland	if (num_backends > R6XX_MAX_BACKENDS)
563189499Srnoland		num_backends = R6XX_MAX_BACKENDS;
564189499Srnoland	if (num_backends < 1)
565189499Srnoland		num_backends = 1;
566189499Srnoland
567189499Srnoland	enabled_backends_mask = 0;
568189499Srnoland	enabled_backends_count = 0;
569189499Srnoland	for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
570189499Srnoland		if (((backend_disable_mask >> i) & 1) == 0) {
571189499Srnoland			enabled_backends_mask |= (1 << i);
572189499Srnoland			++enabled_backends_count;
573189499Srnoland		}
574189499Srnoland		if (enabled_backends_count == num_backends)
575189499Srnoland			break;
576189499Srnoland	}
577189499Srnoland
578189499Srnoland	if (enabled_backends_count == 0) {
579189499Srnoland		enabled_backends_mask = 1;
580189499Srnoland		enabled_backends_count = 1;
581189499Srnoland	}
582189499Srnoland
583189499Srnoland	if (enabled_backends_count != num_backends)
584189499Srnoland		num_backends = enabled_backends_count;
585189499Srnoland
586189499Srnoland	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
587189499Srnoland	switch (num_tile_pipes) {
588189499Srnoland	case 1:
589189499Srnoland		swizzle_pipe[0] = 0;
590189499Srnoland		break;
591189499Srnoland	case 2:
592189499Srnoland		swizzle_pipe[0] = 0;
593189499Srnoland		swizzle_pipe[1] = 1;
594189499Srnoland		break;
595189499Srnoland	case 3:
596189499Srnoland		swizzle_pipe[0] = 0;
597189499Srnoland		swizzle_pipe[1] = 1;
598189499Srnoland		swizzle_pipe[2] = 2;
599189499Srnoland		break;
600189499Srnoland	case 4:
601189499Srnoland		swizzle_pipe[0] = 0;
602189499Srnoland		swizzle_pipe[1] = 1;
603189499Srnoland		swizzle_pipe[2] = 2;
604189499Srnoland		swizzle_pipe[3] = 3;
605189499Srnoland		break;
606189499Srnoland	case 5:
607189499Srnoland		swizzle_pipe[0] = 0;
608189499Srnoland		swizzle_pipe[1] = 1;
609189499Srnoland		swizzle_pipe[2] = 2;
610189499Srnoland		swizzle_pipe[3] = 3;
611189499Srnoland		swizzle_pipe[4] = 4;
612189499Srnoland		break;
613189499Srnoland	case 6:
614189499Srnoland		swizzle_pipe[0] = 0;
615189499Srnoland		swizzle_pipe[1] = 2;
616189499Srnoland		swizzle_pipe[2] = 4;
617189499Srnoland		swizzle_pipe[3] = 5;
618189499Srnoland		swizzle_pipe[4] = 1;
619189499Srnoland		swizzle_pipe[5] = 3;
620189499Srnoland		break;
621189499Srnoland	case 7:
622189499Srnoland		swizzle_pipe[0] = 0;
623189499Srnoland		swizzle_pipe[1] = 2;
624189499Srnoland		swizzle_pipe[2] = 4;
625189499Srnoland		swizzle_pipe[3] = 6;
626189499Srnoland		swizzle_pipe[4] = 1;
627189499Srnoland		swizzle_pipe[5] = 3;
628189499Srnoland		swizzle_pipe[6] = 5;
629189499Srnoland		break;
630189499Srnoland	case 8:
631189499Srnoland		swizzle_pipe[0] = 0;
632189499Srnoland		swizzle_pipe[1] = 2;
633189499Srnoland		swizzle_pipe[2] = 4;
634189499Srnoland		swizzle_pipe[3] = 6;
635189499Srnoland		swizzle_pipe[4] = 1;
636189499Srnoland		swizzle_pipe[5] = 3;
637189499Srnoland		swizzle_pipe[6] = 5;
638189499Srnoland		swizzle_pipe[7] = 7;
639189499Srnoland		break;
640189499Srnoland	}
641189499Srnoland
642189499Srnoland	cur_backend = 0;
643189499Srnoland	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
644189499Srnoland		while (((1 << cur_backend) & enabled_backends_mask) == 0)
645189499Srnoland			cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
646189499Srnoland
647189499Srnoland		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
648189499Srnoland
649189499Srnoland		cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
650189499Srnoland	}
651189499Srnoland
652189499Srnoland	return backend_map;
653189499Srnoland}
654189499Srnoland
655189499Srnolandstatic int r600_count_pipe_bits(uint32_t val)
656189499Srnoland{
657189499Srnoland	int i, ret = 0;
658189499Srnoland	for (i = 0; i < 32; i++) {
659189499Srnoland		ret += val & 1;
660189499Srnoland		val >>= 1;
661189499Srnoland	}
662189499Srnoland	return ret;
663189499Srnoland}
664189499Srnoland
665189499Srnolandstatic void r600_gfx_init(struct drm_device *dev,
666189499Srnoland			  drm_radeon_private_t *dev_priv)
667189499Srnoland{
668189499Srnoland	int i, j, num_qd_pipes;
669189499Srnoland	u32 sx_debug_1;
670189499Srnoland	u32 tc_cntl;
671189499Srnoland	u32 arb_pop;
672189499Srnoland	u32 num_gs_verts_per_thread;
673189499Srnoland	u32 vgt_gs_per_es;
674189499Srnoland	u32 gs_prim_buffer_depth = 0;
675189499Srnoland	u32 sq_ms_fifo_sizes;
676189499Srnoland	u32 sq_config;
677189499Srnoland	u32 sq_gpr_resource_mgmt_1 = 0;
678189499Srnoland	u32 sq_gpr_resource_mgmt_2 = 0;
679189499Srnoland	u32 sq_thread_resource_mgmt = 0;
680189499Srnoland	u32 sq_stack_resource_mgmt_1 = 0;
681189499Srnoland	u32 sq_stack_resource_mgmt_2 = 0;
682189499Srnoland	u32 hdp_host_path_cntl;
683189499Srnoland	u32 backend_map;
684189499Srnoland	u32 gb_tiling_config = 0;
685189499Srnoland	u32 cc_rb_backend_disable = 0;
686189499Srnoland	u32 cc_gc_shader_pipe_config = 0;
687189499Srnoland	u32 ramcfg;
688189499Srnoland
689189499Srnoland	/* setup chip specs */
690189499Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
691189499Srnoland	case CHIP_R600:
692189499Srnoland		dev_priv->r600_max_pipes = 4;
693189499Srnoland		dev_priv->r600_max_tile_pipes = 8;
694189499Srnoland		dev_priv->r600_max_simds = 4;
695189499Srnoland		dev_priv->r600_max_backends = 4;
696189499Srnoland		dev_priv->r600_max_gprs = 256;
697189499Srnoland		dev_priv->r600_max_threads = 192;
698189499Srnoland		dev_priv->r600_max_stack_entries = 256;
699189499Srnoland		dev_priv->r600_max_hw_contexts = 8;
700189499Srnoland		dev_priv->r600_max_gs_threads = 16;
701189499Srnoland		dev_priv->r600_sx_max_export_size = 128;
702189499Srnoland		dev_priv->r600_sx_max_export_pos_size = 16;
703189499Srnoland		dev_priv->r600_sx_max_export_smx_size = 128;
704189499Srnoland		dev_priv->r600_sq_num_cf_insts = 2;
705189499Srnoland		break;
706189499Srnoland	case CHIP_RV630:
707189499Srnoland	case CHIP_RV635:
708189499Srnoland		dev_priv->r600_max_pipes = 2;
709189499Srnoland		dev_priv->r600_max_tile_pipes = 2;
710189499Srnoland		dev_priv->r600_max_simds = 3;
711189499Srnoland		dev_priv->r600_max_backends = 1;
712189499Srnoland		dev_priv->r600_max_gprs = 128;
713189499Srnoland		dev_priv->r600_max_threads = 192;
714189499Srnoland		dev_priv->r600_max_stack_entries = 128;
715189499Srnoland		dev_priv->r600_max_hw_contexts = 8;
716189499Srnoland		dev_priv->r600_max_gs_threads = 4;
717189499Srnoland		dev_priv->r600_sx_max_export_size = 128;
718189499Srnoland		dev_priv->r600_sx_max_export_pos_size = 16;
719189499Srnoland		dev_priv->r600_sx_max_export_smx_size = 128;
720189499Srnoland		dev_priv->r600_sq_num_cf_insts = 2;
721189499Srnoland		break;
722189499Srnoland	case CHIP_RV610:
723189499Srnoland	case CHIP_RS780:
724189499Srnoland	case CHIP_RV620:
725189499Srnoland		dev_priv->r600_max_pipes = 1;
726189499Srnoland		dev_priv->r600_max_tile_pipes = 1;
727189499Srnoland		dev_priv->r600_max_simds = 2;
728189499Srnoland		dev_priv->r600_max_backends = 1;
729189499Srnoland		dev_priv->r600_max_gprs = 128;
730189499Srnoland		dev_priv->r600_max_threads = 192;
731189499Srnoland		dev_priv->r600_max_stack_entries = 128;
732189499Srnoland		dev_priv->r600_max_hw_contexts = 4;
733189499Srnoland		dev_priv->r600_max_gs_threads = 4;
734189499Srnoland		dev_priv->r600_sx_max_export_size = 128;
735189499Srnoland		dev_priv->r600_sx_max_export_pos_size = 16;
736189499Srnoland		dev_priv->r600_sx_max_export_smx_size = 128;
737189499Srnoland		dev_priv->r600_sq_num_cf_insts = 1;
738189499Srnoland		break;
739189499Srnoland	case CHIP_RV670:
740189499Srnoland		dev_priv->r600_max_pipes = 4;
741189499Srnoland		dev_priv->r600_max_tile_pipes = 4;
742189499Srnoland		dev_priv->r600_max_simds = 4;
743189499Srnoland		dev_priv->r600_max_backends = 4;
744189499Srnoland		dev_priv->r600_max_gprs = 192;
745189499Srnoland		dev_priv->r600_max_threads = 192;
746189499Srnoland		dev_priv->r600_max_stack_entries = 256;
747189499Srnoland		dev_priv->r600_max_hw_contexts = 8;
748189499Srnoland		dev_priv->r600_max_gs_threads = 16;
749189499Srnoland		dev_priv->r600_sx_max_export_size = 128;
750189499Srnoland		dev_priv->r600_sx_max_export_pos_size = 16;
751189499Srnoland		dev_priv->r600_sx_max_export_smx_size = 128;
752189499Srnoland		dev_priv->r600_sq_num_cf_insts = 2;
753189499Srnoland		break;
754189499Srnoland	default:
755189499Srnoland		break;
756189499Srnoland	}
757189499Srnoland
758189499Srnoland	/* Initialize HDP */
759189499Srnoland	j = 0;
760189499Srnoland	for (i = 0; i < 32; i++) {
761189499Srnoland		RADEON_WRITE((0x2c14 + j), 0x00000000);
762189499Srnoland		RADEON_WRITE((0x2c18 + j), 0x00000000);
763189499Srnoland		RADEON_WRITE((0x2c1c + j), 0x00000000);
764189499Srnoland		RADEON_WRITE((0x2c20 + j), 0x00000000);
765189499Srnoland		RADEON_WRITE((0x2c24 + j), 0x00000000);
766189499Srnoland		j += 0x18;
767189499Srnoland	}
768189499Srnoland
769189499Srnoland	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
770189499Srnoland
771189499Srnoland	/* setup tiling, simd, pipe config */
772189499Srnoland	ramcfg = RADEON_READ(R600_RAMCFG);
773189499Srnoland
774189499Srnoland	switch (dev_priv->r600_max_tile_pipes) {
775189499Srnoland	case 1:
776189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(0);
777189499Srnoland		break;
778189499Srnoland	case 2:
779189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(1);
780189499Srnoland		break;
781189499Srnoland	case 4:
782189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(2);
783189499Srnoland		break;
784189499Srnoland	case 8:
785189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(3);
786189499Srnoland		break;
787189499Srnoland	default:
788189499Srnoland		break;
789189499Srnoland	}
790189499Srnoland
791189499Srnoland	gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
792189499Srnoland
793189499Srnoland	gb_tiling_config |= R600_GROUP_SIZE(0);
794189499Srnoland
795189499Srnoland	if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
796189499Srnoland		gb_tiling_config |= R600_ROW_TILING(3);
797189499Srnoland		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
798189499Srnoland	} else {
799189499Srnoland		gb_tiling_config |=
800189499Srnoland			R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
801189499Srnoland		gb_tiling_config |=
802189499Srnoland			R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
803189499Srnoland	}
804189499Srnoland
805189499Srnoland	gb_tiling_config |= R600_BANK_SWAPS(1);
806189499Srnoland
807189499Srnoland	backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
808189499Srnoland							dev_priv->r600_max_backends,
809189499Srnoland							(0xff << dev_priv->r600_max_backends) & 0xff);
810189499Srnoland	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
811189499Srnoland
812189499Srnoland	cc_gc_shader_pipe_config =
813189499Srnoland		R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
814189499Srnoland	cc_gc_shader_pipe_config |=
815189499Srnoland		R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
816189499Srnoland
817189499Srnoland	cc_rb_backend_disable =
818189499Srnoland		R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
819189499Srnoland
820189499Srnoland	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
821189499Srnoland	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
822189499Srnoland	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
823189499Srnoland
824189499Srnoland	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
825189499Srnoland	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
826189499Srnoland	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
827189499Srnoland
828189499Srnoland	num_qd_pipes =
829189499Srnoland		R6XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
830189499Srnoland	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
831189499Srnoland	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
832189499Srnoland
833189499Srnoland	/* set HW defaults for 3D engine */
834189499Srnoland	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
835189499Srnoland						R600_ROQ_IB2_START(0x2b)));
836189499Srnoland
837189499Srnoland	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
838189499Srnoland					      R600_ROQ_END(0x40)));
839189499Srnoland
840189499Srnoland	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
841189499Srnoland					R600_SYNC_GRADIENT |
842189499Srnoland					R600_SYNC_WALKER |
843189499Srnoland					R600_SYNC_ALIGNER));
844189499Srnoland
845189499Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
846189499Srnoland		RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
847189499Srnoland
848189499Srnoland	sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
849189499Srnoland	sx_debug_1 |= R600_SMX_EVENT_RELEASE;
850189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
851189499Srnoland		sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
852189499Srnoland	RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
853189499Srnoland
854189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
855189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
856189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
857189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
858189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
859189499Srnoland		RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
860189499Srnoland	else
861189499Srnoland		RADEON_WRITE(R600_DB_DEBUG, 0);
862189499Srnoland
863189499Srnoland	RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
864189499Srnoland					  R600_DEPTH_FLUSH(16) |
865189499Srnoland					  R600_DEPTH_PENDING_FREE(4) |
866189499Srnoland					  R600_DEPTH_CACHELINE_FREE(16)));
867189499Srnoland	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
868189499Srnoland	RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
869189499Srnoland
870189499Srnoland	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
871189499Srnoland	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
872189499Srnoland
873189499Srnoland	sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
874189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
875189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
876189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
877189499Srnoland		sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
878189499Srnoland				    R600_FETCH_FIFO_HIWATER(0xa) |
879189499Srnoland				    R600_DONE_FIFO_HIWATER(0xe0) |
880189499Srnoland				    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
881189499Srnoland	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
882189499Srnoland		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
883189499Srnoland		sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
884189499Srnoland		sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
885189499Srnoland	}
886189499Srnoland	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
887189499Srnoland
888189499Srnoland	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
889189499Srnoland	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
890189499Srnoland	 */
891189499Srnoland	sq_config = RADEON_READ(R600_SQ_CONFIG);
892189499Srnoland	sq_config &= ~(R600_PS_PRIO(3) |
893189499Srnoland		       R600_VS_PRIO(3) |
894189499Srnoland		       R600_GS_PRIO(3) |
895189499Srnoland		       R600_ES_PRIO(3));
896189499Srnoland	sq_config |= (R600_DX9_CONSTS |
897189499Srnoland		      R600_VC_ENABLE |
898189499Srnoland		      R600_PS_PRIO(0) |
899189499Srnoland		      R600_VS_PRIO(1) |
900189499Srnoland		      R600_GS_PRIO(2) |
901189499Srnoland		      R600_ES_PRIO(3));
902189499Srnoland
903189499Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
904189499Srnoland		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
905189499Srnoland					  R600_NUM_VS_GPRS(124) |
906189499Srnoland					  R600_NUM_CLAUSE_TEMP_GPRS(4));
907189499Srnoland		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
908189499Srnoland					  R600_NUM_ES_GPRS(0));
909189499Srnoland		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
910189499Srnoland					   R600_NUM_VS_THREADS(48) |
911189499Srnoland					   R600_NUM_GS_THREADS(4) |
912189499Srnoland					   R600_NUM_ES_THREADS(4));
913189499Srnoland		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
914189499Srnoland					    R600_NUM_VS_STACK_ENTRIES(128));
915189499Srnoland		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
916189499Srnoland					    R600_NUM_ES_STACK_ENTRIES(0));
917189499Srnoland	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
918189499Srnoland		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
919189499Srnoland		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
920189499Srnoland		/* no vertex cache */
921189499Srnoland		sq_config &= ~R600_VC_ENABLE;
922189499Srnoland
923189499Srnoland		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
924189499Srnoland					  R600_NUM_VS_GPRS(44) |
925189499Srnoland					  R600_NUM_CLAUSE_TEMP_GPRS(2));
926189499Srnoland		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
927189499Srnoland					  R600_NUM_ES_GPRS(17));
928189499Srnoland		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
929189499Srnoland					   R600_NUM_VS_THREADS(78) |
930189499Srnoland					   R600_NUM_GS_THREADS(4) |
931189499Srnoland					   R600_NUM_ES_THREADS(31));
932189499Srnoland		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
933189499Srnoland					    R600_NUM_VS_STACK_ENTRIES(40));
934189499Srnoland		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
935189499Srnoland					    R600_NUM_ES_STACK_ENTRIES(16));
936189499Srnoland	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
937189499Srnoland		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {
938189499Srnoland		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
939189499Srnoland					  R600_NUM_VS_GPRS(44) |
940189499Srnoland					  R600_NUM_CLAUSE_TEMP_GPRS(2));
941189499Srnoland		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
942189499Srnoland					  R600_NUM_ES_GPRS(18));
943189499Srnoland		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
944189499Srnoland					   R600_NUM_VS_THREADS(78) |
945189499Srnoland					   R600_NUM_GS_THREADS(4) |
946189499Srnoland					   R600_NUM_ES_THREADS(31));
947189499Srnoland		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
948189499Srnoland					    R600_NUM_VS_STACK_ENTRIES(40));
949189499Srnoland		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
950189499Srnoland					    R600_NUM_ES_STACK_ENTRIES(16));
951189499Srnoland	} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
952189499Srnoland		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
953189499Srnoland					  R600_NUM_VS_GPRS(44) |
954189499Srnoland					  R600_NUM_CLAUSE_TEMP_GPRS(2));
955189499Srnoland		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
956189499Srnoland					  R600_NUM_ES_GPRS(17));
957189499Srnoland		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
958189499Srnoland					   R600_NUM_VS_THREADS(78) |
959189499Srnoland					   R600_NUM_GS_THREADS(4) |
960189499Srnoland					   R600_NUM_ES_THREADS(31));
961189499Srnoland		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
962189499Srnoland					    R600_NUM_VS_STACK_ENTRIES(64));
963189499Srnoland		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
964189499Srnoland					    R600_NUM_ES_STACK_ENTRIES(64));
965189499Srnoland	}
966189499Srnoland
967189499Srnoland	RADEON_WRITE(R600_SQ_CONFIG, sq_config);
968189499Srnoland	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
969189499Srnoland	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
970189499Srnoland	RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
971189499Srnoland	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
972189499Srnoland	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
973189499Srnoland
974189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
975189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
976189499Srnoland	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
977189499Srnoland		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
978189499Srnoland	else
979189499Srnoland		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
980189499Srnoland
981189499Srnoland	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
982189499Srnoland						    R600_S0_Y(0x4) |
983189499Srnoland						    R600_S1_X(0x4) |
984189499Srnoland						    R600_S1_Y(0xc)));
985189499Srnoland	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
986189499Srnoland						    R600_S0_Y(0xe) |
987189499Srnoland						    R600_S1_X(0x2) |
988189499Srnoland						    R600_S1_Y(0x2) |
989189499Srnoland						    R600_S2_X(0xa) |
990189499Srnoland						    R600_S2_Y(0x6) |
991189499Srnoland						    R600_S3_X(0x6) |
992189499Srnoland						    R600_S3_Y(0xa)));
993189499Srnoland	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
994189499Srnoland							R600_S0_Y(0xb) |
995189499Srnoland							R600_S1_X(0x4) |
996189499Srnoland							R600_S1_Y(0xc) |
997189499Srnoland							R600_S2_X(0x1) |
998189499Srnoland							R600_S2_Y(0x6) |
999189499Srnoland							R600_S3_X(0xa) |
1000189499Srnoland							R600_S3_Y(0xe)));
1001189499Srnoland	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
1002189499Srnoland							R600_S4_Y(0x1) |
1003189499Srnoland							R600_S5_X(0x0) |
1004189499Srnoland							R600_S5_Y(0x0) |
1005189499Srnoland							R600_S6_X(0xb) |
1006189499Srnoland							R600_S6_Y(0x4) |
1007189499Srnoland							R600_S7_X(0x7) |
1008189499Srnoland							R600_S7_Y(0x8)));
1009189499Srnoland
1010189499Srnoland
1011189499Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1012189499Srnoland	case CHIP_R600:
1013189499Srnoland	case CHIP_RV630:
1014189499Srnoland	case CHIP_RV635:
1015189499Srnoland		gs_prim_buffer_depth = 0;
1016189499Srnoland		break;
1017189499Srnoland	case CHIP_RV610:
1018189499Srnoland	case CHIP_RS780:
1019189499Srnoland	case CHIP_RV620:
1020189499Srnoland		gs_prim_buffer_depth = 32;
1021189499Srnoland		break;
1022189499Srnoland	case CHIP_RV670:
1023189499Srnoland		gs_prim_buffer_depth = 128;
1024189499Srnoland		break;
1025189499Srnoland	default:
1026189499Srnoland		break;
1027189499Srnoland	}
1028189499Srnoland
1029189499Srnoland	num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1030189499Srnoland	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1031189499Srnoland	/* Max value for this is 256 */
1032189499Srnoland	if (vgt_gs_per_es > 256)
1033189499Srnoland		vgt_gs_per_es = 256;
1034189499Srnoland
1035189499Srnoland	RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1036189499Srnoland	RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1037189499Srnoland	RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1038189499Srnoland	RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1039189499Srnoland
1040189499Srnoland	/* more default values. 2D/3D driver should adjust as needed */
1041189499Srnoland	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1042189499Srnoland	RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1043189499Srnoland	RADEON_WRITE(R600_SX_MISC, 0);
1044189499Srnoland	RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1045189499Srnoland	RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1046189499Srnoland	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1047189499Srnoland	RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1048189499Srnoland	RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1049189499Srnoland	RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1050189499Srnoland
1051189499Srnoland	/* clear render buffer base addresses */
1052189499Srnoland	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1053189499Srnoland	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1054189499Srnoland	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1055189499Srnoland	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1056189499Srnoland	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1057189499Srnoland	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1058189499Srnoland	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1059189499Srnoland	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1060189499Srnoland
1061189499Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1062189499Srnoland	case CHIP_RV610:
1063189499Srnoland	case CHIP_RS780:
1064189499Srnoland	case CHIP_RV620:
1065189499Srnoland		tc_cntl = R600_TC_L2_SIZE(8);
1066189499Srnoland		break;
1067189499Srnoland	case CHIP_RV630:
1068189499Srnoland	case CHIP_RV635:
1069189499Srnoland		tc_cntl = R600_TC_L2_SIZE(4);
1070189499Srnoland		break;
1071189499Srnoland	case CHIP_R600:
1072189499Srnoland		tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
1073189499Srnoland		break;
1074189499Srnoland	default:
1075189499Srnoland		tc_cntl = R600_TC_L2_SIZE(0);
1076189499Srnoland		break;
1077189499Srnoland	}
1078189499Srnoland
1079189499Srnoland	RADEON_WRITE(R600_TC_CNTL, tc_cntl);
1080189499Srnoland
1081189499Srnoland	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1082189499Srnoland	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1083189499Srnoland
1084189499Srnoland	arb_pop = RADEON_READ(R600_ARB_POP);
1085189499Srnoland	arb_pop |= R600_ENABLE_TC128;
1086189499Srnoland	RADEON_WRITE(R600_ARB_POP, arb_pop);
1087189499Srnoland
1088189499Srnoland	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1089189499Srnoland	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1090189499Srnoland					  R600_NUM_CLIP_SEQ(3)));
1091189499Srnoland	RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
1092189499Srnoland
1093189499Srnoland}
1094189499Srnoland
1095189499Srnolandstatic u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
1096189499Srnoland					     u32 num_backends,
1097189499Srnoland					     u32 backend_disable_mask)
1098189499Srnoland{
1099189499Srnoland	u32 backend_map = 0;
1100189499Srnoland	u32 enabled_backends_mask;
1101189499Srnoland	u32 enabled_backends_count;
1102189499Srnoland	u32 cur_pipe;
1103189499Srnoland	u32 swizzle_pipe[R7XX_MAX_PIPES];
1104189499Srnoland	u32 cur_backend;
1105189499Srnoland	u32 i;
1106189499Srnoland
1107189499Srnoland	if (num_tile_pipes > R7XX_MAX_PIPES)
1108189499Srnoland		num_tile_pipes = R7XX_MAX_PIPES;
1109189499Srnoland	if (num_tile_pipes < 1)
1110189499Srnoland		num_tile_pipes = 1;
1111189499Srnoland	if (num_backends > R7XX_MAX_BACKENDS)
1112189499Srnoland		num_backends = R7XX_MAX_BACKENDS;
1113189499Srnoland	if (num_backends < 1)
1114189499Srnoland		num_backends = 1;
1115189499Srnoland
1116189499Srnoland	enabled_backends_mask = 0;
1117189499Srnoland	enabled_backends_count = 0;
1118189499Srnoland	for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
1119189499Srnoland		if (((backend_disable_mask >> i) & 1) == 0) {
1120189499Srnoland			enabled_backends_mask |= (1 << i);
1121189499Srnoland			++enabled_backends_count;
1122189499Srnoland		}
1123189499Srnoland		if (enabled_backends_count == num_backends)
1124189499Srnoland			break;
1125189499Srnoland	}
1126189499Srnoland
1127189499Srnoland	if (enabled_backends_count == 0) {
1128189499Srnoland		enabled_backends_mask = 1;
1129189499Srnoland		enabled_backends_count = 1;
1130189499Srnoland	}
1131189499Srnoland
1132189499Srnoland	if (enabled_backends_count != num_backends)
1133189499Srnoland		num_backends = enabled_backends_count;
1134189499Srnoland
1135189499Srnoland	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
1136189499Srnoland	switch (num_tile_pipes) {
1137189499Srnoland	case 1:
1138189499Srnoland		swizzle_pipe[0] = 0;
1139189499Srnoland		break;
1140189499Srnoland	case 2:
1141189499Srnoland		swizzle_pipe[0] = 0;
1142189499Srnoland		swizzle_pipe[1] = 1;
1143189499Srnoland		break;
1144189499Srnoland	case 3:
1145189499Srnoland		swizzle_pipe[0] = 0;
1146189499Srnoland		swizzle_pipe[1] = 2;
1147189499Srnoland		swizzle_pipe[2] = 1;
1148189499Srnoland		break;
1149189499Srnoland	case 4:
1150189499Srnoland		swizzle_pipe[0] = 0;
1151189499Srnoland		swizzle_pipe[1] = 2;
1152189499Srnoland		swizzle_pipe[2] = 3;
1153189499Srnoland		swizzle_pipe[3] = 1;
1154189499Srnoland		break;
1155189499Srnoland	case 5:
1156189499Srnoland		swizzle_pipe[0] = 0;
1157189499Srnoland		swizzle_pipe[1] = 2;
1158189499Srnoland		swizzle_pipe[2] = 4;
1159189499Srnoland		swizzle_pipe[3] = 1;
1160189499Srnoland		swizzle_pipe[4] = 3;
1161189499Srnoland		break;
1162189499Srnoland	case 6:
1163189499Srnoland		swizzle_pipe[0] = 0;
1164189499Srnoland		swizzle_pipe[1] = 2;
1165189499Srnoland		swizzle_pipe[2] = 4;
1166189499Srnoland		swizzle_pipe[3] = 5;
1167189499Srnoland		swizzle_pipe[4] = 3;
1168189499Srnoland		swizzle_pipe[5] = 1;
1169189499Srnoland		break;
1170189499Srnoland	case 7:
1171189499Srnoland		swizzle_pipe[0] = 0;
1172189499Srnoland		swizzle_pipe[1] = 2;
1173189499Srnoland		swizzle_pipe[2] = 4;
1174189499Srnoland		swizzle_pipe[3] = 6;
1175189499Srnoland		swizzle_pipe[4] = 3;
1176189499Srnoland		swizzle_pipe[5] = 1;
1177189499Srnoland		swizzle_pipe[6] = 5;
1178189499Srnoland		break;
1179189499Srnoland	case 8:
1180189499Srnoland		swizzle_pipe[0] = 0;
1181189499Srnoland		swizzle_pipe[1] = 2;
1182189499Srnoland		swizzle_pipe[2] = 4;
1183189499Srnoland		swizzle_pipe[3] = 6;
1184189499Srnoland		swizzle_pipe[4] = 3;
1185189499Srnoland		swizzle_pipe[5] = 1;
1186189499Srnoland		swizzle_pipe[6] = 7;
1187189499Srnoland		swizzle_pipe[7] = 5;
1188189499Srnoland		break;
1189189499Srnoland	}
1190189499Srnoland
1191189499Srnoland	cur_backend = 0;
1192189499Srnoland	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1193189499Srnoland		while (((1 << cur_backend) & enabled_backends_mask) == 0)
1194189499Srnoland			cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1195189499Srnoland
1196189499Srnoland		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
1197189499Srnoland
1198189499Srnoland		cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1199189499Srnoland	}
1200189499Srnoland
1201189499Srnoland	return backend_map;
1202189499Srnoland}
1203189499Srnoland
1204189499Srnolandstatic void r700_gfx_init(struct drm_device *dev,
1205189499Srnoland			  drm_radeon_private_t *dev_priv)
1206189499Srnoland{
1207189499Srnoland	int i, j, num_qd_pipes;
1208189499Srnoland	u32 sx_debug_1;
1209189499Srnoland	u32 smx_dc_ctl0;
1210189499Srnoland	u32 num_gs_verts_per_thread;
1211189499Srnoland	u32 vgt_gs_per_es;
1212189499Srnoland	u32 gs_prim_buffer_depth = 0;
1213189499Srnoland	u32 sq_ms_fifo_sizes;
1214189499Srnoland	u32 sq_config;
1215189499Srnoland	u32 sq_thread_resource_mgmt;
1216189499Srnoland	u32 hdp_host_path_cntl;
1217189499Srnoland	u32 sq_dyn_gpr_size_simd_ab_0;
1218189499Srnoland	u32 backend_map;
1219189499Srnoland	u32 gb_tiling_config = 0;
1220189499Srnoland	u32 cc_rb_backend_disable = 0;
1221189499Srnoland	u32 cc_gc_shader_pipe_config = 0;
1222189499Srnoland	u32 mc_arb_ramcfg;
1223189499Srnoland	u32 db_debug4;
1224189499Srnoland
1225189499Srnoland	/* setup chip specs */
1226189499Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1227189499Srnoland	case CHIP_RV770:
1228189499Srnoland		dev_priv->r600_max_pipes = 4;
1229189499Srnoland		dev_priv->r600_max_tile_pipes = 8;
1230189499Srnoland		dev_priv->r600_max_simds = 10;
1231189499Srnoland		dev_priv->r600_max_backends = 4;
1232189499Srnoland		dev_priv->r600_max_gprs = 256;
1233189499Srnoland		dev_priv->r600_max_threads = 248;
1234189499Srnoland		dev_priv->r600_max_stack_entries = 512;
1235189499Srnoland		dev_priv->r600_max_hw_contexts = 8;
1236189499Srnoland		dev_priv->r600_max_gs_threads = 16 * 2;
1237189499Srnoland		dev_priv->r600_sx_max_export_size = 128;
1238189499Srnoland		dev_priv->r600_sx_max_export_pos_size = 16;
1239189499Srnoland		dev_priv->r600_sx_max_export_smx_size = 112;
1240189499Srnoland		dev_priv->r600_sq_num_cf_insts = 2;
1241189499Srnoland
1242189499Srnoland		dev_priv->r700_sx_num_of_sets = 7;
1243189499Srnoland		dev_priv->r700_sc_prim_fifo_size = 0xF9;
1244189499Srnoland		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1245189499Srnoland		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1246189499Srnoland		break;
1247189499Srnoland	case CHIP_RV730:
1248189499Srnoland		dev_priv->r600_max_pipes = 2;
1249189499Srnoland		dev_priv->r600_max_tile_pipes = 4;
1250189499Srnoland		dev_priv->r600_max_simds = 8;
1251189499Srnoland		dev_priv->r600_max_backends = 2;
1252189499Srnoland		dev_priv->r600_max_gprs = 128;
1253189499Srnoland		dev_priv->r600_max_threads = 248;
1254189499Srnoland		dev_priv->r600_max_stack_entries = 256;
1255189499Srnoland		dev_priv->r600_max_hw_contexts = 8;
1256189499Srnoland		dev_priv->r600_max_gs_threads = 16 * 2;
1257189499Srnoland		dev_priv->r600_sx_max_export_size = 256;
1258189499Srnoland		dev_priv->r600_sx_max_export_pos_size = 32;
1259189499Srnoland		dev_priv->r600_sx_max_export_smx_size = 224;
1260189499Srnoland		dev_priv->r600_sq_num_cf_insts = 2;
1261189499Srnoland
1262189499Srnoland		dev_priv->r700_sx_num_of_sets = 7;
1263189499Srnoland		dev_priv->r700_sc_prim_fifo_size = 0xf9;
1264189499Srnoland		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1265189499Srnoland		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1266189499Srnoland		break;
1267189499Srnoland	case CHIP_RV710:
1268189499Srnoland		dev_priv->r600_max_pipes = 2;
1269189499Srnoland		dev_priv->r600_max_tile_pipes = 2;
1270189499Srnoland		dev_priv->r600_max_simds = 2;
1271189499Srnoland		dev_priv->r600_max_backends = 1;
1272189499Srnoland		dev_priv->r600_max_gprs = 256;
1273189499Srnoland		dev_priv->r600_max_threads = 192;
1274189499Srnoland		dev_priv->r600_max_stack_entries = 256;
1275189499Srnoland		dev_priv->r600_max_hw_contexts = 4;
1276189499Srnoland		dev_priv->r600_max_gs_threads = 8 * 2;
1277189499Srnoland		dev_priv->r600_sx_max_export_size = 128;
1278189499Srnoland		dev_priv->r600_sx_max_export_pos_size = 16;
1279189499Srnoland		dev_priv->r600_sx_max_export_smx_size = 112;
1280189499Srnoland		dev_priv->r600_sq_num_cf_insts = 1;
1281189499Srnoland
1282189499Srnoland		dev_priv->r700_sx_num_of_sets = 7;
1283189499Srnoland		dev_priv->r700_sc_prim_fifo_size = 0x40;
1284189499Srnoland		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1285189499Srnoland		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1286189499Srnoland		break;
1287189499Srnoland	default:
1288189499Srnoland		break;
1289189499Srnoland	}
1290189499Srnoland
1291189499Srnoland	/* Initialize HDP */
1292189499Srnoland	j = 0;
1293189499Srnoland	for (i = 0; i < 32; i++) {
1294189499Srnoland		RADEON_WRITE((0x2c14 + j), 0x00000000);
1295189499Srnoland		RADEON_WRITE((0x2c18 + j), 0x00000000);
1296189499Srnoland		RADEON_WRITE((0x2c1c + j), 0x00000000);
1297189499Srnoland		RADEON_WRITE((0x2c20 + j), 0x00000000);
1298189499Srnoland		RADEON_WRITE((0x2c24 + j), 0x00000000);
1299189499Srnoland		j += 0x18;
1300189499Srnoland	}
1301189499Srnoland
1302189499Srnoland	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
1303189499Srnoland
1304189499Srnoland	/* setup tiling, simd, pipe config */
1305189499Srnoland	mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
1306189499Srnoland
1307189499Srnoland	switch (dev_priv->r600_max_tile_pipes) {
1308189499Srnoland	case 1:
1309189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(0);
1310189499Srnoland		break;
1311189499Srnoland	case 2:
1312189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(1);
1313189499Srnoland		break;
1314189499Srnoland	case 4:
1315189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(2);
1316189499Srnoland		break;
1317189499Srnoland	case 8:
1318189499Srnoland		gb_tiling_config |= R600_PIPE_TILING(3);
1319189499Srnoland		break;
1320189499Srnoland	default:
1321189499Srnoland		break;
1322189499Srnoland	}
1323189499Srnoland
1324189499Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1325189499Srnoland		gb_tiling_config |= R600_BANK_TILING(1);
1326189499Srnoland	else
1327189499Srnoland		gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
1328189499Srnoland
1329189499Srnoland	gb_tiling_config |= R600_GROUP_SIZE(0);
1330189499Srnoland
1331189499Srnoland	if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
1332189499Srnoland		gb_tiling_config |= R600_ROW_TILING(3);
1333189499Srnoland		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
1334189499Srnoland	} else {
1335189499Srnoland		gb_tiling_config |=
1336189499Srnoland			R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1337189499Srnoland		gb_tiling_config |=
1338189499Srnoland			R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1339189499Srnoland	}
1340189499Srnoland
1341189499Srnoland	gb_tiling_config |= R600_BANK_SWAPS(1);
1342189499Srnoland
1343189499Srnoland	backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
1344189499Srnoland							dev_priv->r600_max_backends,
1345189499Srnoland							(0xff << dev_priv->r600_max_backends) & 0xff);
1346189499Srnoland	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
1347189499Srnoland
1348189499Srnoland	cc_gc_shader_pipe_config =
1349189499Srnoland		R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
1350189499Srnoland	cc_gc_shader_pipe_config |=
1351189499Srnoland		R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
1352189499Srnoland
1353189499Srnoland	cc_rb_backend_disable =
1354189499Srnoland		R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
1355189499Srnoland
1356189499Srnoland	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
1357189499Srnoland	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1358189499Srnoland	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1359189499Srnoland
1360189499Srnoland	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
1361189499Srnoland	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
1362189499Srnoland	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1363189499Srnoland
1364189499Srnoland	RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1365189499Srnoland	RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
1366189499Srnoland	RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
1367189499Srnoland	RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
1368189499Srnoland	RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
1369189499Srnoland
1370189499Srnoland	num_qd_pipes =
1371189499Srnoland		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
1372189499Srnoland	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
1373189499Srnoland	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
1374189499Srnoland
1375189499Srnoland	/* set HW defaults for 3D engine */
1376189499Srnoland	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
1377189499Srnoland						R600_ROQ_IB2_START(0x2b)));
1378189499Srnoland
1379189499Srnoland	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
1380189499Srnoland
1381189499Srnoland	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
1382189499Srnoland					R600_SYNC_GRADIENT |
1383189499Srnoland					R600_SYNC_WALKER |
1384189499Srnoland					R600_SYNC_ALIGNER));
1385189499Srnoland
1386189499Srnoland	sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
1387189499Srnoland	sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
1388189499Srnoland	RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
1389189499Srnoland
1390189499Srnoland	smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
1391189499Srnoland	smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
1392189499Srnoland	smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
1393189499Srnoland	RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
1394189499Srnoland
1395189499Srnoland	RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
1396189499Srnoland					  R700_GS_FLUSH_CTL(4) |
1397189499Srnoland					  R700_ACK_FLUSH_CTL(3) |
1398189499Srnoland					  R700_SYNC_FLUSH_CTL));
1399189499Srnoland
1400189499Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1401189499Srnoland		RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f));
1402189499Srnoland	else {
1403189499Srnoland		db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
1404189499Srnoland		db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
1405189499Srnoland		RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
1406189499Srnoland	}
1407189499Srnoland
1408189499Srnoland	RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
1409189499Srnoland						   R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
1410189499Srnoland						   R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
1411189499Srnoland
1412189499Srnoland	RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
1413189499Srnoland						 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
1414189499Srnoland						 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
1415189499Srnoland
1416189499Srnoland	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1417189499Srnoland
1418189499Srnoland	RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
1419189499Srnoland
1420189499Srnoland	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
1421189499Srnoland
1422189499Srnoland	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
1423189499Srnoland
1424189499Srnoland	RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
1425189499Srnoland
1426189499Srnoland	sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
1427189499Srnoland			    R600_DONE_FIFO_HIWATER(0xe0) |
1428189499Srnoland			    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
1429189499Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1430189499Srnoland	case CHIP_RV770:
1431189499Srnoland		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
1432189499Srnoland		break;
1433189499Srnoland	case CHIP_RV730:
1434189499Srnoland	case CHIP_RV710:
1435189499Srnoland	default:
1436189499Srnoland		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
1437189499Srnoland		break;
1438189499Srnoland	}
1439189499Srnoland	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
1440189499Srnoland
1441189499Srnoland	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
1442189499Srnoland	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
1443189499Srnoland	 */
1444189499Srnoland	sq_config = RADEON_READ(R600_SQ_CONFIG);
1445189499Srnoland	sq_config &= ~(R600_PS_PRIO(3) |
1446189499Srnoland		       R600_VS_PRIO(3) |
1447189499Srnoland		       R600_GS_PRIO(3) |
1448189499Srnoland		       R600_ES_PRIO(3));
1449189499Srnoland	sq_config |= (R600_DX9_CONSTS |
1450189499Srnoland		      R600_VC_ENABLE |
1451189499Srnoland		      R600_EXPORT_SRC_C |
1452189499Srnoland		      R600_PS_PRIO(0) |
1453189499Srnoland		      R600_VS_PRIO(1) |
1454189499Srnoland		      R600_GS_PRIO(2) |
1455189499Srnoland		      R600_ES_PRIO(3));
1456189499Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1457189499Srnoland		/* no vertex cache */
1458189499Srnoland		sq_config &= ~R600_VC_ENABLE;
1459189499Srnoland
1460189499Srnoland	RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1461189499Srnoland
1462189499Srnoland	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1463189499Srnoland						    R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1464189499Srnoland						    R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
1465189499Srnoland
1466189499Srnoland	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
1467189499Srnoland						    R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
1468189499Srnoland
1469189499Srnoland	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
1470189499Srnoland				   R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
1471189499Srnoland				   R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
1472189499Srnoland	if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
1473189499Srnoland		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
1474189499Srnoland	else
1475189499Srnoland		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
1476189499Srnoland	RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1477189499Srnoland
1478189499Srnoland	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1479189499Srnoland						     R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1480189499Srnoland
1481189499Srnoland	RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1482189499Srnoland						     R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1483189499Srnoland
1484189499Srnoland	sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
1485189499Srnoland				     R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
1486189499Srnoland				     R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
1487189499Srnoland				     R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
1488189499Srnoland
1489189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
1490189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
1491189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
1492189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
1493189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
1494189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
1495189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
1496189499Srnoland	RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
1497189499Srnoland
1498189499Srnoland	RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
1499189499Srnoland						     R700_FORCE_EOV_MAX_REZ_CNT(255)));
1500189499Srnoland
1501189499Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1502189499Srnoland		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
1503189499Srnoland							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1504189499Srnoland	else
1505189499Srnoland		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
1506189499Srnoland							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1507189499Srnoland
1508189499Srnoland	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1509189499Srnoland	case CHIP_RV770:
1510189499Srnoland	case CHIP_RV730:
1511189499Srnoland		gs_prim_buffer_depth = 384;
1512189499Srnoland		break;
1513189499Srnoland	case CHIP_RV710:
1514189499Srnoland		gs_prim_buffer_depth = 128;
1515189499Srnoland		break;
1516189499Srnoland	default:
1517189499Srnoland		break;
1518189499Srnoland	}
1519189499Srnoland
1520189499Srnoland	num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1521189499Srnoland	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1522189499Srnoland	/* Max value for this is 256 */
1523189499Srnoland	if (vgt_gs_per_es > 256)
1524189499Srnoland		vgt_gs_per_es = 256;
1525189499Srnoland
1526189499Srnoland	RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1527189499Srnoland	RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1528189499Srnoland	RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1529189499Srnoland
1530189499Srnoland	/* more default values. 2D/3D driver should adjust as needed */
1531189499Srnoland	RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1532189499Srnoland	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1533189499Srnoland	RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1534189499Srnoland	RADEON_WRITE(R600_SX_MISC, 0);
1535189499Srnoland	RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1536189499Srnoland	RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
1537189499Srnoland	RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1538189499Srnoland	RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
1539189499Srnoland	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1540189499Srnoland	RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1541189499Srnoland	RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1542189499Srnoland	RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1543189499Srnoland
1544189499Srnoland	/* clear render buffer base addresses */
1545189499Srnoland	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1546189499Srnoland	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1547189499Srnoland	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1548189499Srnoland	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1549189499Srnoland	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1550189499Srnoland	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1551189499Srnoland	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1552189499Srnoland	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1553189499Srnoland
1554189499Srnoland	RADEON_WRITE(R700_TCP_CNTL, 0);
1555189499Srnoland
1556189499Srnoland	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1557189499Srnoland	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1558189499Srnoland
1559189499Srnoland	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1560189499Srnoland
1561189499Srnoland	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1562189499Srnoland					  R600_NUM_CLIP_SEQ(3)));
1563189499Srnoland
1564189499Srnoland}
1565189499Srnoland
1566189499Srnolandstatic void r600_cp_init_ring_buffer(struct drm_device *dev,
1567189499Srnoland				       drm_radeon_private_t *dev_priv,
1568189499Srnoland				       struct drm_file *file_priv)
1569189499Srnoland{
1570189499Srnoland	u32 ring_start;
1571189558Srnoland	u64 rptr_addr;
1572189499Srnoland
1573189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
1574189499Srnoland		r700_gfx_init(dev, dev_priv);
1575189499Srnoland	else
1576189499Srnoland		r600_gfx_init(dev, dev_priv);
1577189499Srnoland
1578189499Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
1579189499Srnoland	RADEON_READ(R600_GRBM_SOFT_RESET);
1580189499Srnoland	DRM_UDELAY(15000);
1581189499Srnoland	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
1582189499Srnoland
1583189499Srnoland
1584189499Srnoland	/* Set ring buffer size */
1585189499Srnoland#ifdef __BIG_ENDIAN
1586189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
1587189499Srnoland		     RADEON_BUF_SWAP_32BIT |
1588189499Srnoland		     RADEON_RB_NO_UPDATE |
1589189499Srnoland		     (dev_priv->ring.rptr_update_l2qw << 8) |
1590189499Srnoland		     dev_priv->ring.size_l2qw);
1591189499Srnoland#else
1592189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
1593189499Srnoland		     RADEON_RB_NO_UPDATE |
1594189499Srnoland		     (dev_priv->ring.rptr_update_l2qw << 8) |
1595189499Srnoland		     dev_priv->ring.size_l2qw);
1596189499Srnoland#endif
1597189499Srnoland
1598189499Srnoland	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
1599189499Srnoland
1600189499Srnoland	/* Set the write pointer delay */
1601189499Srnoland	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
1602189499Srnoland
1603189499Srnoland#ifdef __BIG_ENDIAN
1604189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
1605189499Srnoland		     RADEON_BUF_SWAP_32BIT |
1606189499Srnoland		     RADEON_RB_NO_UPDATE |
1607189499Srnoland		     RADEON_RB_RPTR_WR_ENA |
1608189499Srnoland		     (dev_priv->ring.rptr_update_l2qw << 8) |
1609189499Srnoland		     dev_priv->ring.size_l2qw);
1610189499Srnoland#else
1611189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
1612189499Srnoland		     RADEON_RB_NO_UPDATE |
1613189499Srnoland		     RADEON_RB_RPTR_WR_ENA |
1614189499Srnoland		     (dev_priv->ring.rptr_update_l2qw << 8) |
1615189499Srnoland		     dev_priv->ring.size_l2qw);
1616189499Srnoland#endif
1617189499Srnoland
1618189499Srnoland	/* Initialize the ring buffer's read and write pointers */
1619189499Srnoland	RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
1620189499Srnoland	RADEON_WRITE(R600_CP_RB_WPTR, 0);
1621189499Srnoland	SET_RING_HEAD(dev_priv, 0);
1622189499Srnoland	dev_priv->ring.tail = 0;
1623189499Srnoland
1624189499Srnoland#if __OS_HAS_AGP
1625189499Srnoland	if (dev_priv->flags & RADEON_IS_AGP) {
1626189558Srnoland		rptr_addr = dev_priv->ring_rptr->offset
1627189558Srnoland			- dev->agp->base +
1628189558Srnoland			dev_priv->gart_vm_start;
1629189499Srnoland	} else
1630189499Srnoland#endif
1631189499Srnoland	{
1632189558Srnoland		rptr_addr = dev_priv->ring_rptr->offset
1633189558Srnoland			- ((unsigned long) dev->sg->virtual)
1634189558Srnoland			+ dev_priv->gart_vm_start;
1635189499Srnoland	}
1636189558Srnoland	RADEON_WRITE(R600_CP_RB_RPTR_ADDR,
1637189558Srnoland		     rptr_addr & 0xffffffff);
1638189558Srnoland	RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,
1639189558Srnoland		     upper_32_bits(rptr_addr));
1640189499Srnoland
1641189499Srnoland#ifdef __BIG_ENDIAN
1642189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
1643189499Srnoland		     RADEON_BUF_SWAP_32BIT |
1644189499Srnoland		     (dev_priv->ring.rptr_update_l2qw << 8) |
1645189499Srnoland		     dev_priv->ring.size_l2qw);
1646189499Srnoland#else
1647189499Srnoland	RADEON_WRITE(R600_CP_RB_CNTL,
1648189499Srnoland		     (dev_priv->ring.rptr_update_l2qw << 8) |
1649189499Srnoland		     dev_priv->ring.size_l2qw);
1650189499Srnoland#endif
1651189499Srnoland
1652189499Srnoland#if __OS_HAS_AGP
1653189499Srnoland	if (dev_priv->flags & RADEON_IS_AGP) {
1654189499Srnoland		/* XXX */
1655189499Srnoland		radeon_write_agp_base(dev_priv, dev->agp->base);
1656189499Srnoland
1657189499Srnoland		/* XXX */
1658189499Srnoland		radeon_write_agp_location(dev_priv,
1659189499Srnoland			     (((dev_priv->gart_vm_start - 1 +
1660189499Srnoland				dev_priv->gart_size) & 0xffff0000) |
1661189499Srnoland			      (dev_priv->gart_vm_start >> 16)));
1662189499Srnoland
1663189499Srnoland		ring_start = (dev_priv->cp_ring->offset
1664189499Srnoland			      - dev->agp->base
1665189499Srnoland			      + dev_priv->gart_vm_start);
1666189499Srnoland	} else
1667189499Srnoland#endif
1668189499Srnoland		ring_start = (dev_priv->cp_ring->offset
1669189499Srnoland			      - (unsigned long)dev->sg->virtual
1670189499Srnoland			      + dev_priv->gart_vm_start);
1671189499Srnoland
1672189499Srnoland	RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
1673189499Srnoland
1674189499Srnoland	RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
1675189499Srnoland
1676189499Srnoland	RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
1677189499Srnoland
1678189499Srnoland	/* Initialize the scratch register pointer.  This will cause
1679189499Srnoland	 * the scratch register values to be written out to memory
1680189499Srnoland	 * whenever they are updated.
1681189499Srnoland	 *
1682189499Srnoland	 * We simply put this behind the ring read pointer, this works
1683189499Srnoland	 * with PCI GART as well as (whatever kind of) AGP GART
1684189499Srnoland	 */
1685189558Srnoland	{
1686189558Srnoland		u64 scratch_addr;
1687189499Srnoland
1688189558Srnoland		scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR);
1689189558Srnoland		scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
1690189558Srnoland		scratch_addr += R600_SCRATCH_REG_OFFSET;
1691189558Srnoland		scratch_addr >>= 8;
1692189558Srnoland		scratch_addr &= 0xffffffff;
1693189558Srnoland
1694189558Srnoland		RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
1695189558Srnoland	}
1696189558Srnoland
1697189499Srnoland	RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
1698189499Srnoland
1699189499Srnoland	/* Turn on bus mastering */
1700189499Srnoland	radeon_enable_bm(dev_priv);
1701189499Srnoland
1702189499Srnoland	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);
1703189499Srnoland	RADEON_WRITE(R600_LAST_FRAME_REG, 0);
1704189499Srnoland
1705189499Srnoland	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
1706189499Srnoland	RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);
1707189499Srnoland
1708189499Srnoland	radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);
1709189499Srnoland	RADEON_WRITE(R600_LAST_CLEAR_REG, 0);
1710189499Srnoland
1711189499Srnoland	/* reset sarea copies of these */
1712189499Srnoland	if (dev_priv->sarea_priv) {
1713189499Srnoland		dev_priv->sarea_priv->last_frame = 0;
1714189499Srnoland		dev_priv->sarea_priv->last_dispatch = 0;
1715189499Srnoland		dev_priv->sarea_priv->last_clear = 0;
1716189499Srnoland	}
1717189499Srnoland
1718189499Srnoland	r600_do_wait_for_idle(dev_priv);
1719189499Srnoland
1720189499Srnoland}
1721189499Srnoland
1722189499Srnolandint r600_do_cleanup_cp(struct drm_device *dev)
1723189499Srnoland{
1724189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
1725189499Srnoland	DRM_DEBUG("\n");
1726189499Srnoland
1727189499Srnoland	/* Make sure interrupts are disabled here because the uninstall ioctl
1728189499Srnoland	 * may not have been called from userspace and after dev_private
1729189499Srnoland	 * is freed, it's too late.
1730189499Srnoland	 */
1731189499Srnoland	if (dev->irq_enabled)
1732189499Srnoland		drm_irq_uninstall(dev);
1733189499Srnoland
1734189499Srnoland#if __OS_HAS_AGP
1735189499Srnoland	if (dev_priv->flags & RADEON_IS_AGP) {
1736189499Srnoland		if (dev_priv->cp_ring != NULL) {
1737189499Srnoland			drm_core_ioremapfree(dev_priv->cp_ring, dev);
1738189499Srnoland			dev_priv->cp_ring = NULL;
1739189499Srnoland		}
1740189499Srnoland		if (dev_priv->ring_rptr != NULL) {
1741189499Srnoland			drm_core_ioremapfree(dev_priv->ring_rptr, dev);
1742189499Srnoland			dev_priv->ring_rptr = NULL;
1743189499Srnoland		}
1744189499Srnoland		if (dev->agp_buffer_map != NULL) {
1745189499Srnoland			drm_core_ioremapfree(dev->agp_buffer_map, dev);
1746189499Srnoland			dev->agp_buffer_map = NULL;
1747189499Srnoland		}
1748189499Srnoland	} else
1749189499Srnoland#endif
1750189499Srnoland	{
1751189499Srnoland
1752189499Srnoland		if (dev_priv->gart_info.bus_addr)
1753189499Srnoland			r600_page_table_cleanup(dev, &dev_priv->gart_info);
1754189499Srnoland
1755189499Srnoland		if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
1756189499Srnoland			drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
1757189499Srnoland			dev_priv->gart_info.addr = 0;
1758189499Srnoland		}
1759189499Srnoland	}
1760189499Srnoland	/* only clear to the start of flags */
1761189499Srnoland	memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
1762189499Srnoland
1763189499Srnoland	return 0;
1764189499Srnoland}
1765189499Srnoland
1766189499Srnolandint r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
1767189499Srnoland		    struct drm_file *file_priv)
1768189499Srnoland{
1769189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
1770189499Srnoland
1771189499Srnoland	DRM_DEBUG("\n");
1772189499Srnoland
1773189499Srnoland	/* if we require new memory map but we don't have it fail */
1774189499Srnoland	if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
1775189499Srnoland		DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
1776189499Srnoland		r600_do_cleanup_cp(dev);
1777189499Srnoland		return -EINVAL;
1778189499Srnoland	}
1779189499Srnoland
1780189499Srnoland	if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
1781189499Srnoland		DRM_DEBUG("Forcing AGP card to PCI mode\n");
1782189499Srnoland		dev_priv->flags &= ~RADEON_IS_AGP;
1783189499Srnoland		/* The writeback test succeeds, but when writeback is enabled,
1784189499Srnoland		 * the ring buffer read ptr update fails after first 128 bytes.
1785189499Srnoland		 */
1786189499Srnoland		radeon_no_wb = 1;
1787189499Srnoland	} else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
1788189499Srnoland		 && !init->is_pci) {
1789189499Srnoland		DRM_DEBUG("Restoring AGP flag\n");
1790189499Srnoland		dev_priv->flags |= RADEON_IS_AGP;
1791189499Srnoland	}
1792189499Srnoland
1793189499Srnoland	dev_priv->usec_timeout = init->usec_timeout;
1794189499Srnoland	if (dev_priv->usec_timeout < 1 ||
1795189499Srnoland	    dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
1796189499Srnoland		DRM_DEBUG("TIMEOUT problem!\n");
1797189499Srnoland		r600_do_cleanup_cp(dev);
1798189499Srnoland		return -EINVAL;
1799189499Srnoland	}
1800189499Srnoland
1801189499Srnoland	/* Enable vblank on CRTC1 for older X servers
1802189499Srnoland	 */
1803189499Srnoland	dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
1804189499Srnoland
1805189499Srnoland	dev_priv->cp_mode = init->cp_mode;
1806189499Srnoland
1807189499Srnoland	/* We don't support anything other than bus-mastering ring mode,
1808189499Srnoland	 * but the ring can be in either AGP or PCI space for the ring
1809189499Srnoland	 * read pointer.
1810189499Srnoland	 */
1811189499Srnoland	if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
1812189499Srnoland	    (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
1813189499Srnoland		DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
1814189499Srnoland		r600_do_cleanup_cp(dev);
1815189499Srnoland		return -EINVAL;
1816189499Srnoland	}
1817189499Srnoland
1818189499Srnoland	switch (init->fb_bpp) {
1819189499Srnoland	case 16:
1820189499Srnoland		dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
1821189499Srnoland		break;
1822189499Srnoland	case 32:
1823189499Srnoland	default:
1824189499Srnoland		dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
1825189499Srnoland		break;
1826189499Srnoland	}
1827189499Srnoland	dev_priv->front_offset = init->front_offset;
1828189499Srnoland	dev_priv->front_pitch = init->front_pitch;
1829189499Srnoland	dev_priv->back_offset = init->back_offset;
1830189499Srnoland	dev_priv->back_pitch = init->back_pitch;
1831189499Srnoland
1832189499Srnoland	dev_priv->ring_offset = init->ring_offset;
1833189499Srnoland	dev_priv->ring_rptr_offset = init->ring_rptr_offset;
1834189499Srnoland	dev_priv->buffers_offset = init->buffers_offset;
1835189499Srnoland	dev_priv->gart_textures_offset = init->gart_textures_offset;
1836189499Srnoland
1837189499Srnoland	dev_priv->sarea = drm_getsarea(dev);
1838189499Srnoland	if (!dev_priv->sarea) {
1839189499Srnoland		DRM_ERROR("could not find sarea!\n");
1840189499Srnoland		r600_do_cleanup_cp(dev);
1841189499Srnoland		return -EINVAL;
1842189499Srnoland	}
1843189499Srnoland
1844189499Srnoland	dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
1845189499Srnoland	if (!dev_priv->cp_ring) {
1846189499Srnoland		DRM_ERROR("could not find cp ring region!\n");
1847189499Srnoland		r600_do_cleanup_cp(dev);
1848189499Srnoland		return -EINVAL;
1849189499Srnoland	}
1850189499Srnoland	dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
1851189499Srnoland	if (!dev_priv->ring_rptr) {
1852189499Srnoland		DRM_ERROR("could not find ring read pointer!\n");
1853189499Srnoland		r600_do_cleanup_cp(dev);
1854189499Srnoland		return -EINVAL;
1855189499Srnoland	}
1856189499Srnoland	dev->agp_buffer_token = init->buffers_offset;
1857189499Srnoland	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
1858189499Srnoland	if (!dev->agp_buffer_map) {
1859189499Srnoland		DRM_ERROR("could not find dma buffer region!\n");
1860189499Srnoland		r600_do_cleanup_cp(dev);
1861189499Srnoland		return -EINVAL;
1862189499Srnoland	}
1863189499Srnoland
1864189499Srnoland	if (init->gart_textures_offset) {
1865189499Srnoland		dev_priv->gart_textures =
1866189499Srnoland		    drm_core_findmap(dev, init->gart_textures_offset);
1867189499Srnoland		if (!dev_priv->gart_textures) {
1868189499Srnoland			DRM_ERROR("could not find GART texture region!\n");
1869189499Srnoland			r600_do_cleanup_cp(dev);
1870189499Srnoland			return -EINVAL;
1871189499Srnoland		}
1872189499Srnoland	}
1873189499Srnoland
1874189499Srnoland	dev_priv->sarea_priv =
1875189499Srnoland	    (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle +
1876189499Srnoland				    init->sarea_priv_offset);
1877189499Srnoland
1878189499Srnoland#if __OS_HAS_AGP
1879189499Srnoland	/* XXX */
1880189499Srnoland	if (dev_priv->flags & RADEON_IS_AGP) {
1881189499Srnoland		drm_core_ioremap_wc(dev_priv->cp_ring, dev);
1882189499Srnoland		drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
1883189499Srnoland		drm_core_ioremap_wc(dev->agp_buffer_map, dev);
1884189499Srnoland		if (!dev_priv->cp_ring->handle ||
1885189499Srnoland		    !dev_priv->ring_rptr->handle ||
1886189499Srnoland		    !dev->agp_buffer_map->handle) {
1887189499Srnoland			DRM_ERROR("could not find ioremap agp regions!\n");
1888189499Srnoland			r600_do_cleanup_cp(dev);
1889189499Srnoland			return -EINVAL;
1890189499Srnoland		}
1891189499Srnoland	} else
1892189499Srnoland#endif
1893189499Srnoland	{
1894189499Srnoland		dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
1895189499Srnoland		dev_priv->ring_rptr->handle =
1896189499Srnoland		    (void *)dev_priv->ring_rptr->offset;
1897189499Srnoland		dev->agp_buffer_map->handle =
1898189499Srnoland		    (void *)dev->agp_buffer_map->offset;
1899189499Srnoland
1900189499Srnoland		DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
1901189499Srnoland			  dev_priv->cp_ring->handle);
1902189499Srnoland		DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
1903189499Srnoland			  dev_priv->ring_rptr->handle);
1904189499Srnoland		DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
1905189499Srnoland			  dev->agp_buffer_map->handle);
1906189499Srnoland	}
1907189499Srnoland
1908189499Srnoland	dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
1909189499Srnoland	dev_priv->fb_size =
1910189499Srnoland		(((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
1911189499Srnoland		- dev_priv->fb_location;
1912189499Srnoland
1913189499Srnoland	dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
1914189499Srnoland					((dev_priv->front_offset
1915189499Srnoland					  + dev_priv->fb_location) >> 10));
1916189499Srnoland
1917189499Srnoland	dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
1918189499Srnoland				       ((dev_priv->back_offset
1919189499Srnoland					 + dev_priv->fb_location) >> 10));
1920189499Srnoland
1921189499Srnoland	dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
1922189499Srnoland					((dev_priv->depth_offset
1923189499Srnoland					  + dev_priv->fb_location) >> 10));
1924189499Srnoland
1925189499Srnoland	dev_priv->gart_size = init->gart_size;
1926189499Srnoland
1927189499Srnoland	/* New let's set the memory map ... */
1928189499Srnoland	if (dev_priv->new_memmap) {
1929189499Srnoland		u32 base = 0;
1930189499Srnoland
1931189499Srnoland		DRM_INFO("Setting GART location based on new memory map\n");
1932189499Srnoland
1933189499Srnoland		/* If using AGP, try to locate the AGP aperture at the same
1934189499Srnoland		 * location in the card and on the bus, though we have to
1935189499Srnoland		 * align it down.
1936189499Srnoland		 */
1937189499Srnoland#if __OS_HAS_AGP
1938189499Srnoland		/* XXX */
1939189499Srnoland		if (dev_priv->flags & RADEON_IS_AGP) {
1940189499Srnoland			base = dev->agp->base;
1941189499Srnoland			/* Check if valid */
1942189499Srnoland			if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
1943189499Srnoland			    base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
1944189499Srnoland				DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
1945189499Srnoland					 dev->agp->base);
1946189499Srnoland				base = 0;
1947189499Srnoland			}
1948189499Srnoland		}
1949189499Srnoland#endif
1950189499Srnoland		/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
1951189499Srnoland		if (base == 0) {
1952189499Srnoland			base = dev_priv->fb_location + dev_priv->fb_size;
1953189499Srnoland			if (base < dev_priv->fb_location ||
1954189499Srnoland			    ((base + dev_priv->gart_size) & 0xfffffffful) < base)
1955189499Srnoland				base = dev_priv->fb_location
1956189499Srnoland					- dev_priv->gart_size;
1957189499Srnoland		}
1958189499Srnoland		dev_priv->gart_vm_start = base & 0xffc00000u;
1959189499Srnoland		if (dev_priv->gart_vm_start != base)
1960189499Srnoland			DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
1961189499Srnoland				 base, dev_priv->gart_vm_start);
1962189499Srnoland	}
1963189499Srnoland
1964189499Srnoland#if __OS_HAS_AGP
1965189499Srnoland	/* XXX */
1966189499Srnoland	if (dev_priv->flags & RADEON_IS_AGP)
1967189499Srnoland		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
1968189499Srnoland						 - dev->agp->base
1969189499Srnoland						 + dev_priv->gart_vm_start);
1970189499Srnoland	else
1971189499Srnoland#endif
1972189499Srnoland		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
1973189499Srnoland						 - (unsigned long)dev->sg->virtual
1974189499Srnoland						 + dev_priv->gart_vm_start);
1975189499Srnoland
1976189499Srnoland	DRM_DEBUG("fb 0x%08x size %d\n",
1977189499Srnoland		  (unsigned int) dev_priv->fb_location,
1978189499Srnoland		  (unsigned int) dev_priv->fb_size);
1979189499Srnoland	DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
1980189499Srnoland	DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
1981189499Srnoland		  (unsigned int) dev_priv->gart_vm_start);
1982189499Srnoland	DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
1983189499Srnoland		  dev_priv->gart_buffers_offset);
1984189499Srnoland
1985189499Srnoland	dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
1986189499Srnoland	dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
1987189499Srnoland			      + init->ring_size / sizeof(u32));
1988189499Srnoland	dev_priv->ring.size = init->ring_size;
1989189499Srnoland	dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
1990189499Srnoland
1991189499Srnoland	dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
1992189499Srnoland	dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8);
1993189499Srnoland
1994189499Srnoland	dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
1995189499Srnoland	dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16);
1996189499Srnoland
1997189499Srnoland	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
1998189499Srnoland
1999189499Srnoland	dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
2000189499Srnoland
2001189499Srnoland#if __OS_HAS_AGP
2002189499Srnoland	if (dev_priv->flags & RADEON_IS_AGP) {
2003189499Srnoland		/* XXX turn off pcie gart */
2004189499Srnoland	} else
2005189499Srnoland#endif
2006189499Srnoland	{
2007189499Srnoland		dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
2008189499Srnoland		/* if we have an offset set from userspace */
2009189499Srnoland		if (!dev_priv->pcigart_offset_set) {
2010189499Srnoland			DRM_ERROR("Need gart offset from userspace\n");
2011189499Srnoland			r600_do_cleanup_cp(dev);
2012189499Srnoland			return -EINVAL;
2013189499Srnoland		}
2014189499Srnoland
2015189499Srnoland		DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
2016189499Srnoland
2017189499Srnoland		dev_priv->gart_info.bus_addr =
2018189499Srnoland			dev_priv->pcigart_offset + dev_priv->fb_location;
2019189499Srnoland		dev_priv->gart_info.mapping.offset =
2020189499Srnoland			dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
2021189499Srnoland		dev_priv->gart_info.mapping.size =
2022189499Srnoland			dev_priv->gart_info.table_size;
2023189499Srnoland
2024189499Srnoland		drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
2025189499Srnoland		if (!dev_priv->gart_info.mapping.handle) {
2026189499Srnoland			DRM_ERROR("ioremap failed.\n");
2027189499Srnoland			r600_do_cleanup_cp(dev);
2028189499Srnoland			return -EINVAL;
2029189499Srnoland		}
2030189499Srnoland
2031189499Srnoland		dev_priv->gart_info.addr =
2032189499Srnoland			dev_priv->gart_info.mapping.handle;
2033189499Srnoland
2034189499Srnoland		DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
2035189499Srnoland			  dev_priv->gart_info.addr,
2036189499Srnoland			  dev_priv->pcigart_offset);
2037189499Srnoland
2038189909Srnoland		if (!r600_page_table_init(dev)) {
2039189499Srnoland			DRM_ERROR("Failed to init GART table\n");
2040189499Srnoland			r600_do_cleanup_cp(dev);
2041189499Srnoland			return -EINVAL;
2042189499Srnoland		}
2043189499Srnoland
2044189499Srnoland		if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2045189499Srnoland			r700_vm_init(dev);
2046189499Srnoland		else
2047189499Srnoland			r600_vm_init(dev);
2048189499Srnoland	}
2049189499Srnoland
2050189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2051189499Srnoland		r700_cp_load_microcode(dev_priv);
2052189499Srnoland	else
2053189499Srnoland		r600_cp_load_microcode(dev_priv);
2054189499Srnoland
2055189499Srnoland	r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2056189499Srnoland
2057189499Srnoland	dev_priv->last_buf = 0;
2058189499Srnoland
2059189499Srnoland	r600_do_engine_reset(dev);
2060189499Srnoland	r600_test_writeback(dev_priv);
2061189499Srnoland
2062189499Srnoland	return 0;
2063189499Srnoland}
2064189499Srnoland
2065189499Srnolandint r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
2066189499Srnoland{
2067189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
2068189499Srnoland
2069189499Srnoland	DRM_DEBUG("\n");
2070189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
2071189499Srnoland		r700_vm_init(dev);
2072189499Srnoland		r700_cp_load_microcode(dev_priv);
2073189499Srnoland	} else {
2074189499Srnoland		r600_vm_init(dev);
2075189499Srnoland		r600_cp_load_microcode(dev_priv);
2076189499Srnoland	}
2077189499Srnoland	r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2078189499Srnoland	r600_do_engine_reset(dev);
2079189499Srnoland
2080189499Srnoland	return 0;
2081189499Srnoland}
2082189499Srnoland
2083189499Srnoland/* Wait for the CP to go idle.
2084189499Srnoland */
2085189499Srnolandint r600_do_cp_idle(drm_radeon_private_t *dev_priv)
2086189499Srnoland{
2087189499Srnoland	RING_LOCALS;
2088189499Srnoland	DRM_DEBUG("\n");
2089189499Srnoland
2090189499Srnoland	BEGIN_RING(5);
2091189499Srnoland	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
2092189499Srnoland	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
2093189499Srnoland	/* wait for 3D idle clean */
2094189499Srnoland	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
2095189499Srnoland	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
2096189499Srnoland	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
2097189499Srnoland
2098189499Srnoland	ADVANCE_RING();
2099189499Srnoland	COMMIT_RING();
2100189499Srnoland
2101189499Srnoland	return r600_do_wait_for_idle(dev_priv);
2102189499Srnoland}
2103189499Srnoland
2104189499Srnoland/* Start the Command Processor.
2105189499Srnoland */
2106189499Srnolandvoid r600_do_cp_start(drm_radeon_private_t *dev_priv)
2107189499Srnoland{
2108189499Srnoland	u32 cp_me;
2109189499Srnoland	RING_LOCALS;
2110189499Srnoland	DRM_DEBUG("\n");
2111189499Srnoland
2112189499Srnoland	BEGIN_RING(7);
2113189499Srnoland	OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
2114189499Srnoland	OUT_RING(0x00000001);
2115189499Srnoland	if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))
2116189499Srnoland		OUT_RING(0x00000003);
2117189499Srnoland	else
2118189499Srnoland		OUT_RING(0x00000000);
2119189499Srnoland	OUT_RING((dev_priv->r600_max_hw_contexts - 1));
2120189499Srnoland	OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
2121189499Srnoland	OUT_RING(0x00000000);
2122189499Srnoland	OUT_RING(0x00000000);
2123189499Srnoland	ADVANCE_RING();
2124189499Srnoland	COMMIT_RING();
2125189499Srnoland
2126189499Srnoland	/* set the mux and reset the halt bit */
2127189499Srnoland	cp_me = 0xff;
2128189499Srnoland	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2129189499Srnoland
2130189499Srnoland	dev_priv->cp_running = 1;
2131189499Srnoland
2132189499Srnoland}
2133189499Srnoland
2134189499Srnolandvoid r600_do_cp_reset(drm_radeon_private_t *dev_priv)
2135189499Srnoland{
2136189499Srnoland	u32 cur_read_ptr;
2137189499Srnoland	DRM_DEBUG("\n");
2138189499Srnoland
2139189499Srnoland	cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
2140189499Srnoland	RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
2141189499Srnoland	SET_RING_HEAD(dev_priv, cur_read_ptr);
2142189499Srnoland	dev_priv->ring.tail = cur_read_ptr;
2143189499Srnoland}
2144189499Srnoland
2145189499Srnolandvoid r600_do_cp_stop(drm_radeon_private_t *dev_priv)
2146189499Srnoland{
2147189499Srnoland	uint32_t cp_me;
2148189499Srnoland
2149189499Srnoland	DRM_DEBUG("\n");
2150189499Srnoland
2151189499Srnoland	cp_me = 0xff | R600_CP_ME_HALT;
2152189499Srnoland
2153189499Srnoland	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2154189499Srnoland
2155189499Srnoland	dev_priv->cp_running = 0;
2156189499Srnoland}
2157189499Srnoland
2158189499Srnolandint r600_cp_dispatch_indirect(struct drm_device *dev,
2159189499Srnoland			      struct drm_buf *buf, int start, int end)
2160189499Srnoland{
2161189499Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
2162189499Srnoland	RING_LOCALS;
2163189499Srnoland
2164189499Srnoland	if (start != end) {
2165189499Srnoland		unsigned long offset = (dev_priv->gart_buffers_offset
2166189499Srnoland					+ buf->offset + start);
2167189499Srnoland		int dwords = (end - start + 3) / sizeof(u32);
2168189499Srnoland
2169189499Srnoland		DRM_DEBUG("dwords:%d\n", dwords);
2170189499Srnoland		DRM_DEBUG("offset 0x%lx\n", offset);
2171189499Srnoland
2172189499Srnoland
2173189499Srnoland		/* Indirect buffer data must be a multiple of 16 dwords.
2174189499Srnoland		 * pad the data with a Type-2 CP packet.
2175189499Srnoland		 */
2176189499Srnoland		while (dwords & 0xf) {
2177189499Srnoland			u32 *data = (u32 *)
2178189499Srnoland			    ((char *)dev->agp_buffer_map->handle
2179189499Srnoland			     + buf->offset + start);
2180189499Srnoland			data[dwords++] = RADEON_CP_PACKET2;
2181189499Srnoland		}
2182189499Srnoland
2183189499Srnoland		/* Fire off the indirect buffer */
2184189499Srnoland		BEGIN_RING(4);
2185189499Srnoland		OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
2186189499Srnoland		OUT_RING((offset & 0xfffffffc));
2187189499Srnoland		OUT_RING((upper_32_bits(offset) & 0xff));
2188189499Srnoland		OUT_RING(dwords);
2189189499Srnoland		ADVANCE_RING();
2190189499Srnoland	}
2191189499Srnoland
2192189499Srnoland	return 0;
2193189499Srnoland}
2194