1254885Sdumbbell/*
2254885Sdumbbell * Copyright 2009 Advanced Micro Devices, Inc.
3254885Sdumbbell *
4254885Sdumbbell * Permission is hereby granted, free of charge, to any person obtaining a
5254885Sdumbbell * copy of this software and associated documentation files (the "Software"),
6254885Sdumbbell * to deal in the Software without restriction, including without limitation
7254885Sdumbbell * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8254885Sdumbbell * and/or sell copies of the Software, and to permit persons to whom the
9254885Sdumbbell * Software is furnished to do so, subject to the following conditions:
10254885Sdumbbell *
11254885Sdumbbell * The above copyright notice and this permission notice (including the next
12254885Sdumbbell * paragraph) shall be included in all copies or substantial portions of the
13254885Sdumbbell * Software.
14254885Sdumbbell *
15254885Sdumbbell * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16254885Sdumbbell * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17254885Sdumbbell * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18254885Sdumbbell * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19254885Sdumbbell * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20254885Sdumbbell * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21254885Sdumbbell * DEALINGS IN THE SOFTWARE.
22254885Sdumbbell *
23254885Sdumbbell * Authors:
24254885Sdumbbell *     Alex Deucher <alexander.deucher@amd.com>
25254885Sdumbbell */
26254885Sdumbbell
27254885Sdumbbell#include <sys/cdefs.h>
28254885Sdumbbell__FBSDID("$FreeBSD$");
29254885Sdumbbell
30254885Sdumbbell#include <dev/drm2/drmP.h>
31254885Sdumbbell#include <dev/drm2/radeon/radeon_drm.h>
32254885Sdumbbell#include "radeon_drv.h"
33254885Sdumbbell
34254885Sdumbbell#include "r600_blit_shaders.h"
35254885Sdumbbell
36254885Sdumbbell#define DI_PT_RECTLIST        0x11
37254885Sdumbbell#define DI_INDEX_SIZE_16_BIT  0x0
38254885Sdumbbell#define DI_SRC_SEL_AUTO_INDEX 0x2
39254885Sdumbbell
40254885Sdumbbell#define FMT_8                 0x1
41254885Sdumbbell#define FMT_5_6_5             0x8
42254885Sdumbbell#define FMT_8_8_8_8           0x1a
43254885Sdumbbell#define COLOR_8               0x1
44254885Sdumbbell#define COLOR_5_6_5           0x8
45254885Sdumbbell#define COLOR_8_8_8_8         0x1a
46254885Sdumbbell
47254885Sdumbbellstatic void
48254885Sdumbbellset_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
49254885Sdumbbell{
50254885Sdumbbell	u32 cb_color_info;
51254885Sdumbbell	int pitch, slice;
52254885Sdumbbell	RING_LOCALS;
53254885Sdumbbell	DRM_DEBUG("\n");
54254885Sdumbbell
55254885Sdumbbell	h = roundup2(h, 8);
56254885Sdumbbell	if (h < 8)
57254885Sdumbbell		h = 8;
58254885Sdumbbell
59254885Sdumbbell	cb_color_info = ((format << 2) | (1 << 27));
60254885Sdumbbell	pitch = (w / 8) - 1;
61254885Sdumbbell	slice = ((w * h) / 64) - 1;
62254885Sdumbbell
63254885Sdumbbell	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
64254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
65254885Sdumbbell		BEGIN_RING(21 + 2);
66254885Sdumbbell		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
67254885Sdumbbell		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
68254885Sdumbbell		OUT_RING(gpu_addr >> 8);
69254885Sdumbbell		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
70254885Sdumbbell		OUT_RING(2 << 0);
71254885Sdumbbell	} else {
72254885Sdumbbell		BEGIN_RING(21);
73254885Sdumbbell		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
74254885Sdumbbell		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
75254885Sdumbbell		OUT_RING(gpu_addr >> 8);
76254885Sdumbbell	}
77254885Sdumbbell
78254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
79254885Sdumbbell	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
80254885Sdumbbell	OUT_RING((pitch << 0) | (slice << 10));
81254885Sdumbbell
82254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
83254885Sdumbbell	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
84254885Sdumbbell	OUT_RING(0);
85254885Sdumbbell
86254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
87254885Sdumbbell	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
88254885Sdumbbell	OUT_RING(cb_color_info);
89254885Sdumbbell
90254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
91254885Sdumbbell	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
92254885Sdumbbell	OUT_RING(0);
93254885Sdumbbell
94254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
95254885Sdumbbell	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
96254885Sdumbbell	OUT_RING(0);
97254885Sdumbbell
98254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
99254885Sdumbbell	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
100254885Sdumbbell	OUT_RING(0);
101254885Sdumbbell
102254885Sdumbbell	ADVANCE_RING();
103254885Sdumbbell}
104254885Sdumbbell
105254885Sdumbbellstatic void
106254885Sdumbbellcp_set_surface_sync(drm_radeon_private_t *dev_priv,
107254885Sdumbbell		    u32 sync_type, u32 size, u64 mc_addr)
108254885Sdumbbell{
109254885Sdumbbell	u32 cp_coher_size;
110254885Sdumbbell	RING_LOCALS;
111254885Sdumbbell	DRM_DEBUG("\n");
112254885Sdumbbell
113254885Sdumbbell	if (size == 0xffffffff)
114254885Sdumbbell		cp_coher_size = 0xffffffff;
115254885Sdumbbell	else
116254885Sdumbbell		cp_coher_size = ((size + 255) >> 8);
117254885Sdumbbell
118254885Sdumbbell	BEGIN_RING(5);
119254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
120254885Sdumbbell	OUT_RING(sync_type);
121254885Sdumbbell	OUT_RING(cp_coher_size);
122254885Sdumbbell	OUT_RING((mc_addr >> 8));
123254885Sdumbbell	OUT_RING(10); /* poll interval */
124254885Sdumbbell	ADVANCE_RING();
125254885Sdumbbell}
126254885Sdumbbell
127254885Sdumbbellstatic void
128254885Sdumbbellset_shaders(struct drm_device *dev)
129254885Sdumbbell{
130254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
131254885Sdumbbell	u64 gpu_addr;
132254885Sdumbbell	int i;
133254885Sdumbbell	u32 *vs, *ps;
134254885Sdumbbell	uint32_t sq_pgm_resources;
135254885Sdumbbell	RING_LOCALS;
136254885Sdumbbell	DRM_DEBUG("\n");
137254885Sdumbbell
138254885Sdumbbell	/* load shaders */
139254885Sdumbbell	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
140254885Sdumbbell	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
141254885Sdumbbell
142254885Sdumbbell	for (i = 0; i < r6xx_vs_size; i++)
143254885Sdumbbell		vs[i] = cpu_to_le32(r6xx_vs[i]);
144254885Sdumbbell	for (i = 0; i < r6xx_ps_size; i++)
145254885Sdumbbell		ps[i] = cpu_to_le32(r6xx_ps[i]);
146254885Sdumbbell
147254885Sdumbbell	dev_priv->blit_vb->used = 512;
148254885Sdumbbell
149254885Sdumbbell	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
150254885Sdumbbell
151254885Sdumbbell	/* setup shader regs */
152254885Sdumbbell	sq_pgm_resources = (1 << 0);
153254885Sdumbbell
154254885Sdumbbell	BEGIN_RING(9 + 12);
155254885Sdumbbell	/* VS */
156254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
157254885Sdumbbell	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
158254885Sdumbbell	OUT_RING(gpu_addr >> 8);
159254885Sdumbbell
160254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
161254885Sdumbbell	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
162254885Sdumbbell	OUT_RING(sq_pgm_resources);
163254885Sdumbbell
164254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
165254885Sdumbbell	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
166254885Sdumbbell	OUT_RING(0);
167254885Sdumbbell
168254885Sdumbbell	/* PS */
169254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
170254885Sdumbbell	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
171254885Sdumbbell	OUT_RING((gpu_addr + 256) >> 8);
172254885Sdumbbell
173254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
174254885Sdumbbell	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
175254885Sdumbbell	OUT_RING(sq_pgm_resources | (1 << 28));
176254885Sdumbbell
177254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
178254885Sdumbbell	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
179254885Sdumbbell	OUT_RING(2);
180254885Sdumbbell
181254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
182254885Sdumbbell	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
183254885Sdumbbell	OUT_RING(0);
184254885Sdumbbell	ADVANCE_RING();
185254885Sdumbbell
186254885Sdumbbell	cp_set_surface_sync(dev_priv,
187254885Sdumbbell			    R600_SH_ACTION_ENA, 512, gpu_addr);
188254885Sdumbbell}
189254885Sdumbbell
190254885Sdumbbellstatic void
191254885Sdumbbellset_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
192254885Sdumbbell{
193254885Sdumbbell	uint32_t sq_vtx_constant_word2;
194254885Sdumbbell	RING_LOCALS;
195254885Sdumbbell	DRM_DEBUG("\n");
196254885Sdumbbell
197254885Sdumbbell	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
198254885Sdumbbell#ifdef __BIG_ENDIAN
199254885Sdumbbell	sq_vtx_constant_word2 |= (2 << 30);
200254885Sdumbbell#endif
201254885Sdumbbell
202254885Sdumbbell	BEGIN_RING(9);
203254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
204254885Sdumbbell	OUT_RING(0x460);
205254885Sdumbbell	OUT_RING(gpu_addr & 0xffffffff);
206254885Sdumbbell	OUT_RING(48 - 1);
207254885Sdumbbell	OUT_RING(sq_vtx_constant_word2);
208254885Sdumbbell	OUT_RING(1 << 0);
209254885Sdumbbell	OUT_RING(0);
210254885Sdumbbell	OUT_RING(0);
211254885Sdumbbell	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
212254885Sdumbbell	ADVANCE_RING();
213254885Sdumbbell
214254885Sdumbbell	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
215254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
216254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
217254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
218254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
219254885Sdumbbell		cp_set_surface_sync(dev_priv,
220254885Sdumbbell				    R600_TC_ACTION_ENA, 48, gpu_addr);
221254885Sdumbbell	else
222254885Sdumbbell		cp_set_surface_sync(dev_priv,
223254885Sdumbbell				    R600_VC_ACTION_ENA, 48, gpu_addr);
224254885Sdumbbell}
225254885Sdumbbell
226254885Sdumbbellstatic void
227254885Sdumbbellset_tex_resource(drm_radeon_private_t *dev_priv,
228254885Sdumbbell		 int format, int w, int h, int pitch, u64 gpu_addr)
229254885Sdumbbell{
230254885Sdumbbell	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
231254885Sdumbbell	RING_LOCALS;
232254885Sdumbbell	DRM_DEBUG("\n");
233254885Sdumbbell
234254885Sdumbbell	if (h < 1)
235254885Sdumbbell		h = 1;
236254885Sdumbbell
237254885Sdumbbell	sq_tex_resource_word0 = (1 << 0);
238254885Sdumbbell	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
239254885Sdumbbell				  ((w - 1) << 19));
240254885Sdumbbell
241254885Sdumbbell	sq_tex_resource_word1 = (format << 26);
242254885Sdumbbell	sq_tex_resource_word1 |= ((h - 1) << 0);
243254885Sdumbbell
244254885Sdumbbell	sq_tex_resource_word4 = ((1 << 14) |
245254885Sdumbbell				 (0 << 16) |
246254885Sdumbbell				 (1 << 19) |
247254885Sdumbbell				 (2 << 22) |
248254885Sdumbbell				 (3 << 25));
249254885Sdumbbell
250254885Sdumbbell	BEGIN_RING(9);
251254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
252254885Sdumbbell	OUT_RING(0);
253254885Sdumbbell	OUT_RING(sq_tex_resource_word0);
254254885Sdumbbell	OUT_RING(sq_tex_resource_word1);
255254885Sdumbbell	OUT_RING(gpu_addr >> 8);
256254885Sdumbbell	OUT_RING(gpu_addr >> 8);
257254885Sdumbbell	OUT_RING(sq_tex_resource_word4);
258254885Sdumbbell	OUT_RING(0);
259254885Sdumbbell	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
260254885Sdumbbell	ADVANCE_RING();
261254885Sdumbbell
262254885Sdumbbell}
263254885Sdumbbell
264254885Sdumbbellstatic void
265254885Sdumbbellset_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
266254885Sdumbbell{
267254885Sdumbbell	RING_LOCALS;
268254885Sdumbbell	DRM_DEBUG("\n");
269254885Sdumbbell
270254885Sdumbbell	BEGIN_RING(12);
271254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
272254885Sdumbbell	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
273254885Sdumbbell	OUT_RING((x1 << 0) | (y1 << 16));
274254885Sdumbbell	OUT_RING((x2 << 0) | (y2 << 16));
275254885Sdumbbell
276254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
277254885Sdumbbell	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
278254885Sdumbbell	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
279254885Sdumbbell	OUT_RING((x2 << 0) | (y2 << 16));
280254885Sdumbbell
281254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
282254885Sdumbbell	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
283254885Sdumbbell	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
284254885Sdumbbell	OUT_RING((x2 << 0) | (y2 << 16));
285254885Sdumbbell	ADVANCE_RING();
286254885Sdumbbell}
287254885Sdumbbell
288254885Sdumbbellstatic void
289254885Sdumbbelldraw_auto(drm_radeon_private_t *dev_priv)
290254885Sdumbbell{
291254885Sdumbbell	RING_LOCALS;
292254885Sdumbbell	DRM_DEBUG("\n");
293254885Sdumbbell
294254885Sdumbbell	BEGIN_RING(10);
295254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
296254885Sdumbbell	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
297254885Sdumbbell	OUT_RING(DI_PT_RECTLIST);
298254885Sdumbbell
299254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
300254885Sdumbbell#ifdef __BIG_ENDIAN
301254885Sdumbbell	OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
302254885Sdumbbell#else
303254885Sdumbbell	OUT_RING(DI_INDEX_SIZE_16_BIT);
304254885Sdumbbell#endif
305254885Sdumbbell
306254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
307254885Sdumbbell	OUT_RING(1);
308254885Sdumbbell
309254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
310254885Sdumbbell	OUT_RING(3);
311254885Sdumbbell	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
312254885Sdumbbell
313254885Sdumbbell	ADVANCE_RING();
314254885Sdumbbell	COMMIT_RING();
315254885Sdumbbell}
316254885Sdumbbell
317254885Sdumbbellstatic void
318254885Sdumbbellset_default_state(drm_radeon_private_t *dev_priv)
319254885Sdumbbell{
320254885Sdumbbell	int i;
321254885Sdumbbell	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
322254885Sdumbbell	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
323254885Sdumbbell	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
324254885Sdumbbell	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
325254885Sdumbbell	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
326254885Sdumbbell	RING_LOCALS;
327254885Sdumbbell
328254885Sdumbbell	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
329254885Sdumbbell	case CHIP_R600:
330254885Sdumbbell		num_ps_gprs = 192;
331254885Sdumbbell		num_vs_gprs = 56;
332254885Sdumbbell		num_temp_gprs = 4;
333254885Sdumbbell		num_gs_gprs = 0;
334254885Sdumbbell		num_es_gprs = 0;
335254885Sdumbbell		num_ps_threads = 136;
336254885Sdumbbell		num_vs_threads = 48;
337254885Sdumbbell		num_gs_threads = 4;
338254885Sdumbbell		num_es_threads = 4;
339254885Sdumbbell		num_ps_stack_entries = 128;
340254885Sdumbbell		num_vs_stack_entries = 128;
341254885Sdumbbell		num_gs_stack_entries = 0;
342254885Sdumbbell		num_es_stack_entries = 0;
343254885Sdumbbell		break;
344254885Sdumbbell	case CHIP_RV630:
345254885Sdumbbell	case CHIP_RV635:
346254885Sdumbbell		num_ps_gprs = 84;
347254885Sdumbbell		num_vs_gprs = 36;
348254885Sdumbbell		num_temp_gprs = 4;
349254885Sdumbbell		num_gs_gprs = 0;
350254885Sdumbbell		num_es_gprs = 0;
351254885Sdumbbell		num_ps_threads = 144;
352254885Sdumbbell		num_vs_threads = 40;
353254885Sdumbbell		num_gs_threads = 4;
354254885Sdumbbell		num_es_threads = 4;
355254885Sdumbbell		num_ps_stack_entries = 40;
356254885Sdumbbell		num_vs_stack_entries = 40;
357254885Sdumbbell		num_gs_stack_entries = 32;
358254885Sdumbbell		num_es_stack_entries = 16;
359254885Sdumbbell		break;
360254885Sdumbbell	case CHIP_RV610:
361254885Sdumbbell	case CHIP_RV620:
362254885Sdumbbell	case CHIP_RS780:
363254885Sdumbbell	case CHIP_RS880:
364254885Sdumbbell	default:
365254885Sdumbbell		num_ps_gprs = 84;
366254885Sdumbbell		num_vs_gprs = 36;
367254885Sdumbbell		num_temp_gprs = 4;
368254885Sdumbbell		num_gs_gprs = 0;
369254885Sdumbbell		num_es_gprs = 0;
370254885Sdumbbell		num_ps_threads = 136;
371254885Sdumbbell		num_vs_threads = 48;
372254885Sdumbbell		num_gs_threads = 4;
373254885Sdumbbell		num_es_threads = 4;
374254885Sdumbbell		num_ps_stack_entries = 40;
375254885Sdumbbell		num_vs_stack_entries = 40;
376254885Sdumbbell		num_gs_stack_entries = 32;
377254885Sdumbbell		num_es_stack_entries = 16;
378254885Sdumbbell		break;
379254885Sdumbbell	case CHIP_RV670:
380254885Sdumbbell		num_ps_gprs = 144;
381254885Sdumbbell		num_vs_gprs = 40;
382254885Sdumbbell		num_temp_gprs = 4;
383254885Sdumbbell		num_gs_gprs = 0;
384254885Sdumbbell		num_es_gprs = 0;
385254885Sdumbbell		num_ps_threads = 136;
386254885Sdumbbell		num_vs_threads = 48;
387254885Sdumbbell		num_gs_threads = 4;
388254885Sdumbbell		num_es_threads = 4;
389254885Sdumbbell		num_ps_stack_entries = 40;
390254885Sdumbbell		num_vs_stack_entries = 40;
391254885Sdumbbell		num_gs_stack_entries = 32;
392254885Sdumbbell		num_es_stack_entries = 16;
393254885Sdumbbell		break;
394254885Sdumbbell	case CHIP_RV770:
395254885Sdumbbell		num_ps_gprs = 192;
396254885Sdumbbell		num_vs_gprs = 56;
397254885Sdumbbell		num_temp_gprs = 4;
398254885Sdumbbell		num_gs_gprs = 0;
399254885Sdumbbell		num_es_gprs = 0;
400254885Sdumbbell		num_ps_threads = 188;
401254885Sdumbbell		num_vs_threads = 60;
402254885Sdumbbell		num_gs_threads = 0;
403254885Sdumbbell		num_es_threads = 0;
404254885Sdumbbell		num_ps_stack_entries = 256;
405254885Sdumbbell		num_vs_stack_entries = 256;
406254885Sdumbbell		num_gs_stack_entries = 0;
407254885Sdumbbell		num_es_stack_entries = 0;
408254885Sdumbbell		break;
409254885Sdumbbell	case CHIP_RV730:
410254885Sdumbbell	case CHIP_RV740:
411254885Sdumbbell		num_ps_gprs = 84;
412254885Sdumbbell		num_vs_gprs = 36;
413254885Sdumbbell		num_temp_gprs = 4;
414254885Sdumbbell		num_gs_gprs = 0;
415254885Sdumbbell		num_es_gprs = 0;
416254885Sdumbbell		num_ps_threads = 188;
417254885Sdumbbell		num_vs_threads = 60;
418254885Sdumbbell		num_gs_threads = 0;
419254885Sdumbbell		num_es_threads = 0;
420254885Sdumbbell		num_ps_stack_entries = 128;
421254885Sdumbbell		num_vs_stack_entries = 128;
422254885Sdumbbell		num_gs_stack_entries = 0;
423254885Sdumbbell		num_es_stack_entries = 0;
424254885Sdumbbell		break;
425254885Sdumbbell	case CHIP_RV710:
426254885Sdumbbell		num_ps_gprs = 192;
427254885Sdumbbell		num_vs_gprs = 56;
428254885Sdumbbell		num_temp_gprs = 4;
429254885Sdumbbell		num_gs_gprs = 0;
430254885Sdumbbell		num_es_gprs = 0;
431254885Sdumbbell		num_ps_threads = 144;
432254885Sdumbbell		num_vs_threads = 48;
433254885Sdumbbell		num_gs_threads = 0;
434254885Sdumbbell		num_es_threads = 0;
435254885Sdumbbell		num_ps_stack_entries = 128;
436254885Sdumbbell		num_vs_stack_entries = 128;
437254885Sdumbbell		num_gs_stack_entries = 0;
438254885Sdumbbell		num_es_stack_entries = 0;
439254885Sdumbbell		break;
440254885Sdumbbell	}
441254885Sdumbbell
442254885Sdumbbell	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
443254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
444254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
445254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
446254885Sdumbbell	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
447254885Sdumbbell		sq_config = 0;
448254885Sdumbbell	else
449254885Sdumbbell		sq_config = R600_VC_ENABLE;
450254885Sdumbbell
451254885Sdumbbell	sq_config |= (R600_DX9_CONSTS |
452254885Sdumbbell		      R600_ALU_INST_PREFER_VECTOR |
453254885Sdumbbell		      R600_PS_PRIO(0) |
454254885Sdumbbell		      R600_VS_PRIO(1) |
455254885Sdumbbell		      R600_GS_PRIO(2) |
456254885Sdumbbell		      R600_ES_PRIO(3));
457254885Sdumbbell
458254885Sdumbbell	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
459254885Sdumbbell				  R600_NUM_VS_GPRS(num_vs_gprs) |
460254885Sdumbbell				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
461254885Sdumbbell	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
462254885Sdumbbell				  R600_NUM_ES_GPRS(num_es_gprs));
463254885Sdumbbell	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
464254885Sdumbbell				   R600_NUM_VS_THREADS(num_vs_threads) |
465254885Sdumbbell				   R600_NUM_GS_THREADS(num_gs_threads) |
466254885Sdumbbell				   R600_NUM_ES_THREADS(num_es_threads));
467254885Sdumbbell	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
468254885Sdumbbell				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
469254885Sdumbbell	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
470254885Sdumbbell				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
471254885Sdumbbell
472254885Sdumbbell	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
473254885Sdumbbell		BEGIN_RING(r7xx_default_size + 10);
474254885Sdumbbell		for (i = 0; i < r7xx_default_size; i++)
475254885Sdumbbell			OUT_RING(r7xx_default_state[i]);
476254885Sdumbbell	} else {
477254885Sdumbbell		BEGIN_RING(r6xx_default_size + 10);
478254885Sdumbbell		for (i = 0; i < r6xx_default_size; i++)
479254885Sdumbbell			OUT_RING(r6xx_default_state[i]);
480254885Sdumbbell	}
481254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
482254885Sdumbbell	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
483254885Sdumbbell	/* SQ config */
484254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
485254885Sdumbbell	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
486254885Sdumbbell	OUT_RING(sq_config);
487254885Sdumbbell	OUT_RING(sq_gpr_resource_mgmt_1);
488254885Sdumbbell	OUT_RING(sq_gpr_resource_mgmt_2);
489254885Sdumbbell	OUT_RING(sq_thread_resource_mgmt);
490254885Sdumbbell	OUT_RING(sq_stack_resource_mgmt_1);
491254885Sdumbbell	OUT_RING(sq_stack_resource_mgmt_2);
492254885Sdumbbell	ADVANCE_RING();
493254885Sdumbbell}
494254885Sdumbbell
495254885Sdumbbell/* 23 bits of float fractional data */
496254885Sdumbbell#define I2F_FRAC_BITS  23
497254885Sdumbbell#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
498254885Sdumbbell
499254885Sdumbbell/*
500254885Sdumbbell * Converts unsigned integer into 32-bit IEEE floating point representation.
501254885Sdumbbell * Will be exact from 0 to 2^24.  Above that, we round towards zero
502254885Sdumbbell * as the fractional bits will not fit in a float.  (It would be better to
503254885Sdumbbell * round towards even as the fpu does, but that is slower.)
504254885Sdumbbell */
505254885Sdumbbell__pure uint32_t int2float(uint32_t x)
506254885Sdumbbell{
507254885Sdumbbell	uint32_t msb, exponent, fraction;
508254885Sdumbbell
509254885Sdumbbell	/* Zero is special */
510254885Sdumbbell	if (!x) return 0;
511254885Sdumbbell
512254885Sdumbbell	/* Get location of the most significant bit */
513254885Sdumbbell	msb = fls(x);
514254885Sdumbbell
515254885Sdumbbell	/*
516254885Sdumbbell	 * Use a rotate instead of a shift because that works both leftwards
517254885Sdumbbell	 * and rightwards due to the mod(32) behaviour.  This means we don't
518254885Sdumbbell	 * need to check to see if we are above 2^24 or not.
519254885Sdumbbell	 */
520254885Sdumbbell	fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
521254885Sdumbbell	exponent = (127 + msb) << I2F_FRAC_BITS;
522254885Sdumbbell
523254885Sdumbbell	return fraction + exponent;
524254885Sdumbbell}
525254885Sdumbbell
526254885Sdumbbellstatic int r600_nomm_get_vb(struct drm_device *dev)
527254885Sdumbbell{
528254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
529254885Sdumbbell	dev_priv->blit_vb = radeon_freelist_get(dev);
530254885Sdumbbell	if (!dev_priv->blit_vb) {
531254885Sdumbbell		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
532254885Sdumbbell		return -EAGAIN;
533254885Sdumbbell	}
534254885Sdumbbell	return 0;
535254885Sdumbbell}
536254885Sdumbbell
537254885Sdumbbellstatic void r600_nomm_put_vb(struct drm_device *dev)
538254885Sdumbbell{
539254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
540254885Sdumbbell
541254885Sdumbbell	dev_priv->blit_vb->used = 0;
542254885Sdumbbell	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->masterp, dev_priv->blit_vb);
543254885Sdumbbell}
544254885Sdumbbell
545254885Sdumbbellstatic void *r600_nomm_get_vb_ptr(struct drm_device *dev)
546254885Sdumbbell{
547254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
548254885Sdumbbell	return (((char *)dev->agp_buffer_map->handle +
549254885Sdumbbell		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
550254885Sdumbbell}
551254885Sdumbbell
552254885Sdumbbellint
553254885Sdumbbellr600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
554254885Sdumbbell{
555254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
556254885Sdumbbell	int ret;
557254885Sdumbbell	DRM_DEBUG("\n");
558254885Sdumbbell
559254885Sdumbbell	ret = r600_nomm_get_vb(dev);
560254885Sdumbbell	if (ret)
561254885Sdumbbell		return ret;
562254885Sdumbbell
563254885Sdumbbell	dev_priv->blit_vb->file_priv = file_priv;
564254885Sdumbbell
565254885Sdumbbell	set_default_state(dev_priv);
566254885Sdumbbell	set_shaders(dev);
567254885Sdumbbell
568254885Sdumbbell	return 0;
569254885Sdumbbell}
570254885Sdumbbell
571254885Sdumbbell
572254885Sdumbbellvoid
573254885Sdumbbellr600_done_blit_copy(struct drm_device *dev)
574254885Sdumbbell{
575254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
576254885Sdumbbell	RING_LOCALS;
577254885Sdumbbell	DRM_DEBUG("\n");
578254885Sdumbbell
579254885Sdumbbell	BEGIN_RING(5);
580254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
581254885Sdumbbell	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
582254885Sdumbbell	/* wait for 3D idle clean */
583254885Sdumbbell	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
584254885Sdumbbell	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
585254885Sdumbbell	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
586254885Sdumbbell
587254885Sdumbbell	ADVANCE_RING();
588254885Sdumbbell	COMMIT_RING();
589254885Sdumbbell
590254885Sdumbbell	r600_nomm_put_vb(dev);
591254885Sdumbbell}
592254885Sdumbbell
593254885Sdumbbellvoid
594254885Sdumbbellr600_blit_copy(struct drm_device *dev,
595254885Sdumbbell	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
596254885Sdumbbell	       int size_bytes)
597254885Sdumbbell{
598254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
599254885Sdumbbell	int max_bytes;
600254885Sdumbbell	u64 vb_addr;
601254885Sdumbbell	u32 *vb;
602254885Sdumbbell
603254885Sdumbbell	vb = r600_nomm_get_vb_ptr(dev);
604254885Sdumbbell
605254885Sdumbbell	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
606254885Sdumbbell		max_bytes = 8192;
607254885Sdumbbell
608254885Sdumbbell		while (size_bytes) {
609254885Sdumbbell			int cur_size = size_bytes;
610254885Sdumbbell			int src_x = src_gpu_addr & 255;
611254885Sdumbbell			int dst_x = dst_gpu_addr & 255;
612254885Sdumbbell			int h = 1;
613254885Sdumbbell			src_gpu_addr = src_gpu_addr & ~255;
614254885Sdumbbell			dst_gpu_addr = dst_gpu_addr & ~255;
615254885Sdumbbell
616254885Sdumbbell			if (!src_x && !dst_x) {
617254885Sdumbbell				h = (cur_size / max_bytes);
618254885Sdumbbell				if (h > 8192)
619254885Sdumbbell					h = 8192;
620254885Sdumbbell				if (h == 0)
621254885Sdumbbell					h = 1;
622254885Sdumbbell				else
623254885Sdumbbell					cur_size = max_bytes;
624254885Sdumbbell			} else {
625254885Sdumbbell				if (cur_size > max_bytes)
626254885Sdumbbell					cur_size = max_bytes;
627254885Sdumbbell				if (cur_size > (max_bytes - dst_x))
628254885Sdumbbell					cur_size = (max_bytes - dst_x);
629254885Sdumbbell				if (cur_size > (max_bytes - src_x))
630254885Sdumbbell					cur_size = (max_bytes - src_x);
631254885Sdumbbell			}
632254885Sdumbbell
633254885Sdumbbell			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
634254885Sdumbbell
635254885Sdumbbell				r600_nomm_put_vb(dev);
636254885Sdumbbell				r600_nomm_get_vb(dev);
637254885Sdumbbell				if (!dev_priv->blit_vb)
638254885Sdumbbell					return;
639254885Sdumbbell				set_shaders(dev);
640254885Sdumbbell				vb = r600_nomm_get_vb_ptr(dev);
641254885Sdumbbell			}
642254885Sdumbbell
643254885Sdumbbell			vb[0] = int2float(dst_x);
644254885Sdumbbell			vb[1] = 0;
645254885Sdumbbell			vb[2] = int2float(src_x);
646254885Sdumbbell			vb[3] = 0;
647254885Sdumbbell
648254885Sdumbbell			vb[4] = int2float(dst_x);
649254885Sdumbbell			vb[5] = int2float(h);
650254885Sdumbbell			vb[6] = int2float(src_x);
651254885Sdumbbell			vb[7] = int2float(h);
652254885Sdumbbell
653254885Sdumbbell			vb[8] = int2float(dst_x + cur_size);
654254885Sdumbbell			vb[9] = int2float(h);
655254885Sdumbbell			vb[10] = int2float(src_x + cur_size);
656254885Sdumbbell			vb[11] = int2float(h);
657254885Sdumbbell
658254885Sdumbbell			/* src */
659254885Sdumbbell			set_tex_resource(dev_priv, FMT_8,
660254885Sdumbbell					 src_x + cur_size, h, src_x + cur_size,
661254885Sdumbbell					 src_gpu_addr);
662254885Sdumbbell
663254885Sdumbbell			cp_set_surface_sync(dev_priv,
664254885Sdumbbell					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
665254885Sdumbbell
666254885Sdumbbell			/* dst */
667254885Sdumbbell			set_render_target(dev_priv, COLOR_8,
668254885Sdumbbell					  dst_x + cur_size, h,
669254885Sdumbbell					  dst_gpu_addr);
670254885Sdumbbell
671254885Sdumbbell			/* scissors */
672254885Sdumbbell			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
673254885Sdumbbell
674254885Sdumbbell			/* Vertex buffer setup */
675254885Sdumbbell			vb_addr = dev_priv->gart_buffers_offset +
676254885Sdumbbell				dev_priv->blit_vb->offset +
677254885Sdumbbell				dev_priv->blit_vb->used;
678254885Sdumbbell			set_vtx_resource(dev_priv, vb_addr);
679254885Sdumbbell
680254885Sdumbbell			/* draw */
681254885Sdumbbell			draw_auto(dev_priv);
682254885Sdumbbell
683254885Sdumbbell			cp_set_surface_sync(dev_priv,
684254885Sdumbbell					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
685254885Sdumbbell					    cur_size * h, dst_gpu_addr);
686254885Sdumbbell
687254885Sdumbbell			vb += 12;
688254885Sdumbbell			dev_priv->blit_vb->used += 12 * 4;
689254885Sdumbbell
690254885Sdumbbell			src_gpu_addr += cur_size * h;
691254885Sdumbbell			dst_gpu_addr += cur_size * h;
692254885Sdumbbell			size_bytes -= cur_size * h;
693254885Sdumbbell		}
694254885Sdumbbell	} else {
695254885Sdumbbell		max_bytes = 8192 * 4;
696254885Sdumbbell
697254885Sdumbbell		while (size_bytes) {
698254885Sdumbbell			int cur_size = size_bytes;
699254885Sdumbbell			int src_x = (src_gpu_addr & 255);
700254885Sdumbbell			int dst_x = (dst_gpu_addr & 255);
701254885Sdumbbell			int h = 1;
702254885Sdumbbell			src_gpu_addr = src_gpu_addr & ~255;
703254885Sdumbbell			dst_gpu_addr = dst_gpu_addr & ~255;
704254885Sdumbbell
705254885Sdumbbell			if (!src_x && !dst_x) {
706254885Sdumbbell				h = (cur_size / max_bytes);
707254885Sdumbbell				if (h > 8192)
708254885Sdumbbell					h = 8192;
709254885Sdumbbell				if (h == 0)
710254885Sdumbbell					h = 1;
711254885Sdumbbell				else
712254885Sdumbbell					cur_size = max_bytes;
713254885Sdumbbell			} else {
714254885Sdumbbell				if (cur_size > max_bytes)
715254885Sdumbbell					cur_size = max_bytes;
716254885Sdumbbell				if (cur_size > (max_bytes - dst_x))
717254885Sdumbbell					cur_size = (max_bytes - dst_x);
718254885Sdumbbell				if (cur_size > (max_bytes - src_x))
719254885Sdumbbell					cur_size = (max_bytes - src_x);
720254885Sdumbbell			}
721254885Sdumbbell
722254885Sdumbbell			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
723254885Sdumbbell				r600_nomm_put_vb(dev);
724254885Sdumbbell				r600_nomm_get_vb(dev);
725254885Sdumbbell				if (!dev_priv->blit_vb)
726254885Sdumbbell					return;
727254885Sdumbbell
728254885Sdumbbell				set_shaders(dev);
729254885Sdumbbell				vb = r600_nomm_get_vb_ptr(dev);
730254885Sdumbbell			}
731254885Sdumbbell
732254885Sdumbbell			vb[0] = int2float(dst_x / 4);
733254885Sdumbbell			vb[1] = 0;
734254885Sdumbbell			vb[2] = int2float(src_x / 4);
735254885Sdumbbell			vb[3] = 0;
736254885Sdumbbell
737254885Sdumbbell			vb[4] = int2float(dst_x / 4);
738254885Sdumbbell			vb[5] = int2float(h);
739254885Sdumbbell			vb[6] = int2float(src_x / 4);
740254885Sdumbbell			vb[7] = int2float(h);
741254885Sdumbbell
742254885Sdumbbell			vb[8] = int2float((dst_x + cur_size) / 4);
743254885Sdumbbell			vb[9] = int2float(h);
744254885Sdumbbell			vb[10] = int2float((src_x + cur_size) / 4);
745254885Sdumbbell			vb[11] = int2float(h);
746254885Sdumbbell
747254885Sdumbbell			/* src */
748254885Sdumbbell			set_tex_resource(dev_priv, FMT_8_8_8_8,
749254885Sdumbbell					 (src_x + cur_size) / 4,
750254885Sdumbbell					 h, (src_x + cur_size) / 4,
751254885Sdumbbell					 src_gpu_addr);
752254885Sdumbbell
753254885Sdumbbell			cp_set_surface_sync(dev_priv,
754254885Sdumbbell					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
755254885Sdumbbell
756254885Sdumbbell			/* dst */
757254885Sdumbbell			set_render_target(dev_priv, COLOR_8_8_8_8,
758254885Sdumbbell					  (dst_x + cur_size) / 4, h,
759254885Sdumbbell					  dst_gpu_addr);
760254885Sdumbbell
761254885Sdumbbell			/* scissors */
762254885Sdumbbell			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
763254885Sdumbbell
764254885Sdumbbell			/* Vertex buffer setup */
765254885Sdumbbell			vb_addr = dev_priv->gart_buffers_offset +
766254885Sdumbbell				dev_priv->blit_vb->offset +
767254885Sdumbbell				dev_priv->blit_vb->used;
768254885Sdumbbell			set_vtx_resource(dev_priv, vb_addr);
769254885Sdumbbell
770254885Sdumbbell			/* draw */
771254885Sdumbbell			draw_auto(dev_priv);
772254885Sdumbbell
773254885Sdumbbell			cp_set_surface_sync(dev_priv,
774254885Sdumbbell					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
775254885Sdumbbell					    cur_size * h, dst_gpu_addr);
776254885Sdumbbell
777254885Sdumbbell			vb += 12;
778254885Sdumbbell			dev_priv->blit_vb->used += 12 * 4;
779254885Sdumbbell
780254885Sdumbbell			src_gpu_addr += cur_size * h;
781254885Sdumbbell			dst_gpu_addr += cur_size * h;
782254885Sdumbbell			size_bytes -= cur_size * h;
783254885Sdumbbell		}
784254885Sdumbbell	}
785254885Sdumbbell}
786254885Sdumbbell
787254885Sdumbbellvoid
788254885Sdumbbellr600_blit_swap(struct drm_device *dev,
789254885Sdumbbell	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
790254885Sdumbbell	       int sx, int sy, int dx, int dy,
791254885Sdumbbell	       int w, int h, int src_pitch, int dst_pitch, int cpp)
792254885Sdumbbell{
793254885Sdumbbell	drm_radeon_private_t *dev_priv = dev->dev_private;
794254885Sdumbbell	int cb_format, tex_format;
795254885Sdumbbell	int sx2, sy2, dx2, dy2;
796254885Sdumbbell	u64 vb_addr;
797254885Sdumbbell	u32 *vb;
798254885Sdumbbell
799254885Sdumbbell	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
800254885Sdumbbell
801254885Sdumbbell		r600_nomm_put_vb(dev);
802254885Sdumbbell		r600_nomm_get_vb(dev);
803254885Sdumbbell		if (!dev_priv->blit_vb)
804254885Sdumbbell			return;
805254885Sdumbbell
806254885Sdumbbell		set_shaders(dev);
807254885Sdumbbell	}
808254885Sdumbbell	vb = r600_nomm_get_vb_ptr(dev);
809254885Sdumbbell
810254885Sdumbbell	sx2 = sx + w;
811254885Sdumbbell	sy2 = sy + h;
812254885Sdumbbell	dx2 = dx + w;
813254885Sdumbbell	dy2 = dy + h;
814254885Sdumbbell
815254885Sdumbbell	vb[0] = int2float(dx);
816254885Sdumbbell	vb[1] = int2float(dy);
817254885Sdumbbell	vb[2] = int2float(sx);
818254885Sdumbbell	vb[3] = int2float(sy);
819254885Sdumbbell
820254885Sdumbbell	vb[4] = int2float(dx);
821254885Sdumbbell	vb[5] = int2float(dy2);
822254885Sdumbbell	vb[6] = int2float(sx);
823254885Sdumbbell	vb[7] = int2float(sy2);
824254885Sdumbbell
825254885Sdumbbell	vb[8] = int2float(dx2);
826254885Sdumbbell	vb[9] = int2float(dy2);
827254885Sdumbbell	vb[10] = int2float(sx2);
828254885Sdumbbell	vb[11] = int2float(sy2);
829254885Sdumbbell
830254885Sdumbbell	switch(cpp) {
831254885Sdumbbell	case 4:
832254885Sdumbbell		cb_format = COLOR_8_8_8_8;
833254885Sdumbbell		tex_format = FMT_8_8_8_8;
834254885Sdumbbell		break;
835254885Sdumbbell	case 2:
836254885Sdumbbell		cb_format = COLOR_5_6_5;
837254885Sdumbbell		tex_format = FMT_5_6_5;
838254885Sdumbbell		break;
839254885Sdumbbell	default:
840254885Sdumbbell		cb_format = COLOR_8;
841254885Sdumbbell		tex_format = FMT_8;
842254885Sdumbbell		break;
843254885Sdumbbell	}
844254885Sdumbbell
845254885Sdumbbell	/* src */
846254885Sdumbbell	set_tex_resource(dev_priv, tex_format,
847254885Sdumbbell			 src_pitch / cpp,
848254885Sdumbbell			 sy2, src_pitch / cpp,
849254885Sdumbbell			 src_gpu_addr);
850254885Sdumbbell
851254885Sdumbbell	cp_set_surface_sync(dev_priv,
852254885Sdumbbell			    R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
853254885Sdumbbell
854254885Sdumbbell	/* dst */
855254885Sdumbbell	set_render_target(dev_priv, cb_format,
856254885Sdumbbell			  dst_pitch / cpp, dy2,
857254885Sdumbbell			  dst_gpu_addr);
858254885Sdumbbell
859254885Sdumbbell	/* scissors */
860254885Sdumbbell	set_scissors(dev_priv, dx, dy, dx2, dy2);
861254885Sdumbbell
862254885Sdumbbell	/* Vertex buffer setup */
863254885Sdumbbell	vb_addr = dev_priv->gart_buffers_offset +
864254885Sdumbbell		dev_priv->blit_vb->offset +
865254885Sdumbbell		dev_priv->blit_vb->used;
866254885Sdumbbell	set_vtx_resource(dev_priv, vb_addr);
867254885Sdumbbell
868254885Sdumbbell	/* draw */
869254885Sdumbbell	draw_auto(dev_priv);
870254885Sdumbbell
871254885Sdumbbell	cp_set_surface_sync(dev_priv,
872254885Sdumbbell			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
873254885Sdumbbell			    dst_pitch * dy2, dst_gpu_addr);
874254885Sdumbbell
875254885Sdumbbell	dev_priv->blit_vb->used += 12 * 4;
876254885Sdumbbell}
877