1148211Sanholt/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2148211Sanholt *
3148211Sanholt * Copyright (C) The Weather Channel, Inc.  2002.
4148211Sanholt * Copyright (C) 2004 Nicolai Haehnle.
5148211Sanholt * All Rights Reserved.
6148211Sanholt *
7148211Sanholt * The Weather Channel (TM) funded Tungsten Graphics to develop the
8148211Sanholt * initial release of the Radeon 8500 driver under the XFree86 license.
9148211Sanholt * This notice must be preserved.
10148211Sanholt *
11148211Sanholt * Permission is hereby granted, free of charge, to any person obtaining a
12148211Sanholt * copy of this software and associated documentation files (the "Software"),
13148211Sanholt * to deal in the Software without restriction, including without limitation
14148211Sanholt * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15148211Sanholt * and/or sell copies of the Software, and to permit persons to whom the
16148211Sanholt * Software is furnished to do so, subject to the following conditions:
17148211Sanholt *
18148211Sanholt * The above copyright notice and this permission notice (including the next
19148211Sanholt * paragraph) shall be included in all copies or substantial portions of the
20148211Sanholt * Software.
21148211Sanholt *
22148211Sanholt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23148211Sanholt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24148211Sanholt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25148211Sanholt * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26148211Sanholt * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27148211Sanholt * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28148211Sanholt * DEALINGS IN THE SOFTWARE.
29148211Sanholt *
30148211Sanholt * Authors:
31148211Sanholt *    Nicolai Haehnle <prefect_@gmx.net>
32148211Sanholt */
33148211Sanholt
34152909Sanholt#include <sys/cdefs.h>
35152909Sanholt__FBSDID("$FreeBSD$");
36152909Sanholt
37148211Sanholt#include "dev/drm/drmP.h"
38148211Sanholt#include "dev/drm/drm.h"
39148211Sanholt#include "dev/drm/radeon_drm.h"
40148211Sanholt#include "dev/drm/radeon_drv.h"
41148211Sanholt#include "dev/drm/r300_reg.h"
42148211Sanholt
43148211Sanholt#define R300_SIMULTANEOUS_CLIPRECTS		4
44148211Sanholt
45148211Sanholt/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
46148211Sanholt */
47148211Sanholtstatic const int r300_cliprect_cntl[4] = {
48148211Sanholt	0xAAAA,
49148211Sanholt	0xEEEE,
50148211Sanholt	0xFEFE,
51148211Sanholt	0xFFFE
52148211Sanholt};
53148211Sanholt
54148211Sanholt/**
55148211Sanholt * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
56148211Sanholt * buffer, starting with index n.
57148211Sanholt */
58157617Sanholtstatic int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
59157617Sanholt			       drm_radeon_kcmd_buffer_t *cmdbuf, int n)
60148211Sanholt{
61182080Srnoland	struct drm_clip_rect box;
62148211Sanholt	int nr;
63148211Sanholt	int i;
64148211Sanholt	RING_LOCALS;
65148211Sanholt
66148211Sanholt	nr = cmdbuf->nbox - n;
67148211Sanholt	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
68148211Sanholt		nr = R300_SIMULTANEOUS_CLIPRECTS;
69148211Sanholt
70148211Sanholt	DRM_DEBUG("%i cliprects\n", nr);
71148211Sanholt
72148211Sanholt	if (nr) {
73157617Sanholt		BEGIN_RING(6 + nr * 2);
74157617Sanholt		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
75148211Sanholt
76157617Sanholt		for (i = 0; i < nr; ++i) {
77157617Sanholt			if (DRM_COPY_FROM_USER_UNCHECKED
78157617Sanholt			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
79148211Sanholt				DRM_ERROR("copy cliprect faulted\n");
80182080Srnoland				return -EFAULT;
81148211Sanholt			}
82148211Sanholt
83182080Srnoland			box.x2--; /* Hardware expects inclusive bottom-right corner */
84182080Srnoland			box.y2--;
85148211Sanholt
86182080Srnoland			if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
87182080Srnoland				box.x1 = (box.x1) &
88182080Srnoland					R300_CLIPRECT_MASK;
89182080Srnoland				box.y1 = (box.y1) &
90182080Srnoland					R300_CLIPRECT_MASK;
91182080Srnoland				box.x2 = (box.x2) &
92182080Srnoland					R300_CLIPRECT_MASK;
93182080Srnoland				box.y2 = (box.y2) &
94182080Srnoland					R300_CLIPRECT_MASK;
95182080Srnoland			} else {
96182080Srnoland				box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
97182080Srnoland					R300_CLIPRECT_MASK;
98182080Srnoland				box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
99182080Srnoland					R300_CLIPRECT_MASK;
100182080Srnoland				box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
101182080Srnoland					R300_CLIPRECT_MASK;
102182080Srnoland				box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
103182080Srnoland					R300_CLIPRECT_MASK;
104182080Srnoland			}
105182080Srnoland
106148211Sanholt			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
107157617Sanholt				 (box.y1 << R300_CLIPRECT_Y_SHIFT));
108148211Sanholt			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
109157617Sanholt				 (box.y2 << R300_CLIPRECT_Y_SHIFT));
110182080Srnoland
111148211Sanholt		}
112148211Sanholt
113157617Sanholt		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
114148211Sanholt
115148211Sanholt		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
116157617Sanholt		 * client might be able to trample over memory.
117157617Sanholt		 * The impact should be very limited, but I'd rather be safe than
118157617Sanholt		 * sorry.
119157617Sanholt		 */
120157617Sanholt		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
121157617Sanholt		OUT_RING(0);
122157617Sanholt		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
123148211Sanholt		ADVANCE_RING();
124157617Sanholt	} else {
125148211Sanholt		/* Why we allow zero cliprect rendering:
126148211Sanholt		 * There are some commands in a command buffer that must be submitted
127148211Sanholt		 * even when there are no cliprects, e.g. DMA buffer discard
128148211Sanholt		 * or state setting (though state setting could be avoided by
129148211Sanholt		 * simulating a loss of context).
130148211Sanholt		 *
131148211Sanholt		 * Now since the cmdbuf interface is so chaotic right now (and is
132148211Sanholt		 * bound to remain that way for a bit until things settle down),
133148211Sanholt		 * it is basically impossible to filter out the commands that are
134148211Sanholt		 * necessary and those that aren't.
135148211Sanholt		 *
136148211Sanholt		 * So I choose the safe way and don't do any filtering at all;
137148211Sanholt		 * instead, I simply set up the engine so that all rendering
138148211Sanholt		 * can't produce any fragments.
139148211Sanholt		 */
140148211Sanholt		BEGIN_RING(2);
141157617Sanholt		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
142148211Sanholt		ADVANCE_RING();
143157617Sanholt	}
144148211Sanholt
145182080Srnoland	/* flus cache and wait idle clean after cliprect change */
146182080Srnoland	BEGIN_RING(2);
147182080Srnoland	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
148182080Srnoland	OUT_RING(R300_RB3D_DC_FLUSH);
149182080Srnoland	ADVANCE_RING();
150182080Srnoland	BEGIN_RING(2);
151182080Srnoland	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
152182080Srnoland	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
153182080Srnoland	ADVANCE_RING();
154182080Srnoland	/* set flush flag */
155182080Srnoland	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
156182080Srnoland
157148211Sanholt	return 0;
158148211Sanholt}
159148211Sanholt
160157617Sanholtstatic u8 r300_reg_flags[0x10000 >> 2];
161148211Sanholt
162182080Srnolandvoid r300_init_reg_flags(struct drm_device *dev)
163148211Sanholt{
164148211Sanholt	int i;
165182080Srnoland	drm_radeon_private_t *dev_priv = dev->dev_private;
166182080Srnoland
167157617Sanholt	memset(r300_reg_flags, 0, 0x10000 >> 2);
168157617Sanholt#define ADD_RANGE_MARK(reg, count,mark) \
169148211Sanholt		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
170148211Sanholt			r300_reg_flags[i]|=(mark);
171148211Sanholt
172157617Sanholt#define MARK_SAFE		1
173157617Sanholt#define MARK_CHECK_OFFSET	2
174157617Sanholt
175157617Sanholt#define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
176157617Sanholt
177148211Sanholt	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
178148211Sanholt	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
179182080Srnoland	ADD_RANGE(R300_VAP_CNTL, 1);
180148211Sanholt	ADD_RANGE(R300_SE_VTE_CNTL, 2);
181148211Sanholt	ADD_RANGE(0x2134, 2);
182182080Srnoland	ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
183148211Sanholt	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
184148211Sanholt	ADD_RANGE(0x21DC, 1);
185182080Srnoland	ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
186182080Srnoland	ADD_RANGE(R300_VAP_CLIP_X_0, 4);
187182080Srnoland	ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
188182080Srnoland	ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
189148211Sanholt	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
190148211Sanholt	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
191148211Sanholt	ADD_RANGE(R300_GB_ENABLE, 1);
192148211Sanholt	ADD_RANGE(R300_GB_MSPOS0, 5);
193182080Srnoland	ADD_RANGE(R300_TX_INVALTAGS, 1);
194148211Sanholt	ADD_RANGE(R300_TX_ENABLE, 1);
195148211Sanholt	ADD_RANGE(0x4200, 4);
196148211Sanholt	ADD_RANGE(0x4214, 1);
197148211Sanholt	ADD_RANGE(R300_RE_POINTSIZE, 1);
198148211Sanholt	ADD_RANGE(0x4230, 3);
199148211Sanholt	ADD_RANGE(R300_RE_LINE_CNT, 1);
200182080Srnoland	ADD_RANGE(R300_RE_UNK4238, 1);
201148211Sanholt	ADD_RANGE(0x4260, 3);
202182080Srnoland	ADD_RANGE(R300_RE_SHADE, 4);
203182080Srnoland	ADD_RANGE(R300_RE_POLYGON_MODE, 5);
204182080Srnoland	ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
205148211Sanholt	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
206182080Srnoland	ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
207148211Sanholt	ADD_RANGE(R300_RE_CULL_CNTL, 1);
208148211Sanholt	ADD_RANGE(0x42C0, 2);
209148211Sanholt	ADD_RANGE(R300_RS_CNTL_0, 2);
210182080Srnoland
211190831Srnoland	ADD_RANGE(R300_SU_REG_DEST, 1);
212190831Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
213190831Srnoland		ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
214190831Srnoland
215182080Srnoland	ADD_RANGE(R300_SC_HYPERZ, 2);
216148211Sanholt	ADD_RANGE(0x43E8, 1);
217182080Srnoland
218148211Sanholt	ADD_RANGE(0x46A4, 5);
219182080Srnoland
220182080Srnoland	ADD_RANGE(R300_RE_FOG_STATE, 1);
221182080Srnoland	ADD_RANGE(R300_FOG_COLOR_R, 3);
222148211Sanholt	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
223148211Sanholt	ADD_RANGE(0x4BD8, 1);
224148211Sanholt	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
225148211Sanholt	ADD_RANGE(0x4E00, 1);
226148211Sanholt	ADD_RANGE(R300_RB3D_CBLEND, 2);
227148211Sanholt	ADD_RANGE(R300_RB3D_COLORMASK, 1);
228182080Srnoland	ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
229157617Sanholt	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */
230148211Sanholt	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
231148211Sanholt	ADD_RANGE(0x4E50, 9);
232148211Sanholt	ADD_RANGE(0x4E88, 1);
233148211Sanholt	ADD_RANGE(0x4EA0, 2);
234182080Srnoland	ADD_RANGE(R300_ZB_CNTL, 3);
235182080Srnoland	ADD_RANGE(R300_ZB_FORMAT, 4);
236182080Srnoland	ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */
237182080Srnoland	ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
238182080Srnoland	ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
239190833Srnoland	ADD_RANGE(R300_ZB_ZMASK_OFFSET, 5);
240190833Srnoland	ADD_RANGE(R300_ZB_HIZ_OFFSET, 5);
241190833Srnoland	ADD_RANGE(R300_ZB_ZPASS_DATA, 1);
242190833Srnoland	ADD_RANGE_MARK(R300_ZB_ZPASS_ADDR, 1, MARK_CHECK_OFFSET);       /* check offset */
243190833Srnoland	ADD_RANGE(R300_ZB_DEPTHXY_OFFSET, 1)
244148211Sanholt
245148211Sanholt	ADD_RANGE(R300_TX_FILTER_0, 16);
246157617Sanholt	ADD_RANGE(R300_TX_FILTER1_0, 16);
247148211Sanholt	ADD_RANGE(R300_TX_SIZE_0, 16);
248148211Sanholt	ADD_RANGE(R300_TX_FORMAT_0, 16);
249157617Sanholt	ADD_RANGE(R300_TX_PITCH_0, 16);
250157617Sanholt	/* Texture offset is dangerous and needs more checking */
251148211Sanholt	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
252157617Sanholt	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
253148211Sanholt	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
254148211Sanholt
255148211Sanholt	/* Sporadic registers used as primitives are emitted */
256182080Srnoland	ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
257148211Sanholt	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
258148211Sanholt	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
259148211Sanholt	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
260148211Sanholt
261182080Srnoland	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
262182080Srnoland		ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
263182080Srnoland		ADD_RANGE(R500_US_CONFIG, 2);
264182080Srnoland		ADD_RANGE(R500_US_CODE_ADDR, 3);
265182080Srnoland		ADD_RANGE(R500_US_FC_CTRL, 1);
266182080Srnoland		ADD_RANGE(R500_RS_IP_0, 16);
267182080Srnoland		ADD_RANGE(R500_RS_INST_0, 16);
268182080Srnoland		ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
269182080Srnoland		ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
270182080Srnoland		ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
271182080Srnoland	} else {
272182080Srnoland		ADD_RANGE(R300_PFS_CNTL_0, 3);
273182080Srnoland		ADD_RANGE(R300_PFS_NODE_0, 4);
274182080Srnoland		ADD_RANGE(R300_PFS_TEXI_0, 64);
275182080Srnoland		ADD_RANGE(R300_PFS_INSTR0_0, 64);
276182080Srnoland		ADD_RANGE(R300_PFS_INSTR1_0, 64);
277182080Srnoland		ADD_RANGE(R300_PFS_INSTR2_0, 64);
278182080Srnoland		ADD_RANGE(R300_PFS_INSTR3_0, 64);
279182080Srnoland		ADD_RANGE(R300_RS_INTERP_0, 8);
280182080Srnoland		ADD_RANGE(R300_RS_ROUTE_0, 8);
281182080Srnoland
282182080Srnoland	}
283148211Sanholt}
284148211Sanholt
285157617Sanholtstatic __inline__ int r300_check_range(unsigned reg, int count)
286148211Sanholt{
287148211Sanholt	int i;
288157617Sanholt	if (reg & ~0xffff)
289157617Sanholt		return -1;
290157617Sanholt	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
291157617Sanholt		if (r300_reg_flags[i] != MARK_SAFE)
292157617Sanholt			return 1;
293148211Sanholt	return 0;
294148211Sanholt}
295148211Sanholt
296157617Sanholtstatic __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
297157617Sanholt							  dev_priv,
298157617Sanholt							  drm_radeon_kcmd_buffer_t
299157617Sanholt							  * cmdbuf,
300157617Sanholt							  drm_r300_cmd_header_t
301157617Sanholt							  header)
302148211Sanholt{
303148211Sanholt	int reg;
304148211Sanholt	int sz;
305148211Sanholt	int i;
306148211Sanholt	int values[64];
307148211Sanholt	RING_LOCALS;
308148211Sanholt
309148211Sanholt	sz = header.packet0.count;
310148211Sanholt	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
311157617Sanholt
312157617Sanholt	if ((sz > 64) || (sz < 0)) {
313157617Sanholt		DRM_ERROR
314157617Sanholt		    ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
315157617Sanholt		     reg, sz);
316182080Srnoland		return -EINVAL;
317157617Sanholt	}
318157617Sanholt	for (i = 0; i < sz; i++) {
319157617Sanholt		values[i] = ((int *)cmdbuf->buf)[i];
320157617Sanholt		switch (r300_reg_flags[(reg >> 2) + i]) {
321148211Sanholt		case MARK_SAFE:
322148211Sanholt			break;
323148211Sanholt		case MARK_CHECK_OFFSET:
324182080Srnoland			if (!radeon_check_offset(dev_priv, (u32) values[i])) {
325157617Sanholt				DRM_ERROR
326157617Sanholt				    ("Offset failed range check (reg=%04x sz=%d)\n",
327157617Sanholt				     reg, sz);
328182080Srnoland				return -EINVAL;
329157617Sanholt			}
330148211Sanholt			break;
331148211Sanholt		default:
332157617Sanholt			DRM_ERROR("Register %04x failed check as flag=%02x\n",
333157617Sanholt				  reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
334182080Srnoland			return -EINVAL;
335148211Sanholt		}
336157617Sanholt	}
337157617Sanholt
338157617Sanholt	BEGIN_RING(1 + sz);
339157617Sanholt	OUT_RING(CP_PACKET0(reg, sz - 1));
340157617Sanholt	OUT_RING_TABLE(values, sz);
341148211Sanholt	ADVANCE_RING();
342148211Sanholt
343157617Sanholt	cmdbuf->buf += sz * 4;
344157617Sanholt	cmdbuf->bufsz -= sz * 4;
345148211Sanholt
346148211Sanholt	return 0;
347148211Sanholt}
348148211Sanholt
349148211Sanholt/**
350148211Sanholt * Emits a packet0 setting arbitrary registers.
351148211Sanholt * Called by r300_do_cp_cmdbuf.
352148211Sanholt *
353148211Sanholt * Note that checks are performed on contents and addresses of the registers
354148211Sanholt */
355157617Sanholtstatic __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
356157617Sanholt					drm_radeon_kcmd_buffer_t *cmdbuf,
357157617Sanholt					drm_r300_cmd_header_t header)
358148211Sanholt{
359148211Sanholt	int reg;
360148211Sanholt	int sz;
361148211Sanholt	RING_LOCALS;
362148211Sanholt
363148211Sanholt	sz = header.packet0.count;
364148211Sanholt	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
365148211Sanholt
366182080Srnoland	DRM_DEBUG("R300_CMD_PACKET0: reg %04x, sz %d\n", reg, sz);
367148211Sanholt	if (!sz)
368148211Sanholt		return 0;
369148211Sanholt
370157617Sanholt	if (sz * 4 > cmdbuf->bufsz)
371182080Srnoland		return -EINVAL;
372157617Sanholt
373157617Sanholt	if (reg + sz * 4 >= 0x10000) {
374157617Sanholt		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
375157617Sanholt			  sz);
376182080Srnoland		return -EINVAL;
377157617Sanholt	}
378148211Sanholt
379157617Sanholt	if (r300_check_range(reg, sz)) {
380148211Sanholt		/* go and check everything */
381157617Sanholt		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
382157617Sanholt							   header);
383157617Sanholt	}
384148211Sanholt	/* the rest of the data is safe to emit, whatever the values the user passed */
385148211Sanholt
386157617Sanholt	BEGIN_RING(1 + sz);
387157617Sanholt	OUT_RING(CP_PACKET0(reg, sz - 1));
388157617Sanholt	OUT_RING_TABLE((int *)cmdbuf->buf, sz);
389148211Sanholt	ADVANCE_RING();
390148211Sanholt
391157617Sanholt	cmdbuf->buf += sz * 4;
392157617Sanholt	cmdbuf->bufsz -= sz * 4;
393148211Sanholt
394148211Sanholt	return 0;
395148211Sanholt}
396148211Sanholt
397148211Sanholt/**
398148211Sanholt * Uploads user-supplied vertex program instructions or parameters onto
399148211Sanholt * the graphics card.
400148211Sanholt * Called by r300_do_cp_cmdbuf.
401148211Sanholt */
402157617Sanholtstatic __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
403157617Sanholt				    drm_radeon_kcmd_buffer_t *cmdbuf,
404148211Sanholt				    drm_r300_cmd_header_t header)
405148211Sanholt{
406148211Sanholt	int sz;
407148211Sanholt	int addr;
408148211Sanholt	RING_LOCALS;
409148211Sanholt
410148211Sanholt	sz = header.vpu.count;
411148211Sanholt	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
412148211Sanholt
413148211Sanholt	if (!sz)
414148211Sanholt		return 0;
415157617Sanholt	if (sz * 16 > cmdbuf->bufsz)
416182080Srnoland		return -EINVAL;
417148211Sanholt
418182080Srnoland	/* VAP is very sensitive so we purge cache before we program it
419182080Srnoland	 * and we also flush its state before & after */
420182080Srnoland	BEGIN_RING(6);
421182080Srnoland	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
422182080Srnoland	OUT_RING(R300_RB3D_DC_FLUSH);
423182080Srnoland	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
424182080Srnoland	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
425182080Srnoland	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
426182080Srnoland	OUT_RING(0);
427182080Srnoland	ADVANCE_RING();
428182080Srnoland	/* set flush flag */
429182080Srnoland	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
430182080Srnoland
431182080Srnoland	BEGIN_RING(3 + sz * 4);
432157617Sanholt	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
433157617Sanholt	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
434157617Sanholt	OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
435182080Srnoland	ADVANCE_RING();
436148211Sanholt
437182080Srnoland	BEGIN_RING(2);
438182080Srnoland	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
439182080Srnoland	OUT_RING(0);
440148211Sanholt	ADVANCE_RING();
441148211Sanholt
442157617Sanholt	cmdbuf->buf += sz * 16;
443157617Sanholt	cmdbuf->bufsz -= sz * 16;
444148211Sanholt
445148211Sanholt	return 0;
446148211Sanholt}
447148211Sanholt
448148211Sanholt/**
449148211Sanholt * Emit a clear packet from userspace.
450148211Sanholt * Called by r300_emit_packet3.
451148211Sanholt */
452157617Sanholtstatic __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
453157617Sanholt				      drm_radeon_kcmd_buffer_t *cmdbuf)
454148211Sanholt{
455148211Sanholt	RING_LOCALS;
456148211Sanholt
457157617Sanholt	if (8 * 4 > cmdbuf->bufsz)
458182080Srnoland		return -EINVAL;
459148211Sanholt
460148211Sanholt	BEGIN_RING(10);
461157617Sanholt	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
462157617Sanholt	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
463157617Sanholt		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
464157617Sanholt	OUT_RING_TABLE((int *)cmdbuf->buf, 8);
465148211Sanholt	ADVANCE_RING();
466148211Sanholt
467182080Srnoland	BEGIN_RING(4);
468182080Srnoland	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
469182080Srnoland	OUT_RING(R300_RB3D_DC_FLUSH);
470182080Srnoland	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
471182080Srnoland	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
472182080Srnoland	ADVANCE_RING();
473182080Srnoland	/* set flush flag */
474182080Srnoland	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
475182080Srnoland
476157617Sanholt	cmdbuf->buf += 8 * 4;
477157617Sanholt	cmdbuf->bufsz -= 8 * 4;
478148211Sanholt
479148211Sanholt	return 0;
480148211Sanholt}
481148211Sanholt
482157617Sanholtstatic __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
483157617Sanholt					       drm_radeon_kcmd_buffer_t *cmdbuf,
484157617Sanholt					       u32 header)
485148211Sanholt{
486157617Sanholt	int count, i, k;
487157617Sanholt#define MAX_ARRAY_PACKET  64
488148211Sanholt	u32 payload[MAX_ARRAY_PACKET];
489148211Sanholt	u32 narrays;
490148211Sanholt	RING_LOCALS;
491148211Sanholt
492157617Sanholt	count = (header >> 16) & 0x3fff;
493157617Sanholt
494157617Sanholt	if ((count + 1) > MAX_ARRAY_PACKET) {
495157617Sanholt		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
496157617Sanholt			  count);
497182080Srnoland		return -EINVAL;
498157617Sanholt	}
499157617Sanholt	memset(payload, 0, MAX_ARRAY_PACKET * 4);
500157617Sanholt	memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
501157617Sanholt
502148211Sanholt	/* carefully check packet contents */
503157617Sanholt
504157617Sanholt	narrays = payload[0];
505157617Sanholt	k = 0;
506157617Sanholt	i = 1;
507157617Sanholt	while ((k < narrays) && (i < (count + 1))) {
508157617Sanholt		i++;		/* skip attribute field */
509182080Srnoland		if (!radeon_check_offset(dev_priv, payload[i])) {
510157617Sanholt			DRM_ERROR
511157617Sanholt			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
512157617Sanholt			     k, i);
513182080Srnoland			return -EINVAL;
514157617Sanholt		}
515148211Sanholt		k++;
516148211Sanholt		i++;
517157617Sanholt		if (k == narrays)
518157617Sanholt			break;
519148211Sanholt		/* have one more to process, they come in pairs */
520182080Srnoland		if (!radeon_check_offset(dev_priv, payload[i])) {
521157617Sanholt			DRM_ERROR
522157617Sanholt			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
523157617Sanholt			     k, i);
524182080Srnoland			return -EINVAL;
525157617Sanholt		}
526148211Sanholt		k++;
527157617Sanholt		i++;
528157617Sanholt	}
529148211Sanholt	/* do the counts match what we expect ? */
530157617Sanholt	if ((k != narrays) || (i != (count + 1))) {
531157617Sanholt		DRM_ERROR
532157617Sanholt		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
533157617Sanholt		     k, i, narrays, count + 1);
534182080Srnoland		return -EINVAL;
535157617Sanholt	}
536148211Sanholt
537148211Sanholt	/* all clear, output packet */
538148211Sanholt
539157617Sanholt	BEGIN_RING(count + 2);
540148211Sanholt	OUT_RING(header);
541157617Sanholt	OUT_RING_TABLE(payload, count + 1);
542148211Sanholt	ADVANCE_RING();
543148211Sanholt
544157617Sanholt	cmdbuf->buf += (count + 2) * 4;
545157617Sanholt	cmdbuf->bufsz -= (count + 2) * 4;
546157617Sanholt
547157617Sanholt	return 0;
548157617Sanholt}
549157617Sanholt
550157617Sanholtstatic __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
551157617Sanholt					     drm_radeon_kcmd_buffer_t *cmdbuf)
552157617Sanholt{
553157617Sanholt	u32 *cmd = (u32 *) cmdbuf->buf;
554157617Sanholt	int count, ret;
555157617Sanholt	RING_LOCALS;
556157617Sanholt
557157617Sanholt	count=(cmd[0]>>16) & 0x3fff;
558157617Sanholt
559157617Sanholt	if (cmd[0] & 0x8000) {
560157617Sanholt		u32 offset;
561157617Sanholt
562182080Srnoland		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
563157617Sanholt			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
564157617Sanholt			offset = cmd[2] << 10;
565182080Srnoland			ret = !radeon_check_offset(dev_priv, offset);
566157617Sanholt			if (ret) {
567157617Sanholt				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
568182080Srnoland				return -EINVAL;
569157617Sanholt			}
570157617Sanholt		}
571157617Sanholt
572157617Sanholt		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
573157617Sanholt		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
574157617Sanholt			offset = cmd[3] << 10;
575182080Srnoland			ret = !radeon_check_offset(dev_priv, offset);
576157617Sanholt			if (ret) {
577157617Sanholt				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
578182080Srnoland				return -EINVAL;
579157617Sanholt			}
580182080Srnoland
581157617Sanholt		}
582157617Sanholt	}
583157617Sanholt
584157617Sanholt	BEGIN_RING(count+2);
585157617Sanholt	OUT_RING(cmd[0]);
586157617Sanholt	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
587157617Sanholt	ADVANCE_RING();
588157617Sanholt
589148211Sanholt	cmdbuf->buf += (count+2)*4;
590148211Sanholt	cmdbuf->bufsz -= (count+2)*4;
591148211Sanholt
592148211Sanholt	return 0;
593148211Sanholt}
594148211Sanholt
595182080Srnolandstatic __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
596182080Srnoland					    drm_radeon_kcmd_buffer_t *cmdbuf)
597182080Srnoland{
598182080Srnoland	u32 *cmd;
599182080Srnoland	int count;
600182080Srnoland	int expected_count;
601182080Srnoland	RING_LOCALS;
602182080Srnoland
603182080Srnoland	cmd = (u32 *) cmdbuf->buf;
604182080Srnoland	count = (cmd[0]>>16) & 0x3fff;
605182080Srnoland	expected_count = cmd[1] >> 16;
606182080Srnoland	if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
607182080Srnoland		expected_count = (expected_count+1)/2;
608182080Srnoland
609182080Srnoland	if (count && count != expected_count) {
610182080Srnoland		DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
611182080Srnoland			count, expected_count);
612182080Srnoland		return -EINVAL;
613182080Srnoland	}
614182080Srnoland
615182080Srnoland	BEGIN_RING(count+2);
616182080Srnoland	OUT_RING(cmd[0]);
617182080Srnoland	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
618182080Srnoland	ADVANCE_RING();
619182080Srnoland
620182080Srnoland	cmdbuf->buf += (count+2)*4;
621182080Srnoland	cmdbuf->bufsz -= (count+2)*4;
622182080Srnoland
623182080Srnoland	if (!count) {
624182080Srnoland		drm_r300_cmd_header_t header;
625182080Srnoland
626182080Srnoland		if (cmdbuf->bufsz < 4*4 + sizeof(header)) {
627182080Srnoland			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
628182080Srnoland			return -EINVAL;
629182080Srnoland		}
630182080Srnoland
631182080Srnoland		header.u = *(unsigned int *)cmdbuf->buf;
632182080Srnoland
633182080Srnoland		cmdbuf->buf += sizeof(header);
634182080Srnoland		cmdbuf->bufsz -= sizeof(header);
635182080Srnoland		cmd = (u32 *) cmdbuf->buf;
636182080Srnoland
637182080Srnoland		if (header.header.cmd_type != R300_CMD_PACKET3 ||
638182080Srnoland		    header.packet3.packet != R300_CMD_PACKET3_RAW ||
639182080Srnoland		    cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
640182080Srnoland			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
641182080Srnoland			return -EINVAL;
642182080Srnoland		}
643182080Srnoland
644182080Srnoland		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
645182080Srnoland			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
646182080Srnoland			return -EINVAL;
647182080Srnoland		}
648182080Srnoland		if (!radeon_check_offset(dev_priv, cmd[2])) {
649182080Srnoland			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
650182080Srnoland			return -EINVAL;
651182080Srnoland		}
652182080Srnoland		if (cmd[3] != expected_count) {
653182080Srnoland			DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
654182080Srnoland				cmd[3], expected_count);
655182080Srnoland			return -EINVAL;
656182080Srnoland		}
657182080Srnoland
658182080Srnoland		BEGIN_RING(4);
659182080Srnoland		OUT_RING(cmd[0]);
660182080Srnoland		OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3);
661182080Srnoland		ADVANCE_RING();
662182080Srnoland
663182080Srnoland		cmdbuf->buf += 4*4;
664182080Srnoland		cmdbuf->bufsz -= 4*4;
665182080Srnoland	}
666182080Srnoland
667182080Srnoland	return 0;
668182080Srnoland}
669182080Srnoland
670157617Sanholtstatic __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
671157617Sanholt					    drm_radeon_kcmd_buffer_t *cmdbuf)
672148211Sanholt{
673148211Sanholt	u32 header;
674148211Sanholt	int count;
675148211Sanholt	RING_LOCALS;
676148211Sanholt
677148211Sanholt	if (4 > cmdbuf->bufsz)
678182080Srnoland		return -EINVAL;
679148211Sanholt
680157617Sanholt	/* Fixme !! This simply emits a packet without much checking.
681148211Sanholt	   We need to be smarter. */
682148211Sanholt
683148211Sanholt	/* obtain first word - actual packet3 header */
684157617Sanholt	header = *(u32 *) cmdbuf->buf;
685148211Sanholt
686148211Sanholt	/* Is it packet 3 ? */
687157617Sanholt	if ((header >> 30) != 0x3) {
688148211Sanholt		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
689182080Srnoland		return -EINVAL;
690157617Sanholt	}
691148211Sanholt
692157617Sanholt	count = (header >> 16) & 0x3fff;
693148211Sanholt
694148211Sanholt	/* Check again now that we know how much data to expect */
695157617Sanholt	if ((count + 2) * 4 > cmdbuf->bufsz) {
696157617Sanholt		DRM_ERROR
697157617Sanholt		    ("Expected packet3 of length %d but have only %d bytes left\n",
698157617Sanholt		     (count + 2) * 4, cmdbuf->bufsz);
699182080Srnoland		return -EINVAL;
700157617Sanholt	}
701148211Sanholt
702148211Sanholt	/* Is it a packet type we know about ? */
703157617Sanholt	switch (header & 0xff00) {
704157617Sanholt	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
705148211Sanholt		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
706148211Sanholt
707157617Sanholt	case RADEON_CNTL_BITBLT_MULTI:
708157617Sanholt		return r300_emit_bitblt_multi(dev_priv, cmdbuf);
709157617Sanholt
710182080Srnoland	case RADEON_CP_INDX_BUFFER:
711182080Srnoland		DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
712182080Srnoland		return -EINVAL;
713182080Srnoland	case RADEON_CP_3D_DRAW_IMMD_2:
714182080Srnoland		/* triggers drawing using in-packet vertex data */
715182080Srnoland	case RADEON_CP_3D_DRAW_VBUF_2:
716182080Srnoland		/* triggers drawing of vertex buffers setup elsewhere */
717182080Srnoland		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
718182080Srnoland					   RADEON_PURGE_EMITED);
719182080Srnoland		break;
720182080Srnoland	case RADEON_CP_3D_DRAW_INDX_2:
721182080Srnoland		/* triggers drawing using indices to vertex buffer */
722182080Srnoland		/* whenever we send vertex we clear flush & purge */
723182080Srnoland		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
724182080Srnoland					   RADEON_PURGE_EMITED);
725182080Srnoland		return r300_emit_draw_indx_2(dev_priv, cmdbuf);
726148211Sanholt	case RADEON_WAIT_FOR_IDLE:
727148211Sanholt	case RADEON_CP_NOP:
728148211Sanholt		/* these packets are safe */
729148211Sanholt		break;
730148211Sanholt	default:
731148211Sanholt		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
732182080Srnoland		return -EINVAL;
733157617Sanholt	}
734148211Sanholt
735157617Sanholt	BEGIN_RING(count + 2);
736148211Sanholt	OUT_RING(header);
737157617Sanholt	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
738148211Sanholt	ADVANCE_RING();
739148211Sanholt
740157617Sanholt	cmdbuf->buf += (count + 2) * 4;
741157617Sanholt	cmdbuf->bufsz -= (count + 2) * 4;
742148211Sanholt
743148211Sanholt	return 0;
744148211Sanholt}
745148211Sanholt
746148211Sanholt/**
747148211Sanholt * Emit a rendering packet3 from userspace.
748148211Sanholt * Called by r300_do_cp_cmdbuf.
749148211Sanholt */
750157617Sanholtstatic __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
751157617Sanholt					drm_radeon_kcmd_buffer_t *cmdbuf,
752148211Sanholt					drm_r300_cmd_header_t header)
753148211Sanholt{
754148211Sanholt	int n;
755148211Sanholt	int ret;
756157617Sanholt	char *orig_buf = cmdbuf->buf;
757148211Sanholt	int orig_bufsz = cmdbuf->bufsz;
758148211Sanholt
759148211Sanholt	/* This is a do-while-loop so that we run the interior at least once,
760148211Sanholt	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
761148211Sanholt	 */
762148211Sanholt	n = 0;
763148211Sanholt	do {
764148211Sanholt		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
765148211Sanholt			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
766148211Sanholt			if (ret)
767148211Sanholt				return ret;
768148211Sanholt
769148211Sanholt			cmdbuf->buf = orig_buf;
770148211Sanholt			cmdbuf->bufsz = orig_bufsz;
771157617Sanholt		}
772148211Sanholt
773157617Sanholt		switch (header.packet3.packet) {
774148211Sanholt		case R300_CMD_PACKET3_CLEAR:
775148211Sanholt			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
776148211Sanholt			ret = r300_emit_clear(dev_priv, cmdbuf);
777148211Sanholt			if (ret) {
778148211Sanholt				DRM_ERROR("r300_emit_clear failed\n");
779148211Sanholt				return ret;
780157617Sanholt			}
781148211Sanholt			break;
782148211Sanholt
783148211Sanholt		case R300_CMD_PACKET3_RAW:
784148211Sanholt			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
785148211Sanholt			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
786148211Sanholt			if (ret) {
787148211Sanholt				DRM_ERROR("r300_emit_raw_packet3 failed\n");
788148211Sanholt				return ret;
789157617Sanholt			}
790148211Sanholt			break;
791148211Sanholt
792148211Sanholt		default:
793148211Sanholt			DRM_ERROR("bad packet3 type %i at %p\n",
794157617Sanholt				  header.packet3.packet,
795157617Sanholt				  cmdbuf->buf - sizeof(header));
796182080Srnoland			return -EINVAL;
797157617Sanholt		}
798148211Sanholt
799148211Sanholt		n += R300_SIMULTANEOUS_CLIPRECTS;
800157617Sanholt	} while (n < cmdbuf->nbox);
801148211Sanholt
802148211Sanholt	return 0;
803148211Sanholt}
804148211Sanholt
805148211Sanholt/* Some of the R300 chips seem to be extremely touchy about the two registers
806148211Sanholt * that are configured in r300_pacify.
807148211Sanholt * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
808148211Sanholt * sends a command buffer that contains only state setting commands and a
809148211Sanholt * vertex program/parameter upload sequence, this will eventually lead to a
810148211Sanholt * lockup, unless the sequence is bracketed by calls to r300_pacify.
811148211Sanholt * So we should take great care to *always* call r300_pacify before
812148211Sanholt * *anything* 3D related, and again afterwards. This is what the
813148211Sanholt * call bracket in r300_do_cp_cmdbuf is for.
814148211Sanholt */
815148211Sanholt
816148211Sanholt/**
817148211Sanholt * Emit the sequence to pacify R300.
818148211Sanholt */
819157617Sanholtstatic __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
820148211Sanholt{
821182080Srnoland	uint32_t cache_z, cache_3d, cache_2d;
822148211Sanholt	RING_LOCALS;
823148211Sanholt
824182080Srnoland	cache_z = R300_ZC_FLUSH;
825182080Srnoland	cache_2d = R300_RB2D_DC_FLUSH;
826182080Srnoland	cache_3d = R300_RB3D_DC_FLUSH;
827182080Srnoland	if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
828182080Srnoland		/* we can purge, primitive where draw since last purge */
829182080Srnoland		cache_z |= R300_ZC_FREE;
830182080Srnoland		cache_2d |= R300_RB2D_DC_FREE;
831182080Srnoland		cache_3d |= R300_RB3D_DC_FREE;
832182080Srnoland	}
833182080Srnoland
834182080Srnoland	/* flush & purge zbuffer */
835182080Srnoland	BEGIN_RING(2);
836182080Srnoland	OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
837182080Srnoland	OUT_RING(cache_z);
838182080Srnoland	ADVANCE_RING();
839182080Srnoland	/* flush & purge 3d */
840182080Srnoland	BEGIN_RING(2);
841157617Sanholt	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
842182080Srnoland	OUT_RING(cache_3d);
843148211Sanholt	ADVANCE_RING();
844182080Srnoland	/* flush & purge texture */
845182080Srnoland	BEGIN_RING(2);
846182080Srnoland	OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
847182080Srnoland	OUT_RING(0);
848182080Srnoland	ADVANCE_RING();
849182080Srnoland	/* FIXME: is this one really needed ? */
850182080Srnoland	BEGIN_RING(2);
851182080Srnoland	OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
852182080Srnoland	OUT_RING(0);
853182080Srnoland	ADVANCE_RING();
854182080Srnoland	BEGIN_RING(2);
855182080Srnoland	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
856182080Srnoland	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
857182080Srnoland	ADVANCE_RING();
858182080Srnoland	/* flush & purge 2d through E2 as RB2D will trigger lockup */
859182080Srnoland	BEGIN_RING(4);
860182080Srnoland	OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
861182080Srnoland	OUT_RING(cache_2d);
862182080Srnoland	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
863182080Srnoland	OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
864182080Srnoland		 RADEON_WAIT_HOST_IDLECLEAN);
865182080Srnoland	ADVANCE_RING();
866182080Srnoland	/* set flush & purge flags */
867182080Srnoland	dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
868148211Sanholt}
869148211Sanholt
870148211Sanholt/**
871148211Sanholt * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
872148211Sanholt * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
873148211Sanholt * be careful about how this function is called.
874148211Sanholt */
875182080Srnolandstatic void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
876148211Sanholt{
877148211Sanholt	drm_radeon_private_t *dev_priv = dev->dev_private;
878148211Sanholt	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
879148211Sanholt
880148624Sanholt	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
881148211Sanholt	buf->pending = 1;
882148211Sanholt	buf->used = 0;
883148211Sanholt}
884148211Sanholt
885182080Srnolandstatic void r300_cmd_wait(drm_radeon_private_t * dev_priv,
886182080Srnoland			  drm_r300_cmd_header_t header)
887182080Srnoland{
888182080Srnoland	u32 wait_until;
889182080Srnoland	RING_LOCALS;
890182080Srnoland
891182080Srnoland	if (!header.wait.flags)
892182080Srnoland		return;
893182080Srnoland
894182080Srnoland	wait_until = 0;
895182080Srnoland
896182080Srnoland	switch(header.wait.flags) {
897182080Srnoland	case R300_WAIT_2D:
898182080Srnoland		wait_until = RADEON_WAIT_2D_IDLE;
899182080Srnoland		break;
900182080Srnoland	case R300_WAIT_3D:
901182080Srnoland		wait_until = RADEON_WAIT_3D_IDLE;
902182080Srnoland		break;
903182080Srnoland	case R300_NEW_WAIT_2D_3D:
904182080Srnoland		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
905182080Srnoland		break;
906182080Srnoland	case R300_NEW_WAIT_2D_2D_CLEAN:
907182080Srnoland		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
908182080Srnoland		break;
909182080Srnoland	case R300_NEW_WAIT_3D_3D_CLEAN:
910182080Srnoland		wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
911182080Srnoland		break;
912182080Srnoland	case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
913182080Srnoland		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
914182080Srnoland		wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
915182080Srnoland		break;
916182080Srnoland	default:
917182080Srnoland		return;
918182080Srnoland	}
919182080Srnoland
920182080Srnoland	BEGIN_RING(2);
921182080Srnoland	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
922182080Srnoland	OUT_RING(wait_until);
923182080Srnoland	ADVANCE_RING();
924182080Srnoland}
925182080Srnoland
926157617Sanholtstatic int r300_scratch(drm_radeon_private_t *dev_priv,
927157617Sanholt			drm_radeon_kcmd_buffer_t *cmdbuf,
928157617Sanholt			drm_r300_cmd_header_t header)
929157617Sanholt{
930157617Sanholt	u32 *ref_age_base;
931157617Sanholt	u32 i, buf_idx, h_pending;
932157617Sanholt	RING_LOCALS;
933182080Srnoland
934157617Sanholt	if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
935182080Srnoland		return -EINVAL;
936157617Sanholt	}
937182080Srnoland
938157617Sanholt	if (header.scratch.reg >= 5) {
939182080Srnoland		return -EINVAL;
940157617Sanholt	}
941182080Srnoland
942157617Sanholt	dev_priv->scratch_ages[header.scratch.reg] ++;
943182080Srnoland
944158683Sanholt	ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
945182080Srnoland
946157617Sanholt	cmdbuf->buf += sizeof(uint64_t);
947157617Sanholt	cmdbuf->bufsz -= sizeof(uint64_t);
948182080Srnoland
949157617Sanholt	for (i=0; i < header.scratch.n_bufs; i++) {
950157617Sanholt		buf_idx = *(u32 *)cmdbuf->buf;
951157617Sanholt		buf_idx *= 2; /* 8 bytes per buf */
952182080Srnoland
953157617Sanholt		if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
954182080Srnoland			return -EINVAL;
955157617Sanholt		}
956182080Srnoland
957157617Sanholt		if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
958182080Srnoland			return -EINVAL;
959157617Sanholt		}
960182080Srnoland
961157617Sanholt		if (h_pending == 0) {
962182080Srnoland			return -EINVAL;
963157617Sanholt		}
964182080Srnoland
965157617Sanholt		h_pending--;
966182080Srnoland
967157617Sanholt		if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
968182080Srnoland			return -EINVAL;
969157617Sanholt		}
970182080Srnoland
971157617Sanholt		cmdbuf->buf += sizeof(buf_idx);
972157617Sanholt		cmdbuf->bufsz -= sizeof(buf_idx);
973157617Sanholt	}
974182080Srnoland
975157617Sanholt	BEGIN_RING(2);
976157617Sanholt	OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
977157617Sanholt	OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
978157617Sanholt	ADVANCE_RING();
979182080Srnoland
980157617Sanholt	return 0;
981157617Sanholt}
982148211Sanholt
983148211Sanholt/**
984182080Srnoland * Uploads user-supplied vertex program instructions or parameters onto
985182080Srnoland * the graphics card.
986182080Srnoland * Called by r300_do_cp_cmdbuf.
987182080Srnoland */
988182080Srnolandstatic __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
989182080Srnoland				       drm_radeon_kcmd_buffer_t *cmdbuf,
990182080Srnoland				       drm_r300_cmd_header_t header)
991182080Srnoland{
992182080Srnoland	int sz;
993182080Srnoland	int addr;
994182080Srnoland	int type;
995182080Srnoland	int clamp;
996182080Srnoland	int stride;
997182080Srnoland	RING_LOCALS;
998182080Srnoland
999182080Srnoland	sz = header.r500fp.count;
1000182080Srnoland	/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
1001182080Srnoland	addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
1002182080Srnoland
1003182080Srnoland	type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
1004182080Srnoland	clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
1005182080Srnoland
1006182080Srnoland	addr |= (type << 16);
1007182080Srnoland	addr |= (clamp << 17);
1008182080Srnoland
1009182080Srnoland	stride = type ? 4 : 6;
1010182080Srnoland
1011182080Srnoland	DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
1012182080Srnoland	if (!sz)
1013182080Srnoland		return 0;
1014182080Srnoland	if (sz * stride * 4 > cmdbuf->bufsz)
1015182080Srnoland		return -EINVAL;
1016182080Srnoland
1017182080Srnoland	BEGIN_RING(3 + sz * stride);
1018182080Srnoland	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
1019182080Srnoland	OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
1020182080Srnoland	OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
1021182080Srnoland
1022182080Srnoland	ADVANCE_RING();
1023182080Srnoland
1024182080Srnoland	cmdbuf->buf += sz * stride * 4;
1025182080Srnoland	cmdbuf->bufsz -= sz * stride * 4;
1026182080Srnoland
1027182080Srnoland	return 0;
1028182080Srnoland}
1029182080Srnoland
1030182080Srnoland
1031182080Srnoland/**
1032148211Sanholt * Parses and validates a user-supplied command buffer and emits appropriate
1033148211Sanholt * commands on the DMA ring buffer.
1034148211Sanholt * Called by the ioctl handler function radeon_cp_cmdbuf.
1035148211Sanholt */
1036182080Srnolandint r300_do_cp_cmdbuf(struct drm_device *dev,
1037182080Srnoland		      struct drm_file *file_priv,
1038157617Sanholt		      drm_radeon_kcmd_buffer_t *cmdbuf)
1039148211Sanholt{
1040148211Sanholt	drm_radeon_private_t *dev_priv = dev->dev_private;
1041182080Srnoland	struct drm_device_dma *dma = dev->dma;
1042182080Srnoland	struct drm_buf *buf = NULL;
1043148211Sanholt	int emit_dispatch_age = 0;
1044148211Sanholt	int ret = 0;
1045148211Sanholt
1046148211Sanholt	DRM_DEBUG("\n");
1047148211Sanholt
1048182080Srnoland	/* pacify */
1049148211Sanholt	r300_pacify(dev_priv);
1050148211Sanholt
1051148211Sanholt	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1052148211Sanholt		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1053148211Sanholt		if (ret)
1054148211Sanholt			goto cleanup;
1055157617Sanholt	}
1056148211Sanholt
1057157617Sanholt	while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
1058148211Sanholt		int idx;
1059148211Sanholt		drm_r300_cmd_header_t header;
1060148211Sanholt
1061148211Sanholt		header.u = *(unsigned int *)cmdbuf->buf;
1062148211Sanholt
1063148211Sanholt		cmdbuf->buf += sizeof(header);
1064148211Sanholt		cmdbuf->bufsz -= sizeof(header);
1065148211Sanholt
1066157617Sanholt		switch (header.header.cmd_type) {
1067157617Sanholt		case R300_CMD_PACKET0:
1068148211Sanholt			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
1069148211Sanholt			if (ret) {
1070148211Sanholt				DRM_ERROR("r300_emit_packet0 failed\n");
1071148211Sanholt				goto cleanup;
1072157617Sanholt			}
1073148211Sanholt			break;
1074148211Sanholt
1075148211Sanholt		case R300_CMD_VPU:
1076148211Sanholt			DRM_DEBUG("R300_CMD_VPU\n");
1077148211Sanholt			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
1078148211Sanholt			if (ret) {
1079148211Sanholt				DRM_ERROR("r300_emit_vpu failed\n");
1080148211Sanholt				goto cleanup;
1081157617Sanholt			}
1082148211Sanholt			break;
1083148211Sanholt
1084148211Sanholt		case R300_CMD_PACKET3:
1085148211Sanholt			DRM_DEBUG("R300_CMD_PACKET3\n");
1086148211Sanholt			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
1087148211Sanholt			if (ret) {
1088148211Sanholt				DRM_ERROR("r300_emit_packet3 failed\n");
1089148211Sanholt				goto cleanup;
1090157617Sanholt			}
1091148211Sanholt			break;
1092148211Sanholt
1093148211Sanholt		case R300_CMD_END3D:
1094148211Sanholt			DRM_DEBUG("R300_CMD_END3D\n");
1095157617Sanholt			/* TODO:
1096157617Sanholt			   Ideally userspace driver should not need to issue this call,
1097157617Sanholt			   i.e. the drm driver should issue it automatically and prevent
1098157617Sanholt			   lockups.
1099148211Sanholt
1100157617Sanholt			   In practice, we do not understand why this call is needed and what
1101157617Sanholt			   it does (except for some vague guesses that it has to do with cache
1102157617Sanholt			   coherence) and so the user space driver does it.
1103157617Sanholt
1104157617Sanholt			   Once we are sure which uses prevent lockups the code could be moved
1105157617Sanholt			   into the kernel and the userspace driver will not
1106157617Sanholt			   need to use this command.
1107157617Sanholt
1108157617Sanholt			   Note that issuing this command does not hurt anything
1109157617Sanholt			   except, possibly, performance */
1110148211Sanholt			r300_pacify(dev_priv);
1111148211Sanholt			break;
1112148211Sanholt
1113148211Sanholt		case R300_CMD_CP_DELAY:
1114148211Sanholt			/* simple enough, we can do it here */
1115148211Sanholt			DRM_DEBUG("R300_CMD_CP_DELAY\n");
1116148211Sanholt			{
1117148211Sanholt				int i;
1118148211Sanholt				RING_LOCALS;
1119148211Sanholt
1120148211Sanholt				BEGIN_RING(header.delay.count);
1121157617Sanholt				for (i = 0; i < header.delay.count; i++)
1122148211Sanholt					OUT_RING(RADEON_CP_PACKET2);
1123148211Sanholt				ADVANCE_RING();
1124148211Sanholt			}
1125148211Sanholt			break;
1126148211Sanholt
1127148211Sanholt		case R300_CMD_DMA_DISCARD:
1128148211Sanholt			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1129157617Sanholt			idx = header.dma.buf_idx;
1130157617Sanholt			if (idx < 0 || idx >= dma->buf_count) {
1131157617Sanholt				DRM_ERROR("buffer index %d (of %d max)\n",
1132157617Sanholt					  idx, dma->buf_count - 1);
1133182080Srnoland				ret = -EINVAL;
1134157617Sanholt				goto cleanup;
1135157617Sanholt			}
1136148211Sanholt
1137157617Sanholt			buf = dma->buflist[idx];
1138182080Srnoland			if (buf->file_priv != file_priv || buf->pending) {
1139157617Sanholt				DRM_ERROR("bad buffer %p %p %d\n",
1140182080Srnoland					  buf->file_priv, file_priv,
1141182080Srnoland					  buf->pending);
1142182080Srnoland				ret = -EINVAL;
1143148211Sanholt				goto cleanup;
1144157617Sanholt			}
1145148211Sanholt
1146148211Sanholt			emit_dispatch_age = 1;
1147148211Sanholt			r300_discard_buffer(dev, buf);
1148157617Sanholt			break;
1149148211Sanholt
1150148211Sanholt		case R300_CMD_WAIT:
1151148211Sanholt			DRM_DEBUG("R300_CMD_WAIT\n");
1152182080Srnoland			r300_cmd_wait(dev_priv, header);
1153148211Sanholt			break;
1154148211Sanholt
1155157617Sanholt		case R300_CMD_SCRATCH:
1156157617Sanholt			DRM_DEBUG("R300_CMD_SCRATCH\n");
1157157617Sanholt			ret = r300_scratch(dev_priv, cmdbuf, header);
1158157617Sanholt			if (ret) {
1159157617Sanholt				DRM_ERROR("r300_scratch failed\n");
1160157617Sanholt				goto cleanup;
1161157617Sanholt			}
1162157617Sanholt			break;
1163182080Srnoland
1164182080Srnoland		case R300_CMD_R500FP:
1165182080Srnoland			if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1166182080Srnoland				DRM_ERROR("Calling r500 command on r300 card\n");
1167182080Srnoland				ret = -EINVAL;
1168182080Srnoland				goto cleanup;
1169182080Srnoland			}
1170182080Srnoland			DRM_DEBUG("R300_CMD_R500FP\n");
1171182080Srnoland			ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
1172182080Srnoland			if (ret) {
1173182080Srnoland				DRM_ERROR("r300_emit_r500fp failed\n");
1174182080Srnoland				goto cleanup;
1175182080Srnoland			}
1176182080Srnoland			break;
1177148211Sanholt		default:
1178148211Sanholt			DRM_ERROR("bad cmd_type %i at %p\n",
1179157617Sanholt				  header.header.cmd_type,
1180148211Sanholt				  cmdbuf->buf - sizeof(header));
1181182080Srnoland			ret = -EINVAL;
1182148211Sanholt			goto cleanup;
1183157617Sanholt		}
1184148211Sanholt	}
1185148211Sanholt
1186148211Sanholt	DRM_DEBUG("END\n");
1187148211Sanholt
1188157617Sanholt      cleanup:
1189148211Sanholt	r300_pacify(dev_priv);
1190148211Sanholt
1191148211Sanholt	/* We emit the vertex buffer age here, outside the pacifier "brackets"
1192148211Sanholt	 * for two reasons:
1193148211Sanholt	 *  (1) This may coalesce multiple age emissions into a single one and
1194148211Sanholt	 *  (2) more importantly, some chips lock up hard when scratch registers
1195148211Sanholt	 *      are written inside the pacifier bracket.
1196148211Sanholt	 */
1197148211Sanholt	if (emit_dispatch_age) {
1198148211Sanholt		RING_LOCALS;
1199148211Sanholt
1200148211Sanholt		/* Emit the vertex buffer age */
1201148211Sanholt		BEGIN_RING(2);
1202148211Sanholt		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
1203148211Sanholt		ADVANCE_RING();
1204157617Sanholt	}
1205148211Sanholt
1206148211Sanholt	COMMIT_RING();
1207148211Sanholt
1208148211Sanholt	return ret;
1209148211Sanholt}
1210