r300_cmdbuf.c revision 148624
1/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2 *
3 * Copyright (C) The Weather Channel, Inc.  2002.
4 * Copyright (C) 2004 Nicolai Haehnle.
5 * All Rights Reserved.
6 *
7 * The Weather Channel (TM) funded Tungsten Graphics to develop the
8 * initial release of the Radeon 8500 driver under the XFree86 license.
9 * This notice must be preserved.
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 * DEALINGS IN THE SOFTWARE.
29 *
30 * Authors:
31 *    Nicolai Haehnle <prefect_@gmx.net>
32 *
33 * $FreeBSD: head/sys/dev/drm/r300_cmdbuf.c 148624 2005-08-01 17:50:19Z anholt $
34 */
35
36#include "dev/drm/drmP.h"
37#include "dev/drm/drm.h"
38#include "dev/drm/radeon_drm.h"
39#include "dev/drm/radeon_drv.h"
40#include "dev/drm/r300_reg.h"
41
42
43#define R300_SIMULTANEOUS_CLIPRECTS		4
44
45/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
46 */
47static const int r300_cliprect_cntl[4] = {
48	0xAAAA,
49	0xEEEE,
50	0xFEFE,
51	0xFFFE
52};
53
54
55/**
56 * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
57 * buffer, starting with index n.
58 */
59static int r300_emit_cliprects(drm_radeon_private_t* dev_priv,
60			       drm_radeon_cmd_buffer_t* cmdbuf,
61			       int n)
62{
63	drm_clip_rect_t box;
64	int nr;
65	int i;
66	RING_LOCALS;
67
68	nr = cmdbuf->nbox - n;
69	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
70		nr = R300_SIMULTANEOUS_CLIPRECTS;
71
72	DRM_DEBUG("%i cliprects\n", nr);
73
74	if (nr) {
75		BEGIN_RING(6 + nr*2);
76		OUT_RING( CP_PACKET0( R300_RE_CLIPRECT_TL_0, nr*2 - 1 ) );
77
78		for(i = 0; i < nr; ++i) {
79			if (DRM_COPY_FROM_USER_UNCHECKED(&box, &cmdbuf->boxes[n+i], sizeof(box))) {
80				DRM_ERROR("copy cliprect faulted\n");
81				return DRM_ERR(EFAULT);
82			}
83
84			box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
85			box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
86			box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
87			box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
88
89			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
90					(box.y1 << R300_CLIPRECT_Y_SHIFT));
91			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
92					(box.y2 << R300_CLIPRECT_Y_SHIFT));
93		}
94
95		OUT_RING_REG( R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr-1] );
96
97		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
98		* client might be able to trample over memory.
99		* The impact should be very limited, but I'd rather be safe than
100		* sorry.
101		*/
102		OUT_RING( CP_PACKET0( R300_RE_SCISSORS_TL, 1 ) );
103		OUT_RING( 0 );
104		OUT_RING( R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK );
105		ADVANCE_RING();
106		} else {
107		/* Why we allow zero cliprect rendering:
108		 * There are some commands in a command buffer that must be submitted
109		 * even when there are no cliprects, e.g. DMA buffer discard
110		 * or state setting (though state setting could be avoided by
111		 * simulating a loss of context).
112		 *
113		 * Now since the cmdbuf interface is so chaotic right now (and is
114		 * bound to remain that way for a bit until things settle down),
115		 * it is basically impossible to filter out the commands that are
116		 * necessary and those that aren't.
117		 *
118		 * So I choose the safe way and don't do any filtering at all;
119		 * instead, I simply set up the engine so that all rendering
120		 * can't produce any fragments.
121		 */
122		BEGIN_RING(2);
123		OUT_RING_REG( R300_RE_CLIPRECT_CNTL, 0 );
124		ADVANCE_RING();
125		}
126
127	return 0;
128}
129
130u8  r300_reg_flags[0x10000>>2];
131
132
133void r300_init_reg_flags(void)
134{
135	int i;
136	memset(r300_reg_flags, 0, 0x10000>>2);
137	#define ADD_RANGE_MARK(reg, count,mark) \
138		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
139			r300_reg_flags[i]|=(mark);
140
141	#define MARK_SAFE		1
142	#define MARK_CHECK_OFFSET	2
143
144	#define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
145
146	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
147	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
148	ADD_RANGE(0x2080, 1);
149	ADD_RANGE(R300_SE_VTE_CNTL, 2);
150	ADD_RANGE(0x2134, 2);
151	ADD_RANGE(0x2140, 1);
152	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
153	ADD_RANGE(0x21DC, 1);
154	ADD_RANGE(0x221C, 1);
155	ADD_RANGE(0x2220, 4);
156	ADD_RANGE(0x2288, 1);
157	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
158	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
159	ADD_RANGE(R300_GB_ENABLE, 1);
160	ADD_RANGE(R300_GB_MSPOS0, 5);
161	ADD_RANGE(R300_TX_ENABLE, 1);
162	ADD_RANGE(0x4200, 4);
163	ADD_RANGE(0x4214, 1);
164	ADD_RANGE(R300_RE_POINTSIZE, 1);
165	ADD_RANGE(0x4230, 3);
166	ADD_RANGE(R300_RE_LINE_CNT, 1);
167	ADD_RANGE(0x4238, 1);
168	ADD_RANGE(0x4260, 3);
169	ADD_RANGE(0x4274, 4);
170	ADD_RANGE(0x4288, 5);
171	ADD_RANGE(0x42A0, 1);
172	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
173	ADD_RANGE(0x42B4, 1);
174	ADD_RANGE(R300_RE_CULL_CNTL, 1);
175	ADD_RANGE(0x42C0, 2);
176	ADD_RANGE(R300_RS_CNTL_0, 2);
177	ADD_RANGE(R300_RS_INTERP_0, 8);
178	ADD_RANGE(R300_RS_ROUTE_0, 8);
179	ADD_RANGE(0x43A4, 2);
180	ADD_RANGE(0x43E8, 1);
181	ADD_RANGE(R300_PFS_CNTL_0, 3);
182	ADD_RANGE(R300_PFS_NODE_0, 4);
183	ADD_RANGE(R300_PFS_TEXI_0, 64);
184	ADD_RANGE(0x46A4, 5);
185	ADD_RANGE(R300_PFS_INSTR0_0, 64);
186	ADD_RANGE(R300_PFS_INSTR1_0, 64);
187	ADD_RANGE(R300_PFS_INSTR2_0, 64);
188	ADD_RANGE(R300_PFS_INSTR3_0, 64);
189	ADD_RANGE(0x4BC0, 1);
190	ADD_RANGE(0x4BC8, 3);
191	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
192	ADD_RANGE(0x4BD8, 1);
193	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
194	ADD_RANGE(0x4E00, 1);
195	ADD_RANGE(R300_RB3D_CBLEND, 2);
196	ADD_RANGE(R300_RB3D_COLORMASK, 1);
197	ADD_RANGE(0x4E10, 3);
198	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */
199	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
200	ADD_RANGE(0x4E50, 9);
201	ADD_RANGE(0x4E88, 1);
202	ADD_RANGE(0x4EA0, 2);
203	ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
204	ADD_RANGE(0x4F10, 4);
205	ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
206	ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
207	ADD_RANGE(0x4F28, 1);
208	ADD_RANGE(0x4F30, 2);
209	ADD_RANGE(0x4F44, 1);
210	ADD_RANGE(0x4F54, 1);
211
212	ADD_RANGE(R300_TX_FILTER_0, 16);
213	ADD_RANGE(R300_TX_UNK1_0, 16);
214	ADD_RANGE(R300_TX_SIZE_0, 16);
215	ADD_RANGE(R300_TX_FORMAT_0, 16);
216		/* Texture offset is dangerous and needs more checking */
217	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
218	ADD_RANGE(R300_TX_UNK4_0, 16);
219	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
220
221	/* Sporadic registers used as primitives are emitted */
222	ADD_RANGE(0x4f18, 1);
223	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
224	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
225	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
226
227}
228
229static __inline__ int r300_check_range(unsigned  reg, int count)
230{
231	int i;
232	if(reg & ~0xffff)return -1;
233	for(i=(reg>>2);i<(reg>>2)+count;i++)
234		if(r300_reg_flags[i]!=MARK_SAFE)return 1;
235	return 0;
236}
237
238  /* we expect offsets passed to the framebuffer to be either within video memory or
239      within AGP space */
240static __inline__ int r300_check_offset(drm_radeon_private_t* dev_priv, u32 offset)
241{
242	/* we realy want to check against end of video aperture
243		but this value is not being kept.
244		This code is correct for now (does the same thing as the
245		code that sets MC_FB_LOCATION) in radeon_cp.c */
246	if((offset>=dev_priv->fb_location) &&
247		(offset<dev_priv->gart_vm_start))return 0;
248	if((offset>=dev_priv->gart_vm_start) &&
249		 (offset<dev_priv->gart_vm_start+dev_priv->gart_size))return 0;
250	return 1;
251}
252
253static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t* dev_priv,
254						drm_radeon_cmd_buffer_t* cmdbuf,
255						drm_r300_cmd_header_t header)
256{
257	int reg;
258	int sz;
259	int i;
260	int values[64];
261	RING_LOCALS;
262
263	sz = header.packet0.count;
264	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
265
266	if((sz>64)||(sz<0)){
267		DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", reg, sz);
268		return DRM_ERR(EINVAL);
269		}
270	for(i=0;i<sz;i++){
271		values[i]=((int __user*)cmdbuf->buf)[i];
272		switch(r300_reg_flags[(reg>>2)+i]){
273		case MARK_SAFE:
274			break;
275		case MARK_CHECK_OFFSET:
276			if(r300_check_offset(dev_priv, (u32)values[i])){
277				DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n", reg, sz);
278				return DRM_ERR(EINVAL);
279				}
280			break;
281		default:
282			DRM_ERROR("Register %04x failed check as flag=%02x\n", reg+i*4, r300_reg_flags[(reg>>2)+i]);
283			return DRM_ERR(EINVAL);
284			}
285		}
286
287	BEGIN_RING(1+sz);
288	OUT_RING( CP_PACKET0( reg, sz-1 ) );
289	OUT_RING_TABLE( values, sz );
290	ADVANCE_RING();
291
292	cmdbuf->buf += sz*4;
293	cmdbuf->bufsz -= sz*4;
294
295	return 0;
296}
297
298/**
299 * Emits a packet0 setting arbitrary registers.
300 * Called by r300_do_cp_cmdbuf.
301 *
302 * Note that checks are performed on contents and addresses of the registers
303 */
304static __inline__ int r300_emit_packet0(drm_radeon_private_t* dev_priv,
305						drm_radeon_cmd_buffer_t* cmdbuf,
306						drm_r300_cmd_header_t header)
307{
308	int reg;
309	int sz;
310	RING_LOCALS;
311
312	sz = header.packet0.count;
313	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
314
315	if (!sz)
316		return 0;
317
318	if (sz*4 > cmdbuf->bufsz)
319		return DRM_ERR(EINVAL);
320
321	if (reg+sz*4 >= 0x10000){
322		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, sz);
323		return DRM_ERR(EINVAL);
324		}
325
326	if(r300_check_range(reg, sz)){
327		/* go and check everything */
328		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, header);
329		}
330	/* the rest of the data is safe to emit, whatever the values the user passed */
331
332	BEGIN_RING(1+sz);
333	OUT_RING( CP_PACKET0( reg, sz-1 ) );
334	OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz );
335	ADVANCE_RING();
336
337	cmdbuf->buf += sz*4;
338	cmdbuf->bufsz -= sz*4;
339
340	return 0;
341}
342
343
344/**
345 * Uploads user-supplied vertex program instructions or parameters onto
346 * the graphics card.
347 * Called by r300_do_cp_cmdbuf.
348 */
349static __inline__ int r300_emit_vpu(drm_radeon_private_t* dev_priv,
350				    drm_radeon_cmd_buffer_t* cmdbuf,
351				    drm_r300_cmd_header_t header)
352{
353	int sz;
354	int addr;
355	RING_LOCALS;
356
357	sz = header.vpu.count;
358	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
359
360	if (!sz)
361		return 0;
362	if (sz*16 > cmdbuf->bufsz)
363		return DRM_ERR(EINVAL);
364
365	BEGIN_RING(5+sz*4);
366	/* Wait for VAP to come to senses.. */
367	/* there is no need to emit it multiple times, (only once before VAP is programmed,
368	   but this optimization is for later */
369	OUT_RING_REG( R300_VAP_PVS_WAITIDLE, 0 );
370	OUT_RING_REG( R300_VAP_PVS_UPLOAD_ADDRESS, addr );
371	OUT_RING( CP_PACKET0_TABLE( R300_VAP_PVS_UPLOAD_DATA, sz*4 - 1 ) );
372	OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz*4 );
373
374	ADVANCE_RING();
375
376	cmdbuf->buf += sz*16;
377	cmdbuf->bufsz -= sz*16;
378
379	return 0;
380}
381
382
383/**
384 * Emit a clear packet from userspace.
385 * Called by r300_emit_packet3.
386 */
387static __inline__ int r300_emit_clear(drm_radeon_private_t* dev_priv,
388				      drm_radeon_cmd_buffer_t* cmdbuf)
389{
390	RING_LOCALS;
391
392	if (8*4 > cmdbuf->bufsz)
393		return DRM_ERR(EINVAL);
394
395	BEGIN_RING(10);
396	OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 8 ) );
397	OUT_RING( R300_PRIM_TYPE_POINT|R300_PRIM_WALK_RING|
398	          (1<<R300_PRIM_NUM_VERTICES_SHIFT) );
399	OUT_RING_TABLE( (int __user*)cmdbuf->buf, 8 );
400	ADVANCE_RING();
401
402	cmdbuf->buf += 8*4;
403	cmdbuf->bufsz -= 8*4;
404
405	return 0;
406}
407
408static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t* dev_priv,
409				      drm_radeon_cmd_buffer_t* cmdbuf,
410				      u32 header)
411{
412	int count, i,k;
413	#define MAX_ARRAY_PACKET  64
414	u32 payload[MAX_ARRAY_PACKET];
415	u32 narrays;
416	RING_LOCALS;
417
418	count=(header>>16) & 0x3fff;
419
420	if((count+1)>MAX_ARRAY_PACKET){
421		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", count);
422		return DRM_ERR(EINVAL);
423		}
424	memset(payload, 0, MAX_ARRAY_PACKET*4);
425	memcpy(payload, cmdbuf->buf+4, (count+1)*4);
426
427	/* carefully check packet contents */
428
429	narrays=payload[0];
430	k=0;
431	i=1;
432	while((k<narrays) && (i<(count+1))){
433		i++; /* skip attribute field */
434		if(r300_check_offset(dev_priv, payload[i])){
435			DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
436			return DRM_ERR(EINVAL);
437			}
438		k++;
439		i++;
440		if(k==narrays)break;
441		/* have one more to process, they come in pairs */
442		if(r300_check_offset(dev_priv, payload[i])){
443			DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
444			return DRM_ERR(EINVAL);
445			}
446		k++;
447		i++;
448		}
449	/* do the counts match what we expect ? */
450	if((k!=narrays) || (i!=(count+1))){
451		DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", k, i, narrays, count+1);
452		return DRM_ERR(EINVAL);
453		}
454
455	/* all clear, output packet */
456
457	BEGIN_RING(count+2);
458	OUT_RING(header);
459	OUT_RING_TABLE(payload, count+1);
460	ADVANCE_RING();
461
462	cmdbuf->buf += (count+2)*4;
463	cmdbuf->bufsz -= (count+2)*4;
464
465	return 0;
466}
467
468static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t* dev_priv,
469				      drm_radeon_cmd_buffer_t* cmdbuf)
470{
471	u32 header;
472	int count;
473	RING_LOCALS;
474
475	if (4 > cmdbuf->bufsz)
476		return DRM_ERR(EINVAL);
477
478        /* Fixme !! This simply emits a packet without much checking.
479	   We need to be smarter. */
480
481	/* obtain first word - actual packet3 header */
482	header = *(u32 __user*)cmdbuf->buf;
483
484	/* Is it packet 3 ? */
485	if( (header>>30)!=0x3 ) {
486		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
487		return DRM_ERR(EINVAL);
488		}
489
490	count=(header>>16) & 0x3fff;
491
492	/* Check again now that we know how much data to expect */
493	if ((count+2)*4 > cmdbuf->bufsz){
494		DRM_ERROR("Expected packet3 of length %d but have only %d bytes left\n",
495			(count+2)*4, cmdbuf->bufsz);
496		return DRM_ERR(EINVAL);
497		}
498
499	/* Is it a packet type we know about ? */
500	switch(header & 0xff00){
501	case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
502		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
503
504	case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */
505	case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */
506	case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */
507	case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
508	case RADEON_WAIT_FOR_IDLE:
509	case RADEON_CP_NOP:
510		/* these packets are safe */
511		break;
512	default:
513		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
514		return DRM_ERR(EINVAL);
515		}
516
517
518	BEGIN_RING(count+2);
519	OUT_RING(header);
520	OUT_RING_TABLE( (int __user*)(cmdbuf->buf+4), count+1);
521	ADVANCE_RING();
522
523	cmdbuf->buf += (count+2)*4;
524	cmdbuf->bufsz -= (count+2)*4;
525
526	return 0;
527}
528
529
530/**
531 * Emit a rendering packet3 from userspace.
532 * Called by r300_do_cp_cmdbuf.
533 */
534static __inline__ int r300_emit_packet3(drm_radeon_private_t* dev_priv,
535					drm_radeon_cmd_buffer_t* cmdbuf,
536					drm_r300_cmd_header_t header)
537{
538	int n;
539	int ret;
540	char __user* orig_buf = cmdbuf->buf;
541	int orig_bufsz = cmdbuf->bufsz;
542
543	/* This is a do-while-loop so that we run the interior at least once,
544	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
545	 */
546	n = 0;
547	do {
548		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
549			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
550			if (ret)
551				return ret;
552
553			cmdbuf->buf = orig_buf;
554			cmdbuf->bufsz = orig_bufsz;
555			}
556
557		switch(header.packet3.packet) {
558		case R300_CMD_PACKET3_CLEAR:
559			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
560			ret = r300_emit_clear(dev_priv, cmdbuf);
561			if (ret) {
562				DRM_ERROR("r300_emit_clear failed\n");
563				return ret;
564				}
565			break;
566
567		case R300_CMD_PACKET3_RAW:
568			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
569			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
570			if (ret) {
571				DRM_ERROR("r300_emit_raw_packet3 failed\n");
572				return ret;
573				}
574			break;
575
576		default:
577			DRM_ERROR("bad packet3 type %i at %p\n",
578				header.packet3.packet,
579				cmdbuf->buf - sizeof(header));
580			return DRM_ERR(EINVAL);
581			}
582
583		n += R300_SIMULTANEOUS_CLIPRECTS;
584	} while(n < cmdbuf->nbox);
585
586	return 0;
587}
588
589/* Some of the R300 chips seem to be extremely touchy about the two registers
590 * that are configured in r300_pacify.
591 * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
592 * sends a command buffer that contains only state setting commands and a
593 * vertex program/parameter upload sequence, this will eventually lead to a
594 * lockup, unless the sequence is bracketed by calls to r300_pacify.
595 * So we should take great care to *always* call r300_pacify before
596 * *anything* 3D related, and again afterwards. This is what the
597 * call bracket in r300_do_cp_cmdbuf is for.
598 */
599
600/**
601 * Emit the sequence to pacify R300.
602 */
603static __inline__ void r300_pacify(drm_radeon_private_t* dev_priv)
604{
605	RING_LOCALS;
606
607	BEGIN_RING(6);
608	OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) );
609	OUT_RING( 0xa );
610	OUT_RING( CP_PACKET0( 0x4f18, 0 ) );
611	OUT_RING( 0x3 );
612	OUT_RING( CP_PACKET3( RADEON_CP_NOP, 0 ) );
613	OUT_RING( 0x0 );
614	ADVANCE_RING();
615}
616
617
618/**
619 * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
620 * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
621 * be careful about how this function is called.
622 */
623static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
624{
625	drm_radeon_private_t *dev_priv = dev->dev_private;
626	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
627
628	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
629	buf->pending = 1;
630	buf->used = 0;
631}
632
633
634/**
635 * Parses and validates a user-supplied command buffer and emits appropriate
636 * commands on the DMA ring buffer.
637 * Called by the ioctl handler function radeon_cp_cmdbuf.
638 */
639int r300_do_cp_cmdbuf(drm_device_t* dev,
640			  DRMFILE filp,
641		      drm_file_t* filp_priv,
642		      drm_radeon_cmd_buffer_t* cmdbuf)
643{
644	drm_radeon_private_t *dev_priv = dev->dev_private;
645        drm_device_dma_t *dma = dev->dma;
646        drm_buf_t *buf = NULL;
647	int emit_dispatch_age = 0;
648	int ret = 0;
649
650	DRM_DEBUG("\n");
651
652	/* See the comment above r300_emit_begin3d for why this call must be here,
653	 * and what the cleanup gotos are for. */
654	r300_pacify(dev_priv);
655
656	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
657		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
658		if (ret)
659			goto cleanup;
660		}
661
662	while(cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
663		int idx;
664		drm_r300_cmd_header_t header;
665
666		header.u = *(unsigned int *)cmdbuf->buf;
667
668		cmdbuf->buf += sizeof(header);
669		cmdbuf->bufsz -= sizeof(header);
670
671		switch(header.header.cmd_type) {
672		case R300_CMD_PACKET0:
673			DRM_DEBUG("R300_CMD_PACKET0\n");
674			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
675			if (ret) {
676				DRM_ERROR("r300_emit_packet0 failed\n");
677				goto cleanup;
678				}
679			break;
680
681		case R300_CMD_VPU:
682			DRM_DEBUG("R300_CMD_VPU\n");
683			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
684			if (ret) {
685				DRM_ERROR("r300_emit_vpu failed\n");
686				goto cleanup;
687				}
688			break;
689
690		case R300_CMD_PACKET3:
691			DRM_DEBUG("R300_CMD_PACKET3\n");
692			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
693			if (ret) {
694				DRM_ERROR("r300_emit_packet3 failed\n");
695				goto cleanup;
696				}
697			break;
698
699		case R300_CMD_END3D:
700			DRM_DEBUG("R300_CMD_END3D\n");
701			/* TODO:
702				Ideally userspace driver should not need to issue this call,
703				i.e. the drm driver should issue it automatically and prevent
704				lockups.
705
706				In practice, we do not understand why this call is needed and what
707				it does (except for some vague guesses that it has to do with cache
708				coherence) and so the user space driver does it.
709
710				Once we are sure which uses prevent lockups the code could be moved
711				into the kernel and the userspace driver will not
712				need to use this command.
713
714				Note that issuing this command does not hurt anything
715				except, possibly, performance */
716			r300_pacify(dev_priv);
717			break;
718
719		case R300_CMD_CP_DELAY:
720			/* simple enough, we can do it here */
721			DRM_DEBUG("R300_CMD_CP_DELAY\n");
722			{
723				int i;
724				RING_LOCALS;
725
726				BEGIN_RING(header.delay.count);
727				for(i=0;i<header.delay.count;i++)
728					OUT_RING(RADEON_CP_PACKET2);
729				ADVANCE_RING();
730			}
731			break;
732
733		case R300_CMD_DMA_DISCARD:
734			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
735            		idx = header.dma.buf_idx;
736            		if (idx < 0 || idx >= dma->buf_count) {
737                		DRM_ERROR("buffer index %d (of %d max)\n",
738                      			idx, dma->buf_count - 1);
739				ret = DRM_ERR(EINVAL);
740                		goto cleanup;
741            			}
742
743	                buf = dma->buflist[idx];
744            		if (buf->filp != filp || buf->pending) {
745                		DRM_ERROR("bad buffer %p %p %d\n",
746                      		buf->filp, filp, buf->pending);
747                		ret = DRM_ERR(EINVAL);
748				goto cleanup;
749            			}
750
751			emit_dispatch_age = 1;
752			r300_discard_buffer(dev, buf);
753            		break;
754
755		case R300_CMD_WAIT:
756			/* simple enough, we can do it here */
757			DRM_DEBUG("R300_CMD_WAIT\n");
758			if(header.wait.flags==0)break; /* nothing to do */
759
760			{
761				RING_LOCALS;
762
763				BEGIN_RING(2);
764				OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) );
765				OUT_RING( (header.wait.flags & 0xf)<<14 );
766				ADVANCE_RING();
767			}
768			break;
769
770		default:
771			DRM_ERROR("bad cmd_type %i at %p\n",
772			          header.header.cmd_type,
773				  cmdbuf->buf - sizeof(header));
774			ret = DRM_ERR(EINVAL);
775			goto cleanup;
776			}
777	}
778
779	DRM_DEBUG("END\n");
780
781cleanup:
782	r300_pacify(dev_priv);
783
784	/* We emit the vertex buffer age here, outside the pacifier "brackets"
785	 * for two reasons:
786	 *  (1) This may coalesce multiple age emissions into a single one and
787	 *  (2) more importantly, some chips lock up hard when scratch registers
788	 *      are written inside the pacifier bracket.
789	 */
790	if (emit_dispatch_age) {
791		RING_LOCALS;
792
793		/* Emit the vertex buffer age */
794		BEGIN_RING(2);
795		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
796		ADVANCE_RING();
797		}
798
799	COMMIT_RING();
800
801	return ret;
802}
803
804