1/*
2 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5/*
6 * r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
7 *
8 * Copyright (C) The Weather Channel, Inc.  2002.
9 * Copyright (C) 2004 Nicolai Haehnle.
10 * All Rights Reserved.
11 *
12 * The Weather Channel (TM) funded Tungsten Graphics to develop the
13 * initial release of the Radeon 8500 driver under the XFree86 license.
14 * This notice must be preserved.
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a
17 * copy of this software and associated documentation files (the "Software"),
18 * to deal in the Software without restriction, including without limitation
19 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 * and/or sell copies of the Software, and to permit persons to whom the
21 * Software is furnished to do so, subject to the following conditions:
22 *
23 * The above copyright notice and this permission notice (including the next
24 * paragraph) shall be included in all copies or substantial portions of the
25 * Software.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
30 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
31 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
32 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
33 * DEALINGS IN THE SOFTWARE.
34 *
35 * Authors:
36 *    Nicolai Haehnle <prefect_@gmx.net>
37 */
38
39#pragma ident	"%Z%%M%	%I%	%E% SMI"
40
41#include "drm.h"
42#include "radeon_drm.h"
43#include "drmP.h"
44#include "radeon_drv.h"
45#include "r300_reg.h"
46
47#define	R300_SIMULTANEOUS_CLIPRECTS		4
48
49/*
50 * Values for R300_RE_CLIPRECT_CNTL depending on the number of
51 * cliprects
52 */
53static const int r300_cliprect_cntl[4] = {
54	0xAAAA,
55	0xEEEE,
56	0xFEFE,
57	0xFFFE
58};
59
60/*
61 * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
62 * buffer, starting with index n.
63 */
64static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
65    drm_radeon_kcmd_buffer_t *cmdbuf, int n)
66{
67	drm_clip_rect_t box;
68	int nr;
69	int i;
70	RING_LOCALS;
71
72	nr = cmdbuf->nbox - n;
73	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
74		nr = R300_SIMULTANEOUS_CLIPRECTS;
75
76	DRM_DEBUG("%i cliprects\n", nr);
77
78	if (nr) {
79		BEGIN_RING(6 + nr * 2);
80		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
81
82		for (i = 0; i < nr; ++i) {
83			if (DRM_COPY_FROM_USER_UNCHECKED
84			    (&box, &cmdbuf->boxes[n + i], sizeof (box))) {
85				DRM_ERROR("copy cliprect faulted\n");
86				return (EFAULT);
87			}
88
89			box.x1 =
90			    (box.x1 +
91			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
92			box.y1 =
93			    (box.y1 +
94			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
95			box.x2 =
96			    (box.x2 +
97			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
98			box.y2 =
99			    (box.y2 +
100			    R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
101
102			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
103			    (box.y1 << R300_CLIPRECT_Y_SHIFT));
104			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
105			    (box.y2 << R300_CLIPRECT_Y_SHIFT));
106		}
107
108		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
109
110		/*
111		 * TODO/SECURITY: Force scissors to a safe value, otherwise
112		 * the client might be able to trample over memory.
113		 * The impact should be very limited, but I'd rather be safe
114		 * than sorry.
115		 */
116		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
117		OUT_RING(0);
118		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
119		ADVANCE_RING();
120	} else {
121		/*
122		 * Why we allow zero cliprect rendering:
123		 * There are some commands in a command buffer that must be
124		 * submitted even when there are no cliprects, e.g. DMA buffer
125		 * discard or state setting (though state setting could be
126		 * avoided by simulating a loss of context).
127		 *
128		 * Now since the cmdbuf interface is so chaotic right now (and
129		 * is bound to remain that way for a bit until things settle
130		 * down), it is basically impossible to filter out the commands
131		 * that are necessary and those that aren't.
132		 *
133		 * So I choose the safe way and don't do any filtering at all;
134		 * instead, I simply set up the engine so that all rendering
135		 * can't produce any fragments.
136		 */
137		BEGIN_RING(2);
138		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
139		ADVANCE_RING();
140	}
141
142	return (0);
143}
144
145static u8 r300_reg_flags[0x10000 >> 2];
146
147void
148r300_init_reg_flags(void)
149{
150	int i;
151	(void) memset(r300_reg_flags, 0, 0x10000 >> 2);
152#define	ADD_RANGE_MARK(reg, count, mark) \
153		for (i = ((reg) >> 2); i < ((reg) >> 2) + (count); i++)\
154			r300_reg_flags[i] |= (mark);
155
156#define	MARK_SAFE		1
157#define	MARK_CHECK_OFFSET	2
158
159#define	ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
160
161	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
162	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
163	ADD_RANGE(0x2080, 1);
164	ADD_RANGE(R300_SE_VTE_CNTL, 2);
165	ADD_RANGE(0x2134, 2);
166	ADD_RANGE(0x2140, 1);
167	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
168	ADD_RANGE(0x21DC, 1);
169	ADD_RANGE(0x221C, 1);
170	ADD_RANGE(0x2220, 4);
171	ADD_RANGE(0x2288, 1);
172	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
173	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
174	ADD_RANGE(R300_GB_ENABLE, 1);
175	ADD_RANGE(R300_GB_MSPOS0, 5);
176	ADD_RANGE(R300_TX_CNTL, 1);
177	ADD_RANGE(R300_TX_ENABLE, 1);
178	ADD_RANGE(0x4200, 4);
179	ADD_RANGE(0x4214, 1);
180	ADD_RANGE(R300_RE_POINTSIZE, 1);
181	ADD_RANGE(0x4230, 3);
182	ADD_RANGE(R300_RE_LINE_CNT, 1);
183	ADD_RANGE(0x4238, 1);
184	ADD_RANGE(0x4260, 3);
185	ADD_RANGE(0x4274, 4);
186	ADD_RANGE(0x4288, 5);
187	ADD_RANGE(0x42A0, 1);
188	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
189	ADD_RANGE(0x42B4, 1);
190	ADD_RANGE(R300_RE_CULL_CNTL, 1);
191	ADD_RANGE(0x42C0, 2);
192	ADD_RANGE(R300_RS_CNTL_0, 2);
193	ADD_RANGE(R300_RS_INTERP_0, 8);
194	ADD_RANGE(R300_RS_ROUTE_0, 8);
195	ADD_RANGE(0x43A4, 2);
196	ADD_RANGE(0x43E8, 1);
197	ADD_RANGE(R300_PFS_CNTL_0, 3);
198	ADD_RANGE(R300_PFS_NODE_0, 4);
199	ADD_RANGE(R300_PFS_TEXI_0, 64);
200	ADD_RANGE(0x46A4, 5);
201	ADD_RANGE(R300_PFS_INSTR0_0, 64);
202	ADD_RANGE(R300_PFS_INSTR1_0, 64);
203	ADD_RANGE(R300_PFS_INSTR2_0, 64);
204	ADD_RANGE(R300_PFS_INSTR3_0, 64);
205	ADD_RANGE(0x4BC0, 1);
206	ADD_RANGE(0x4BC8, 3);
207	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
208	ADD_RANGE(0x4BD8, 1);
209	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
210	ADD_RANGE(0x4E00, 1);
211	ADD_RANGE(R300_RB3D_CBLEND, 2);
212	ADD_RANGE(R300_RB3D_COLORMASK, 1);
213	ADD_RANGE(0x4E10, 3);
214	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);
215					/* check offset */
216	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
217	ADD_RANGE(0x4E50, 9);
218	ADD_RANGE(0x4E88, 1);
219	ADD_RANGE(0x4EA0, 2);
220	ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
221	ADD_RANGE(0x4F10, 4);
222	ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);
223					/* check offset */
224	ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
225	ADD_RANGE(0x4F28, 1);
226	ADD_RANGE(0x4F30, 2);
227	ADD_RANGE(0x4F44, 1);
228	ADD_RANGE(0x4F54, 1);
229
230	ADD_RANGE(R300_TX_FILTER_0, 16);
231	ADD_RANGE(R300_TX_FILTER1_0, 16);
232	ADD_RANGE(R300_TX_SIZE_0, 16);
233	ADD_RANGE(R300_TX_FORMAT_0, 16);
234	ADD_RANGE(R300_TX_PITCH_0, 16);
235	/* Texture offset is dangerous and needs more checking */
236	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
237	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
238	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
239
240	/* Sporadic registers used as primitives are emitted */
241	ADD_RANGE(0x4f18, 1);
242	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
243	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
244	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
245
246}
247
248static __inline__ int r300_check_range(unsigned reg, int count)
249{
250	int i;
251	if (reg & ~0xffff)
252		return (-1);
253	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
254		if (r300_reg_flags[i] != MARK_SAFE)
255			return (1);
256	return (0);
257}
258
259static inline int
260r300_emit_carefully_checked_packet0(drm_radeon_private_t *dev_priv,
261    drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
262{
263	int reg;
264	int sz;
265	int i;
266	int values[64];
267	RING_LOCALS;
268
269	sz = header.packet0.count;
270	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
271
272	if ((sz > 64) || (sz < 0)) {
273		DRM_ERROR("Cannot emit more than 64 values at a time "
274		    "(reg=%04x sz=%d)\n", reg, sz);
275		return (EINVAL);
276	}
277	for (i = 0; i < sz; i++) {
278		values[i] = ((int *)(uintptr_t)cmdbuf->buf)[i];
279		switch (r300_reg_flags[(reg >> 2) + i]) {
280		case MARK_SAFE:
281			break;
282		case MARK_CHECK_OFFSET:
283			if (!RADEON_CHECK_OFFSET(dev_priv, (u32) values[i])) {
284				DRM_ERROR("Offset failed range check "
285				    "(reg=%04x sz=%d)\n", reg, sz);
286				return (EINVAL);
287			}
288			break;
289		default:
290			DRM_ERROR("Register %04x failed check as flag=%02x\n",
291			    reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
292			return (EINVAL);
293		}
294	}
295
296	BEGIN_RING(1 + sz);
297	OUT_RING(CP_PACKET0(reg, sz - 1));
298	OUT_RING_TABLE(values, sz);
299	ADVANCE_RING();
300
301	cmdbuf->buf += sz * 4;
302	cmdbuf->bufsz -= sz * 4;
303
304	return (0);
305}
306
307/*
308 * Emits a packet0 setting arbitrary registers.
309 * Called by r300_do_cp_cmdbuf.
310 *
311 * Note that checks are performed on contents and addresses of the registers
312 */
313static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
314					drm_radeon_kcmd_buffer_t *cmdbuf,
315					drm_r300_cmd_header_t header)
316{
317	int reg;
318	int sz;
319	RING_LOCALS;
320
321	sz = header.packet0.count;
322	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
323
324	if (!sz)
325		return (0);
326
327	if (sz * 4 > cmdbuf->bufsz)
328		return (EINVAL);
329
330	if (reg + sz * 4 >= 0x10000) {
331		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n",
332		    reg, sz);
333		return (EINVAL);
334	}
335
336	if (r300_check_range(reg, sz)) {
337		/* go and check everything */
338		return (r300_emit_carefully_checked_packet0(dev_priv,
339		    cmdbuf, header));
340	}
341	/*
342	 * the rest of the data is safe to emit, whatever the values
343	 * the user passed
344	 */
345
346	BEGIN_RING(1 + sz);
347	OUT_RING(CP_PACKET0(reg, sz - 1));
348	OUT_RING_TABLE(cmdbuf->buf, sz);
349	ADVANCE_RING();
350
351	cmdbuf->buf += sz * 4;
352	cmdbuf->bufsz -= sz * 4;
353
354	return (0);
355}
356
357/*
358 * Uploads user-supplied vertex program instructions or parameters onto
359 * the graphics card.
360 * Called by r300_do_cp_cmdbuf.
361 */
362static inline int r300_emit_vpu(drm_radeon_private_t *dev_priv,
363    drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
364{
365	int sz;
366	int addr;
367	RING_LOCALS;
368
369	sz = header.vpu.count;
370	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
371
372	if (!sz)
373		return (0);
374	if (sz * 16 > cmdbuf->bufsz)
375		return (EINVAL);
376
377	BEGIN_RING(5 + sz * 4);
378	/* Wait for VAP to come to senses.. */
379	/*
380	 * there is no need to emit it multiple times, (only once before
381	 * VAP is programmed, but this optimization is for later
382	 */
383	OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
384	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
385	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
386	OUT_RING_TABLE(cmdbuf->buf, sz * 4);
387
388	ADVANCE_RING();
389
390	cmdbuf->buf += sz * 16;
391	cmdbuf->bufsz -= sz * 16;
392
393	return (0);
394}
395
396/*
397 * Emit a clear packet from userspace.
398 * Called by r300_emit_packet3.
399 */
400static inline int r300_emit_clear(drm_radeon_private_t *dev_priv,
401    drm_radeon_kcmd_buffer_t *cmdbuf)
402{
403	RING_LOCALS;
404
405	if (8 * 4 > cmdbuf->bufsz)
406		return (EINVAL);
407
408	BEGIN_RING(10);
409	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
410	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
411	    (1 << R300_PRIM_NUM_VERTICES_SHIFT));
412	OUT_RING_TABLE(cmdbuf->buf, 8);
413	ADVANCE_RING();
414
415	cmdbuf->buf += 8 * 4;
416	cmdbuf->bufsz -= 8 * 4;
417
418	return (0);
419}
420
421static inline int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
422    drm_radeon_kcmd_buffer_t *cmdbuf, u32 header)
423{
424	int count, i, k;
425#define	MAX_ARRAY_PACKET		64
426	u32 payload[MAX_ARRAY_PACKET];
427	u32 narrays;
428	RING_LOCALS;
429
430	count = (header >> 16) & 0x3fff;
431
432	if ((count + 1) > MAX_ARRAY_PACKET) {
433		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
434		    count);
435		return (EINVAL);
436	}
437	(void) memset(payload, 0, MAX_ARRAY_PACKET * 4);
438	(void) memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
439
440	/* carefully check packet contents */
441
442	narrays = payload[0];
443	k = 0;
444	i = 1;
445	while ((k < narrays) && (i < (count + 1))) {
446		i++;		/* skip attribute field */
447		if (!RADEON_CHECK_OFFSET(dev_priv, payload[i])) {
448			DRM_ERROR("Offset failed range check (k=%d i=%d) "
449			    "while processing 3D_LOAD_VBPNTR packet.\n",
450			    k, i);
451			return (EINVAL);
452		}
453		k++;
454		i++;
455		if (k == narrays)
456			break;
457		/* have one more to process, they come in pairs */
458		if (!RADEON_CHECK_OFFSET(dev_priv, payload[i])) {
459			DRM_ERROR("Offset failed range check (k=%d i=%d) "
460			    "while processing 3D_LOAD_VBPNTR packet.\n",
461			    k, i);
462			return (EINVAL);
463		}
464		k++;
465		i++;
466	}
467	/* do the counts match what we expect ? */
468	if ((k != narrays) || (i != (count + 1))) {
469		DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet "
470		    "(k=%d i=%d narrays=%d count+1=%d).\n",
471		    k, i, narrays, count + 1);
472		return (EINVAL);
473	}
474
475	/* all clear, output packet */
476
477	BEGIN_RING(count + 2);
478	OUT_RING(header);
479	OUT_RING_TABLE(payload, count + 1);
480	ADVANCE_RING();
481
482	cmdbuf->buf += (count + 2) * 4;
483	cmdbuf->bufsz -= (count + 2) * 4;
484
485	return (0);
486}
487
488static inline int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
489    drm_radeon_kcmd_buffer_t *cmdbuf)
490{
491	u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
492	int count, ret;
493	RING_LOCALS;
494
495	count = (cmd[0] >> 16) & 0x3fff;
496
497	if (cmd[0] & 0x8000) {
498		u32 offset;
499
500		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
501		    RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
502			offset = cmd[2] << 10;
503			ret = !RADEON_CHECK_OFFSET(dev_priv, offset);
504			if (ret) {
505				DRM_ERROR("Invalid bitblt first offset "
506				    "is %08X\n", offset);
507				return (EINVAL);
508			}
509		}
510
511		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
512		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
513			offset = cmd[3] << 10;
514			ret = !RADEON_CHECK_OFFSET(dev_priv, offset);
515			if (ret) {
516				DRM_ERROR("Invalid bitblt second offset "
517				    "is %08X\n", offset);
518				return (EINVAL);
519			}
520
521		}
522	}
523
524	BEGIN_RING(count+2);
525	OUT_RING(cmd[0]);
526	OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
527	ADVANCE_RING();
528
529	cmdbuf->buf += (count+2)*4;
530	cmdbuf->bufsz -= (count+2)*4;
531
532	return (0);
533}
534
535
536static inline int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
537    drm_radeon_kcmd_buffer_t *cmdbuf)
538{
539	u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
540	int count, ret;
541	RING_LOCALS;
542
543	count = (cmd[0]>>16) & 0x3fff;
544
545	if ((cmd[1] & 0x8000ffff) != 0x80000810) {
546		DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
547		return (EINVAL);
548	}
549	ret = !RADEON_CHECK_OFFSET(dev_priv, cmd[2]);
550	if (ret) {
551		DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
552		return (EINVAL);
553	}
554
555	BEGIN_RING(count+2);
556	OUT_RING(cmd[0]);
557	OUT_RING_TABLE(cmdbuf->buf + 4, count + 1);
558	ADVANCE_RING();
559
560	cmdbuf->buf += (count+2)*4;
561	cmdbuf->bufsz -= (count+2)*4;
562
563	return (0);
564}
565
566
567static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
568					    drm_radeon_kcmd_buffer_t *cmdbuf)
569{
570	u32 header;
571	int count;
572	RING_LOCALS;
573
574	if (4 > cmdbuf->bufsz)
575		return (EINVAL);
576
577	/*
578	 * Fixme !! This simply emits a packet without much checking.
579	 * We need to be smarter.
580	 */
581
582	/* obtain first word - actual packet3 header */
583	header = *(u32 *)(uintptr_t)cmdbuf->buf;
584
585	/* Is it packet 3 ? */
586	if ((header >> 30) != 0x3) {
587		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
588		return (EINVAL);
589	}
590
591	count = (header >> 16) & 0x3fff;
592
593	/* Check again now that we know how much data to expect */
594	if ((count + 2) * 4 > cmdbuf->bufsz) {
595		DRM_ERROR("Expected packet3 of length %d but have only "
596		    "%d bytes left\n", (count + 2) * 4, cmdbuf->bufsz);
597		return (EINVAL);
598	}
599
600	/* Is it a packet type we know about ? */
601	switch (header & 0xff00) {
602	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
603		return (r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header));
604
605	case RADEON_CNTL_BITBLT_MULTI:
606		return (r300_emit_bitblt_multi(dev_priv, cmdbuf));
607
608	case RADEON_CP_INDX_BUFFER:
609			// DRAW_INDX_2 without INDX_BUFFER seems to lock
610			// up the GPU
611		return (r300_emit_indx_buffer(dev_priv, cmdbuf));
612
613	case RADEON_CP_3D_DRAW_IMMD_2:
614			/* triggers drawing using in-packet vertex data */
615	case RADEON_CP_3D_DRAW_VBUF_2:
616			/* triggers drawing of vertex buffers setup elsewhere */
617	case RADEON_CP_3D_DRAW_INDX_2:
618			/* triggers drawing using indices to vertex buffer */
619	case RADEON_WAIT_FOR_IDLE:
620	case RADEON_CP_NOP:
621		/* these packets are safe */
622		break;
623	default:
624		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
625		return (EINVAL);
626	}
627
628	BEGIN_RING(count + 2);
629	OUT_RING(header);
630	OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
631	ADVANCE_RING();
632
633	cmdbuf->buf += (count + 2) * 4;
634	cmdbuf->bufsz -= (count + 2) * 4;
635
636	return (0);
637}
638
639/*
640 * Emit a rendering packet3 from userspace.
641 * Called by r300_do_cp_cmdbuf.
642 */
643static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
644    drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
645{
646	int n;
647	int ret;
648	char *orig_buf = cmdbuf->buf;
649	int orig_bufsz = cmdbuf->bufsz;
650
651	/*
652	 * This is a do-while-loop so that we run the interior at least once,
653	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
654	 */
655	n = 0;
656	do {
657		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
658			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
659			if (ret)
660				return (ret);
661
662			cmdbuf->buf = orig_buf;
663			cmdbuf->bufsz = orig_bufsz;
664		}
665
666		switch (header.packet3.packet) {
667		case R300_CMD_PACKET3_CLEAR:
668			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
669			ret = r300_emit_clear(dev_priv, cmdbuf);
670			if (ret) {
671				DRM_ERROR("r300_emit_clear failed\n");
672				return (ret);
673			}
674			break;
675
676		case R300_CMD_PACKET3_RAW:
677			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
678			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
679			if (ret) {
680				DRM_ERROR("r300_emit_raw_packet3 failed\n");
681				return (ret);
682			}
683			break;
684
685		default:
686			DRM_ERROR("bad packet3 type %i at %p\n",
687			    header.packet3.packet,
688			    cmdbuf->buf - sizeof (header));
689			return (EINVAL);
690		}
691
692		n += R300_SIMULTANEOUS_CLIPRECTS;
693	} while (n < cmdbuf->nbox);
694
695	return (0);
696}
697
698/*
699 * Some of the R300 chips seem to be extremely touchy about the two registers
700 * that are configured in r300_pacify.
701 * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
702 * sends a command buffer that contains only state setting commands and a
703 * vertex program/parameter upload sequence, this will eventually lead to a
704 * lockup, unless the sequence is bracketed by calls to r300_pacify.
705 * So we should take great care to *always* call r300_pacify before
706 * *anything* 3D related, and again afterwards. This is what the
707 * call bracket in r300_do_cp_cmdbuf is for.
708 */
709
710/*
711 * Emit the sequence to pacify R300.
712 */
713static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
714{
715	RING_LOCALS;
716
717	BEGIN_RING(6);
718	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
719	OUT_RING(0xa);
720	OUT_RING(CP_PACKET0(0x4f18, 0));
721	OUT_RING(0x3);
722	OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
723	OUT_RING(0x0);
724	ADVANCE_RING();
725}
726
727/*
728 * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
729 * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
730 * be careful about how this function is called.
731 */
732static void r300_discard_buffer(drm_device_t *dev, drm_buf_t *buf)
733{
734	drm_radeon_private_t *dev_priv = dev->dev_private;
735	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
736
737	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
738	buf->pending = 1;
739	buf->used = 0;
740}
741
742static int r300_scratch(drm_radeon_private_t *dev_priv,
743			drm_radeon_kcmd_buffer_t *cmdbuf,
744			drm_r300_cmd_header_t header)
745{
746	u32 *ref_age_base;
747	u32 i, buf_idx, h_pending;
748	RING_LOCALS;
749
750	if (cmdbuf->bufsz < sizeof (uint64_t) +
751	    header.scratch.n_bufs * sizeof (buf_idx)) {
752		return (EINVAL);
753	}
754
755	if (header.scratch.reg >= 5) {
756		return (EINVAL);
757	}
758
759	dev_priv->scratch_ages[header.scratch.reg] ++;
760
761	ref_age_base = (u32 *)(uintptr_t)*((uint64_t *)(uintptr_t)cmdbuf->buf);
762
763	cmdbuf->buf += sizeof (uint64_t);
764	cmdbuf->bufsz -= sizeof (uint64_t);
765
766	for (i = 0; i < header.scratch.n_bufs; i++) {
767		buf_idx = *(u32 *)(uintptr_t)cmdbuf->buf;
768		buf_idx *= 2; /* 8 bytes per buf */
769
770		if (DRM_COPY_TO_USER(ref_age_base + buf_idx,
771		    &dev_priv->scratch_ages[header.scratch.reg],
772		    sizeof (u32))) {
773			return (EINVAL);
774		}
775
776		if (DRM_COPY_FROM_USER(&h_pending,
777		    ref_age_base + buf_idx + 1, sizeof (u32))) {
778			return (EINVAL);
779		}
780
781		if (h_pending == 0) {
782			return (EINVAL);
783		}
784
785		h_pending--;
786
787		if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1,
788		    &h_pending, sizeof (u32))) {
789			return (EINVAL);
790		}
791
792		cmdbuf->buf += sizeof (buf_idx);
793		cmdbuf->bufsz -= sizeof (buf_idx);
794	}
795
796	BEGIN_RING(2);
797	OUT_RING(CP_PACKET0(RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0));
798	OUT_RING(dev_priv->scratch_ages[header.scratch.reg]);
799	ADVANCE_RING();
800
801	return (0);
802}
803
804/*
805 * Parses and validates a user-supplied command buffer and emits appropriate
806 * commands on the DMA ring buffer.
807 * Called by the ioctl handler function radeon_cp_cmdbuf.
808 */
809/*ARGSUSED*/
810int
811r300_do_cp_cmdbuf(drm_device_t *dev,
812    drm_file_t *fpriv, drm_radeon_kcmd_buffer_t *cmdbuf)
813{
814	drm_radeon_private_t *dev_priv = dev->dev_private;
815	drm_device_dma_t *dma = dev->dma;
816	drm_buf_t *buf = NULL;
817	int emit_dispatch_age = 0;
818	int ret = 0;
819
820	DRM_DEBUG("\n");
821
822	/*
823	 * See the comment above r300_emit_begin3d for why this call
824	 * must be here, and what the cleanup gotos are for.
825	 */
826	r300_pacify(dev_priv);
827
828	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
829		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
830		if (ret)
831			goto cleanup;
832	}
833
834	while (cmdbuf->bufsz >= sizeof (drm_r300_cmd_header_t)) {
835		int idx;
836		drm_r300_cmd_header_t header;
837
838		header.u = *(unsigned int *)(uintptr_t)cmdbuf->buf;
839
840		cmdbuf->buf += sizeof (header);
841		cmdbuf->bufsz -= sizeof (header);
842
843		switch (header.header.cmd_type) {
844		case R300_CMD_PACKET0:
845			DRM_DEBUG("R300_CMD_PACKET0\n");
846			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
847			if (ret) {
848				DRM_ERROR("r300_emit_packet0 failed\n");
849				goto cleanup;
850			}
851			break;
852
853		case R300_CMD_VPU:
854			DRM_DEBUG("R300_CMD_VPU\n");
855			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
856			if (ret) {
857				DRM_ERROR("r300_emit_vpu failed\n");
858				goto cleanup;
859			}
860			break;
861
862		case R300_CMD_PACKET3:
863			DRM_DEBUG("R300_CMD_PACKET3\n");
864			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
865			if (ret) {
866				DRM_ERROR("r300_emit_packet3 failed\n");
867				goto cleanup;
868			}
869			break;
870
871		case R300_CMD_END3D:
872			DRM_DEBUG("R300_CMD_END3D\n");
873			/*
874			 * TODO:
875			 * Ideally userspace driver should not need to issue
876			 * this call, i.e. the drm driver should issue it
877			 * automatically and prevent lockups. In practice, we
878			 * do not understand why this call is needed and what
879			 * it does (except for some vague guesses that it has
880			 * to do with cache coherence) and so the user space
881			 * driver does it.
882			 *
883			 * Once we are sure which uses prevent lockups the code
884			 * could be moved into the kernel and the userspace
885			 * driver will not need to use this command.
886			 *
887			 * Note that issuing this command does not hurt anything
888			 * except, possibly, performance
889			 */
890			r300_pacify(dev_priv);
891			break;
892
893		case R300_CMD_CP_DELAY:
894			/* simple enough, we can do it here */
895			DRM_DEBUG("R300_CMD_CP_DELAY\n");
896			{
897				int i;
898				RING_LOCALS;
899
900				BEGIN_RING(header.delay.count);
901				for (i = 0; i < header.delay.count; i++)
902					OUT_RING(RADEON_CP_PACKET2);
903				ADVANCE_RING();
904			}
905			break;
906
907		case R300_CMD_DMA_DISCARD:
908			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
909			idx = header.dma.buf_idx;
910			if (idx < 0 || idx >= dma->buf_count) {
911				DRM_ERROR("buffer index %d (of %d max)\n",
912				    idx, dma->buf_count - 1);
913				ret = EINVAL;
914				goto cleanup;
915			}
916
917			buf = dma->buflist[idx];
918			if (buf->filp != fpriv || buf->pending) {
919				DRM_ERROR("bad buffer %p %p %d\n",
920				    buf->filp, fpriv, buf->pending);
921				ret = EINVAL;
922				goto cleanup;
923			}
924
925			emit_dispatch_age = 1;
926			r300_discard_buffer(dev, buf);
927			break;
928
929		case R300_CMD_WAIT:
930			/* simple enough, we can do it here */
931			DRM_DEBUG("R300_CMD_WAIT\n");
932			if (header.wait.flags == 0)
933				break;	/* nothing to do */
934
935			{
936				RING_LOCALS;
937
938				BEGIN_RING(2);
939				OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
940				OUT_RING((header.wait.flags & 0xf) << 14);
941				ADVANCE_RING();
942			}
943			break;
944
945		case R300_CMD_SCRATCH:
946			DRM_DEBUG("R300_CMD_SCRATCH\n");
947			ret = r300_scratch(dev_priv, cmdbuf, header);
948			if (ret) {
949				DRM_ERROR("r300_scratch failed\n");
950				goto cleanup;
951			}
952			break;
953
954		default:
955			DRM_ERROR("bad cmd_type %i at %p\n",
956			    header.header.cmd_type,
957			    cmdbuf->buf - sizeof (header));
958			ret = EINVAL;
959			goto cleanup;
960		}
961	}
962
963	DRM_DEBUG("END\n");
964
965cleanup:
966	r300_pacify(dev_priv);
967
968	/*
969	 * We emit the vertex buffer age here, outside the pacifier "brackets"
970	 * for two reasons:
971	 * (1) This may coalesce multiple age emissions into a single one and
972	 * (2) more importantly, some chips lock up hard when scratch registers
973	 * 		are written inside the pacifier bracket.
974	 */
975	if (emit_dispatch_age) {
976		RING_LOCALS;
977
978		/* Emit the vertex buffer age */
979		BEGIN_RING(2);
980		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
981		ADVANCE_RING();
982	}
983
984	COMMIT_RING();
985
986	return (ret);
987}
988