1
2/*
3 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
4 * Use is subject to license terms.
5 */
6/* radeon_state.c -- State support for Radeon -*- linux-c -*- */
7/*
8 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
9 * All Rights Reserved.
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 * DEALINGS IN THE SOFTWARE.
29 *
30 * Authors:
31 *    Gareth Hughes <gareth@valinux.com>
32 *    Kevin E. Martin <martin@valinux.com>
33 */
34
35#pragma ident	"%Z%%M%	%I%	%E% SMI"
36
37#include "drmP.h"
38#include "drm.h"
39#include "drm_sarea.h"
40#include "radeon_drm.h"
41#include "radeon_drv.h"
42#include "radeon_io32.h"
43
44/*
45 * Helper functions for client state checking and fixup
46 */
47
48static inline int
49radeon_check_and_fixup_offset(drm_radeon_private_t *dev_priv,
50    drm_file_t *filp_priv, u32 *offset)
51{
52	u64 off = *offset;
53	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
54	struct drm_radeon_driver_file_fields *radeon_priv;
55
56	/*
57	 * Hrm ... the story of the offset ... So this function converts
58	 * the various ideas of what userland clients might have for an
59	 * offset in the card address space into an offset into the card
60	 * address space :) So with a sane client, it should just keep
61	 * the value intact and just do some boundary checking. However,
62	 * not all clients are sane. Some older clients pass us 0 based
63	 * offsets relative to the start of the framebuffer and some may
64	 * assume the AGP aperture it appended to the framebuffer, so we
65	 * try to detect those cases and fix them up.
66	 *
67	 * Note: It might be a good idea here to make sure the offset lands
68	 * in some "allowed" area to protect things like the PCIE GART...
69	 */
70
71	/*
72	 * First, the best case, the offset already lands in either the
73	 * framebuffer or the GART mapped space
74	 */
75
76	if (RADEON_CHECK_OFFSET(dev_priv, off))
77		return (0);
78
79	/*
80	 * Ok, that didn't happen... now check if we have a zero based
81	 * offset that fits in the framebuffer + gart space, apply the
82	 * magic offset we get from SETPARAM or calculated from fb_location
83	 */
84	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
85		radeon_priv = filp_priv->driver_priv;
86		off += radeon_priv->radeon_fb_delta;
87	}
88
89	/* Finally, assume we aimed at a GART offset if beyond the fb */
90	if (off > fb_end)
91		off = off - fb_end - 1 + dev_priv->gart_vm_start;
92
93	/* Now recheck and fail if out of bounds */
94	if (RADEON_CHECK_OFFSET(dev_priv, off)) {
95		DRM_DEBUG("offset fixed up to 0x%x\n", off);
96		*offset = (uint32_t)off;
97		return (0);
98	}
99	return (EINVAL);
100}
101
102static inline int
103radeon_check_and_fixup_packets(drm_radeon_private_t *dev_priv,
104    drm_file_t *filp_priv, int id, u32 *data)
105{
106	switch (id) {
107
108	case RADEON_EMIT_PP_MISC:
109		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
110		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
111			DRM_ERROR("Invalid depth buffer offset\n");
112			return (EINVAL);
113		}
114		break;
115
116	case RADEON_EMIT_PP_CNTL:
117		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
118		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
119			DRM_ERROR("Invalid colour buffer offset\n");
120			return (EINVAL);
121		}
122		break;
123
124	case R200_EMIT_PP_TXOFFSET_0:
125	case R200_EMIT_PP_TXOFFSET_1:
126	case R200_EMIT_PP_TXOFFSET_2:
127	case R200_EMIT_PP_TXOFFSET_3:
128	case R200_EMIT_PP_TXOFFSET_4:
129	case R200_EMIT_PP_TXOFFSET_5:
130		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
131		    &data[0])) {
132			DRM_ERROR("Invalid R200 texture offset\n");
133			return (EINVAL);
134		}
135		break;
136
137	case RADEON_EMIT_PP_TXFILTER_0:
138	case RADEON_EMIT_PP_TXFILTER_1:
139	case RADEON_EMIT_PP_TXFILTER_2:
140		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
141		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
142			DRM_ERROR("Invalid R100 texture offset\n");
143			return (EINVAL);
144		}
145		break;
146
147	case R200_EMIT_PP_CUBIC_OFFSETS_0:
148	case R200_EMIT_PP_CUBIC_OFFSETS_1:
149	case R200_EMIT_PP_CUBIC_OFFSETS_2:
150	case R200_EMIT_PP_CUBIC_OFFSETS_3:
151	case R200_EMIT_PP_CUBIC_OFFSETS_4:
152	case R200_EMIT_PP_CUBIC_OFFSETS_5: {
153			int i;
154			for (i = 0; i < 5; i++) {
155				if (radeon_check_and_fixup_offset(dev_priv,
156				    filp_priv, &data[i])) {
157					DRM_ERROR("Invalid R200 cubic"
158					    " texture offset\n");
159					return (EINVAL);
160				}
161			}
162			break;
163		}
164
165	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
166	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
167	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:
168	{
169			int i;
170			for (i = 0; i < 5; i++) {
171				if (radeon_check_and_fixup_offset(dev_priv,
172				    filp_priv, &data[i])) {
173					DRM_ERROR("Invalid R100 cubic"
174					    " texture offset\n");
175					return (EINVAL);
176				}
177			}
178	}
179		break;
180
181	case R200_EMIT_VAP_CTL:
182	{
183		RING_LOCALS;
184		BEGIN_RING(2);
185		OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
186		ADVANCE_RING();
187	}
188		break;
189
190	case RADEON_EMIT_RB3D_COLORPITCH:
191	case RADEON_EMIT_RE_LINE_PATTERN:
192	case RADEON_EMIT_SE_LINE_WIDTH:
193	case RADEON_EMIT_PP_LUM_MATRIX:
194	case RADEON_EMIT_PP_ROT_MATRIX_0:
195	case RADEON_EMIT_RB3D_STENCILREFMASK:
196	case RADEON_EMIT_SE_VPORT_XSCALE:
197	case RADEON_EMIT_SE_CNTL:
198	case RADEON_EMIT_SE_CNTL_STATUS:
199	case RADEON_EMIT_RE_MISC:
200	case RADEON_EMIT_PP_BORDER_COLOR_0:
201	case RADEON_EMIT_PP_BORDER_COLOR_1:
202	case RADEON_EMIT_PP_BORDER_COLOR_2:
203	case RADEON_EMIT_SE_ZBIAS_FACTOR:
204	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
205	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
206	case R200_EMIT_PP_TXCBLEND_0:
207	case R200_EMIT_PP_TXCBLEND_1:
208	case R200_EMIT_PP_TXCBLEND_2:
209	case R200_EMIT_PP_TXCBLEND_3:
210	case R200_EMIT_PP_TXCBLEND_4:
211	case R200_EMIT_PP_TXCBLEND_5:
212	case R200_EMIT_PP_TXCBLEND_6:
213	case R200_EMIT_PP_TXCBLEND_7:
214	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
215	case R200_EMIT_TFACTOR_0:
216	case R200_EMIT_VTX_FMT_0:
217	case R200_EMIT_MATRIX_SELECT_0:
218	case R200_EMIT_TEX_PROC_CTL_2:
219	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
220	case R200_EMIT_PP_TXFILTER_0:
221	case R200_EMIT_PP_TXFILTER_1:
222	case R200_EMIT_PP_TXFILTER_2:
223	case R200_EMIT_PP_TXFILTER_3:
224	case R200_EMIT_PP_TXFILTER_4:
225	case R200_EMIT_PP_TXFILTER_5:
226	case R200_EMIT_VTE_CNTL:
227	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
228	case R200_EMIT_PP_TAM_DEBUG3:
229	case R200_EMIT_PP_CNTL_X:
230	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
231	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
232	case R200_EMIT_RE_SCISSOR_TL_0:
233	case R200_EMIT_RE_SCISSOR_TL_1:
234	case R200_EMIT_RE_SCISSOR_TL_2:
235	case R200_EMIT_SE_VAP_CNTL_STATUS:
236	case R200_EMIT_SE_VTX_STATE_CNTL:
237	case R200_EMIT_RE_POINTSIZE:
238	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
239	case R200_EMIT_PP_CUBIC_FACES_0:
240	case R200_EMIT_PP_CUBIC_FACES_1:
241	case R200_EMIT_PP_CUBIC_FACES_2:
242	case R200_EMIT_PP_CUBIC_FACES_3:
243	case R200_EMIT_PP_CUBIC_FACES_4:
244	case R200_EMIT_PP_CUBIC_FACES_5:
245	case RADEON_EMIT_PP_TEX_SIZE_0:
246	case RADEON_EMIT_PP_TEX_SIZE_1:
247	case RADEON_EMIT_PP_TEX_SIZE_2:
248	case R200_EMIT_RB3D_BLENDCOLOR:
249	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
250	case RADEON_EMIT_PP_CUBIC_FACES_0:
251	case RADEON_EMIT_PP_CUBIC_FACES_1:
252	case RADEON_EMIT_PP_CUBIC_FACES_2:
253	case R200_EMIT_PP_TRI_PERF_CNTL:
254	case R200_EMIT_PP_AFS_0:
255	case R200_EMIT_PP_AFS_1:
256	case R200_EMIT_ATF_TFACTOR:
257	case R200_EMIT_PP_TXCTLALL_0:
258	case R200_EMIT_PP_TXCTLALL_1:
259	case R200_EMIT_PP_TXCTLALL_2:
260	case R200_EMIT_PP_TXCTLALL_3:
261	case R200_EMIT_PP_TXCTLALL_4:
262	case R200_EMIT_PP_TXCTLALL_5:
263	case R200_EMIT_VAP_PVS_CNTL:
264		/* These packets don't contain memory offsets */
265		break;
266
267	default:
268		DRM_ERROR("Unknown state packet ID %d\n", id);
269		return (EINVAL);
270	}
271
272	return (0);
273}
274
275static inline int
276radeon_check_and_fixup_packet3(drm_radeon_private_t *dev_priv,
277    drm_file_t *filp_priv, drm_radeon_kcmd_buffer_t *cmdbuf,
278    unsigned int *cmdsz)
279{
280	u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
281	u32 offset, narrays;
282	int count, i, k;
283
284	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
285
286	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
287		DRM_ERROR("Not a type 3 packet\n");
288		return (EINVAL);
289	}
290
291	if (4 * *cmdsz > cmdbuf->bufsz) {
292		DRM_ERROR("Packet size larger than size of data provided\n");
293		return (EINVAL);
294	}
295
296
297	switch (cmd[0] & 0xff00) {
298	/* XXX Are there old drivers needing other packets? */
299
300	case RADEON_3D_DRAW_IMMD:
301	case RADEON_3D_DRAW_VBUF:
302	case RADEON_3D_DRAW_INDX:
303	case RADEON_WAIT_FOR_IDLE:
304	case RADEON_CP_NOP:
305	case RADEON_3D_CLEAR_ZMASK:
306#if 0
307	case RADEON_CP_NEXT_CHAR:
308	case RADEON_CP_PLY_NEXTSCAN:
309	case RADEON_CP_SET_SCISSORS:
310	/* probably safe but will never need them? */
311#endif
312/* these packets are safe */
313		break;
314
315	case RADEON_CP_3D_DRAW_IMMD_2:
316	case RADEON_CP_3D_DRAW_VBUF_2:
317	case RADEON_CP_3D_DRAW_INDX_2:
318	case RADEON_3D_CLEAR_HIZ:
319		/* safe but r200 only */
320		if (dev_priv->microcode_version != UCODE_R200) {
321			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
322			return (EINVAL);
323		}
324		break;
325
326	case RADEON_3D_LOAD_VBPNTR:
327		count = (cmd[0] >> 16) & 0x3fff;
328
329		if (count > 18) { /* 12 arrays max */
330			DRM_ERROR(
331			    "Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
332			    count);
333			return (EINVAL);
334		}
335
336		/* carefully check packet contents */
337		narrays = cmd[1] & ~0xc000;
338		k = 0;
339		i = 2;
340		while ((k < narrays) && (i < (count + 2))) {
341			i++;		/* skip attribute field */
342			if (radeon_check_and_fixup_offset(dev_priv,
343			    filp_priv, &cmd[i])) {
344				DRM_ERROR(
345				    "Invalid offset (k=%d i=%d) ini"
346				    " 3D_LOAD_VBPNTR packet.\n", k, i);
347				return (EINVAL);
348			}
349			k++;
350			i++;
351			if (k == narrays)
352				break;
353			/* have one more to process, they come in pairs */
354			if (radeon_check_and_fixup_offset(dev_priv,
355			    filp_priv, &cmd[i])) {
356				DRM_ERROR(
357				    "Invalid offset (k=%d i=%d) in"
358				    " 3D_LOAD_VBPNTR packet.\n", k, i);
359				return (EINVAL);
360			}
361			k++;
362			i++;
363		}
364		/* do the counts match what we expect ? */
365		if ((k != narrays) || (i != (count + 2))) {
366			DRM_ERROR(
367			    "Malformed 3D_LOAD_VBPNTR packet"
368			    "(k=%d i=%d narrays=%d count+1=%d).\n",
369			    k, i, narrays, count + 1);
370			return (EINVAL);
371		}
372		break;
373
374	case RADEON_3D_RNDR_GEN_INDX_PRIM:
375		if (dev_priv->microcode_version != UCODE_R100) {
376			DRM_ERROR("Invalid 3d packet for r200-class chip\n");
377			return (EINVAL);
378		}
379		if (radeon_check_and_fixup_offset(dev_priv,
380		    filp_priv, &cmd[1])) {
381				DRM_ERROR("Invalid rndr_gen_indx offset\n");
382				return (EINVAL);
383		}
384		break;
385
386	case RADEON_CP_INDX_BUFFER:
387		if (dev_priv->microcode_version != UCODE_R200) {
388			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
389			return (EINVAL);
390		}
391		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
392			DRM_ERROR(
393			    "Invalid indx_buffer reg address %08X\n", cmd[1]);
394			return (EINVAL);
395		}
396		if (radeon_check_and_fixup_offset(dev_priv,
397		    filp_priv, &cmd[2])) {
398			DRM_ERROR(
399			    "Invalid indx_buffer offset is %08X\n", cmd[2]);
400			return (EINVAL);
401		}
402		break;
403
404	case RADEON_CNTL_HOSTDATA_BLT:
405	case RADEON_CNTL_PAINT_MULTI:
406	case RADEON_CNTL_BITBLT_MULTI:
407		/* MSB of opcode: next DWORD GUI_CNTL */
408		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
409		    RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
410			offset = cmd[2] << 10;
411			if (radeon_check_and_fixup_offset
412			    (dev_priv, filp_priv, &offset)) {
413				DRM_ERROR("Invalid first packet offset\n");
414				return (EINVAL);
415			}
416			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
417		}
418
419		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
420		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
421			offset = cmd[3] << 10;
422			if (radeon_check_and_fixup_offset
423			    (dev_priv, filp_priv, &offset)) {
424				DRM_ERROR("Invalid second packet offset\n");
425				return (EINVAL);
426			}
427			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
428		}
429		break;
430
431	default:
432		DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
433		return (EINVAL);
434	}
435
436	return (0);
437}
438
439/*
440 * CP hardware state programming functions
441 */
442
443static inline void radeon_emit_clip_rect(drm_radeon_private_t *dev_priv,
444    drm_clip_rect_t *box)
445{
446	RING_LOCALS;
447
448	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
449	    box->x1, box->y1, box->x2, box->y2);
450
451	BEGIN_RING(4);
452	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
453	OUT_RING((box->y1 << 16) | box->x1);
454	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
455	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
456	ADVANCE_RING();
457}
458
459/* Emit 1.1 state */
460static int radeon_emit_state(drm_radeon_private_t *dev_priv,
461    drm_file_t *filp_priv, drm_radeon_context_regs_t *ctx,
462    drm_radeon_texture_regs_t *tex, unsigned int dirty)
463{
464	RING_LOCALS;
465	DRM_DEBUG("dirty=0x%08x\n", dirty);
466
467	if (dirty & RADEON_UPLOAD_CONTEXT) {
468		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
469		    &ctx->rb3d_depthoffset)) {
470			DRM_ERROR("Invalid depth buffer offset\n");
471			return (EINVAL);
472		}
473
474		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
475		    &ctx->rb3d_coloroffset)) {
476			DRM_ERROR("Invalid depth buffer offset\n");
477			return (EINVAL);
478		}
479
480		BEGIN_RING(14);
481		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
482		OUT_RING(ctx->pp_misc);
483		OUT_RING(ctx->pp_fog_color);
484		OUT_RING(ctx->re_solid_color);
485		OUT_RING(ctx->rb3d_blendcntl);
486		OUT_RING(ctx->rb3d_depthoffset);
487		OUT_RING(ctx->rb3d_depthpitch);
488		OUT_RING(ctx->rb3d_zstencilcntl);
489		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
490		OUT_RING(ctx->pp_cntl);
491		OUT_RING(ctx->rb3d_cntl);
492		OUT_RING(ctx->rb3d_coloroffset);
493		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
494		OUT_RING(ctx->rb3d_colorpitch);
495		ADVANCE_RING();
496	}
497
498	if (dirty & RADEON_UPLOAD_VERTFMT) {
499		BEGIN_RING(2);
500		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
501		OUT_RING(ctx->se_coord_fmt);
502		ADVANCE_RING();
503	}
504
505	if (dirty & RADEON_UPLOAD_LINE) {
506		BEGIN_RING(5);
507		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
508		OUT_RING(ctx->re_line_pattern);
509		OUT_RING(ctx->re_line_state);
510		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
511		OUT_RING(ctx->se_line_width);
512		ADVANCE_RING();
513	}
514
515	if (dirty & RADEON_UPLOAD_BUMPMAP) {
516		BEGIN_RING(5);
517		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
518		OUT_RING(ctx->pp_lum_matrix);
519		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
520		OUT_RING(ctx->pp_rot_matrix_0);
521		OUT_RING(ctx->pp_rot_matrix_1);
522		ADVANCE_RING();
523	}
524
525	if (dirty & RADEON_UPLOAD_MASKS) {
526		BEGIN_RING(4);
527		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
528		OUT_RING(ctx->rb3d_stencilrefmask);
529		OUT_RING(ctx->rb3d_ropcntl);
530		OUT_RING(ctx->rb3d_planemask);
531		ADVANCE_RING();
532	}
533
534	if (dirty & RADEON_UPLOAD_VIEWPORT) {
535		BEGIN_RING(7);
536		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
537		OUT_RING(ctx->se_vport_xscale);
538		OUT_RING(ctx->se_vport_xoffset);
539		OUT_RING(ctx->se_vport_yscale);
540		OUT_RING(ctx->se_vport_yoffset);
541		OUT_RING(ctx->se_vport_zscale);
542		OUT_RING(ctx->se_vport_zoffset);
543		ADVANCE_RING();
544	}
545
546	if (dirty & RADEON_UPLOAD_SETUP) {
547		BEGIN_RING(4);
548		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
549		OUT_RING(ctx->se_cntl);
550		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
551		OUT_RING(ctx->se_cntl_status);
552		ADVANCE_RING();
553	}
554
555	if (dirty & RADEON_UPLOAD_MISC) {
556		BEGIN_RING(2);
557		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
558		OUT_RING(ctx->re_misc);
559		ADVANCE_RING();
560	}
561
562	if (dirty & RADEON_UPLOAD_TEX0) {
563		if (radeon_check_and_fixup_offset(dev_priv,
564		    filp_priv, &tex[0].pp_txoffset)) {
565			DRM_ERROR("Invalid texture offset for unit 0\n");
566			return (EINVAL);
567		}
568
569		BEGIN_RING(9);
570		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
571		OUT_RING(tex[0].pp_txfilter);
572		OUT_RING(tex[0].pp_txformat);
573		OUT_RING(tex[0].pp_txoffset);
574		OUT_RING(tex[0].pp_txcblend);
575		OUT_RING(tex[0].pp_txablend);
576		OUT_RING(tex[0].pp_tfactor);
577		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
578		OUT_RING(tex[0].pp_border_color);
579		ADVANCE_RING();
580	}
581
582	if (dirty & RADEON_UPLOAD_TEX1) {
583		if (radeon_check_and_fixup_offset(dev_priv,
584		    filp_priv, &tex[1].pp_txoffset)) {
585			DRM_ERROR("Invalid texture offset for unit 1\n");
586			return (EINVAL);
587		}
588
589		BEGIN_RING(9);
590		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
591		OUT_RING(tex[1].pp_txfilter);
592		OUT_RING(tex[1].pp_txformat);
593		OUT_RING(tex[1].pp_txoffset);
594		OUT_RING(tex[1].pp_txcblend);
595		OUT_RING(tex[1].pp_txablend);
596		OUT_RING(tex[1].pp_tfactor);
597		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
598		OUT_RING(tex[1].pp_border_color);
599		ADVANCE_RING();
600	}
601
602	if (dirty & RADEON_UPLOAD_TEX2) {
603		if (radeon_check_and_fixup_offset(dev_priv,
604		    filp_priv, &tex[2].pp_txoffset)) {
605			DRM_ERROR("Invalid texture offset for unit 2\n");
606			return (EINVAL);
607		}
608
609		BEGIN_RING(9);
610		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
611		OUT_RING(tex[2].pp_txfilter);
612		OUT_RING(tex[2].pp_txformat);
613		OUT_RING(tex[2].pp_txoffset);
614		OUT_RING(tex[2].pp_txcblend);
615		OUT_RING(tex[2].pp_txablend);
616		OUT_RING(tex[2].pp_tfactor);
617		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
618		OUT_RING(tex[2].pp_border_color);
619		ADVANCE_RING();
620	}
621
622	return (0);
623}
624
625/* Emit 1.2 state */
626static int radeon_emit_state2(drm_radeon_private_t *dev_priv,
627    drm_file_t *filp_priv, drm_radeon_state_t *state)
628{
629	RING_LOCALS;
630
631	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
632		BEGIN_RING(3);
633		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
634		OUT_RING(state->context2.se_zbias_factor);
635		OUT_RING(state->context2.se_zbias_constant);
636		ADVANCE_RING();
637	}
638
639	return (radeon_emit_state(dev_priv, filp_priv,
640	    &state->context, state->tex, state->dirty));
641}
642
643/*
644 * New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
645 * 1.3 cmdbuffers allow all previous state to be updated as well as
646 * the tcl scalar and vector areas.
647 */
648static struct {
649	int start;
650	int len;
651	const char *name;
652} packet[RADEON_MAX_STATE_PACKETS] = {
653	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
654	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
655	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
656	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
657	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
658	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
659	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
660	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
661	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
662	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
663	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
664	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
665	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
666	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
667	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
668	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
669	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
670	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
671	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
672	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
673	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
674		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
675	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
676	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
677	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
678	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
679	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
680	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
681	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
682	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
683	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
684	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
685	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
686	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
687	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
688	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
689	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
690	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
691	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
692	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
693	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
694	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
695	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
696	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
697	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
698	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
699	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
700	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
701	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
702	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
703	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
704	"R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
705	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
706	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
707	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
708	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
709	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
710	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
711	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
712	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
713	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
714	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
715	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
716		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
717	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
718	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
719	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
720	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
721	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
722	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
723	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
724	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
725	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
726	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
727	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
728	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
729	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
730	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
731	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
732	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
733	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
734	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
735	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
736	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
737	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
738	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
739	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
740	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
741	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},	/* 85 */
742	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
743	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
744	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
745	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
746	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
747	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
748	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
749	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
750	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
751};
752
753/*
754 * Performance monitoring functions
755 */
756
757static void radeon_clear_box(drm_radeon_private_t *dev_priv,
758    int x, int y, int w, int h, int r, int g, int b)
759{
760	u32 color;
761	RING_LOCALS;
762
763	x += dev_priv->sarea_priv->boxes[0].x1;
764	y += dev_priv->sarea_priv->boxes[0].y1;
765
766	switch (dev_priv->color_fmt) {
767	case RADEON_COLOR_FORMAT_RGB565:
768		color = (((r & 0xf8) << 8) |
769		    ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
770		break;
771	case RADEON_COLOR_FORMAT_ARGB8888:
772	default:
773		color = (((0xfful) << 24) | (r << 16) | (g << 8) | b);
774		break;
775	}
776
777	BEGIN_RING(4);
778	RADEON_WAIT_UNTIL_3D_IDLE();
779	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
780	OUT_RING(0xffffffff);
781	ADVANCE_RING();
782
783	BEGIN_RING(6);
784
785	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
786	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
787	    RADEON_GMC_BRUSH_SOLID_COLOR |
788	    (dev_priv->color_fmt << 8) |
789	    RADEON_GMC_SRC_DATATYPE_COLOR |
790	    RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
791
792	if (dev_priv->page_flipping && dev_priv->current_page == 1) {
793		OUT_RING(dev_priv->front_pitch_offset);
794	} else {
795		OUT_RING(dev_priv->back_pitch_offset);
796	}
797
798	OUT_RING(color);
799
800	OUT_RING((x << 16) | y);
801	OUT_RING((w << 16) | h);
802
803	ADVANCE_RING();
804}
805
806static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv)
807{
808	/*
809	 * Collapse various things into a wait flag -- trying to
810	 * guess if userspase slept -- better just to have them tell us.
811	 */
812	if (dev_priv->stats.last_frame_reads > 1 ||
813	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
814		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
815	}
816
817	if (dev_priv->stats.freelist_loops) {
818		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
819	}
820
821	/* Purple box for page flipping */
822	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
823		radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
824
825	/* Red box if we have to wait for idle at any point */
826	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
827		radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
828
829	/* Blue box: lost context? */
830
831	/* Yellow box for texture swaps */
832	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
833		radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
834
835	/* Green box if hardware never idles (as far as we can tell) */
836	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
837		radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
838
839	/*
840	 * Draw bars indicating number of buffers allocated
841	 * (not a great measure, easily confused)
842	 */
843	if (dev_priv->stats.requested_bufs) {
844		if (dev_priv->stats.requested_bufs > 100)
845			dev_priv->stats.requested_bufs = 100;
846
847		radeon_clear_box(dev_priv, 4, 16,
848		    dev_priv->stats.requested_bufs, 4, 196, 128, 128);
849	}
850
851	(void) memset(&dev_priv->stats, 0, sizeof (dev_priv->stats));
852
853}
854
855/*
856 * CP command dispatch functions
857 */
858
859static void radeon_cp_dispatch_clear(drm_device_t *dev,
860    drm_radeon_clear_t *clear, drm_radeon_clear_rect_t *depth_boxes)
861{
862	drm_radeon_private_t *dev_priv = dev->dev_private;
863	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
864	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
865	int nbox = sarea_priv->nbox;
866	drm_clip_rect_t *pbox = sarea_priv->boxes;
867	unsigned int flags = clear->flags;
868	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
869	int i;
870	RING_LOCALS;
871	DRM_DEBUG("flags = 0x%x\n", flags);
872
873	dev_priv->stats.clears++;
874
875	if (dev_priv->page_flipping && dev_priv->current_page == 1) {
876		unsigned int tmp = flags;
877
878		flags &= ~(RADEON_FRONT | RADEON_BACK);
879		if (tmp & RADEON_FRONT)
880			flags |= RADEON_BACK;
881		if (tmp & RADEON_BACK)
882			flags |= RADEON_FRONT;
883	}
884
885	if (flags & (RADEON_FRONT | RADEON_BACK)) {
886
887		BEGIN_RING(4);
888
889		/*
890		 * Ensure the 3D stream is idle before doing a
891		 * 2D fill to clear the front or back buffer.
892		 */
893		RADEON_WAIT_UNTIL_3D_IDLE();
894
895		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
896		OUT_RING(clear->color_mask);
897
898		ADVANCE_RING();
899
900		/* Make sure we restore the 3D state next time.  */
901		dev_priv->sarea_priv->ctx_owner = 0;
902
903		for (i = 0; i < nbox; i++) {
904			int x = pbox[i].x1;
905			int y = pbox[i].y1;
906			int w = pbox[i].x2 - x;
907			int h = pbox[i].y2 - y;
908
909			DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
910			    x, y, w, h, flags);
911
912			if (flags & RADEON_FRONT) {
913				BEGIN_RING(6);
914
915				OUT_RING(CP_PACKET3
916				    (RADEON_CNTL_PAINT_MULTI, 4));
917				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
918				    RADEON_GMC_BRUSH_SOLID_COLOR |
919				    (dev_priv-> color_fmt << 8) |
920				    RADEON_GMC_SRC_DATATYPE_COLOR |
921				    RADEON_ROP3_P |
922				    RADEON_GMC_CLR_CMP_CNTL_DIS);
923
924				OUT_RING(dev_priv->front_pitch_offset);
925				OUT_RING(clear->clear_color);
926
927				OUT_RING((x << 16) | y);
928				OUT_RING((w << 16) | h);
929
930				ADVANCE_RING();
931			}
932
933			if (flags & RADEON_BACK) {
934				BEGIN_RING(6);
935
936				OUT_RING(CP_PACKET3
937				    (RADEON_CNTL_PAINT_MULTI, 4));
938				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
939				    RADEON_GMC_BRUSH_SOLID_COLOR |
940				    (dev_priv-> color_fmt << 8) |
941				    RADEON_GMC_SRC_DATATYPE_COLOR |
942				    RADEON_ROP3_P |
943				    RADEON_GMC_CLR_CMP_CNTL_DIS);
944
945				OUT_RING(dev_priv->back_pitch_offset);
946				OUT_RING(clear->clear_color);
947
948				OUT_RING((x << 16) | y);
949				OUT_RING((w << 16) | h);
950
951				ADVANCE_RING();
952			}
953		}
954	}
955
956	/* hyper z clear */
957	/*
958	 * no docs available, based on reverse engeneering
959	 * by Stephane Marchesin
960	 */
961	if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) &&
962	    (flags & RADEON_CLEAR_FASTZ)) {
963
964		int i;
965		int depthpixperline =
966		    dev_priv->depth_fmt ==
967		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ?
968		    (dev_priv->depth_pitch / 2) :
969		    (dev_priv-> depth_pitch / 4);
970
971		u32 clearmask;
972
973		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
974		    ((clear->depth_mask & 0xff) << 24);
975
976		/*
977		 * Make sure we restore the 3D state next time.
978		 * we haven't touched any "normal" state - still
979		 * need this?
980		 */
981		dev_priv->sarea_priv->ctx_owner = 0;
982
983		if ((dev_priv->flags & RADEON_HAS_HIERZ) &&
984		    (flags & RADEON_USE_HIERZ)) {
985			/* FIXME : reverse engineer that for Rx00 cards */
986			/*
987			 * FIXME : the mask supposedly contains low-res
988			 * z values. So can't set just to the max (0xff?
989			 * or actually 0x3fff?), need to take z clear
990			 * value into account?
991			 */
992			/*
993			 * pattern seems to work for r100, though get
994			 * slight rendering errors with glxgears. If
995			 * hierz is not enabled for r100, only 4 bits
996			 * which indicate clear (15,16,31,32, all zero)
997			 * matter, the other ones are ignored, and the
998			 * same clear mask can be used. That's very
999			 * different behaviour than R200 which needs
1000			 * different clear mask and different number
1001			 * of tiles to clear if hierz is enabled or not !?!
1002			 */
1003			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
1004		} else {
1005		/*
1006		 * clear mask : chooses the clearing pattern.
1007		 * rv250: could be used to clear only parts of macrotiles
1008		 * (but that would get really complicated...)?
1009		 * bit 0 and 1 (either or both of them ?!?!) are used to
1010		 * not clear tile (or maybe one of the bits indicates if
1011		 * the tile is compressed or not), bit 2 and 3 to not
1012		 * clear tile 1,...,.
1013		 * Pattern is as follows:
1014		 * | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1015		 * bits -------------------------------------------------
1016		 * | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1017		 * rv100: clearmask covers 2x8 4x1 tiles, but one clear
1018		 * still covers 256 pixels ?!?
1019		 */
1020			clearmask = 0x0;
1021		}
1022
1023		BEGIN_RING(8);
1024		RADEON_WAIT_UNTIL_2D_IDLE();
1025		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1026		    tempRB3D_DEPTHCLEARVALUE);
1027		/* what offset is this exactly ? */
1028		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1029		/* need ctlstat, otherwise get some strange black flickering */
1030		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1031		    RADEON_RB3D_ZC_FLUSH_ALL);
1032		ADVANCE_RING();
1033
1034		for (i = 0; i < nbox; i++) {
1035			int tileoffset, nrtilesx, nrtilesy, j;
1036			/*
1037			 * it looks like r200 needs rv-style clears, at
1038			 * least if hierz is not enabled?
1039			 */
1040			if ((dev_priv->flags & RADEON_HAS_HIERZ) &&
1041			    !(dev_priv->microcode_version == UCODE_R200)) {
1042				/*
1043				 * FIXME : figure this out for r200 (when hierz
1044				 * is enabled). Or maybe r200 actually doesn't
1045				 * need to put the low-res z value into the tile
1046				 * cache like r100, but just needs to clear the
1047				 * hi-level z-buffer? Works for R100, both with
1048				 * hierz and without.R100 seems to operate on
1049				 * 2x1 8x8 tiles, but... odd: offset/nrtiles
1050				 * need to be 64 pix (4 blocka) aligned?
1051				 * Potentially problematic with resolutions
1052				 * which are not 64 pix aligned?
1053				 */
1054				tileoffset =
1055				    ((pbox[i].y1 >> 3) * depthpixperline +
1056				    pbox[i].x1) >> 6;
1057				nrtilesx =
1058				    ((pbox[i].x2 & ~63) -
1059				    (pbox[i].x1 & ~63)) >> 4;
1060				nrtilesy =
1061				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1062				for (j = 0; j <= nrtilesy; j++) {
1063					BEGIN_RING(4);
1064					OUT_RING(CP_PACKET3
1065					    (RADEON_3D_CLEAR_ZMASK, 2));
1066					/* first tile */
1067					OUT_RING(tileoffset * 8);
1068					/* the number of tiles to clear */
1069					OUT_RING(nrtilesx + 4);
1070					/*
1071					 * clear mask :
1072					 * chooses the clearing pattern.
1073					 */
1074					OUT_RING(clearmask);
1075					ADVANCE_RING();
1076					tileoffset += depthpixperline >> 6;
1077				}
1078			} else if (dev_priv->microcode_version == UCODE_R200) {
1079				/* works for rv250. */
1080				/*
1081				 * find first macro tile
1082				 * (8x2 4x4 z-pixels on rv250)
1083				 */
1084				tileoffset =
1085				    ((pbox[i].y1 >> 3) * depthpixperline +
1086				    pbox[i].x1) >> 5;
1087				nrtilesx =
1088				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1089				nrtilesy =
1090				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1091				for (j = 0; j <= nrtilesy; j++) {
1092					BEGIN_RING(4);
1093					OUT_RING(CP_PACKET3
1094					    (RADEON_3D_CLEAR_ZMASK, 2));
1095					/* first tile */
1096					/*
1097					 * judging by the first tile
1098					 * offset needed, could possibly
1099					 * directly address/clear 4x4
1100					 * tiles instead of 8x2 * 4x4
1101					 * macro tiles, though would
1102					 * still need clear mask for
1103					 * right/bottom if truely 4x4
1104					 * granularity is desired ?
1105					 */
1106					OUT_RING(tileoffset * 16);
1107					/* the number of tiles to clear */
1108					OUT_RING(nrtilesx + 1);
1109					/*
1110					 * clear mask :
1111					 * chooses the clearing pattern.
1112					 */
1113					OUT_RING(clearmask);
1114					ADVANCE_RING();
1115					tileoffset += depthpixperline >> 5;
1116				}
1117			} else {	/* rv 100 */
1118				/* rv100 might not need 64 pix alignment */
1119				/* offsets are, hmm, weird */
1120				tileoffset =
1121				    ((pbox[i].y1 >> 4) * depthpixperline +
1122				    pbox[i].x1) >> 6;
1123				nrtilesx =
1124				    ((pbox[i].x2 & ~63) -
1125				    (pbox[i].x1 & ~63)) >> 4;
1126				nrtilesy =
1127				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1128				for (j = 0; j <= nrtilesy; j++) {
1129					BEGIN_RING(4);
1130					OUT_RING(CP_PACKET3
1131					    (RADEON_3D_CLEAR_ZMASK, 2));
1132					OUT_RING(tileoffset * 128);
1133					/* the number of tiles to clear */
1134					OUT_RING(nrtilesx + 4);
1135					/*
1136					 * clear mask :
1137					 * chooses the clearing pattern.
1138					 */
1139					OUT_RING(clearmask);
1140					ADVANCE_RING();
1141					tileoffset += depthpixperline >> 6;
1142				}
1143			}
1144		}
1145
1146		/* TODO don't always clear all hi-level z tiles */
1147		if ((dev_priv->flags & RADEON_HAS_HIERZ) &&
1148		    (dev_priv->microcode_version == UCODE_R200) &&
1149		    (flags & RADEON_USE_HIERZ))
1150			/*
1151			 * r100 and cards without hierarchical z-buffer
1152			 * have no high-level z-buffer
1153			 */
1154			/*
1155			 * FIXME : the mask supposedly contains low-res
1156			 * z values. So can't set just to the max (0xff?
1157			 * or actually 0x3fff?), need to take z clear value
1158			 * into account?
1159			 */
1160		{
1161			BEGIN_RING(4);
1162			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1163			OUT_RING(0x0);	/* First tile */
1164			OUT_RING(0x3cc0);
1165			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1166			ADVANCE_RING();
1167		}
1168	}
1169
1170	/*
1171	 * We have to clear the depth and/or stencil buffers by
1172	 * rendering a quad into just those buffers.  Thus, we have to
1173	 * make sure the 3D engine is configured correctly.
1174	 */
1175	else if ((dev_priv->microcode_version == UCODE_R200) &&
1176	    (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1177
1178		int tempPP_CNTL;
1179		int tempRE_CNTL;
1180		int tempRB3D_CNTL;
1181		int tempRB3D_ZSTENCILCNTL;
1182		int tempRB3D_STENCILREFMASK;
1183		int tempRB3D_PLANEMASK;
1184		int tempSE_CNTL;
1185		int tempSE_VTE_CNTL;
1186		int tempSE_VTX_FMT_0;
1187		int tempSE_VTX_FMT_1;
1188		int tempSE_VAP_CNTL;
1189		int tempRE_AUX_SCISSOR_CNTL;
1190
1191		tempPP_CNTL = 0;
1192		tempRE_CNTL = 0;
1193
1194		tempRB3D_CNTL = depth_clear->rb3d_cntl;
1195
1196		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1197		tempRB3D_STENCILREFMASK = 0x0;
1198
1199		tempSE_CNTL = depth_clear->se_cntl;
1200
1201		/* Disable TCL */
1202
1203		tempSE_VAP_CNTL =
1204		    (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1205		    (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1206
1207		tempRB3D_PLANEMASK = 0x0;
1208
1209		tempRE_AUX_SCISSOR_CNTL = 0x0;
1210
1211		tempSE_VTE_CNTL =
1212		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1213
1214		/* Vertex format (X, Y, Z, W) */
1215		tempSE_VTX_FMT_0 =
1216		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1217		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1218		tempSE_VTX_FMT_1 = 0x0;
1219
1220		/*
1221		 * Depth buffer specific enables
1222		 */
1223		if (flags & RADEON_DEPTH) {
1224			/* Enable depth buffer */
1225			tempRB3D_CNTL |= RADEON_Z_ENABLE;
1226		} else {
1227			/* Disable depth buffer */
1228			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1229		}
1230
1231		/*
1232		 * Stencil buffer specific enables
1233		 */
1234		if (flags & RADEON_STENCIL) {
1235			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1236			tempRB3D_STENCILREFMASK = clear->depth_mask;
1237		} else {
1238			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1239			tempRB3D_STENCILREFMASK = 0x00000000;
1240		}
1241
1242		if (flags & RADEON_USE_COMP_ZBUF) {
1243			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1244			    RADEON_Z_DECOMPRESSION_ENABLE;
1245		}
1246		if (flags & RADEON_USE_HIERZ) {
1247			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1248		}
1249
1250		BEGIN_RING(26);
1251		RADEON_WAIT_UNTIL_2D_IDLE();
1252
1253		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1254		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1255		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1256		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1257		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1258		    tempRB3D_STENCILREFMASK);
1259		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1260		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1261		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1262		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1263		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1264		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1265		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1266		ADVANCE_RING();
1267
1268		/* Make sure we restore the 3D state next time. */
1269		dev_priv->sarea_priv->ctx_owner = 0;
1270
1271		for (i = 0; i < nbox; i++) {
1272
1273			/*
1274			 * Funny that this should be required --
1275			 *  sets top-left?
1276			 */
1277			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1278
1279			BEGIN_RING(14);
1280			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1281			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1282			    RADEON_PRIM_WALK_RING |
1283			    (3 << RADEON_NUM_VERTICES_SHIFT)));
1284			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1285			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1286			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1287			OUT_RING(0x3f800000);
1288			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1289			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1290			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1291			OUT_RING(0x3f800000);
1292			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1293			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1294			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1295			OUT_RING(0x3f800000);
1296			ADVANCE_RING();
1297		}
1298	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1299
1300		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1301
1302		rb3d_cntl = depth_clear->rb3d_cntl;
1303
1304		if (flags & RADEON_DEPTH) {
1305			rb3d_cntl |= RADEON_Z_ENABLE;
1306		} else {
1307			rb3d_cntl &= ~RADEON_Z_ENABLE;
1308		}
1309
1310		if (flags & RADEON_STENCIL) {
1311			rb3d_cntl |= RADEON_STENCIL_ENABLE;
1312
1313			/* misnamed field */
1314			rb3d_stencilrefmask = clear->depth_mask;
1315
1316		} else {
1317			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1318			rb3d_stencilrefmask = 0x00000000;
1319		}
1320
1321		if (flags & RADEON_USE_COMP_ZBUF) {
1322			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1323			    RADEON_Z_DECOMPRESSION_ENABLE;
1324		}
1325		if (flags & RADEON_USE_HIERZ) {
1326			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1327		}
1328
1329		BEGIN_RING(13);
1330		RADEON_WAIT_UNTIL_2D_IDLE();
1331
1332		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1333		OUT_RING(0x00000000);
1334		OUT_RING(rb3d_cntl);
1335
1336		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1337		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1338		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1339		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1340		ADVANCE_RING();
1341
1342		/* Make sure we restore the 3D state next time.  */
1343		dev_priv->sarea_priv->ctx_owner = 0;
1344
1345		for (i = 0; i < nbox; i++) {
1346
1347			/*
1348			 * Funny that this should be required --
1349			 *  sets top-left?
1350			 */
1351			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1352
1353			BEGIN_RING(15);
1354
1355			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1356			OUT_RING(RADEON_VTX_Z_PRESENT |
1357			    RADEON_VTX_PKCOLOR_PRESENT);
1358			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1359			    RADEON_PRIM_WALK_RING |
1360			    RADEON_MAOS_ENABLE |
1361			    RADEON_VTX_FMT_RADEON_MODE |
1362			    (3 << RADEON_NUM_VERTICES_SHIFT)));
1363
1364			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1365			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1366			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1367			OUT_RING(0x0);
1368
1369			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1370			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1371			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1372			OUT_RING(0x0);
1373
1374			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1375			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1376			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1377			OUT_RING(0x0);
1378
1379			ADVANCE_RING();
1380		}
1381	}
1382
1383	/*
1384	 * Increment the clear counter.  The client-side 3D driver must
1385	 * wait on this value before performing the clear ioctl.  We
1386	 * need this because the card's so damned fast...
1387	 */
1388	dev_priv->sarea_priv->last_clear++;
1389
1390	BEGIN_RING(4);
1391
1392	RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1393	RADEON_WAIT_UNTIL_IDLE();
1394
1395	ADVANCE_RING();
1396}
1397
1398static void radeon_cp_dispatch_swap(drm_device_t *dev)
1399{
1400	drm_radeon_private_t *dev_priv = dev->dev_private;
1401	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1402	int nbox = sarea_priv->nbox;
1403	drm_clip_rect_t *pbox = sarea_priv->boxes;
1404	int i;
1405	RING_LOCALS;
1406
1407	/* Do some trivial performance monitoring... */
1408	if (dev_priv->do_boxes)
1409		radeon_cp_performance_boxes(dev_priv);
1410
1411	/*
1412	 * Wait for the 3D stream to idle before dispatching the bitblt.
1413	 * This will prevent data corruption between the two streams.
1414	 */
1415	BEGIN_RING(2);
1416
1417	RADEON_WAIT_UNTIL_3D_IDLE();
1418
1419	ADVANCE_RING();
1420
1421	for (i = 0; i < nbox; i++) {
1422		int x = pbox[i].x1;
1423		int y = pbox[i].y1;
1424		int w = pbox[i].x2 - x;
1425		int h = pbox[i].y2 - y;
1426
1427		DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1428
1429		BEGIN_RING(9);
1430
1431		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1432		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1433		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1434		    RADEON_GMC_BRUSH_NONE |
1435		    (dev_priv->color_fmt << 8) |
1436		    RADEON_GMC_SRC_DATATYPE_COLOR |
1437		    RADEON_ROP3_S |
1438		    RADEON_DP_SRC_SOURCE_MEMORY |
1439		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1440
1441		/* Make this work even if front & back are flipped: */
1442		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1443		if (dev_priv->current_page == 0) {
1444			OUT_RING(dev_priv->back_pitch_offset);
1445			OUT_RING(dev_priv->front_pitch_offset);
1446		} else {
1447			OUT_RING(dev_priv->front_pitch_offset);
1448			OUT_RING(dev_priv->back_pitch_offset);
1449		}
1450
1451		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1452		OUT_RING((x << 16) | y);
1453		OUT_RING((x << 16) | y);
1454		OUT_RING((w << 16) | h);
1455
1456		ADVANCE_RING();
1457	}
1458
1459	/*
1460	 * Increment the frame counter.  The client-side 3D driver must
1461	 * throttle the framerate by waiting for this value before
1462	 * performing the swapbuffer ioctl.
1463	 */
1464	dev_priv->sarea_priv->last_frame ++;
1465
1466	BEGIN_RING(4);
1467
1468	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1469	RADEON_WAIT_UNTIL_2D_IDLE();
1470
1471	ADVANCE_RING();
1472}
1473
1474static void radeon_cp_dispatch_flip(drm_device_t *dev)
1475{
1476	drm_radeon_private_t *dev_priv = dev->dev_private;
1477	drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1478	int offset = (dev_priv->current_page == 1)
1479	    ? dev_priv->front_offset : dev_priv->back_offset;
1480	RING_LOCALS;
1481	DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1482	    __FUNCTION__,
1483	    dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1484
1485	/* Do some trivial performance monitoring... */
1486	if (dev_priv->do_boxes) {
1487		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1488		radeon_cp_performance_boxes(dev_priv);
1489	}
1490
1491	/* Update the frame offsets for both CRTCs */
1492	BEGIN_RING(6);
1493
1494	RADEON_WAIT_UNTIL_3D_IDLE();
1495	OUT_RING_REG(RADEON_CRTC_OFFSET,
1496	    ((sarea->frame.y * dev_priv->front_pitch +
1497	    sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7) + offset);
1498	OUT_RING_REG(RADEON_CRTC2_OFFSET,
1499	    dev_priv->sarea_priv->crtc2_base + offset);
1500
1501	ADVANCE_RING();
1502
1503	/*
1504	 * Increment the frame counter.  The client-side 3D driver must
1505	 * throttle the framerate by waiting for this value before
1506	 * performing the swapbuffer ioctl.
1507	 */
1508	dev_priv->sarea_priv->last_frame ++;
1509	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1510	    1 - dev_priv->current_page;
1511
1512	BEGIN_RING(2);
1513
1514	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1515
1516	ADVANCE_RING();
1517}
1518
1519static int bad_prim_vertex_nr(int primitive, int nr)
1520{
1521	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1522	case RADEON_PRIM_TYPE_NONE:
1523	case RADEON_PRIM_TYPE_POINT:
1524		return (nr < 1);
1525	case RADEON_PRIM_TYPE_LINE:
1526		return ((nr & 1) || nr == 0);
1527	case RADEON_PRIM_TYPE_LINE_STRIP:
1528		return (nr < 2);
1529	case RADEON_PRIM_TYPE_TRI_LIST:
1530	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1531	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1532	case RADEON_PRIM_TYPE_RECT_LIST:
1533		return (nr % 3 || nr == 0);
1534	case RADEON_PRIM_TYPE_TRI_FAN:
1535	case RADEON_PRIM_TYPE_TRI_STRIP:
1536		return (nr < 3);
1537	default:
1538		return (1);
1539	}
1540}
1541
1542typedef struct {
1543	unsigned int start;
1544	unsigned int finish;
1545	unsigned int prim;
1546	unsigned int numverts;
1547	unsigned int offset;
1548	unsigned int vc_format;
1549} drm_radeon_tcl_prim_t;
1550
1551static void radeon_cp_dispatch_vertex(drm_device_t *dev,
1552    drm_buf_t *buf, drm_radeon_tcl_prim_t *prim)
1553{
1554	drm_radeon_private_t *dev_priv = dev->dev_private;
1555	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1556	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1557	int numverts = (int)prim->numverts;
1558	int nbox = sarea_priv->nbox;
1559	int i = 0;
1560	RING_LOCALS;
1561
1562	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1563	    prim->prim, prim->vc_format, prim->start,
1564	    prim->finish, prim->numverts);
1565
1566	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1567		DRM_ERROR("bad prim %x numverts %d\n",
1568		    prim->prim, prim->numverts);
1569		return;
1570	}
1571
1572	do {
1573		/* Emit the next cliprect */
1574		if (i < nbox) {
1575			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1576		}
1577
1578		/* Emit the vertex buffer rendering commands */
1579		BEGIN_RING(5);
1580
1581		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1582		OUT_RING(offset);
1583		OUT_RING(numverts);
1584		OUT_RING(prim->vc_format);
1585		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1586		    RADEON_COLOR_ORDER_RGBA |
1587		    RADEON_VTX_FMT_RADEON_MODE |
1588		    (numverts << RADEON_NUM_VERTICES_SHIFT));
1589
1590		ADVANCE_RING();
1591
1592		i++;
1593	} while (i < nbox);
1594}
1595
1596static void radeon_cp_discard_buffer(drm_device_t *dev, drm_buf_t *buf)
1597{
1598	drm_radeon_private_t *dev_priv = dev->dev_private;
1599	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1600	RING_LOCALS;
1601
1602	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1603
1604	/* Emit the vertex buffer age */
1605	BEGIN_RING(2);
1606	RADEON_DISPATCH_AGE(buf_priv->age);
1607	ADVANCE_RING();
1608
1609	buf->pending = 1;
1610	buf->used = 0;
1611}
1612
1613static void radeon_cp_dispatch_indirect(drm_device_t *dev,
1614    drm_buf_t *buf, int start, int end)
1615{
1616	drm_radeon_private_t *dev_priv = dev->dev_private;
1617	RING_LOCALS;
1618	DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1619
1620	if (start != end) {
1621		int offset = (dev_priv->gart_buffers_offset +
1622		    buf->offset + start);
1623		int dwords = (end - start + 3) / sizeof (u32);
1624
1625		/*
1626		 * Indirect buffer data must be an even number of
1627		 * dwords, so if we've been given an odd number we must
1628		 * pad the data with a Type-2 CP packet.
1629		 */
1630		if (dwords & 1) {
1631			u32 *data = (u32 *)(uintptr_t)
1632			    ((char *)dev->agp_buffer_map->handle
1633			    + buf->offset + start);
1634			data[dwords++] = RADEON_CP_PACKET2;
1635		}
1636
1637		/* Fire off the indirect buffer */
1638		BEGIN_RING(3);
1639
1640		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1641		OUT_RING(offset);
1642		OUT_RING(dwords);
1643
1644		ADVANCE_RING();
1645	}
1646}
1647
1648static void radeon_cp_dispatch_indices(drm_device_t *dev,
1649    drm_buf_t *elt_buf, drm_radeon_tcl_prim_t *prim)
1650{
1651	drm_radeon_private_t *dev_priv = dev->dev_private;
1652	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1653	int offset = dev_priv->gart_buffers_offset + prim->offset;
1654	u32 *data;
1655	int dwords;
1656	int i = 0;
1657	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1658	int count = (prim->finish - start) / sizeof (u16);
1659	int nbox = sarea_priv->nbox;
1660
1661	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1662	    prim->prim, prim->vc_format, prim->start,
1663	    prim->finish, prim->offset, prim->numverts);
1664
1665	if (bad_prim_vertex_nr(prim->prim, count)) {
1666		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1667		return;
1668	}
1669
1670	if (start >= prim->finish || (prim->start & 0x7)) {
1671		DRM_ERROR("buffer prim %d\n", prim->prim);
1672		return;
1673	}
1674
1675	dwords = (prim->finish - prim->start + 3) / sizeof (u32);
1676
1677	data = (u32 *)(uintptr_t)((char *)dev->agp_buffer_map->handle +
1678	    elt_buf->offset + prim->start);
1679
1680	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1681	data[1] = offset;
1682	data[2] = prim->numverts;
1683	data[3] = prim->vc_format;
1684	data[4] = (prim->prim |
1685	    RADEON_PRIM_WALK_IND |
1686	    RADEON_COLOR_ORDER_RGBA |
1687	    RADEON_VTX_FMT_RADEON_MODE |
1688	    (count << RADEON_NUM_VERTICES_SHIFT));
1689
1690	do {
1691		if (i < nbox)
1692			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1693
1694		radeon_cp_dispatch_indirect(dev, elt_buf,
1695		    prim->start, prim->finish);
1696
1697		i++;
1698	} while (i < nbox);
1699
1700}
1701
1702#define	RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1703
1704/*ARGSUSED*/
1705static int radeon_cp_dispatch_texture(drm_file_t *fpriv,
1706    drm_device_t *dev, drm_radeon_texture_t *tex,
1707    drm_radeon_tex_image_t *image, int mode)
1708{
1709	drm_radeon_private_t *dev_priv = dev->dev_private;
1710	drm_buf_t *buf;
1711	u32 format;
1712	u32 *buffer;
1713	const u8 __user *data;
1714	int size, dwords, tex_width, blit_width, spitch;
1715	u32 height;
1716	int i;
1717	u32 texpitch, microtile;
1718	u32 offset;
1719	RING_LOCALS;
1720
1721
1722	if (radeon_check_and_fixup_offset(dev_priv, fpriv, &tex->offset)) {
1723		DRM_ERROR("Invalid destination offset\n");
1724		return (EINVAL);
1725	}
1726
1727	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1728
1729	/*
1730	 * Flush the pixel cache.  This ensures no pixel data gets mixed
1731	 * up with the texture data from the host data blit, otherwise
1732	 * part of the texture image may be corrupted.
1733	 */
1734	BEGIN_RING(4);
1735	RADEON_FLUSH_CACHE();
1736	RADEON_WAIT_UNTIL_IDLE();
1737	ADVANCE_RING();
1738
1739	/*
1740	 * The compiler won't optimize away a division by a variable,
1741	 * even if the only legal values are powers of two.  Thus, we'll
1742	 * use a shift instead.
1743	 */
1744	switch (tex->format) {
1745	case RADEON_TXFORMAT_ARGB8888:
1746	case RADEON_TXFORMAT_RGBA8888:
1747		format = RADEON_COLOR_FORMAT_ARGB8888;
1748		tex_width = tex->width * 4;
1749		blit_width = image->width * 4;
1750		break;
1751	case RADEON_TXFORMAT_AI88:
1752	case RADEON_TXFORMAT_ARGB1555:
1753	case RADEON_TXFORMAT_RGB565:
1754	case RADEON_TXFORMAT_ARGB4444:
1755	case RADEON_TXFORMAT_VYUY422:
1756	case RADEON_TXFORMAT_YVYU422:
1757		format = RADEON_COLOR_FORMAT_RGB565;
1758		tex_width = tex->width * 2;
1759		blit_width = image->width * 2;
1760		break;
1761	case RADEON_TXFORMAT_I8:
1762	case RADEON_TXFORMAT_RGB332:
1763		format = RADEON_COLOR_FORMAT_CI8;
1764		tex_width = tex->width * 1;
1765		blit_width = image->width * 1;
1766		break;
1767	default:
1768		DRM_ERROR("invalid texture format %d\n", tex->format);
1769		return (EINVAL);
1770	}
1771	spitch = blit_width >> 6;
1772	if (spitch == 0 && image->height > 1)
1773		return (EINVAL);
1774
1775	texpitch = tex->pitch;
1776	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1777		microtile = 1;
1778		if (tex_width < 64) {
1779			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1780			/* we got tiled coordinates, untile them */
1781			image->x *= 2;
1782		}
1783	} else
1784		microtile = 0;
1785
1786	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1787
1788	do {
1789		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1790		    tex->offset >> 10, tex->pitch, tex->format,
1791		    image->x, image->y, image->width, image->height);
1792
1793		/*
1794		 * Make a copy of some parameters in case we have to
1795		 * update them for a multi-pass texture blit.
1796		 */
1797		height = image->height;
1798		data = (const u8 __user *)image->data;
1799
1800		size = height * blit_width;
1801
1802		if (size > RADEON_MAX_TEXTURE_SIZE) {
1803			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1804			size = height * blit_width;
1805		} else if (size < 4 && size > 0) {
1806			size = 4;
1807		} else if (size == 0) {
1808			return (0);
1809		}
1810
1811		buf = radeon_freelist_get(dev);
1812#if 0
1813		if (0 && !buf) {
1814			radeon_do_cp_idle(dev_priv);
1815			buf = radeon_freelist_get(dev);
1816		}
1817#endif
1818		if (!buf) {
1819			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1820
1821#ifdef _MULTI_DATAMODEL
1822			if (ddi_model_convert_from(mode & FMODELS) ==
1823			    DDI_MODEL_ILP32) {
1824				drm_radeon_tex_image_32_t image32;
1825				image32.x = image->x;
1826				image32.y = image->y;
1827				image32.width = image->width;
1828				image32.height = image->height;
1829				image32.data = (uint32_t)(uintptr_t)image->data;
1830				DRM_COPYTO_WITH_RETURN(tex->image, &image32,
1831				    sizeof (image32));
1832			} else {
1833#endif
1834				DRM_COPYTO_WITH_RETURN(tex->image, image,
1835				    sizeof (*image));
1836#ifdef _MULTI_DATAMODEL
1837			}
1838#endif
1839			return (EAGAIN);
1840		}
1841
1842		/*
1843		 * Dispatch the indirect buffer.
1844		 */
1845		buffer = (u32 *)(uintptr_t)
1846		    ((char *)dev->agp_buffer_map->handle + buf->offset);
1847
1848		dwords = size / 4;
1849
1850#define	RADEON_COPY_MT(_buf, _data, _width) \
1851	do { \
1852		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1853			DRM_ERROR("%d: EFAULT on pad, %d bytes\n", \
1854			    __LINE__, (_width)); \
1855			return (EFAULT); \
1856		} \
1857	} while (*"\0")
1858
1859		if (microtile) {
1860			/*
1861			 * texture micro tiling in use, minimum texture
1862			 * width is thus 16 bytes. however, we cannot use
1863			 * blitter directly for texture width < 64 bytes,
1864			 * since minimum tex pitch is 64 bytes and we need
1865			 * this to match the texture width, otherwise the
1866			 * blitter will tile it wrong. Thus, tiling manually
1867			 * in this case. Additionally, need to special case
1868			 * tex height = 1, since our actual image will have
1869			 * height 2 and we need to ensure we don't read
1870			 * beyond the texture size from user space.
1871			 */
1872			if (tex->height == 1) {
1873				if (tex_width >= 64 || tex_width <= 16) {
1874					RADEON_COPY_MT(buffer, data,
1875					    (int)(tex_width * sizeof (u32)));
1876				} else if (tex_width == 32) {
1877					RADEON_COPY_MT(buffer, data, 16);
1878					RADEON_COPY_MT(buffer + 8,
1879					    data + 16, 16);
1880				}
1881			} else if (tex_width >= 64 || tex_width == 16) {
1882				RADEON_COPY_MT(buffer, data,
1883				    (int)(dwords * sizeof (u32)));
1884			} else if (tex_width < 16) {
1885				for (i = 0; i < tex->height; i++) {
1886					RADEON_COPY_MT(buffer, data, tex_width);
1887					buffer += 4;
1888					data += tex_width;
1889				}
1890			} else if (tex_width == 32) {
1891				/*
1892				 * TODO: make sure this works when not
1893				 * fitting in one buffer
1894				 *  (i.e. 32bytes x 2048...)
1895				 */
1896				for (i = 0; i < tex->height; i += 2) {
1897					RADEON_COPY_MT(buffer, data, 16);
1898					data += 16;
1899					RADEON_COPY_MT(buffer + 8, data, 16);
1900					data += 16;
1901					RADEON_COPY_MT(buffer + 4, data, 16);
1902					data += 16;
1903					RADEON_COPY_MT(buffer + 12, data, 16);
1904					data += 16;
1905					buffer += 16;
1906				}
1907			}
1908		} else {
1909			if (tex_width >= 32) {
1910				/*
1911				 * Texture image width is larger than the
1912				 * minimum, so we can upload it directly.
1913				 */
1914				RADEON_COPY_MT(buffer, data,
1915				    (int)(dwords * sizeof (u32)));
1916			} else {
1917				/*
1918				 * Texture image width is less than the minimum,
1919				 * so we need to pad out each image scanline to
1920				 * the minimum width.
1921				 */
1922				for (i = 0; i < tex->height; i++) {
1923					RADEON_COPY_MT(buffer, data, tex_width);
1924					buffer += 8;
1925					data += tex_width;
1926				}
1927			}
1928		}
1929
1930#undef RADEON_COPY_MT
1931		buf->filp = fpriv;
1932		buf->used = size;
1933		offset = dev_priv->gart_buffers_offset + buf->offset;
1934
1935		BEGIN_RING(9);
1936		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1937		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1938		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1939		    RADEON_GMC_BRUSH_NONE |
1940		    (format << 8) |
1941		    RADEON_GMC_SRC_DATATYPE_COLOR |
1942		    RADEON_ROP3_S |
1943		    RADEON_DP_SRC_SOURCE_MEMORY |
1944		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1945		OUT_RING((spitch << 22) | (offset >> 10));
1946		OUT_RING((texpitch << 22) | (tex->offset >> 10));
1947		OUT_RING(0);
1948		OUT_RING((image->x << 16) | image->y);
1949		OUT_RING((image->width << 16) | height);
1950		RADEON_WAIT_UNTIL_2D_IDLE();
1951		ADVANCE_RING();
1952		COMMIT_RING();
1953
1954
1955		radeon_cp_discard_buffer(dev, buf);
1956
1957		/* Update the input parameters for next time */
1958		image->y += height;
1959		image->height -= height;
1960		image->data = (const u8 __user *)image->data + size;
1961	} while (image->height > 0);
1962
1963	/*
1964	 * Flush the pixel cache after the blit completes.  This ensures
1965	 * the texture data is written out to memory before rendering
1966	 * continues.
1967	 */
1968	BEGIN_RING(4);
1969	RADEON_FLUSH_CACHE();
1970	RADEON_WAIT_UNTIL_2D_IDLE();
1971	ADVANCE_RING();
1972	COMMIT_RING();
1973	return (0);
1974}
1975
1976static void radeon_cp_dispatch_stipple(drm_device_t *dev, u32 *stipple)
1977{
1978	drm_radeon_private_t *dev_priv = dev->dev_private;
1979	int i;
1980	RING_LOCALS;
1981	DRM_DEBUG("\n");
1982
1983	BEGIN_RING(35);
1984
1985	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1986	OUT_RING(0x00000000);
1987
1988	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1989	for (i = 0; i < 32; i++) {
1990		OUT_RING(stipple[i]);
1991	}
1992
1993	ADVANCE_RING();
1994}
1995
1996static void radeon_apply_surface_regs(int surf_index,
1997    drm_radeon_private_t *dev_priv)
1998{
1999	if (!dev_priv->mmio)
2000		return;
2001
2002	(void) radeon_do_cp_idle(dev_priv);
2003
2004	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
2005	    dev_priv->surfaces[surf_index].flags);
2006	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
2007	    dev_priv->surfaces[surf_index].lower);
2008	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
2009	    dev_priv->surfaces[surf_index].upper);
2010}
2011
2012/*
2013 * Allocates a virtual surface
2014 * doesn't always allocate a real surface, will stretch an existing
2015 * surface when possible.
2016 *
2017 * Note that refcount can be at most 2, since during a free refcount=3
2018 * might mean we have to allocate a new surface which might not always
2019 * be available.
2020 * For example : we allocate three contigous surfaces ABC. If B is
2021 * freed, we suddenly need two surfaces to store A and C, which might
2022 * not always be available.
2023 */
2024static int alloc_surface(drm_radeon_surface_alloc_t *new,
2025    drm_radeon_private_t *dev_priv, drm_file_t *filp)
2026{
2027	struct radeon_virt_surface *s;
2028	int i;
2029	int virt_surface_index;
2030	uint32_t new_upper, new_lower;
2031
2032	new_lower = new->address;
2033	new_upper = new_lower + new->size - 1;
2034
2035	/* sanity check */
2036	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
2037	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
2038	    RADEON_SURF_ADDRESS_FIXED_MASK) ||
2039	    ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
2040		return (-1);
2041
2042	/* make sure there is no overlap with existing surfaces */
2043	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2044		if ((dev_priv->surfaces[i].refcount != 0) &&
2045		    (((new_lower >= dev_priv->surfaces[i].lower) &&
2046		    (new_lower < dev_priv->surfaces[i].upper)) ||
2047		    ((new_lower < dev_priv->surfaces[i].lower) &&
2048		    (new_upper > dev_priv->surfaces[i].lower)))) {
2049			return (-1);
2050		}
2051	}
2052
2053	/* find a virtual surface */
2054	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
2055		if (dev_priv->virt_surfaces[i].filp == 0)
2056			break;
2057	if (i == 2 * RADEON_MAX_SURFACES) {
2058		return (-1);
2059	}
2060	virt_surface_index = i;
2061
2062	/* try to reuse an existing surface */
2063	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2064		/* extend before */
2065		if ((dev_priv->surfaces[i].refcount == 1) &&
2066		    (new->flags == dev_priv->surfaces[i].flags) &&
2067		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2068			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2069			s->surface_index = i;
2070			s->lower = new_lower;
2071			s->upper = new_upper;
2072			s->flags = new->flags;
2073			s->filp = filp;
2074			dev_priv->surfaces[i].refcount++;
2075			dev_priv->surfaces[i].lower = s->lower;
2076			radeon_apply_surface_regs(s->surface_index, dev_priv);
2077			return (virt_surface_index);
2078		}
2079
2080		/* extend after */
2081		if ((dev_priv->surfaces[i].refcount == 1) &&
2082		    (new->flags == dev_priv->surfaces[i].flags) &&
2083		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
2084			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2085			s->surface_index = i;
2086			s->lower = new_lower;
2087			s->upper = new_upper;
2088			s->flags = new->flags;
2089			s->filp = filp;
2090			dev_priv->surfaces[i].refcount++;
2091			dev_priv->surfaces[i].upper = s->upper;
2092			radeon_apply_surface_regs(s->surface_index, dev_priv);
2093			return (virt_surface_index);
2094		}
2095	}
2096
2097	/* okay, we need a new one */
2098	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2099		if (dev_priv->surfaces[i].refcount == 0) {
2100			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2101			s->surface_index = i;
2102			s->lower = new_lower;
2103			s->upper = new_upper;
2104			s->flags = new->flags;
2105			s->filp = filp;
2106			dev_priv->surfaces[i].refcount = 1;
2107			dev_priv->surfaces[i].lower = s->lower;
2108			dev_priv->surfaces[i].upper = s->upper;
2109			dev_priv->surfaces[i].flags = s->flags;
2110			radeon_apply_surface_regs(s->surface_index, dev_priv);
2111			return (virt_surface_index);
2112		}
2113	}
2114
2115	/* we didn't find anything */
2116	return (-1);
2117}
2118
2119static int
2120free_surface(drm_file_t *filp, drm_radeon_private_t *dev_priv, int lower)
2121{
2122	struct radeon_virt_surface *s;
2123	int i;
2124
2125	/* find the virtual surface */
2126	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2127		s = &(dev_priv->virt_surfaces[i]);
2128		if (s->filp) {
2129			if ((lower == s->lower) && (filp == s->filp)) {
2130				if (dev_priv->surfaces[s->surface_index].
2131				    lower == s->lower)
2132					dev_priv->surfaces[s->surface_index].
2133					    lower = s->upper;
2134
2135				if (dev_priv->surfaces[s->surface_index].
2136				    upper == s->upper)
2137					dev_priv->surfaces[s->surface_index].
2138					    upper = s->lower;
2139
2140				dev_priv->surfaces[s->surface_index].refcount--;
2141				if (dev_priv->surfaces[s->surface_index].
2142				    refcount == 0)
2143					dev_priv->surfaces[s->surface_index].
2144					    flags = 0;
2145				s->filp = NULL;
2146				radeon_apply_surface_regs(s->surface_index,
2147				    dev_priv);
2148				return (0);
2149			}
2150		}
2151	}
2152
2153	return (1);
2154}
2155
2156static void radeon_surfaces_release(drm_file_t *filp,
2157    drm_radeon_private_t *dev_priv)
2158{
2159	int i;
2160
2161	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2162		if (dev_priv->virt_surfaces[i].filp == filp)
2163			(void) free_surface(filp, dev_priv,
2164			    dev_priv->virt_surfaces[i].lower);
2165	}
2166}
2167
2168/*
2169 * IOCTL functions
2170 */
2171/*ARGSUSED*/
2172static int radeon_surface_alloc(DRM_IOCTL_ARGS)
2173{
2174	DRM_DEVICE;
2175	drm_radeon_private_t *dev_priv = dev->dev_private;
2176	drm_radeon_surface_alloc_t alloc;
2177
2178	if (!dev_priv) {
2179		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2180		return (EINVAL);
2181	}
2182
2183	DRM_COPYFROM_WITH_RETURN(&alloc, (void *)data, sizeof (alloc));
2184
2185	if (alloc_surface(&alloc, dev_priv, fpriv) == -1)
2186		return (EINVAL);
2187	else
2188		return (0);
2189}
2190
2191/*ARGSUSED*/
2192static int radeon_surface_free(DRM_IOCTL_ARGS)
2193{
2194	DRM_DEVICE;
2195	drm_radeon_private_t *dev_priv = dev->dev_private;
2196	drm_radeon_surface_free_t memfree;
2197
2198	if (!dev_priv) {
2199		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2200		return (EINVAL);
2201	}
2202
2203	DRM_COPYFROM_WITH_RETURN(&memfree, (void *)data, sizeof (memfree));
2204	if (free_surface(fpriv, dev_priv, memfree.address)) {
2205		return (EINVAL);
2206	}
2207	else
2208		return (0);
2209}
2210
2211/*ARGSUSED*/
2212static int radeon_cp_clear(DRM_IOCTL_ARGS)
2213{
2214	DRM_DEVICE;
2215	drm_radeon_private_t *dev_priv = dev->dev_private;
2216	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2217	drm_radeon_clear_t clear;
2218	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2219
2220	LOCK_TEST_WITH_RETURN(dev, fpriv);
2221
2222#ifdef _MULTI_DATAMODEL
2223	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
2224		drm_radeon_clear_32_t	clear32;
2225		DRM_COPYFROM_WITH_RETURN(&clear32, (void *)data,
2226		    sizeof (clear32));
2227		clear.flags = clear32.flags;
2228		clear.clear_color = clear32.clear_color;
2229		clear.clear_depth = clear32.clear_depth;
2230		clear.color_mask = clear32.color_mask;
2231		clear.depth_mask = clear32.depth_mask;
2232		clear.depth_boxes = (void*)(uintptr_t)clear32.depth_boxes;
2233	} else {
2234#endif
2235		DRM_COPYFROM_WITH_RETURN(&clear, (void *)data, sizeof (clear));
2236#ifdef _MULTI_DATAMODEL
2237	}
2238#endif
2239
2240	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2241
2242	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2243		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2244
2245	if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2246	    sarea_priv->nbox * sizeof (depth_boxes[0])))
2247	return (EFAULT);
2248
2249	radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2250
2251	COMMIT_RING();
2252	return (0);
2253}
2254
2255/*
2256 * Not sure why this isn't set all the time:
2257 */
2258static int radeon_do_init_pageflip(drm_device_t *dev)
2259{
2260	drm_radeon_private_t *dev_priv = dev->dev_private;
2261	RING_LOCALS;
2262
2263	BEGIN_RING(6);
2264	RADEON_WAIT_UNTIL_3D_IDLE();
2265	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2266	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2267	    RADEON_CRTC_OFFSET_FLIP_CNTL);
2268	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2269	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2270	    RADEON_CRTC_OFFSET_FLIP_CNTL);
2271	ADVANCE_RING();
2272
2273	dev_priv->page_flipping = 1;
2274	dev_priv->current_page = 0;
2275	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2276
2277	return (0);
2278}
2279
2280/*
2281 * Called whenever a client dies, from drm_release.
2282 * NOTE:  Lock isn't necessarily held when this is called!
2283 */
2284static int radeon_do_cleanup_pageflip(drm_device_t *dev)
2285{
2286	drm_radeon_private_t *dev_priv = dev->dev_private;
2287
2288	if (dev_priv->current_page != 0)
2289		radeon_cp_dispatch_flip(dev);
2290
2291	dev_priv->page_flipping = 0;
2292	return (0);
2293}
2294
2295/*
2296 * Swapping and flipping are different operations, need different ioctls.
2297 * They can & should be intermixed to support multiple 3d windows.
2298 */
2299/*ARGSUSED*/
2300static int radeon_cp_flip(DRM_IOCTL_ARGS)
2301{
2302	DRM_DEVICE;
2303	drm_radeon_private_t *dev_priv = dev->dev_private;
2304
2305	LOCK_TEST_WITH_RETURN(dev, fpriv);
2306
2307	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2308
2309	if (!dev_priv->page_flipping)
2310		(void) radeon_do_init_pageflip(dev);
2311
2312	radeon_cp_dispatch_flip(dev);
2313
2314	COMMIT_RING();
2315	return (0);
2316}
2317
2318/*ARGSUSED*/
2319static int radeon_cp_swap(DRM_IOCTL_ARGS)
2320{
2321	DRM_DEVICE;
2322	drm_radeon_private_t *dev_priv = dev->dev_private;
2323	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2324
2325	LOCK_TEST_WITH_RETURN(dev, fpriv);
2326
2327	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2328
2329	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2330		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2331
2332	radeon_cp_dispatch_swap(dev);
2333	dev_priv->sarea_priv->ctx_owner = 0;
2334
2335	COMMIT_RING();
2336	return (0);
2337}
2338
2339/*ARGSUSED*/
2340static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2341{
2342	DRM_DEVICE;
2343	drm_radeon_private_t *dev_priv = dev->dev_private;
2344	drm_radeon_sarea_t *sarea_priv;
2345	drm_device_dma_t *dma = dev->dma;
2346	drm_buf_t *buf;
2347	drm_radeon_vertex_t vertex;
2348	drm_radeon_tcl_prim_t prim;
2349
2350	LOCK_TEST_WITH_RETURN(dev, fpriv);
2351
2352	if (!dev_priv) {
2353		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2354		return (EINVAL);
2355	}
2356
2357	sarea_priv = dev_priv->sarea_priv;
2358
2359	DRM_COPYFROM_WITH_RETURN(&vertex, (void *)data, sizeof (vertex));
2360
2361	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2362	    DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2363
2364	if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2365		DRM_ERROR("buffer index %d (of %d max)\n",
2366		    vertex.idx, dma->buf_count - 1);
2367		return (EINVAL);
2368	}
2369	if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2370		DRM_ERROR("buffer prim %d\n", vertex.prim);
2371		return (EINVAL);
2372	}
2373
2374	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2375	VB_AGE_TEST_WITH_RETURN(dev_priv);
2376
2377	buf = dma->buflist[vertex.idx];
2378
2379	if (buf->filp != fpriv) {
2380		DRM_ERROR("process %d using buffer owned by %p\n",
2381		    DRM_CURRENTPID, buf->filp);
2382		return (EINVAL);
2383	}
2384	if (buf->pending) {
2385		DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2386		return (EINVAL);
2387	}
2388
2389	/*
2390	 * Build up a prim_t record:
2391	 */
2392	if (vertex.count) {
2393		buf->used = vertex.count;	/* not used? */
2394
2395		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2396			if (radeon_emit_state(dev_priv, fpriv,
2397			    &sarea_priv->context_state,
2398			    sarea_priv->tex_state,
2399			    sarea_priv->dirty)) {
2400				DRM_ERROR("radeon_emit_state failed\n");
2401				return (EINVAL);
2402			}
2403
2404			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2405			    RADEON_UPLOAD_TEX1IMAGES |
2406			    RADEON_UPLOAD_TEX2IMAGES |
2407			    RADEON_REQUIRE_QUIESCENCE);
2408		}
2409
2410		prim.start = 0;
2411		prim.finish = vertex.count;	/* unused */
2412		prim.prim = vertex.prim;
2413		prim.numverts = vertex.count;
2414		prim.vc_format = dev_priv->sarea_priv->vc_format;
2415
2416		radeon_cp_dispatch_vertex(dev, buf, &prim);
2417	}
2418
2419	if (vertex.discard) {
2420		radeon_cp_discard_buffer(dev, buf);
2421	}
2422
2423	COMMIT_RING();
2424	return (0);
2425}
2426
2427/*ARGSUSED*/
2428static int radeon_cp_indices(DRM_IOCTL_ARGS)
2429{
2430	DRM_DEVICE;
2431	drm_radeon_private_t *dev_priv = dev->dev_private;
2432	drm_radeon_sarea_t *sarea_priv;
2433	drm_device_dma_t *dma = dev->dma;
2434	drm_buf_t *buf;
2435	drm_radeon_indices_t elts;
2436	drm_radeon_tcl_prim_t prim;
2437/*	int count; */
2438
2439	LOCK_TEST_WITH_RETURN(dev, fpriv);
2440
2441	if (!dev_priv) {
2442		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2443		return (EINVAL);
2444	}
2445	sarea_priv = dev_priv->sarea_priv;
2446
2447	DRM_COPYFROM_WITH_RETURN(&elts, (void *)data, sizeof (elts));
2448
2449	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2450	    DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2451
2452	if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2453		DRM_ERROR("buffer index %d (of %d max)\n",
2454		    elts.idx, dma->buf_count - 1);
2455		return (EINVAL);
2456	}
2457	if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2458		DRM_ERROR("buffer prim %d\n", elts.prim);
2459		return (EINVAL);
2460	}
2461
2462	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2463	VB_AGE_TEST_WITH_RETURN(dev_priv);
2464
2465	buf = dma->buflist[elts.idx];
2466
2467	if (buf->filp != fpriv) {
2468		DRM_ERROR("process %d using buffer owned by %p\n",
2469		    DRM_CURRENTPID, buf->filp);
2470		return (EINVAL);
2471	}
2472	if (buf->pending) {
2473		DRM_ERROR("sending pending buffer %d\n", elts.idx);
2474		return (EINVAL);
2475	}
2476
2477/*	count = (elts.end - elts.start) / sizeof(u16); */
2478	elts.start -= RADEON_INDEX_PRIM_OFFSET;
2479
2480	if (elts.start & 0x7) {
2481		DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2482		return (EINVAL);
2483	}
2484	if (elts.start < buf->used) {
2485		DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2486		return (EINVAL);
2487	}
2488
2489	buf->used = elts.end;
2490
2491	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2492		if (radeon_emit_state(dev_priv, fpriv,
2493		    &sarea_priv->context_state,
2494		    sarea_priv->tex_state,
2495		    sarea_priv->dirty)) {
2496			DRM_ERROR("radeon_emit_state failed\n");
2497			return (EINVAL);
2498		}
2499
2500		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2501		    RADEON_UPLOAD_TEX1IMAGES |
2502		    RADEON_UPLOAD_TEX2IMAGES |
2503		    RADEON_REQUIRE_QUIESCENCE);
2504	}
2505
2506	/*
2507	 * Build up a prim_t record:
2508	 */
2509	prim.start = elts.start;
2510	prim.finish = elts.end;
2511	prim.prim = elts.prim;
2512	prim.offset = 0;	/* offset from start of dma buffers */
2513	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2514	prim.vc_format = dev_priv->sarea_priv->vc_format;
2515
2516	radeon_cp_dispatch_indices(dev, buf, &prim);
2517	if (elts.discard) {
2518		radeon_cp_discard_buffer(dev, buf);
2519	}
2520
2521	COMMIT_RING();
2522	return (0);
2523}
2524
2525/*ARGSUSED*/
2526static int radeon_cp_texture(DRM_IOCTL_ARGS)
2527{
2528	DRM_DEVICE;
2529	drm_radeon_private_t *dev_priv = dev->dev_private;
2530	drm_radeon_texture_t tex;
2531	drm_radeon_tex_image_t image;
2532	int ret;
2533
2534	LOCK_TEST_WITH_RETURN(dev, fpriv);
2535
2536#ifdef _MULTI_DATAMODEL
2537	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
2538		drm_radeon_texture_32_t tex32;
2539		drm_radeon_tex_image_32_t image32;
2540
2541		DRM_COPYFROM_WITH_RETURN(&tex32, (void *)data, sizeof (tex32));
2542		if (tex32.image == 0) {
2543			DRM_ERROR("null texture image!\n");
2544			return (EINVAL);
2545		}
2546		if (DRM_COPY_FROM_USER(&image32,
2547		    (void *)(uintptr_t)tex32.image, sizeof (image32))) {
2548			cmn_err(CE_WARN, "copyin32 failed");
2549			return (EFAULT);
2550		}
2551
2552		tex.offset = tex32.offset;
2553		tex.pitch = tex32.pitch;
2554		tex.format = tex32.format;
2555		tex.width = tex32.width;
2556		tex.height = tex32.height;
2557		tex.image = (void*)(uintptr_t)tex32.image;
2558
2559		image.x = image32.x;
2560		image.y = image32.y;
2561		image.width = image32.width;
2562		image.height = image32.height;
2563		image.data = (void*)(uintptr_t)image32.data;
2564
2565	} else {
2566#endif
2567		DRM_COPYFROM_WITH_RETURN(&tex, (void *)data, sizeof (tex));
2568		if (tex.image == NULL) {
2569			return (EINVAL);
2570		}
2571		if (DRM_COPY_FROM_USER(&image,
2572		    (drm_radeon_tex_image_t *)tex.image, sizeof (image))) {
2573			return (EFAULT);
2574		}
2575#ifdef _MULTI_DATAMODEL
2576	}
2577#endif
2578
2579	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2580	VB_AGE_TEST_WITH_RETURN(dev_priv);
2581
2582	ret = radeon_cp_dispatch_texture(fpriv, dev, &tex, &image, mode);
2583
2584	COMMIT_RING();
2585	return (ret);
2586}
2587
2588/*ARGSUSED*/
2589static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2590{
2591	DRM_DEVICE;
2592	drm_radeon_private_t *dev_priv = dev->dev_private;
2593	drm_radeon_stipple_t stipple;
2594	u32 mask[32];
2595
2596	LOCK_TEST_WITH_RETURN(dev, fpriv);
2597
2598#ifdef _MULTI_DATAMODEL
2599	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
2600		drm_radeon_stipple_32_t stipple32;
2601		DRM_COPYFROM_WITH_RETURN(&stipple32, (void *)data,
2602		    sizeof (stipple32));
2603		stipple.mask = (void *)(uintptr_t)stipple32.mask;
2604	} else {
2605#endif
2606		DRM_COPYFROM_WITH_RETURN(&stipple, (void *)data,
2607		    sizeof (stipple));
2608#ifdef _MULTI_DATAMODEL
2609	}
2610#endif
2611	if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof (u32)))
2612		return (EFAULT);
2613
2614
2615	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2616
2617	radeon_cp_dispatch_stipple(dev, mask);
2618
2619	COMMIT_RING();
2620	return (0);
2621}
2622
2623/*ARGSUSED*/
2624static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2625{
2626	DRM_DEVICE;
2627	drm_radeon_private_t *dev_priv = dev->dev_private;
2628	drm_device_dma_t *dma = dev->dma;
2629	drm_buf_t *buf;
2630	drm_radeon_indirect_t indirect;
2631	RING_LOCALS;
2632
2633	LOCK_TEST_WITH_RETURN(dev, fpriv);
2634
2635	if (!dev_priv) {
2636		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2637		return (EINVAL);
2638	}
2639
2640	DRM_COPYFROM_WITH_RETURN(&indirect, (void *) data, sizeof (indirect));
2641
2642	DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2643	    indirect.idx, indirect.start, indirect.end, indirect.discard);
2644
2645	if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2646		DRM_ERROR("buffer index %d (of %d max)\n",
2647		    indirect.idx, dma->buf_count - 1);
2648		return (EINVAL);
2649	}
2650
2651	buf = dma->buflist[indirect.idx];
2652
2653	if (buf->filp != fpriv) {
2654		DRM_ERROR("process %d using buffer owned by %p\n",
2655		    DRM_CURRENTPID, buf->filp);
2656		return (EINVAL);
2657	}
2658	if (buf->pending) {
2659		DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2660		return (EINVAL);
2661	}
2662
2663	if (indirect.start < buf->used) {
2664		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2665		    indirect.start, buf->used);
2666		return (EINVAL);
2667	}
2668
2669	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2670	VB_AGE_TEST_WITH_RETURN(dev_priv);
2671
2672	buf->used = indirect.end;
2673
2674	/*
2675	 * Wait for the 3D stream to idle before the indirect buffer
2676	 * containing 2D acceleration commands is processed.
2677	 */
2678	BEGIN_RING(2);
2679
2680	RADEON_WAIT_UNTIL_3D_IDLE();
2681
2682	ADVANCE_RING();
2683
2684	/*
2685	 * Dispatch the indirect buffer full of commands from the
2686	 * X server.  This is insecure and is thus only available to
2687	 * privileged clients.
2688	 */
2689	radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2690	if (indirect.discard) {
2691		radeon_cp_discard_buffer(dev, buf);
2692	}
2693
2694	COMMIT_RING();
2695	return (0);
2696}
2697
2698/*ARGSUSED*/
2699static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2700{
2701	DRM_DEVICE;
2702	drm_radeon_private_t *dev_priv = dev->dev_private;
2703	drm_radeon_sarea_t *sarea_priv;
2704	drm_device_dma_t *dma = dev->dma;
2705	drm_buf_t *buf;
2706	drm_radeon_vertex2_t vertex;
2707	int i;
2708	unsigned char laststate;
2709
2710	LOCK_TEST_WITH_RETURN(dev, fpriv);
2711
2712	if (!dev_priv) {
2713		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2714		return (EINVAL);
2715	}
2716
2717	sarea_priv = dev_priv->sarea_priv;
2718
2719
2720#ifdef _MULTI_DATAMODEL
2721	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
2722		drm_radeon_vertex2_32_t	vertex32;
2723
2724		DRM_COPYFROM_WITH_RETURN(&vertex32, (void *) data,
2725		    sizeof (vertex32));
2726		vertex.idx = vertex32.idx;
2727		vertex.discard = vertex32.discard;
2728		vertex.nr_states = vertex32.nr_states;
2729		vertex.state = (void *) (uintptr_t)vertex32.state;
2730		vertex.nr_prims = vertex32.nr_prims;
2731		vertex.prim = (void *)(uintptr_t)vertex32.prim;
2732	} else {
2733#endif
2734		DRM_COPYFROM_WITH_RETURN(&vertex, (void *) data,
2735		    sizeof (vertex));
2736#ifdef _MULTI_DATAMODEL
2737	}
2738#endif
2739
2740	DRM_DEBUG("pid=%d index=%d discard=%d\n",
2741	    DRM_CURRENTPID, vertex.idx, vertex.discard);
2742
2743	if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2744		DRM_ERROR("buffer index %d (of %d max)\n",
2745		    vertex.idx, dma->buf_count - 1);
2746		return (EINVAL);
2747	}
2748
2749	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2750	VB_AGE_TEST_WITH_RETURN(dev_priv);
2751
2752	buf = dma->buflist[vertex.idx];
2753
2754	if (buf->filp != fpriv) {
2755		DRM_ERROR("process %d using buffer owned by %p\n",
2756		    DRM_CURRENTPID, buf->filp);
2757		return (EINVAL);
2758	}
2759
2760	if (buf->pending) {
2761		DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2762		return (EINVAL);
2763	}
2764
2765	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2766		return (EINVAL);
2767
2768	for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2769		drm_radeon_prim_t prim;
2770		drm_radeon_tcl_prim_t tclprim;
2771
2772		if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof (prim)))
2773			return (EFAULT);
2774
2775		if (prim.stateidx != laststate) {
2776			drm_radeon_state_t state;
2777
2778			if (DRM_COPY_FROM_USER(&state,
2779			    &vertex.state[prim.stateidx], sizeof (state)))
2780				return (EFAULT);
2781
2782			if (radeon_emit_state2(dev_priv, fpriv, &state)) {
2783				DRM_ERROR("radeon_emit_state2 failed\n");
2784				return (EINVAL);
2785			}
2786
2787			laststate = prim.stateidx;
2788		}
2789
2790		tclprim.start = prim.start;
2791		tclprim.finish = prim.finish;
2792		tclprim.prim = prim.prim;
2793		tclprim.vc_format = prim.vc_format;
2794
2795		if (prim.prim & RADEON_PRIM_WALK_IND) {
2796			tclprim.offset = prim.numverts * 64;
2797			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2798
2799			radeon_cp_dispatch_indices(dev, buf, &tclprim);
2800		} else {
2801			tclprim.numverts = prim.numverts;
2802			tclprim.offset = 0;	/* not used */
2803
2804			radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2805		}
2806
2807		if (sarea_priv->nbox == 1)
2808			sarea_priv->nbox = 0;
2809	}
2810
2811	if (vertex.discard) {
2812		radeon_cp_discard_buffer(dev, buf);
2813	}
2814
2815	COMMIT_RING();
2816	return (0);
2817}
2818
2819static int radeon_emit_packets(drm_radeon_private_t *dev_priv,
2820    drm_file_t *filp_priv, drm_radeon_cmd_header_t header,
2821    drm_radeon_kcmd_buffer_t *cmdbuf)
2822{
2823	int id = (int)header.packet.packet_id;
2824	int sz, reg;
2825	u32 *data = (u32 *)(uintptr_t)cmdbuf->buf;
2826	RING_LOCALS;
2827
2828	if (id >= RADEON_MAX_STATE_PACKETS)
2829		return (EINVAL);
2830
2831	sz = packet[id].len;
2832	reg = packet[id].start;
2833
2834	if (sz * sizeof (int) > cmdbuf->bufsz) {
2835		DRM_ERROR("Packet size provided larger than data provided\n");
2836		return (EINVAL);
2837	}
2838
2839	if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2840		DRM_ERROR("Packet verification failed\n");
2841		return (EINVAL);
2842	}
2843
2844	BEGIN_RING(sz + 1);
2845	OUT_RING(CP_PACKET0(reg, (sz - 1)));
2846	OUT_RING_TABLE(data, sz);
2847	ADVANCE_RING();
2848
2849	cmdbuf->buf += sz * sizeof (int);
2850	cmdbuf->bufsz -= sz * sizeof (int);
2851	return (0);
2852}
2853
2854static inline int
2855radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2856    drm_radeon_cmd_header_t header, drm_radeon_kcmd_buffer_t *cmdbuf)
2857{
2858	int sz = header.scalars.count;
2859	int start = header.scalars.offset;
2860	int stride = header.scalars.stride;
2861	RING_LOCALS;
2862
2863	BEGIN_RING(3 + sz);
2864	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2865	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2866	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2867	OUT_RING_TABLE(cmdbuf->buf, sz);
2868	ADVANCE_RING();
2869	cmdbuf->buf += sz * sizeof (int);
2870	cmdbuf->bufsz -= sz * sizeof (int);
2871	return (0);
2872}
2873
2874/*
2875 * God this is ugly
2876 */
2877static inline int
2878radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2879    drm_radeon_cmd_header_t header, drm_radeon_kcmd_buffer_t *cmdbuf)
2880{
2881	int sz = header.scalars.count;
2882	int start = ((unsigned int)header.scalars.offset) + 0x100;
2883	int stride = header.scalars.stride;
2884	RING_LOCALS;
2885
2886	BEGIN_RING(3 + sz);
2887	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2888	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2889	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2890	OUT_RING_TABLE(cmdbuf->buf, sz);
2891	ADVANCE_RING();
2892	cmdbuf->buf += sz * sizeof (int);
2893	cmdbuf->bufsz -= sz * sizeof (int);
2894	return (0);
2895}
2896
2897static inline int
2898radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2899    drm_radeon_cmd_header_t header, drm_radeon_kcmd_buffer_t *cmdbuf)
2900{
2901	int sz = header.vectors.count;
2902	int start = header.vectors.offset;
2903	int stride = header.vectors.stride;
2904	RING_LOCALS;
2905
2906	BEGIN_RING(5 + sz);
2907	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2908	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2909	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2910	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2911	OUT_RING_TABLE(cmdbuf->buf, sz);
2912	ADVANCE_RING();
2913
2914	cmdbuf->buf += sz * sizeof (int);
2915	cmdbuf->bufsz -= sz * sizeof (int);
2916	return (0);
2917}
2918
2919static inline int
2920radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2921    drm_radeon_cmd_header_t header, drm_radeon_kcmd_buffer_t *cmdbuf)
2922{
2923	int sz = header.veclinear.count * 4;
2924	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2925	RING_LOCALS;
2926
2927		if (!sz)
2928			return (0);
2929		if (sz * 4 > cmdbuf->bufsz)
2930			return (EINVAL);
2931
2932	BEGIN_RING(5 + sz);
2933	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2934	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2935	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2936	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2937	OUT_RING_TABLE(cmdbuf->buf, sz);
2938	ADVANCE_RING();
2939
2940	cmdbuf->buf += sz * sizeof (int);
2941	cmdbuf->bufsz -= sz * sizeof (int);
2942	return (0);
2943}
2944
2945static int
2946radeon_emit_packet3(drm_device_t *dev, drm_file_t *filp_priv,
2947    drm_radeon_kcmd_buffer_t *cmdbuf)
2948{
2949	drm_radeon_private_t *dev_priv = dev->dev_private;
2950	unsigned int cmdsz;
2951	int ret;
2952	RING_LOCALS;
2953
2954
2955	if ((ret = radeon_check_and_fixup_packet3(dev_priv,
2956	    filp_priv, cmdbuf, &cmdsz))) {
2957		DRM_ERROR("Packet verification failed\n");
2958		return (ret);
2959	}
2960
2961	BEGIN_RING(cmdsz);
2962	OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2963	ADVANCE_RING();
2964
2965	cmdbuf->buf += cmdsz * 4;
2966	cmdbuf->bufsz -= cmdsz * 4;
2967	return (0);
2968}
2969
2970static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2971					drm_file_t *filp_priv,
2972					drm_radeon_kcmd_buffer_t *cmdbuf,
2973					int orig_nbox)
2974{
2975	drm_radeon_private_t *dev_priv = dev->dev_private;
2976	drm_clip_rect_t box;
2977	unsigned int cmdsz;
2978	int ret;
2979	drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2980	int i = 0;
2981	RING_LOCALS;
2982
2983	if ((ret = radeon_check_and_fixup_packet3(dev_priv,
2984	    filp_priv, cmdbuf, &cmdsz))) {
2985		DRM_ERROR("Packet verification failed\n");
2986		return (ret);
2987	}
2988
2989	if (!orig_nbox)
2990		goto out;
2991
2992	do {
2993		if (i < cmdbuf->nbox) {
2994			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof (box)))
2995				return (EFAULT);
2996			/*
2997			 * FIXME The second and subsequent times round
2998			 * this loop, send a WAIT_UNTIL_3D_IDLE before
2999			 * calling emit_clip_rect(). This fixes a
3000			 * lockup on fast machines when sending
3001			 * several cliprects with a cmdbuf, as when
3002			 * waving a 2D window over a 3D
3003			 * window. Something in the commands from user
3004			 * space seems to hang the card when they're
3005			 * sent several times in a row. That would be
3006			 * the correct place to fix it but this works
3007			 * around it until I can figure that out - Tim
3008			 * Smith
3009			 */
3010			if (i) {
3011				BEGIN_RING(2);
3012				RADEON_WAIT_UNTIL_3D_IDLE();
3013				ADVANCE_RING();
3014			}
3015			radeon_emit_clip_rect(dev_priv, &box);
3016		}
3017
3018		BEGIN_RING(cmdsz);
3019		OUT_RING_TABLE(cmdbuf->buf, cmdsz);
3020		ADVANCE_RING();
3021
3022	} while (++i < cmdbuf->nbox);
3023	if (cmdbuf->nbox == 1)
3024		cmdbuf->nbox = 0;
3025
3026out:
3027	cmdbuf->buf += cmdsz * 4;
3028	cmdbuf->bufsz -= cmdsz * 4;
3029	return (0);
3030}
3031
3032static int
3033radeon_emit_wait(drm_device_t *dev, int flags)
3034{
3035	drm_radeon_private_t *dev_priv = dev->dev_private;
3036	RING_LOCALS;
3037
3038	DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
3039	switch (flags) {
3040	case RADEON_WAIT_2D:
3041		BEGIN_RING(2);
3042		RADEON_WAIT_UNTIL_2D_IDLE();
3043		ADVANCE_RING();
3044		break;
3045	case RADEON_WAIT_3D:
3046		BEGIN_RING(2);
3047		RADEON_WAIT_UNTIL_3D_IDLE();
3048		ADVANCE_RING();
3049		break;
3050	case RADEON_WAIT_2D | RADEON_WAIT_3D:
3051		BEGIN_RING(2);
3052		RADEON_WAIT_UNTIL_IDLE();
3053		ADVANCE_RING();
3054		break;
3055	default:
3056		return (EINVAL);
3057	}
3058
3059	return (0);
3060}
3061
3062/*ARGSUSED*/
3063static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
3064{
3065	DRM_DEVICE;
3066	drm_radeon_private_t *dev_priv = dev->dev_private;
3067	drm_device_dma_t *dma = dev->dma;
3068	drm_buf_t *buf = NULL;
3069	int idx;
3070	drm_radeon_kcmd_buffer_t cmdbuf;
3071	drm_radeon_cmd_header_t header;
3072	int orig_nbox, orig_bufsz;
3073	char *kbuf = NULL;
3074
3075	LOCK_TEST_WITH_RETURN(dev, fpriv);
3076
3077	if (!dev_priv) {
3078		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3079		return (EINVAL);
3080	}
3081
3082#ifdef _MULTI_DATAMODEL
3083	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
3084		drm_radeon_kcmd_buffer_32_t cmdbuf32;
3085
3086		DRM_COPYFROM_WITH_RETURN(&cmdbuf32, (void *)data,
3087		    sizeof (cmdbuf32));
3088		cmdbuf.bufsz = cmdbuf32.bufsz;
3089		cmdbuf.buf = (void *)(uintptr_t)cmdbuf32.buf;
3090		cmdbuf.nbox = cmdbuf32.nbox;
3091		cmdbuf.boxes = (void *)(uintptr_t)cmdbuf32.boxes;
3092	} else {
3093#endif
3094		DRM_COPYFROM_WITH_RETURN(&cmdbuf, (void *) data,
3095		    sizeof (cmdbuf));
3096#ifdef _MULTI_DATAMODEL
3097	}
3098#endif
3099	RING_SPACE_TEST_WITH_RETURN(dev_priv);
3100	VB_AGE_TEST_WITH_RETURN(dev_priv);
3101
3102	if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
3103		return (EINVAL);
3104	}
3105
3106	/*
3107	 * Allocate an in-kernel area and copy in the cmdbuf. Do this
3108	 * to avoid races between checking values and using those values
3109	 * in other code, and simply to avoid a lot of function calls
3110	 * to copy in data.
3111	 */
3112	orig_bufsz = cmdbuf.bufsz;
3113	if (orig_bufsz != 0) {
3114		kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
3115		if (kbuf == NULL)
3116			return (ENOMEM);
3117		if (DRM_COPY_FROM_USER(kbuf, (void *)cmdbuf.buf,
3118		    cmdbuf.bufsz)) {
3119			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3120			return (EFAULT);
3121		}
3122		cmdbuf.buf = kbuf;
3123	}
3124
3125	orig_nbox = cmdbuf.nbox;
3126
3127	if (dev_priv->microcode_version == UCODE_R300) {
3128		int temp;
3129		temp = r300_do_cp_cmdbuf(dev, fpriv, &cmdbuf);
3130
3131		if (orig_bufsz != 0)
3132			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3133
3134		return (temp);
3135	}
3136
3137	/* microcode_version != r300 */
3138	while (cmdbuf.bufsz >= sizeof (header)) {
3139
3140		header.i = *(int *)(uintptr_t)cmdbuf.buf;
3141		cmdbuf.buf += sizeof (header);
3142		cmdbuf.bufsz -= sizeof (header);
3143
3144		switch (header.header.cmd_type) {
3145		case RADEON_CMD_PACKET:
3146			DRM_DEBUG("RADEON_CMD_PACKET\n");
3147			if (radeon_emit_packets
3148			    (dev_priv, fpriv, header, &cmdbuf)) {
3149				DRM_ERROR("radeon_emit_packets failed\n");
3150				goto err;
3151			}
3152			break;
3153
3154		case RADEON_CMD_SCALARS:
3155			DRM_DEBUG("RADEON_CMD_SCALARS\n");
3156			if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
3157				DRM_ERROR("radeon_emit_scalars failed\n");
3158				goto err;
3159			}
3160			break;
3161
3162		case RADEON_CMD_VECTORS:
3163			DRM_DEBUG("RADEON_CMD_VECTORS\n");
3164			if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
3165				DRM_ERROR("radeon_emit_vectors failed\n");
3166				goto err;
3167			}
3168			break;
3169
3170		case RADEON_CMD_DMA_DISCARD:
3171			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
3172			idx = header.dma.buf_idx;
3173			if (idx < 0 || idx >= dma->buf_count) {
3174				DRM_ERROR("buffer index %d (of %d max)\n",
3175				    idx, dma->buf_count - 1);
3176				goto err;
3177			}
3178
3179			buf = dma->buflist[idx];
3180			if (buf->filp != fpriv || buf->pending) {
3181				DRM_ERROR("bad buffer %p %p %d\n",
3182				    buf->filp, fpriv, buf->pending);
3183				goto err;
3184			}
3185
3186			radeon_cp_discard_buffer(dev, buf);
3187			break;
3188
3189		case RADEON_CMD_PACKET3:
3190			DRM_DEBUG("RADEON_CMD_PACKET3\n");
3191			if (radeon_emit_packet3(dev, fpriv, &cmdbuf)) {
3192				DRM_ERROR("radeon_emit_packet3 failed\n");
3193				goto err;
3194			}
3195			break;
3196
3197		case RADEON_CMD_PACKET3_CLIP:
3198			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
3199			if (radeon_emit_packet3_cliprect
3200			    (dev, fpriv, &cmdbuf, orig_nbox)) {
3201				DRM_ERROR("radeon_emit_packet3_clip failed\n");
3202				goto err;
3203			}
3204			break;
3205
3206		case RADEON_CMD_SCALARS2:
3207			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
3208			if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
3209				DRM_ERROR("radeon_emit_scalars2 failed\n");
3210				goto err;
3211			}
3212			break;
3213
3214		case RADEON_CMD_WAIT:
3215			DRM_DEBUG("RADEON_CMD_WAIT\n");
3216			if (radeon_emit_wait(dev, header.wait.flags)) {
3217				DRM_ERROR("radeon_emit_wait failed\n");
3218				goto err;
3219			}
3220			break;
3221		case RADEON_CMD_VECLINEAR:
3222			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3223			if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
3224				DRM_ERROR("radeon_emit_veclinear failed\n");
3225				goto err;
3226			}
3227			break;
3228
3229		default:
3230			DRM_ERROR("bad cmd_type %d at %p\n",
3231			    header.header.cmd_type,
3232			    cmdbuf.buf - sizeof (header));
3233			goto err;
3234		}
3235	}
3236
3237	if (orig_bufsz != 0)
3238		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3239
3240	COMMIT_RING();
3241	return (0);
3242
3243err:
3244	if (orig_bufsz != 0)
3245		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3246	return (EINVAL);
3247}
3248
3249/*ARGSUSED*/
3250static int radeon_cp_getparam(DRM_IOCTL_ARGS)
3251{
3252	DRM_DEVICE;
3253	drm_radeon_private_t *dev_priv = dev->dev_private;
3254	drm_radeon_getparam_t param;
3255	int value;
3256
3257	if (!dev_priv) {
3258		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3259		return (EINVAL);
3260	}
3261
3262#ifdef _MULTI_DATAMODEL
3263	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
3264		drm_radeon_getparam_32_t param32;
3265
3266		DRM_COPYFROM_WITH_RETURN(&param32,
3267		    (drm_radeon_getparam_32_t *)data, sizeof (param32));
3268		param.param = param32.param;
3269		param.value = (void *)(uintptr_t)param32.value;
3270	} else {
3271#endif
3272		DRM_COPYFROM_WITH_RETURN(&param,
3273		    (drm_radeon_getparam_t *)data, sizeof (param));
3274#ifdef _MULTI_DATAMODEL
3275	}
3276#endif
3277	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3278
3279	switch (param.param) {
3280	case RADEON_PARAM_GART_BUFFER_OFFSET:
3281		value = dev_priv->gart_buffers_offset;
3282		break;
3283	case RADEON_PARAM_LAST_FRAME:
3284		dev_priv->stats.last_frame_reads++;
3285		value = GET_SCRATCH(0);
3286		break;
3287	case RADEON_PARAM_LAST_DISPATCH:
3288		value = GET_SCRATCH(1);
3289		break;
3290	case RADEON_PARAM_LAST_CLEAR:
3291		dev_priv->stats.last_clear_reads++;
3292		value = GET_SCRATCH(2);
3293		break;
3294	case RADEON_PARAM_IRQ_NR:
3295		value = dev->irq;
3296		break;
3297	case RADEON_PARAM_GART_BASE:
3298		value = dev_priv->gart_vm_start;
3299		break;
3300	case RADEON_PARAM_REGISTER_HANDLE:
3301		value = dev_priv->mmio->offset;
3302		break;
3303	case RADEON_PARAM_STATUS_HANDLE:
3304		value = dev_priv->ring_rptr_offset;
3305		break;
3306#ifndef __LP64__
3307		/*
3308		 * This ioctl() doesn't work on 64-bit platforms because
3309		 * hw_lock is a pointer which can't fit into an int-sized
3310		 * variable.  According to Michel Dänzer, the ioctl) is
3311		 * only used on embedded platforms, so not supporting it
3312		 * shouldn't be a problem.  If the same functionality is
3313		 * needed on 64-bit platforms, a new ioctl() would have
3314		 * to be added, so backwards-compatibility for the embedded
3315		 * platforms can be maintained.  --davidm 4-Feb-2004.
3316		 */
3317	case RADEON_PARAM_SAREA_HANDLE:
3318		/* The lock is the first dword in the sarea. */
3319		value = (long)dev->lock.hw_lock;
3320		break;
3321#endif
3322	case RADEON_PARAM_GART_TEX_HANDLE:
3323		value = dev_priv->gart_textures_offset;
3324		break;
3325	case RADEON_PARAM_SCRATCH_OFFSET:
3326		if (!dev_priv->writeback_works)
3327			return (EINVAL);
3328		value = RADEON_SCRATCH_REG_OFFSET;
3329		break;
3330
3331	case RADEON_PARAM_CARD_TYPE:
3332		if (dev_priv->flags & RADEON_IS_PCIE)
3333			value = RADEON_CARD_PCIE;
3334		else if (dev_priv->flags & RADEON_IS_AGP)
3335			value = RADEON_CARD_AGP;
3336		else
3337			value = RADEON_CARD_PCI;
3338		break;
3339	case RADEON_PARAM_VBLANK_CRTC:
3340		value = radeon_vblank_crtc_get(dev);
3341		break;
3342	default:
3343		return (EINVAL);
3344	}
3345
3346	if (DRM_COPY_TO_USER(param.value, &value, sizeof (int))) {
3347		DRM_ERROR("copy_to_user\n");
3348		return (EFAULT);
3349	}
3350	return (0);
3351}
3352
3353/*ARGSUSED*/
3354static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3355{
3356	DRM_DEVICE;
3357	drm_radeon_private_t *dev_priv = dev->dev_private;
3358	drm_radeon_setparam_t sp;
3359	struct drm_radeon_driver_file_fields *radeon_priv;
3360
3361	if (!dev_priv) {
3362		DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3363		return (EINVAL);
3364	}
3365
3366#ifdef _MULTI_DATAMODEL
3367	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
3368		drm_radeon_setparam_32_t sp32;
3369
3370		DRM_COPYFROM_WITH_RETURN(&sp32, (void *) data, sizeof (sp32));
3371		sp.param = sp32.param;
3372		sp.value = sp32.value;
3373	} else {
3374#endif
3375	DRM_COPYFROM_WITH_RETURN(&sp, (void *) data, sizeof (sp));
3376#ifdef _MULTI_DATAMODEL
3377	}
3378#endif
3379	switch (sp.param) {
3380	case RADEON_SETPARAM_FB_LOCATION:
3381		radeon_priv = fpriv->driver_priv;
3382		radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3383		break;
3384	case RADEON_SETPARAM_SWITCH_TILING:
3385		if (sp.value == 0) {
3386			DRM_DEBUG("color tiling disabled\n");
3387			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3388			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3389			dev_priv->sarea_priv->tiling_enabled = 0;
3390		} else if (sp.value == 1) {
3391			DRM_DEBUG("color tiling enabled\n");
3392			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3393			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3394			dev_priv->sarea_priv->tiling_enabled = 1;
3395		}
3396		break;
3397	case RADEON_SETPARAM_PCIGART_LOCATION:
3398		dev_priv->pcigart_offset = (unsigned long)sp.value;
3399		break;
3400	case RADEON_SETPARAM_NEW_MEMMAP:
3401		dev_priv->new_memmap = (int)sp.value;
3402		break;
3403	case RADEON_SETPARAM_VBLANK_CRTC:
3404		return (radeon_vblank_crtc_set(dev, sp.value));
3405	default:
3406		DRM_DEBUG("Invalid parameter %d\n", sp.param);
3407		return (EINVAL);
3408	}
3409
3410	return (0);
3411}
3412
3413/*
3414 * When a client dies:
3415 *    - Check for and clean up flipped page state
3416 *    - Free any alloced GART memory.
3417 *    - Free any alloced radeon surfaces.
3418 *
3419 * DRM infrastructure takes care of reclaiming dma buffers.
3420 */
3421void
3422radeon_driver_preclose(drm_device_t *dev, drm_file_t *filp)
3423{
3424	if (dev->dev_private) {
3425		drm_radeon_private_t *dev_priv = dev->dev_private;
3426		if (dev_priv->page_flipping) {
3427			(void) radeon_do_cleanup_pageflip(dev);
3428		}
3429		radeon_mem_release(filp, dev_priv->gart_heap);
3430		radeon_mem_release(filp, dev_priv->fb_heap);
3431		radeon_surfaces_release(filp, dev_priv);
3432	}
3433}
3434
3435void
3436radeon_driver_lastclose(drm_device_t *dev)
3437{
3438	radeon_do_release(dev);
3439}
3440
3441int
3442radeon_driver_open(drm_device_t *dev, drm_file_t *filp_priv)
3443{
3444	drm_radeon_private_t *dev_priv = dev->dev_private;
3445	struct drm_radeon_driver_file_fields *radeon_priv;
3446
3447	radeon_priv =
3448	    (struct drm_radeon_driver_file_fields *)
3449	    drm_alloc(sizeof (*radeon_priv), DRM_MEM_FILES);
3450
3451	if (!radeon_priv)
3452		return (-ENOMEM);
3453
3454	filp_priv->driver_priv = radeon_priv;
3455
3456	if (dev_priv)
3457		radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3458	else
3459		radeon_priv->radeon_fb_delta = 0;
3460	return (0);
3461}
3462
3463/*ARGSUSED*/
3464void
3465radeon_driver_postclose(drm_device_t *dev, drm_file_t *filp_priv)
3466{
3467	struct drm_radeon_driver_file_fields *radeon_priv =
3468	    filp_priv->driver_priv;
3469
3470	drm_free(radeon_priv, sizeof (* radeon_priv), DRM_MEM_FILES);
3471}
3472
3473drm_ioctl_desc_t radeon_ioctls[] = {
3474	[DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] =
3475	    {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3476	[DRM_IOCTL_NR(DRM_RADEON_CP_START)] =
3477	    {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3478	[DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] =
3479	    {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3480	[DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] =
3481	    {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3482	[DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] =
3483	    {radeon_cp_idle, DRM_AUTH},
3484	[DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] =
3485	    {radeon_cp_resume, DRM_AUTH},
3486	[DRM_IOCTL_NR(DRM_RADEON_RESET)] =
3487	    {radeon_engine_reset, DRM_AUTH},
3488	[DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] =
3489	    {radeon_fullscreen, DRM_AUTH},
3490	[DRM_IOCTL_NR(DRM_RADEON_SWAP)] =
3491	    {radeon_cp_swap, DRM_AUTH},
3492	[DRM_IOCTL_NR(DRM_RADEON_CLEAR)] =
3493	    {radeon_cp_clear, DRM_AUTH},
3494	[DRM_IOCTL_NR(DRM_RADEON_VERTEX)] =
3495	    {radeon_cp_vertex, DRM_AUTH},
3496	[DRM_IOCTL_NR(DRM_RADEON_INDICES)] =
3497	    {radeon_cp_indices, DRM_AUTH},
3498	[DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] =
3499	    {radeon_cp_texture, DRM_AUTH},
3500	[DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] =
3501	    {radeon_cp_stipple, DRM_AUTH},
3502	[DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] =
3503	    {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3504	[DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] =
3505	    {radeon_cp_vertex2, DRM_AUTH},
3506	[DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] =
3507	    {radeon_cp_cmdbuf, DRM_AUTH},
3508	[DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] =
3509	    {radeon_cp_getparam, DRM_AUTH},
3510	[DRM_IOCTL_NR(DRM_RADEON_FLIP)] =
3511	    {radeon_cp_flip, DRM_AUTH},
3512	[DRM_IOCTL_NR(DRM_RADEON_ALLOC)] =
3513	    {radeon_mem_alloc, DRM_AUTH},
3514	[DRM_IOCTL_NR(DRM_RADEON_FREE)] =
3515	    {radeon_mem_free, DRM_AUTH},
3516	[DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] =
3517	    {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3518	[DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] =
3519	    {radeon_irq_emit, DRM_AUTH},
3520	[DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] =
3521	    {radeon_irq_wait, DRM_AUTH},
3522	[DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] =
3523	    {radeon_cp_setparam, DRM_AUTH},
3524	[DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] =
3525	    {radeon_surface_alloc, DRM_AUTH},
3526	[DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] =
3527	    {radeon_surface_free, DRM_AUTH}
3528};
3529
3530int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
3531