1/* savage_state.c -- State and drawing support for Savage
2 *
3 * Copyright 2004  Felix Kuehling
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sub license,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD$");
28#include "dev/drm/drmP.h"
29#include "dev/drm/savage_drm.h"
30#include "dev/drm/savage_drv.h"
31
32void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv,
33			       const struct drm_clip_rect *pbox)
34{
35	uint32_t scstart = dev_priv->state.s3d.new_scstart;
36	uint32_t scend = dev_priv->state.s3d.new_scend;
37	scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
38		((uint32_t)pbox->x1 & 0x000007ff) |
39		(((uint32_t)pbox->y1 << 16) & 0x07ff0000);
40	scend   = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
41		(((uint32_t)pbox->x2 - 1) & 0x000007ff) |
42		((((uint32_t)pbox->y2 - 1) << 16) & 0x07ff0000);
43	if (scstart != dev_priv->state.s3d.scstart ||
44	    scend   != dev_priv->state.s3d.scend) {
45		DMA_LOCALS;
46		BEGIN_DMA(4);
47		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
48		DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
49		DMA_WRITE(scstart);
50		DMA_WRITE(scend);
51		dev_priv->state.s3d.scstart = scstart;
52		dev_priv->state.s3d.scend = scend;
53		dev_priv->waiting = 1;
54		DMA_COMMIT();
55	}
56}
57
58void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
59			      const struct drm_clip_rect *pbox)
60{
61	uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
62	uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
63	drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
64		((uint32_t)pbox->x1 & 0x000007ff) |
65		(((uint32_t)pbox->y1 << 12) & 0x00fff000);
66	drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
67		(((uint32_t)pbox->x2 - 1) & 0x000007ff) |
68		((((uint32_t)pbox->y2 - 1) << 12) & 0x00fff000);
69	if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
70	    drawctrl1 != dev_priv->state.s4.drawctrl1) {
71		DMA_LOCALS;
72		BEGIN_DMA(4);
73		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
74		DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
75		DMA_WRITE(drawctrl0);
76		DMA_WRITE(drawctrl1);
77		dev_priv->state.s4.drawctrl0 = drawctrl0;
78		dev_priv->state.s4.drawctrl1 = drawctrl1;
79		dev_priv->waiting = 1;
80		DMA_COMMIT();
81	}
82}
83
84static int savage_verify_texaddr(drm_savage_private_t *dev_priv, int unit,
85				 uint32_t addr)
86{
87	if ((addr & 6) != 2) { /* reserved bits */
88		DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
89		return -EINVAL;
90	}
91	if (!(addr & 1)) { /* local */
92		addr &= ~7;
93		if (addr < dev_priv->texture_offset ||
94		    addr >= dev_priv->texture_offset + dev_priv->texture_size) {
95			DRM_ERROR
96			    ("bad texAddr%d %08x (local addr out of range)\n",
97			     unit, addr);
98			return -EINVAL;
99		}
100	} else { /* AGP */
101		if (!dev_priv->agp_textures) {
102			DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
103				  unit, addr);
104			return -EINVAL;
105		}
106		addr &= ~7;
107		if (addr < dev_priv->agp_textures->offset ||
108		    addr >= (dev_priv->agp_textures->offset +
109			     dev_priv->agp_textures->size)) {
110			DRM_ERROR
111			    ("bad texAddr%d %08x (AGP addr out of range)\n",
112			     unit, addr);
113			return -EINVAL;
114		}
115	}
116	return 0;
117}
118
119#define SAVE_STATE(reg,where)			\
120	if(start <= reg && start + count > reg)	\
121		dev_priv->state.where = regs[reg - start]
122#define SAVE_STATE_MASK(reg,where,mask) do {			\
123	if(start <= reg && start + count > reg) {			\
124		uint32_t tmp;					\
125		tmp = regs[reg - start];			\
126		dev_priv->state.where = (tmp & (mask)) |	\
127			(dev_priv->state.where & ~(mask));	\
128	}							\
129} while (0)
130static int savage_verify_state_s3d(drm_savage_private_t *dev_priv,
131				   unsigned int start, unsigned int count,
132				   const uint32_t *regs)
133{
134	if (start < SAVAGE_TEXPALADDR_S3D ||
135	    start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
136		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
137			  start, start + count - 1);
138		return -EINVAL;
139	}
140
141	SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
142			~SAVAGE_SCISSOR_MASK_S3D);
143	SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
144			~SAVAGE_SCISSOR_MASK_S3D);
145
146	/* if any texture regs were changed ... */
147	if (start <= SAVAGE_TEXCTRL_S3D &&
148	    start + count > SAVAGE_TEXPALADDR_S3D) {
149		/* ... check texture state */
150		SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
151		SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
152		if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
153			return savage_verify_texaddr(dev_priv, 0,
154						dev_priv->state.s3d.texaddr);
155	}
156
157	return 0;
158}
159
160static int savage_verify_state_s4(drm_savage_private_t *dev_priv,
161				  unsigned int start, unsigned int count,
162				  const uint32_t *regs)
163{
164	int ret = 0;
165
166	if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
167	    start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
168		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
169			  start, start + count - 1);
170		return -EINVAL;
171	}
172
173	SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
174			~SAVAGE_SCISSOR_MASK_S4);
175	SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
176			~SAVAGE_SCISSOR_MASK_S4);
177
178	/* if any texture regs were changed ... */
179	if (start <= SAVAGE_TEXDESCR_S4 &&
180	    start + count > SAVAGE_TEXPALADDR_S4) {
181		/* ... check texture state */
182		SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
183		SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
184		SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
185		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
186			ret |= savage_verify_texaddr(dev_priv, 0,
187						dev_priv->state.s4.texaddr0);
188		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
189			ret |= savage_verify_texaddr(dev_priv, 1,
190						dev_priv->state.s4.texaddr1);
191	}
192
193	return ret;
194}
195#undef SAVE_STATE
196#undef SAVE_STATE_MASK
197
198static int savage_dispatch_state(drm_savage_private_t *dev_priv,
199				 const drm_savage_cmd_header_t *cmd_header,
200				 const uint32_t *regs)
201{
202	unsigned int count = cmd_header->state.count;
203	unsigned int start = cmd_header->state.start;
204	unsigned int count2 = 0;
205	unsigned int bci_size;
206	int ret;
207	DMA_LOCALS;
208
209	if (!count)
210		return 0;
211
212	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
213		ret = savage_verify_state_s3d(dev_priv, start, count, regs);
214		if (ret != 0)
215			return ret;
216		/* scissor regs are emitted in savage_dispatch_draw */
217		if (start < SAVAGE_SCSTART_S3D) {
218			if (start + count > SAVAGE_SCEND_S3D + 1)
219				count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
220			if (start + count > SAVAGE_SCSTART_S3D)
221				count = SAVAGE_SCSTART_S3D - start;
222		} else if (start <= SAVAGE_SCEND_S3D) {
223			if (start + count > SAVAGE_SCEND_S3D + 1) {
224				count -= SAVAGE_SCEND_S3D + 1 - start;
225				start = SAVAGE_SCEND_S3D + 1;
226			} else
227				return 0;
228		}
229	} else {
230		ret = savage_verify_state_s4(dev_priv, start, count, regs);
231		if (ret != 0)
232			return ret;
233		/* scissor regs are emitted in savage_dispatch_draw */
234		if (start < SAVAGE_DRAWCTRL0_S4) {
235			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
236				count2 = count -
237					 (SAVAGE_DRAWCTRL1_S4 + 1 - start);
238			if (start + count > SAVAGE_DRAWCTRL0_S4)
239				count = SAVAGE_DRAWCTRL0_S4 - start;
240		} else if (start <= SAVAGE_DRAWCTRL1_S4) {
241			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
242				count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
243				start = SAVAGE_DRAWCTRL1_S4 + 1;
244			} else
245				return 0;
246		}
247	}
248
249	bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
250
251	if (cmd_header->state.global) {
252		BEGIN_DMA(bci_size + 1);
253		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
254		dev_priv->waiting = 1;
255	} else {
256		BEGIN_DMA(bci_size);
257	}
258
259	do {
260		while (count > 0) {
261			unsigned int n = count < 255 ? count : 255;
262			DMA_SET_REGISTERS(start, n);
263			DMA_COPY(regs, n);
264			count -= n;
265			start += n;
266			regs += n;
267		}
268		start += 2;
269		regs += 2;
270		count = count2;
271		count2 = 0;
272	} while (count);
273
274	DMA_COMMIT();
275
276	return 0;
277}
278
279static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv,
280				    const drm_savage_cmd_header_t *cmd_header,
281				    const struct drm_buf *dmabuf)
282{
283	unsigned char reorder = 0;
284	unsigned int prim = cmd_header->prim.prim;
285	unsigned int skip = cmd_header->prim.skip;
286	unsigned int n = cmd_header->prim.count;
287	unsigned int start = cmd_header->prim.start;
288	unsigned int i;
289	BCI_LOCALS;
290
291	if (!dmabuf) {
292		DRM_ERROR("called without dma buffers!\n");
293		return -EINVAL;
294	}
295
296	if (!n)
297		return 0;
298
299	switch (prim) {
300	case SAVAGE_PRIM_TRILIST_201:
301		reorder = 1;
302		prim = SAVAGE_PRIM_TRILIST;
303	case SAVAGE_PRIM_TRILIST:
304		if (n % 3 != 0) {
305			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
306				  n);
307			return -EINVAL;
308		}
309		break;
310	case SAVAGE_PRIM_TRISTRIP:
311	case SAVAGE_PRIM_TRIFAN:
312		if (n < 3) {
313			DRM_ERROR
314			   ("wrong number of vertices %u in TRIFAN/STRIP\n",
315			    n);
316			return -EINVAL;
317		}
318		break;
319	default:
320		DRM_ERROR("invalid primitive type %u\n", prim);
321		return -EINVAL;
322	}
323
324	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
325		if (skip != 0) {
326			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
327			return -EINVAL;
328		}
329	} else {
330		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
331			(skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
332			(skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
333		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
334			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
335			return -EINVAL;
336		}
337		if (reorder) {
338			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
339			return -EINVAL;
340		}
341	}
342
343	if (start + n > dmabuf->total / 32) {
344		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
345			  start, start + n - 1, dmabuf->total / 32);
346		return -EINVAL;
347	}
348
349	/* Vertex DMA doesn't work with command DMA at the same time,
350	 * so we use BCI_... to submit commands here. Flush buffered
351	 * faked DMA first. */
352	DMA_FLUSH();
353
354	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
355		BEGIN_BCI(2);
356		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
357		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
358		dev_priv->state.common.vbaddr = dmabuf->bus_address;
359	}
360	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
361		/* Workaround for what looks like a hardware bug. If a
362		 * WAIT_3D_IDLE was emitted some time before the
363		 * indexed drawing command then the engine will lock
364		 * up. There are two known workarounds:
365		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
366		BEGIN_BCI(63);
367		for (i = 0; i < 63; ++i)
368			BCI_WRITE(BCI_CMD_WAIT);
369		dev_priv->waiting = 0;
370	}
371
372	prim <<= 25;
373	while (n != 0) {
374		/* Can emit up to 255 indices (85 triangles) at once. */
375		unsigned int count = n > 255 ? 255 : n;
376		if (reorder) {
377			/* Need to reorder indices for correct flat
378			 * shading while preserving the clock sense
379			 * for correct culling. Only on Savage3D. */
380			int reorder[3] = { -1, -1, -1 };
381			reorder[start % 3] = 2;
382
383			BEGIN_BCI((count + 1 + 1) / 2);
384			BCI_DRAW_INDICES_S3D(count, prim, start + 2);
385
386			for (i = start + 1; i + 1 < start + count; i += 2)
387				BCI_WRITE((i + reorder[i % 3]) |
388					  ((i + 1 +
389					    reorder[(i + 1) % 3]) << 16));
390			if (i < start + count)
391				BCI_WRITE(i + reorder[i % 3]);
392		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
393			BEGIN_BCI((count + 1 + 1) / 2);
394			BCI_DRAW_INDICES_S3D(count, prim, start);
395
396			for (i = start + 1; i + 1 < start + count; i += 2)
397				BCI_WRITE(i | ((i + 1) << 16));
398			if (i < start + count)
399				BCI_WRITE(i);
400		} else {
401			BEGIN_BCI((count + 2 + 1) / 2);
402			BCI_DRAW_INDICES_S4(count, prim, skip);
403
404			for (i = start; i + 1 < start + count; i += 2)
405				BCI_WRITE(i | ((i + 1) << 16));
406			if (i < start + count)
407				BCI_WRITE(i);
408		}
409
410		start += count;
411		n -= count;
412
413		prim |= BCI_CMD_DRAW_CONT;
414	}
415
416	return 0;
417}
418
419static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv,
420				   const drm_savage_cmd_header_t *cmd_header,
421				   const uint32_t *vtxbuf, unsigned int vb_size,
422				   unsigned int vb_stride)
423{
424	unsigned char reorder = 0;
425	unsigned int prim = cmd_header->prim.prim;
426	unsigned int skip = cmd_header->prim.skip;
427	unsigned int n = cmd_header->prim.count;
428	unsigned int start = cmd_header->prim.start;
429	unsigned int vtx_size;
430	unsigned int i;
431	DMA_LOCALS;
432
433	if (!n)
434		return 0;
435
436	switch (prim) {
437	case SAVAGE_PRIM_TRILIST_201:
438		reorder = 1;
439		prim = SAVAGE_PRIM_TRILIST;
440	case SAVAGE_PRIM_TRILIST:
441		if (n % 3 != 0) {
442			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
443				  n);
444			return -EINVAL;
445		}
446		break;
447	case SAVAGE_PRIM_TRISTRIP:
448	case SAVAGE_PRIM_TRIFAN:
449		if (n < 3) {
450			DRM_ERROR
451			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
452			     n);
453			return -EINVAL;
454		}
455		break;
456	default:
457		DRM_ERROR("invalid primitive type %u\n", prim);
458		return -EINVAL;
459	}
460
461	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
462		if (skip > SAVAGE_SKIP_ALL_S3D) {
463			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
464			return -EINVAL;
465		}
466		vtx_size = 8; /* full vertex */
467	} else {
468		if (skip > SAVAGE_SKIP_ALL_S4) {
469			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
470			return -EINVAL;
471		}
472		vtx_size = 10; /* full vertex */
473	}
474
475	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
476		(skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
477		(skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
478
479	if (vtx_size > vb_stride) {
480		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
481			  vtx_size, vb_stride);
482		return -EINVAL;
483	}
484
485	if (start + n > vb_size / (vb_stride * 4)) {
486		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
487			  start, start + n - 1, vb_size / (vb_stride * 4));
488		return -EINVAL;
489	}
490
491	prim <<= 25;
492	while (n != 0) {
493		/* Can emit up to 255 vertices (85 triangles) at once. */
494		unsigned int count = n > 255 ? 255 : n;
495		if (reorder) {
496			/* Need to reorder vertices for correct flat
497			 * shading while preserving the clock sense
498			 * for correct culling. Only on Savage3D. */
499			int reorder[3] = { -1, -1, -1 };
500			reorder[start % 3] = 2;
501
502			BEGIN_DMA(count * vtx_size + 1);
503			DMA_DRAW_PRIMITIVE(count, prim, skip);
504
505			for (i = start; i < start + count; ++i) {
506				unsigned int j = i + reorder[i % 3];
507				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
508			}
509
510			DMA_COMMIT();
511		} else {
512			BEGIN_DMA(count * vtx_size + 1);
513			DMA_DRAW_PRIMITIVE(count, prim, skip);
514
515			if (vb_stride == vtx_size) {
516				DMA_COPY(&vtxbuf[vb_stride * start],
517					 vtx_size * count);
518			} else {
519				for (i = start; i < start + count; ++i) {
520					DMA_COPY(&vtxbuf[vb_stride * i],
521						 vtx_size);
522				}
523			}
524
525			DMA_COMMIT();
526		}
527
528		start += count;
529		n -= count;
530
531		prim |= BCI_CMD_DRAW_CONT;
532	}
533
534	return 0;
535}
536
537static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv,
538				   const drm_savage_cmd_header_t *cmd_header,
539				   const uint16_t *idx,
540				   const struct drm_buf *dmabuf)
541{
542	unsigned char reorder = 0;
543	unsigned int prim = cmd_header->idx.prim;
544	unsigned int skip = cmd_header->idx.skip;
545	unsigned int n = cmd_header->idx.count;
546	unsigned int i;
547	BCI_LOCALS;
548
549	if (!dmabuf) {
550		DRM_ERROR("called without dma buffers!\n");
551		return -EINVAL;
552	}
553
554	if (!n)
555		return 0;
556
557	switch (prim) {
558	case SAVAGE_PRIM_TRILIST_201:
559		reorder = 1;
560		prim = SAVAGE_PRIM_TRILIST;
561	case SAVAGE_PRIM_TRILIST:
562		if (n % 3 != 0) {
563			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
564			return -EINVAL;
565		}
566		break;
567	case SAVAGE_PRIM_TRISTRIP:
568	case SAVAGE_PRIM_TRIFAN:
569		if (n < 3) {
570			DRM_ERROR
571			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
572			return -EINVAL;
573		}
574		break;
575	default:
576		DRM_ERROR("invalid primitive type %u\n", prim);
577		return -EINVAL;
578	}
579
580	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
581		if (skip != 0) {
582			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
583			return -EINVAL;
584		}
585	} else {
586		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
587			(skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
588			(skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
589		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
590			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
591			return -EINVAL;
592		}
593		if (reorder) {
594			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
595			return -EINVAL;
596		}
597	}
598
599	/* Vertex DMA doesn't work with command DMA at the same time,
600	 * so we use BCI_... to submit commands here. Flush buffered
601	 * faked DMA first. */
602	DMA_FLUSH();
603
604	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
605		BEGIN_BCI(2);
606		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
607		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
608		dev_priv->state.common.vbaddr = dmabuf->bus_address;
609	}
610	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
611		/* Workaround for what looks like a hardware bug. If a
612		 * WAIT_3D_IDLE was emitted some time before the
613		 * indexed drawing command then the engine will lock
614		 * up. There are two known workarounds:
615		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
616		BEGIN_BCI(63);
617		for (i = 0; i < 63; ++i)
618			BCI_WRITE(BCI_CMD_WAIT);
619		dev_priv->waiting = 0;
620	}
621
622	prim <<= 25;
623	while (n != 0) {
624		/* Can emit up to 255 indices (85 triangles) at once. */
625		unsigned int count = n > 255 ? 255 : n;
626
627		/* check indices */
628		for (i = 0; i < count; ++i) {
629			if (idx[i] > dmabuf->total / 32) {
630				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
631					  i, idx[i], dmabuf->total / 32);
632				return -EINVAL;
633			}
634		}
635
636		if (reorder) {
637			/* Need to reorder indices for correct flat
638			 * shading while preserving the clock sense
639			 * for correct culling. Only on Savage3D. */
640			int reorder[3] = { 2, -1, -1 };
641
642			BEGIN_BCI((count + 1 + 1) / 2);
643			BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
644
645			for (i = 1; i + 1 < count; i += 2)
646				BCI_WRITE(idx[i + reorder[i % 3]] |
647					  (idx[i + 1 +
648					   reorder[(i + 1) % 3]] << 16));
649			if (i < count)
650				BCI_WRITE(idx[i + reorder[i % 3]]);
651		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
652			BEGIN_BCI((count + 1 + 1) / 2);
653			BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
654
655			for (i = 1; i + 1 < count; i += 2)
656				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
657			if (i < count)
658				BCI_WRITE(idx[i]);
659		} else {
660			BEGIN_BCI((count + 2 + 1) / 2);
661			BCI_DRAW_INDICES_S4(count, prim, skip);
662
663			for (i = 0; i + 1 < count; i += 2)
664				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
665			if (i < count)
666				BCI_WRITE(idx[i]);
667		}
668
669		idx += count;
670		n -= count;
671
672		prim |= BCI_CMD_DRAW_CONT;
673	}
674
675	return 0;
676}
677
678static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv,
679				  const drm_savage_cmd_header_t *cmd_header,
680				  const uint16_t *idx,
681				  const uint32_t *vtxbuf,
682				  unsigned int vb_size, unsigned int vb_stride)
683{
684	unsigned char reorder = 0;
685	unsigned int prim = cmd_header->idx.prim;
686	unsigned int skip = cmd_header->idx.skip;
687	unsigned int n = cmd_header->idx.count;
688	unsigned int vtx_size;
689	unsigned int i;
690	DMA_LOCALS;
691
692	if (!n)
693		return 0;
694
695	switch (prim) {
696	case SAVAGE_PRIM_TRILIST_201:
697		reorder = 1;
698		prim = SAVAGE_PRIM_TRILIST;
699	case SAVAGE_PRIM_TRILIST:
700		if (n % 3 != 0) {
701			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
702			return -EINVAL;
703		}
704		break;
705	case SAVAGE_PRIM_TRISTRIP:
706	case SAVAGE_PRIM_TRIFAN:
707		if (n < 3) {
708			DRM_ERROR
709			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
710			return -EINVAL;
711		}
712		break;
713	default:
714		DRM_ERROR("invalid primitive type %u\n", prim);
715		return -EINVAL;
716	}
717
718	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
719		if (skip > SAVAGE_SKIP_ALL_S3D) {
720			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
721			return -EINVAL;
722		}
723		vtx_size = 8; /* full vertex */
724	} else {
725		if (skip > SAVAGE_SKIP_ALL_S4) {
726			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
727			return -EINVAL;
728		}
729		vtx_size = 10; /* full vertex */
730	}
731
732	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
733		(skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
734		(skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
735
736	if (vtx_size > vb_stride) {
737		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
738			  vtx_size, vb_stride);
739		return -EINVAL;
740	}
741
742	prim <<= 25;
743	while (n != 0) {
744		/* Can emit up to 255 vertices (85 triangles) at once. */
745		unsigned int count = n > 255 ? 255 : n;
746
747		/* Check indices */
748		for (i = 0; i < count; ++i) {
749			if (idx[i] > vb_size / (vb_stride * 4)) {
750				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
751					  i, idx[i],  vb_size / (vb_stride * 4));
752				return -EINVAL;
753			}
754		}
755
756		if (reorder) {
757			/* Need to reorder vertices for correct flat
758			 * shading while preserving the clock sense
759			 * for correct culling. Only on Savage3D. */
760			int reorder[3] = { 2, -1, -1 };
761
762			BEGIN_DMA(count * vtx_size + 1);
763			DMA_DRAW_PRIMITIVE(count, prim, skip);
764
765			for (i = 0; i < count; ++i) {
766				unsigned int j = idx[i + reorder[i % 3]];
767				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
768			}
769
770			DMA_COMMIT();
771		} else {
772			BEGIN_DMA(count * vtx_size + 1);
773			DMA_DRAW_PRIMITIVE(count, prim, skip);
774
775			for (i = 0; i < count; ++i) {
776				unsigned int j = idx[i];
777				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
778			}
779
780			DMA_COMMIT();
781		}
782
783		idx += count;
784		n -= count;
785
786		prim |= BCI_CMD_DRAW_CONT;
787	}
788
789	return 0;
790}
791
792static int savage_dispatch_clear(drm_savage_private_t *dev_priv,
793				 const drm_savage_cmd_header_t *cmd_header,
794				 const drm_savage_cmd_header_t *data,
795				 unsigned int nbox,
796				 const struct drm_clip_rect *boxes)
797{
798	unsigned int flags = cmd_header->clear0.flags;
799	unsigned int clear_cmd;
800	unsigned int i, nbufs;
801	DMA_LOCALS;
802
803	if (nbox == 0)
804		return 0;
805
806	clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
807		BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
808	BCI_CMD_SET_ROP(clear_cmd,0xCC);
809
810	nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
811	    ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
812	if (nbufs == 0)
813		return 0;
814
815	if (data->clear1.mask != 0xffffffff) {
816		/* set mask */
817		BEGIN_DMA(2);
818		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
819		DMA_WRITE(data->clear1.mask);
820		DMA_COMMIT();
821	}
822	for (i = 0; i < nbox; ++i) {
823		unsigned int x, y, w, h;
824		unsigned int buf;
825
826		x = boxes[i].x1, y = boxes[i].y1;
827		w = boxes[i].x2 - boxes[i].x1;
828		h = boxes[i].y2 - boxes[i].y1;
829		BEGIN_DMA(nbufs * 6);
830		for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
831			if (!(flags & buf))
832				continue;
833			DMA_WRITE(clear_cmd);
834			switch (buf) {
835			case SAVAGE_FRONT:
836				DMA_WRITE(dev_priv->front_offset);
837				DMA_WRITE(dev_priv->front_bd);
838				break;
839			case SAVAGE_BACK:
840				DMA_WRITE(dev_priv->back_offset);
841				DMA_WRITE(dev_priv->back_bd);
842				break;
843			case SAVAGE_DEPTH:
844				DMA_WRITE(dev_priv->depth_offset);
845				DMA_WRITE(dev_priv->depth_bd);
846				break;
847			}
848			DMA_WRITE(data->clear1.value);
849			DMA_WRITE(BCI_X_Y(x, y));
850			DMA_WRITE(BCI_W_H(w, h));
851		}
852		DMA_COMMIT();
853	}
854	if (data->clear1.mask != 0xffffffff) {
855		/* reset mask */
856		BEGIN_DMA(2);
857		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
858		DMA_WRITE(0xffffffff);
859		DMA_COMMIT();
860	}
861
862	return 0;
863}
864
865static int savage_dispatch_swap(drm_savage_private_t *dev_priv,
866				unsigned int nbox, const struct drm_clip_rect *boxes)
867{
868	unsigned int swap_cmd;
869	unsigned int i;
870	DMA_LOCALS;
871
872	if (nbox == 0)
873		return 0;
874
875	swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
876		BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
877	BCI_CMD_SET_ROP(swap_cmd,0xCC);
878
879	for (i = 0; i < nbox; ++i) {
880		BEGIN_DMA(6);
881		DMA_WRITE(swap_cmd);
882		DMA_WRITE(dev_priv->back_offset);
883		DMA_WRITE(dev_priv->back_bd);
884		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
885		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
886		DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
887				  boxes[i].y2 - boxes[i].y1));
888		DMA_COMMIT();
889	}
890
891	return 0;
892}
893
894static int savage_dispatch_draw(drm_savage_private_t *dev_priv,
895				const drm_savage_cmd_header_t *start,
896				const drm_savage_cmd_header_t *end,
897				const struct drm_buf *dmabuf,
898				const unsigned int *vtxbuf,
899				unsigned int vb_size, unsigned int vb_stride,
900				unsigned int nbox,
901				const struct drm_clip_rect *boxes)
902{
903	unsigned int i, j;
904	int ret;
905
906	for (i = 0; i < nbox; ++i) {
907		const drm_savage_cmd_header_t *cmdbuf;
908		dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
909
910		cmdbuf = start;
911		while (cmdbuf < end) {
912			drm_savage_cmd_header_t cmd_header;
913			cmd_header = *cmdbuf;
914			cmdbuf++;
915			switch (cmd_header.cmd.cmd) {
916			case SAVAGE_CMD_DMA_PRIM:
917				ret = savage_dispatch_dma_prim(
918					dev_priv, &cmd_header, dmabuf);
919				break;
920			case SAVAGE_CMD_VB_PRIM:
921				ret = savage_dispatch_vb_prim(
922					dev_priv, &cmd_header,
923					vtxbuf, vb_size, vb_stride);
924				break;
925			case SAVAGE_CMD_DMA_IDX:
926				j = (cmd_header.idx.count + 3) / 4;
927				/* j was check in savage_bci_cmdbuf */
928				ret = savage_dispatch_dma_idx(dev_priv,
929					&cmd_header, (const uint16_t *)cmdbuf,
930					dmabuf);
931				cmdbuf += j;
932				break;
933			case SAVAGE_CMD_VB_IDX:
934				j = (cmd_header.idx.count + 3) / 4;
935				/* j was check in savage_bci_cmdbuf */
936				ret = savage_dispatch_vb_idx(dev_priv,
937					&cmd_header, (const uint16_t *)cmdbuf,
938					(const uint32_t *)vtxbuf, vb_size,
939					vb_stride);
940				cmdbuf += j;
941				break;
942			default:
943				/* What's the best return code? EFAULT? */
944				DRM_ERROR("IMPLEMENTATION ERROR: "
945					  "non-drawing-command %d\n",
946					  cmd_header.cmd.cmd);
947				return -EINVAL;
948			}
949
950			if (ret != 0)
951				return ret;
952		}
953	}
954
955	return 0;
956}
957
958int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
959{
960	drm_savage_private_t *dev_priv = dev->dev_private;
961	struct drm_device_dma *dma = dev->dma;
962	struct drm_buf *dmabuf;
963	drm_savage_cmdbuf_t *cmdbuf = data;
964	drm_savage_cmd_header_t *kcmd_addr = NULL;
965	drm_savage_cmd_header_t *first_draw_cmd;
966	unsigned int *kvb_addr = NULL;
967	struct drm_clip_rect *kbox_addr = NULL;
968	unsigned int i, j;
969	int ret = 0;
970
971	DRM_DEBUG("\n");
972
973	LOCK_TEST_WITH_RETURN(dev, file_priv);
974
975	if (dma && dma->buflist) {
976		if (cmdbuf->dma_idx > dma->buf_count) {
977			DRM_ERROR
978			    ("vertex buffer index %u out of range (0-%u)\n",
979			     cmdbuf->dma_idx, dma->buf_count - 1);
980			return -EINVAL;
981		}
982		dmabuf = dma->buflist[cmdbuf->dma_idx];
983	} else {
984		dmabuf = NULL;
985	}
986
987	/* Copy the user buffers into kernel temporary areas.  This hasn't been
988	 * a performance loss compared to VERIFYAREA_READ/
989	 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
990	 * for locking on FreeBSD.
991	 */
992	if (cmdbuf->size) {
993		kcmd_addr = drm_alloc(cmdbuf->size * 8, DRM_MEM_DRIVER);
994		if (kcmd_addr == NULL)
995			return -ENOMEM;
996
997		if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr,
998				       cmdbuf->size * 8))
999		{
1000			drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
1001			return -EFAULT;
1002		}
1003		cmdbuf->cmd_addr = kcmd_addr;
1004	}
1005	if (cmdbuf->vb_size) {
1006		kvb_addr = drm_alloc(cmdbuf->vb_size, DRM_MEM_DRIVER);
1007		if (kvb_addr == NULL) {
1008			ret = -ENOMEM;
1009			goto done;
1010		}
1011
1012		if (DRM_COPY_FROM_USER(kvb_addr, cmdbuf->vb_addr,
1013				       cmdbuf->vb_size)) {
1014			ret = -EFAULT;
1015			goto done;
1016		}
1017		cmdbuf->vb_addr = kvb_addr;
1018	}
1019	if (cmdbuf->nbox) {
1020		kbox_addr = drm_alloc(cmdbuf->nbox *
1021				      sizeof(struct drm_clip_rect),
1022				      DRM_MEM_DRIVER);
1023		if (kbox_addr == NULL) {
1024			ret = -ENOMEM;
1025			goto done;
1026		}
1027
1028		if (DRM_COPY_FROM_USER(kbox_addr, cmdbuf->box_addr,
1029				       cmdbuf->nbox *
1030				       sizeof(struct drm_clip_rect))) {
1031			ret = -EFAULT;
1032			goto done;
1033		}
1034		cmdbuf->box_addr = kbox_addr;
1035	}
1036
1037	/* Make sure writes to DMA buffers are finished before sending
1038	 * DMA commands to the graphics hardware. */
1039	DRM_MEMORYBARRIER();
1040
1041	/* Coming from user space. Don't know if the Xserver has
1042	 * emitted wait commands. Assuming the worst. */
1043	dev_priv->waiting = 1;
1044
1045	i = 0;
1046	first_draw_cmd = NULL;
1047	while (i < cmdbuf->size) {
1048		drm_savage_cmd_header_t cmd_header;
1049		cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
1050		cmdbuf->cmd_addr++;
1051		i++;
1052
1053		/* Group drawing commands with same state to minimize
1054		 * iterations over clip rects. */
1055		j = 0;
1056		switch (cmd_header.cmd.cmd) {
1057		case SAVAGE_CMD_DMA_IDX:
1058		case SAVAGE_CMD_VB_IDX:
1059			j = (cmd_header.idx.count + 3) / 4;
1060			if (i + j > cmdbuf->size) {
1061				DRM_ERROR("indexed drawing command extends "
1062					  "beyond end of command buffer\n");
1063				DMA_FLUSH();
1064				return -EINVAL;
1065			}
1066			/* fall through */
1067		case SAVAGE_CMD_DMA_PRIM:
1068		case SAVAGE_CMD_VB_PRIM:
1069			if (!first_draw_cmd)
1070				first_draw_cmd = cmdbuf->cmd_addr - 1;
1071			cmdbuf->cmd_addr += j;
1072			i += j;
1073			break;
1074		default:
1075			if (first_draw_cmd) {
1076				ret = savage_dispatch_draw(
1077					dev_priv, first_draw_cmd,
1078					cmdbuf->cmd_addr - 1,
1079					dmabuf, cmdbuf->vb_addr,
1080					cmdbuf->vb_size,
1081					cmdbuf->vb_stride,
1082					cmdbuf->nbox, cmdbuf->box_addr);
1083				if (ret != 0)
1084					return ret;
1085				first_draw_cmd = NULL;
1086			}
1087		}
1088		if (first_draw_cmd)
1089			continue;
1090
1091		switch (cmd_header.cmd.cmd) {
1092		case SAVAGE_CMD_STATE:
1093			j = (cmd_header.state.count + 1) / 2;
1094			if (i + j > cmdbuf->size) {
1095				DRM_ERROR("command SAVAGE_CMD_STATE extends "
1096					  "beyond end of command buffer\n");
1097				DMA_FLUSH();
1098				ret = -EINVAL;
1099				goto done;
1100			}
1101			ret = savage_dispatch_state(dev_priv, &cmd_header,
1102				(const uint32_t *)cmdbuf->cmd_addr);
1103			cmdbuf->cmd_addr += j;
1104			i += j;
1105			break;
1106		case SAVAGE_CMD_CLEAR:
1107			if (i + 1 > cmdbuf->size) {
1108				DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
1109					  "beyond end of command buffer\n");
1110				DMA_FLUSH();
1111				ret = -EINVAL;
1112				goto done;
1113			}
1114			ret = savage_dispatch_clear(dev_priv, &cmd_header,
1115						    cmdbuf->cmd_addr,
1116						    cmdbuf->nbox,
1117						    cmdbuf->box_addr);
1118			cmdbuf->cmd_addr++;
1119			i++;
1120			break;
1121		case SAVAGE_CMD_SWAP:
1122			ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
1123						   cmdbuf->box_addr);
1124			break;
1125		default:
1126			DRM_ERROR("invalid command 0x%x\n",
1127				  cmd_header.cmd.cmd);
1128			DMA_FLUSH();
1129			ret = -EINVAL;
1130			goto done;
1131		}
1132
1133		if (ret != 0) {
1134			DMA_FLUSH();
1135			goto done;
1136		}
1137	}
1138
1139	if (first_draw_cmd) {
1140		ret = savage_dispatch_draw(
1141			dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
1142			cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
1143			cmdbuf->nbox, cmdbuf->box_addr);
1144		if (ret != 0) {
1145			DMA_FLUSH();
1146			goto done;
1147		}
1148	}
1149
1150	DMA_FLUSH();
1151
1152	if (dmabuf && cmdbuf->discard) {
1153		drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
1154		uint16_t event;
1155		event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
1156		SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
1157		savage_freelist_put(dev, dmabuf);
1158	}
1159
1160done:
1161	/* If we didn't need to allocate them, these'll be NULL */
1162	drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
1163	drm_free(kvb_addr, cmdbuf->vb_size, DRM_MEM_DRIVER);
1164	drm_free(kbox_addr, cmdbuf->nbox * sizeof(struct drm_clip_rect),
1165		 DRM_MEM_DRIVER);
1166
1167	return ret;
1168}
1169