1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 *          Alex Deucher
26 *          Jerome Glisse
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <dev/drm2/drmP.h>
33#include "radeon.h"
34#include "radeon_asic.h"
35#include "evergreend.h"
36#include "evergreen_reg_safe.h"
37#include "cayman_reg_safe.h"
38#include "r600_cs.h"
39
40#define MAX(a,b)                   (((a)>(b))?(a):(b))
41#define MIN(a,b)                   (((a)<(b))?(a):(b))
42
43static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
44					  struct radeon_cs_reloc **cs_reloc);
45
46struct evergreen_cs_track {
47	u32			group_size;
48	u32			nbanks;
49	u32			npipes;
50	u32			row_size;
51	/* value we track */
52	u32			nsamples;		/* unused */
53	struct radeon_bo	*cb_color_bo[12];
54	u32			cb_color_bo_offset[12];
55	struct radeon_bo	*cb_color_fmask_bo[8];	/* unused */
56	struct radeon_bo	*cb_color_cmask_bo[8];	/* unused */
57	u32			cb_color_info[12];
58	u32			cb_color_view[12];
59	u32			cb_color_pitch[12];
60	u32			cb_color_slice[12];
61	u32			cb_color_slice_idx[12];
62	u32			cb_color_attrib[12];
63	u32			cb_color_cmask_slice[8];/* unused */
64	u32			cb_color_fmask_slice[8];/* unused */
65	u32			cb_target_mask;
66	u32			cb_shader_mask; /* unused */
67	u32			vgt_strmout_config;
68	u32			vgt_strmout_buffer_config;
69	struct radeon_bo	*vgt_strmout_bo[4];
70	u32			vgt_strmout_bo_offset[4];
71	u32			vgt_strmout_size[4];
72	u32			db_depth_control;
73	u32			db_depth_view;
74	u32			db_depth_slice;
75	u32			db_depth_size;
76	u32			db_z_info;
77	u32			db_z_read_offset;
78	u32			db_z_write_offset;
79	struct radeon_bo	*db_z_read_bo;
80	struct radeon_bo	*db_z_write_bo;
81	u32			db_s_info;
82	u32			db_s_read_offset;
83	u32			db_s_write_offset;
84	struct radeon_bo	*db_s_read_bo;
85	struct radeon_bo	*db_s_write_bo;
86	bool			sx_misc_kill_all_prims;
87	bool			cb_dirty;
88	bool			db_dirty;
89	bool			streamout_dirty;
90	u32			htile_offset;
91	u32			htile_surface;
92	struct radeon_bo	*htile_bo;
93};
94
95static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
96{
97	if (tiling_flags & RADEON_TILING_MACRO)
98		return ARRAY_2D_TILED_THIN1;
99	else if (tiling_flags & RADEON_TILING_MICRO)
100		return ARRAY_1D_TILED_THIN1;
101	else
102		return ARRAY_LINEAR_GENERAL;
103}
104
105static u32 evergreen_cs_get_num_banks(u32 nbanks)
106{
107	switch (nbanks) {
108	case 2:
109		return ADDR_SURF_2_BANK;
110	case 4:
111		return ADDR_SURF_4_BANK;
112	case 8:
113	default:
114		return ADDR_SURF_8_BANK;
115	case 16:
116		return ADDR_SURF_16_BANK;
117	}
118}
119
120static void evergreen_cs_track_init(struct evergreen_cs_track *track)
121{
122	int i;
123
124	for (i = 0; i < 8; i++) {
125		track->cb_color_fmask_bo[i] = NULL;
126		track->cb_color_cmask_bo[i] = NULL;
127		track->cb_color_cmask_slice[i] = 0;
128		track->cb_color_fmask_slice[i] = 0;
129	}
130
131	for (i = 0; i < 12; i++) {
132		track->cb_color_bo[i] = NULL;
133		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
134		track->cb_color_info[i] = 0;
135		track->cb_color_view[i] = 0xFFFFFFFF;
136		track->cb_color_pitch[i] = 0;
137		track->cb_color_slice[i] = 0xfffffff;
138		track->cb_color_slice_idx[i] = 0;
139	}
140	track->cb_target_mask = 0xFFFFFFFF;
141	track->cb_shader_mask = 0xFFFFFFFF;
142	track->cb_dirty = true;
143
144	track->db_depth_slice = 0xffffffff;
145	track->db_depth_view = 0xFFFFC000;
146	track->db_depth_size = 0xFFFFFFFF;
147	track->db_depth_control = 0xFFFFFFFF;
148	track->db_z_info = 0xFFFFFFFF;
149	track->db_z_read_offset = 0xFFFFFFFF;
150	track->db_z_write_offset = 0xFFFFFFFF;
151	track->db_z_read_bo = NULL;
152	track->db_z_write_bo = NULL;
153	track->db_s_info = 0xFFFFFFFF;
154	track->db_s_read_offset = 0xFFFFFFFF;
155	track->db_s_write_offset = 0xFFFFFFFF;
156	track->db_s_read_bo = NULL;
157	track->db_s_write_bo = NULL;
158	track->db_dirty = true;
159	track->htile_bo = NULL;
160	track->htile_offset = 0xFFFFFFFF;
161	track->htile_surface = 0;
162
163	for (i = 0; i < 4; i++) {
164		track->vgt_strmout_size[i] = 0;
165		track->vgt_strmout_bo[i] = NULL;
166		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
167	}
168	track->streamout_dirty = true;
169	track->sx_misc_kill_all_prims = false;
170}
171
172struct eg_surface {
173	/* value gathered from cs */
174	unsigned	nbx;
175	unsigned	nby;
176	unsigned	format;
177	unsigned	mode;
178	unsigned	nbanks;
179	unsigned	bankw;
180	unsigned	bankh;
181	unsigned	tsplit;
182	unsigned	mtilea;
183	unsigned	nsamples;
184	/* output value */
185	unsigned	bpe;
186	unsigned	layer_size;
187	unsigned	palign;
188	unsigned	halign;
189	unsigned long	base_align;
190};
191
192static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
193					  struct eg_surface *surf,
194					  const char *prefix)
195{
196	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
197	surf->base_align = surf->bpe;
198	surf->palign = 1;
199	surf->halign = 1;
200	return 0;
201}
202
203static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
204						  struct eg_surface *surf,
205						  const char *prefix)
206{
207	struct evergreen_cs_track *track = p->track;
208	unsigned palign;
209
210	palign = MAX(64, track->group_size / surf->bpe);
211	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
212	surf->base_align = track->group_size;
213	surf->palign = palign;
214	surf->halign = 1;
215	if (surf->nbx & (palign - 1)) {
216		if (prefix) {
217			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
218				 __func__, __LINE__, prefix, surf->nbx, palign);
219		}
220		return -EINVAL;
221	}
222	return 0;
223}
224
225static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
226				      struct eg_surface *surf,
227				      const char *prefix)
228{
229	struct evergreen_cs_track *track = p->track;
230	unsigned palign;
231
232	palign = track->group_size / (8 * surf->bpe * surf->nsamples);
233	palign = MAX(8, palign);
234	surf->layer_size = surf->nbx * surf->nby * surf->bpe;
235	surf->base_align = track->group_size;
236	surf->palign = palign;
237	surf->halign = 8;
238	if ((surf->nbx & (palign - 1))) {
239		if (prefix) {
240			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
241				 __func__, __LINE__, prefix, surf->nbx, palign,
242				 track->group_size, surf->bpe, surf->nsamples);
243		}
244		return -EINVAL;
245	}
246	if ((surf->nby & (8 - 1))) {
247		if (prefix) {
248			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
249				 __func__, __LINE__, prefix, surf->nby);
250		}
251		return -EINVAL;
252	}
253	return 0;
254}
255
256static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
257				      struct eg_surface *surf,
258				      const char *prefix)
259{
260	struct evergreen_cs_track *track = p->track;
261	unsigned palign, halign, tileb, slice_pt;
262	unsigned mtile_pr, mtile_ps, mtileb;
263
264	tileb = 64 * surf->bpe * surf->nsamples;
265	slice_pt = 1;
266	if (tileb > surf->tsplit) {
267		slice_pt = tileb / surf->tsplit;
268	}
269	tileb = tileb / slice_pt;
270	/* macro tile width & height */
271	palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
272	halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
273	mtileb = (palign / 8) * (halign / 8) * tileb;
274	mtile_pr = surf->nbx / palign;
275	mtile_ps = (mtile_pr * surf->nby) / halign;
276	surf->layer_size = mtile_ps * mtileb * slice_pt;
277	surf->base_align = (palign / 8) * (halign / 8) * tileb;
278	surf->palign = palign;
279	surf->halign = halign;
280
281	if ((surf->nbx & (palign - 1))) {
282		if (prefix) {
283			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
284				 __func__, __LINE__, prefix, surf->nbx, palign);
285		}
286		return -EINVAL;
287	}
288	if ((surf->nby & (halign - 1))) {
289		if (prefix) {
290			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
291				 __func__, __LINE__, prefix, surf->nby, halign);
292		}
293		return -EINVAL;
294	}
295
296	return 0;
297}
298
299static int evergreen_surface_check(struct radeon_cs_parser *p,
300				   struct eg_surface *surf,
301				   const char *prefix)
302{
303	/* some common value computed here */
304	surf->bpe = r600_fmt_get_blocksize(surf->format);
305
306	switch (surf->mode) {
307	case ARRAY_LINEAR_GENERAL:
308		return evergreen_surface_check_linear(p, surf, prefix);
309	case ARRAY_LINEAR_ALIGNED:
310		return evergreen_surface_check_linear_aligned(p, surf, prefix);
311	case ARRAY_1D_TILED_THIN1:
312		return evergreen_surface_check_1d(p, surf, prefix);
313	case ARRAY_2D_TILED_THIN1:
314		return evergreen_surface_check_2d(p, surf, prefix);
315	default:
316		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
317				__func__, __LINE__, prefix, surf->mode);
318		return -EINVAL;
319	}
320	return -EINVAL;
321}
322
323static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
324					      struct eg_surface *surf,
325					      const char *prefix)
326{
327	switch (surf->mode) {
328	case ARRAY_2D_TILED_THIN1:
329		break;
330	case ARRAY_LINEAR_GENERAL:
331	case ARRAY_LINEAR_ALIGNED:
332	case ARRAY_1D_TILED_THIN1:
333		return 0;
334	default:
335		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
336				__func__, __LINE__, prefix, surf->mode);
337		return -EINVAL;
338	}
339
340	switch (surf->nbanks) {
341	case 0: surf->nbanks = 2; break;
342	case 1: surf->nbanks = 4; break;
343	case 2: surf->nbanks = 8; break;
344	case 3: surf->nbanks = 16; break;
345	default:
346		dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
347			 __func__, __LINE__, prefix, surf->nbanks);
348		return -EINVAL;
349	}
350	switch (surf->bankw) {
351	case 0: surf->bankw = 1; break;
352	case 1: surf->bankw = 2; break;
353	case 2: surf->bankw = 4; break;
354	case 3: surf->bankw = 8; break;
355	default:
356		dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
357			 __func__, __LINE__, prefix, surf->bankw);
358		return -EINVAL;
359	}
360	switch (surf->bankh) {
361	case 0: surf->bankh = 1; break;
362	case 1: surf->bankh = 2; break;
363	case 2: surf->bankh = 4; break;
364	case 3: surf->bankh = 8; break;
365	default:
366		dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
367			 __func__, __LINE__, prefix, surf->bankh);
368		return -EINVAL;
369	}
370	switch (surf->mtilea) {
371	case 0: surf->mtilea = 1; break;
372	case 1: surf->mtilea = 2; break;
373	case 2: surf->mtilea = 4; break;
374	case 3: surf->mtilea = 8; break;
375	default:
376		dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
377			 __func__, __LINE__, prefix, surf->mtilea);
378		return -EINVAL;
379	}
380	switch (surf->tsplit) {
381	case 0: surf->tsplit = 64; break;
382	case 1: surf->tsplit = 128; break;
383	case 2: surf->tsplit = 256; break;
384	case 3: surf->tsplit = 512; break;
385	case 4: surf->tsplit = 1024; break;
386	case 5: surf->tsplit = 2048; break;
387	case 6: surf->tsplit = 4096; break;
388	default:
389		dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
390			 __func__, __LINE__, prefix, surf->tsplit);
391		return -EINVAL;
392	}
393	return 0;
394}
395
396static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
397{
398	struct evergreen_cs_track *track = p->track;
399	struct eg_surface surf;
400	unsigned pitch, slice, mslice;
401	unsigned long offset;
402	int r;
403
404	mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
405	pitch = track->cb_color_pitch[id];
406	slice = track->cb_color_slice[id];
407	surf.nbx = (pitch + 1) * 8;
408	surf.nby = ((slice + 1) * 64) / surf.nbx;
409	surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
410	surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
411	surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
412	surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
413	surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
414	surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
415	surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
416	surf.nsamples = 1;
417
418	if (!r600_fmt_is_valid_color(surf.format)) {
419		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
420			 __func__, __LINE__, surf.format,
421			id, track->cb_color_info[id]);
422		return -EINVAL;
423	}
424
425	r = evergreen_surface_value_conv_check(p, &surf, "cb");
426	if (r) {
427		return r;
428	}
429
430	r = evergreen_surface_check(p, &surf, "cb");
431	if (r) {
432		dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
433			 __func__, __LINE__, id, track->cb_color_pitch[id],
434			 track->cb_color_slice[id], track->cb_color_attrib[id],
435			 track->cb_color_info[id]);
436		return r;
437	}
438
439	offset = track->cb_color_bo_offset[id] << 8;
440	if (offset & (surf.base_align - 1)) {
441		dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
442			 __func__, __LINE__, id, offset, surf.base_align);
443		return -EINVAL;
444	}
445
446	offset += surf.layer_size * mslice;
447	if (offset > radeon_bo_size(track->cb_color_bo[id])) {
448		/* old ddx are broken they allocate bo with w*h*bpp but
449		 * program slice with ALIGN(h, 8), catch this and patch
450		 * command stream.
451		 */
452		if (!surf.mode) {
453			volatile u32 *ib = p->ib.ptr;
454			unsigned long tmp, nby, bsize, size, min = 0;
455
456			/* find the height the ddx wants */
457			if (surf.nby > 8) {
458				min = surf.nby - 8;
459			}
460			bsize = radeon_bo_size(track->cb_color_bo[id]);
461			tmp = track->cb_color_bo_offset[id] << 8;
462			for (nby = surf.nby; nby > min; nby--) {
463				size = nby * surf.nbx * surf.bpe * surf.nsamples;
464				if ((tmp + size * mslice) <= bsize) {
465					break;
466				}
467			}
468			if (nby > min) {
469				surf.nby = nby;
470				slice = ((nby * surf.nbx) / 64) - 1;
471				if (!evergreen_surface_check(p, &surf, "cb")) {
472					/* check if this one works */
473					tmp += surf.layer_size * mslice;
474					if (tmp <= bsize) {
475						ib[track->cb_color_slice_idx[id]] = slice;
476						goto old_ddx_ok;
477					}
478				}
479			}
480		}
481		dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
482			 "offset %d, max layer %d, bo size %ld, slice %d)\n",
483			 __func__, __LINE__, id, surf.layer_size,
484			track->cb_color_bo_offset[id] << 8, mslice,
485			radeon_bo_size(track->cb_color_bo[id]), slice);
486		dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
487			 __func__, __LINE__, surf.nbx, surf.nby,
488			surf.mode, surf.bpe, surf.nsamples,
489			surf.bankw, surf.bankh,
490			surf.tsplit, surf.mtilea);
491		return -EINVAL;
492	}
493old_ddx_ok:
494
495	return 0;
496}
497
498static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
499						unsigned nbx, unsigned nby)
500{
501	struct evergreen_cs_track *track = p->track;
502	unsigned long size;
503
504	if (track->htile_bo == NULL) {
505		dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
506				__func__, __LINE__, track->db_z_info);
507		return -EINVAL;
508	}
509
510	if (G_028ABC_LINEAR(track->htile_surface)) {
511		/* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
512		nbx = roundup(nbx, 16 * 8);
513		/* height is npipes htiles aligned == npipes * 8 pixel aligned */
514		nby = roundup(nby, track->npipes * 8);
515	} else {
516		/* always assume 8x8 htile */
517		/* align is htile align * 8, htile align vary according to
518		 * number of pipe and tile width and nby
519		 */
520		switch (track->npipes) {
521		case 8:
522			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
523			nbx = roundup(nbx, 64 * 8);
524			nby = roundup(nby, 64 * 8);
525			break;
526		case 4:
527			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
528			nbx = roundup(nbx, 64 * 8);
529			nby = roundup(nby, 32 * 8);
530			break;
531		case 2:
532			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
533			nbx = roundup(nbx, 32 * 8);
534			nby = roundup(nby, 32 * 8);
535			break;
536		case 1:
537			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
538			nbx = roundup(nbx, 32 * 8);
539			nby = roundup(nby, 16 * 8);
540			break;
541		default:
542			dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
543					__func__, __LINE__, track->npipes);
544			return -EINVAL;
545		}
546	}
547	/* compute number of htile */
548	nbx = nbx >> 3;
549	nby = nby >> 3;
550	/* size must be aligned on npipes * 2K boundary */
551	size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
552	size += track->htile_offset;
553
554	if (size > radeon_bo_size(track->htile_bo)) {
555		dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
556				__func__, __LINE__, radeon_bo_size(track->htile_bo),
557				size, nbx, nby);
558		return -EINVAL;
559	}
560	return 0;
561}
562
563static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
564{
565	struct evergreen_cs_track *track = p->track;
566	struct eg_surface surf;
567	unsigned pitch, slice, mslice;
568	unsigned long offset;
569	int r;
570
571	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
572	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
573	slice = track->db_depth_slice;
574	surf.nbx = (pitch + 1) * 8;
575	surf.nby = ((slice + 1) * 64) / surf.nbx;
576	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
577	surf.format = G_028044_FORMAT(track->db_s_info);
578	surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
579	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
580	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
581	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
582	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
583	surf.nsamples = 1;
584
585	if (surf.format != 1) {
586		dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
587			 __func__, __LINE__, surf.format);
588		return -EINVAL;
589	}
590	/* replace by color format so we can use same code */
591	surf.format = V_028C70_COLOR_8;
592
593	r = evergreen_surface_value_conv_check(p, &surf, "stencil");
594	if (r) {
595		return r;
596	}
597
598	r = evergreen_surface_check(p, &surf, NULL);
599	if (r) {
600		/* old userspace doesn't compute proper depth/stencil alignment
601		 * check that alignment against a bigger byte per elements and
602		 * only report if that alignment is wrong too.
603		 */
604		surf.format = V_028C70_COLOR_8_8_8_8;
605		r = evergreen_surface_check(p, &surf, "stencil");
606		if (r) {
607			dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
608				 __func__, __LINE__, track->db_depth_size,
609				 track->db_depth_slice, track->db_s_info, track->db_z_info);
610		}
611		return r;
612	}
613
614	offset = track->db_s_read_offset << 8;
615	if (offset & (surf.base_align - 1)) {
616		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
617			 __func__, __LINE__, offset, surf.base_align);
618		return -EINVAL;
619	}
620	offset += surf.layer_size * mslice;
621	if (offset > radeon_bo_size(track->db_s_read_bo)) {
622		dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
623			 "offset %ld, max layer %d, bo size %ld)\n",
624			 __func__, __LINE__, surf.layer_size,
625			(unsigned long)track->db_s_read_offset << 8, mslice,
626			radeon_bo_size(track->db_s_read_bo));
627		dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
628			 __func__, __LINE__, track->db_depth_size,
629			 track->db_depth_slice, track->db_s_info, track->db_z_info);
630		return -EINVAL;
631	}
632
633	offset = track->db_s_write_offset << 8;
634	if (offset & (surf.base_align - 1)) {
635		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
636			 __func__, __LINE__, offset, surf.base_align);
637		return -EINVAL;
638	}
639	offset += surf.layer_size * mslice;
640	if (offset > radeon_bo_size(track->db_s_write_bo)) {
641		dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
642			 "offset %ld, max layer %d, bo size %ld)\n",
643			 __func__, __LINE__, surf.layer_size,
644			(unsigned long)track->db_s_write_offset << 8, mslice,
645			radeon_bo_size(track->db_s_write_bo));
646		return -EINVAL;
647	}
648
649	/* hyperz */
650	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
651		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
652		if (r) {
653			return r;
654		}
655	}
656
657	return 0;
658}
659
660static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
661{
662	struct evergreen_cs_track *track = p->track;
663	struct eg_surface surf;
664	unsigned pitch, slice, mslice;
665	unsigned long offset;
666	int r;
667
668	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
669	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
670	slice = track->db_depth_slice;
671	surf.nbx = (pitch + 1) * 8;
672	surf.nby = ((slice + 1) * 64) / surf.nbx;
673	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
674	surf.format = G_028040_FORMAT(track->db_z_info);
675	surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
676	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
677	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
678	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
679	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
680	surf.nsamples = 1;
681
682	switch (surf.format) {
683	case V_028040_Z_16:
684		surf.format = V_028C70_COLOR_16;
685		break;
686	case V_028040_Z_24:
687	case V_028040_Z_32_FLOAT:
688		surf.format = V_028C70_COLOR_8_8_8_8;
689		break;
690	default:
691		dev_warn(p->dev, "%s:%d depth invalid format %d\n",
692			 __func__, __LINE__, surf.format);
693		return -EINVAL;
694	}
695
696	r = evergreen_surface_value_conv_check(p, &surf, "depth");
697	if (r) {
698		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
699			 __func__, __LINE__, track->db_depth_size,
700			 track->db_depth_slice, track->db_z_info);
701		return r;
702	}
703
704	r = evergreen_surface_check(p, &surf, "depth");
705	if (r) {
706		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
707			 __func__, __LINE__, track->db_depth_size,
708			 track->db_depth_slice, track->db_z_info);
709		return r;
710	}
711
712	offset = track->db_z_read_offset << 8;
713	if (offset & (surf.base_align - 1)) {
714		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
715			 __func__, __LINE__, offset, surf.base_align);
716		return -EINVAL;
717	}
718	offset += surf.layer_size * mslice;
719	if (offset > radeon_bo_size(track->db_z_read_bo)) {
720		dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
721			 "offset %ld, max layer %d, bo size %ld)\n",
722			 __func__, __LINE__, surf.layer_size,
723			(unsigned long)track->db_z_read_offset << 8, mslice,
724			radeon_bo_size(track->db_z_read_bo));
725		return -EINVAL;
726	}
727
728	offset = track->db_z_write_offset << 8;
729	if (offset & (surf.base_align - 1)) {
730		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
731			 __func__, __LINE__, offset, surf.base_align);
732		return -EINVAL;
733	}
734	offset += surf.layer_size * mslice;
735	if (offset > radeon_bo_size(track->db_z_write_bo)) {
736		dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
737			 "offset %ld, max layer %d, bo size %ld)\n",
738			 __func__, __LINE__, surf.layer_size,
739			(unsigned long)track->db_z_write_offset << 8, mslice,
740			radeon_bo_size(track->db_z_write_bo));
741		return -EINVAL;
742	}
743
744	/* hyperz */
745	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
746		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
747		if (r) {
748			return r;
749		}
750	}
751
752	return 0;
753}
754
755static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
756					       struct radeon_bo *texture,
757					       struct radeon_bo *mipmap,
758					       unsigned idx)
759{
760	struct eg_surface surf;
761	unsigned long toffset, moffset;
762	unsigned dim, llevel, mslice, width, height, depth, i;
763	u32 texdw[8];
764	int r;
765
766	texdw[0] = radeon_get_ib_value(p, idx + 0);
767	texdw[1] = radeon_get_ib_value(p, idx + 1);
768	texdw[2] = radeon_get_ib_value(p, idx + 2);
769	texdw[3] = radeon_get_ib_value(p, idx + 3);
770	texdw[4] = radeon_get_ib_value(p, idx + 4);
771	texdw[5] = radeon_get_ib_value(p, idx + 5);
772	texdw[6] = radeon_get_ib_value(p, idx + 6);
773	texdw[7] = radeon_get_ib_value(p, idx + 7);
774	dim = G_030000_DIM(texdw[0]);
775	llevel = G_030014_LAST_LEVEL(texdw[5]);
776	mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
777	width = G_030000_TEX_WIDTH(texdw[0]) + 1;
778	height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
779	depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
780	surf.format = G_03001C_DATA_FORMAT(texdw[7]);
781	surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
782	surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
783	surf.nby = r600_fmt_get_nblocksy(surf.format, height);
784	surf.mode = G_030004_ARRAY_MODE(texdw[1]);
785	surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
786	surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
787	surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
788	surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
789	surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
790	surf.nsamples = 1;
791	toffset = texdw[2] << 8;
792	moffset = texdw[3] << 8;
793
794	if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
795		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
796			 __func__, __LINE__, surf.format);
797		return -EINVAL;
798	}
799	switch (dim) {
800	case V_030000_SQ_TEX_DIM_1D:
801	case V_030000_SQ_TEX_DIM_2D:
802	case V_030000_SQ_TEX_DIM_CUBEMAP:
803	case V_030000_SQ_TEX_DIM_1D_ARRAY:
804	case V_030000_SQ_TEX_DIM_2D_ARRAY:
805		depth = 1;
806		break;
807	case V_030000_SQ_TEX_DIM_2D_MSAA:
808	case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
809		surf.nsamples = 1 << llevel;
810		llevel = 0;
811		depth = 1;
812		break;
813	case V_030000_SQ_TEX_DIM_3D:
814		break;
815	default:
816		dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
817			 __func__, __LINE__, dim);
818		return -EINVAL;
819	}
820
821	r = evergreen_surface_value_conv_check(p, &surf, "texture");
822	if (r) {
823		return r;
824	}
825
826	/* align height */
827	evergreen_surface_check(p, &surf, NULL);
828	surf.nby = roundup(surf.nby, surf.halign);
829
830	r = evergreen_surface_check(p, &surf, "texture");
831	if (r) {
832		dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
833			 __func__, __LINE__, texdw[0], texdw[1], texdw[4],
834			 texdw[5], texdw[6], texdw[7]);
835		return r;
836	}
837
838	/* check texture size */
839	if (toffset & (surf.base_align - 1)) {
840		dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
841			 __func__, __LINE__, toffset, surf.base_align);
842		return -EINVAL;
843	}
844	if (moffset & (surf.base_align - 1)) {
845		dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
846			 __func__, __LINE__, moffset, surf.base_align);
847		return -EINVAL;
848	}
849	if (dim == SQ_TEX_DIM_3D) {
850		toffset += surf.layer_size * depth;
851	} else {
852		toffset += surf.layer_size * mslice;
853	}
854	if (toffset > radeon_bo_size(texture)) {
855		dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
856			 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
857			 __func__, __LINE__, surf.layer_size,
858			(unsigned long)texdw[2] << 8, mslice,
859			depth, radeon_bo_size(texture),
860			surf.nbx, surf.nby);
861		return -EINVAL;
862	}
863
864	if (!mipmap) {
865		if (llevel) {
866			dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
867				 __func__, __LINE__);
868			return -EINVAL;
869		} else {
870			return 0; /* everything's ok */
871		}
872	}
873
874	/* check mipmap size */
875	for (i = 1; i <= llevel; i++) {
876		unsigned w, h, d;
877
878		w = r600_mip_minify(width, i);
879		h = r600_mip_minify(height, i);
880		d = r600_mip_minify(depth, i);
881		surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
882		surf.nby = r600_fmt_get_nblocksy(surf.format, h);
883
884		switch (surf.mode) {
885		case ARRAY_2D_TILED_THIN1:
886			if (surf.nbx < surf.palign || surf.nby < surf.halign) {
887				surf.mode = ARRAY_1D_TILED_THIN1;
888			}
889			/* recompute alignment */
890			evergreen_surface_check(p, &surf, NULL);
891			break;
892		case ARRAY_LINEAR_GENERAL:
893		case ARRAY_LINEAR_ALIGNED:
894		case ARRAY_1D_TILED_THIN1:
895			break;
896		default:
897			dev_warn(p->dev, "%s:%d invalid array mode %d\n",
898				 __func__, __LINE__, surf.mode);
899			return -EINVAL;
900		}
901		surf.nbx = roundup(surf.nbx, surf.palign);
902		surf.nby = roundup(surf.nby, surf.halign);
903
904		r = evergreen_surface_check(p, &surf, "mipmap");
905		if (r) {
906			return r;
907		}
908
909		if (dim == SQ_TEX_DIM_3D) {
910			moffset += surf.layer_size * d;
911		} else {
912			moffset += surf.layer_size * mslice;
913		}
914		if (moffset > radeon_bo_size(mipmap)) {
915			dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
916					"offset %ld, coffset %ld, max layer %d, depth %d, "
917					"bo size %ld) level0 (%d %d %d)\n",
918					__func__, __LINE__, i, surf.layer_size,
919					(unsigned long)texdw[3] << 8, moffset, mslice,
920					d, radeon_bo_size(mipmap),
921					width, height, depth);
922			dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
923				 __func__, __LINE__, surf.nbx, surf.nby,
924				surf.mode, surf.bpe, surf.nsamples,
925				surf.bankw, surf.bankh,
926				surf.tsplit, surf.mtilea);
927			return -EINVAL;
928		}
929	}
930
931	return 0;
932}
933
934static int evergreen_cs_track_check(struct radeon_cs_parser *p)
935{
936	struct evergreen_cs_track *track = p->track;
937	unsigned tmp, i;
938	int r;
939	unsigned buffer_mask = 0;
940
941	/* check streamout */
942	if (track->streamout_dirty && track->vgt_strmout_config) {
943		for (i = 0; i < 4; i++) {
944			if (track->vgt_strmout_config & (1 << i)) {
945				buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
946			}
947		}
948
949		for (i = 0; i < 4; i++) {
950			if (buffer_mask & (1 << i)) {
951				if (track->vgt_strmout_bo[i]) {
952					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
953							(u64)track->vgt_strmout_size[i];
954					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
955						DRM_ERROR("streamout %d bo too small: 0x%jx, 0x%lx\n",
956							  i, (uintmax_t)offset,
957							  radeon_bo_size(track->vgt_strmout_bo[i]));
958						return -EINVAL;
959					}
960				} else {
961					dev_warn(p->dev, "No buffer for streamout %d\n", i);
962					return -EINVAL;
963				}
964			}
965		}
966		track->streamout_dirty = false;
967	}
968
969	if (track->sx_misc_kill_all_prims)
970		return 0;
971
972	/* check that we have a cb for each enabled target
973	 */
974	if (track->cb_dirty) {
975		tmp = track->cb_target_mask;
976		for (i = 0; i < 8; i++) {
977			if ((tmp >> (i * 4)) & 0xF) {
978				/* at least one component is enabled */
979				if (track->cb_color_bo[i] == NULL) {
980					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
981						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
982					return -EINVAL;
983				}
984				/* check cb */
985				r = evergreen_cs_track_validate_cb(p, i);
986				if (r) {
987					return r;
988				}
989			}
990		}
991		track->cb_dirty = false;
992	}
993
994	if (track->db_dirty) {
995		/* Check stencil buffer */
996		if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
997		    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
998			r = evergreen_cs_track_validate_stencil(p);
999			if (r)
1000				return r;
1001		}
1002		/* Check depth buffer */
1003		if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1004		    G_028800_Z_ENABLE(track->db_depth_control)) {
1005			r = evergreen_cs_track_validate_depth(p);
1006			if (r)
1007				return r;
1008		}
1009		track->db_dirty = false;
1010	}
1011
1012	return 0;
1013}
1014
1015/**
1016 * evergreen_cs_packet_parse() - parse cp packet and point ib index to next packet
1017 * @parser:	parser structure holding parsing context.
1018 * @pkt:	where to store packet informations
1019 *
1020 * Assume that chunk_ib_index is properly set. Will return -EINVAL
1021 * if packet is bigger than remaining ib size. or if packets is unknown.
1022 **/
1023static int evergreen_cs_packet_parse(struct radeon_cs_parser *p,
1024			      struct radeon_cs_packet *pkt,
1025			      unsigned idx)
1026{
1027	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1028	uint32_t header;
1029
1030	if (idx >= ib_chunk->length_dw) {
1031		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1032			  idx, ib_chunk->length_dw);
1033		return -EINVAL;
1034	}
1035	header = radeon_get_ib_value(p, idx);
1036	pkt->idx = idx;
1037	pkt->type = CP_PACKET_GET_TYPE(header);
1038	pkt->count = CP_PACKET_GET_COUNT(header);
1039	pkt->one_reg_wr = 0;
1040	switch (pkt->type) {
1041	case PACKET_TYPE0:
1042		pkt->reg = CP_PACKET0_GET_REG(header);
1043		break;
1044	case PACKET_TYPE3:
1045		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1046		break;
1047	case PACKET_TYPE2:
1048		pkt->count = -1;
1049		break;
1050	default:
1051		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1052		return -EINVAL;
1053	}
1054	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1055		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1056			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1057		return -EINVAL;
1058	}
1059	return 0;
1060}
1061
1062/**
1063 * evergreen_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1064 * @parser:		parser structure holding parsing context.
1065 * @data:		pointer to relocation data
1066 * @offset_start:	starting offset
1067 * @offset_mask:	offset mask (to align start offset on)
1068 * @reloc:		reloc informations
1069 *
1070 * Check next packet is relocation packet3, do bo validation and compute
1071 * GPU offset using the provided start.
1072 **/
1073static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
1074					  struct radeon_cs_reloc **cs_reloc)
1075{
1076	struct radeon_cs_chunk *relocs_chunk;
1077	struct radeon_cs_packet p3reloc;
1078	unsigned idx;
1079	int r;
1080
1081	if (p->chunk_relocs_idx == -1) {
1082		DRM_ERROR("No relocation chunk !\n");
1083		return -EINVAL;
1084	}
1085	*cs_reloc = NULL;
1086	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1087	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1088	if (r) {
1089		return r;
1090	}
1091	p->idx += p3reloc.count + 2;
1092	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1093		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1094			  p3reloc.idx);
1095		return -EINVAL;
1096	}
1097	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
1098	if (idx >= relocs_chunk->length_dw) {
1099		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1100			  idx, relocs_chunk->length_dw);
1101		return -EINVAL;
1102	}
1103	/* FIXME: we assume reloc size is 4 dwords */
1104	*cs_reloc = p->relocs_ptr[(idx / 4)];
1105	return 0;
1106}
1107
1108/**
1109 * evergreen_cs_packet_next_is_pkt3_nop() - test if the next packet is NOP
1110 * @p:		structure holding the parser context.
1111 *
1112 * Check if the next packet is a relocation packet3.
1113 **/
1114static bool evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
1115{
1116	struct radeon_cs_packet p3reloc;
1117	int r;
1118
1119	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1120	if (r) {
1121		return false;
1122	}
1123	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1124		return false;
1125	}
1126	return true;
1127}
1128
1129/**
1130 * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
1131 * @parser:		parser structure holding parsing context.
1132 *
1133 * Userspace sends a special sequence for VLINE waits.
1134 * PACKET0 - VLINE_START_END + value
1135 * PACKET3 - WAIT_REG_MEM poll vline status reg
1136 * RELOC (P3) - crtc_id in reloc.
1137 *
1138 * This function parses this and relocates the VLINE START END
1139 * and WAIT_REG_MEM packets to the correct crtc.
1140 * It also detects a switched off crtc and nulls out the
1141 * wait in that case.
1142 */
1143static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1144{
1145	struct drm_mode_object *obj;
1146	struct drm_crtc *crtc;
1147	struct radeon_crtc *radeon_crtc;
1148	struct radeon_cs_packet p3reloc, wait_reg_mem;
1149	int crtc_id;
1150	int r;
1151	uint32_t header, h_idx, reg, wait_reg_mem_info;
1152	volatile uint32_t *ib;
1153
1154	ib = p->ib.ptr;
1155
1156	/* parse the WAIT_REG_MEM */
1157	r = evergreen_cs_packet_parse(p, &wait_reg_mem, p->idx);
1158	if (r)
1159		return r;
1160
1161	/* check its a WAIT_REG_MEM */
1162	if (wait_reg_mem.type != PACKET_TYPE3 ||
1163	    wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
1164		DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
1165		return -EINVAL;
1166	}
1167
1168	wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
1169	/* bit 4 is reg (0) or mem (1) */
1170	if (wait_reg_mem_info & 0x10) {
1171		DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
1172		return -EINVAL;
1173	}
1174	/* waiting for value to be equal */
1175	if ((wait_reg_mem_info & 0x7) != 0x3) {
1176		DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1177		return -EINVAL;
1178	}
1179	if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != EVERGREEN_VLINE_STATUS) {
1180		DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1181		return -EINVAL;
1182	}
1183
1184	if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != EVERGREEN_VLINE_STAT) {
1185		DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1186		return -EINVAL;
1187	}
1188
1189	/* jump over the NOP */
1190	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
1191	if (r)
1192		return r;
1193
1194	h_idx = p->idx - 2;
1195	p->idx += wait_reg_mem.count + 2;
1196	p->idx += p3reloc.count + 2;
1197
1198	header = radeon_get_ib_value(p, h_idx);
1199	crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
1200	reg = CP_PACKET0_GET_REG(header);
1201	obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1202	if (!obj) {
1203		DRM_ERROR("cannot find crtc %d\n", crtc_id);
1204		return -EINVAL;
1205	}
1206	crtc = obj_to_crtc(obj);
1207	radeon_crtc = to_radeon_crtc(crtc);
1208	crtc_id = radeon_crtc->crtc_id;
1209
1210	if (!crtc->enabled) {
1211		/* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
1212		ib[h_idx + 2] = PACKET2(0);
1213		ib[h_idx + 3] = PACKET2(0);
1214		ib[h_idx + 4] = PACKET2(0);
1215		ib[h_idx + 5] = PACKET2(0);
1216		ib[h_idx + 6] = PACKET2(0);
1217		ib[h_idx + 7] = PACKET2(0);
1218		ib[h_idx + 8] = PACKET2(0);
1219	} else {
1220		switch (reg) {
1221		case EVERGREEN_VLINE_START_END:
1222			header &= ~R600_CP_PACKET0_REG_MASK;
1223			header |= (EVERGREEN_VLINE_START_END + radeon_crtc->crtc_offset) >> 2;
1224			ib[h_idx] = header;
1225			ib[h_idx + 4] = (EVERGREEN_VLINE_STATUS + radeon_crtc->crtc_offset) >> 2;
1226			break;
1227		default:
1228			DRM_ERROR("unknown crtc reloc\n");
1229			return -EINVAL;
1230		}
1231	}
1232	return 0;
1233}
1234
1235static int evergreen_packet0_check(struct radeon_cs_parser *p,
1236				   struct radeon_cs_packet *pkt,
1237				   unsigned idx, unsigned reg)
1238{
1239	int r;
1240
1241	switch (reg) {
1242	case EVERGREEN_VLINE_START_END:
1243		r = evergreen_cs_packet_parse_vline(p);
1244		if (r) {
1245			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1246					idx, reg);
1247			return r;
1248		}
1249		break;
1250	default:
1251		DRM_ERROR("Forbidden register 0x%04X in cs at %d\n",
1252		       reg, idx);
1253		return -EINVAL;
1254	}
1255	return 0;
1256}
1257
1258static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1259				      struct radeon_cs_packet *pkt)
1260{
1261	unsigned reg, i;
1262	unsigned idx;
1263	int r;
1264
1265	idx = pkt->idx + 1;
1266	reg = pkt->reg;
1267	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1268		r = evergreen_packet0_check(p, pkt, idx, reg);
1269		if (r) {
1270			return r;
1271		}
1272	}
1273	return 0;
1274}
1275
1276/**
1277 * evergreen_cs_check_reg() - check if register is authorized or not
1278 * @parser: parser structure holding parsing context
1279 * @reg: register we are testing
1280 * @idx: index into the cs buffer
1281 *
1282 * This function will test against evergreen_reg_safe_bm and return 0
1283 * if register is safe. If register is not flag as safe this function
1284 * will test it against a list of register needind special handling.
1285 */
1286static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1287{
1288	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1289	struct radeon_cs_reloc *reloc;
1290	u32 last_reg;
1291	u32 m, i, tmp, *ib;
1292	int r;
1293
1294	if (p->rdev->family >= CHIP_CAYMAN)
1295		last_reg = DRM_ARRAY_SIZE(cayman_reg_safe_bm);
1296	else
1297		last_reg = DRM_ARRAY_SIZE(evergreen_reg_safe_bm);
1298
1299	i = (reg >> 7);
1300	if (i >= last_reg) {
1301		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1302		return -EINVAL;
1303	}
1304	m = 1 << ((reg >> 2) & 31);
1305	if (p->rdev->family >= CHIP_CAYMAN) {
1306		if (!(cayman_reg_safe_bm[i] & m))
1307			return 0;
1308	} else {
1309		if (!(evergreen_reg_safe_bm[i] & m))
1310			return 0;
1311	}
1312	ib = p->ib.ptr;
1313	switch (reg) {
1314	/* force following reg to 0 in an attempt to disable out buffer
1315	 * which will need us to better understand how it works to perform
1316	 * security check on it (Jerome)
1317	 */
1318	case SQ_ESGS_RING_SIZE:
1319	case SQ_GSVS_RING_SIZE:
1320	case SQ_ESTMP_RING_SIZE:
1321	case SQ_GSTMP_RING_SIZE:
1322	case SQ_HSTMP_RING_SIZE:
1323	case SQ_LSTMP_RING_SIZE:
1324	case SQ_PSTMP_RING_SIZE:
1325	case SQ_VSTMP_RING_SIZE:
1326	case SQ_ESGS_RING_ITEMSIZE:
1327	case SQ_ESTMP_RING_ITEMSIZE:
1328	case SQ_GSTMP_RING_ITEMSIZE:
1329	case SQ_GSVS_RING_ITEMSIZE:
1330	case SQ_GS_VERT_ITEMSIZE:
1331	case SQ_GS_VERT_ITEMSIZE_1:
1332	case SQ_GS_VERT_ITEMSIZE_2:
1333	case SQ_GS_VERT_ITEMSIZE_3:
1334	case SQ_GSVS_RING_OFFSET_1:
1335	case SQ_GSVS_RING_OFFSET_2:
1336	case SQ_GSVS_RING_OFFSET_3:
1337	case SQ_HSTMP_RING_ITEMSIZE:
1338	case SQ_LSTMP_RING_ITEMSIZE:
1339	case SQ_PSTMP_RING_ITEMSIZE:
1340	case SQ_VSTMP_RING_ITEMSIZE:
1341	case VGT_TF_RING_SIZE:
1342		/* get value to populate the IB don't remove */
1343		/*tmp =radeon_get_ib_value(p, idx);
1344		  ib[idx] = 0;*/
1345		break;
1346	case SQ_ESGS_RING_BASE:
1347	case SQ_GSVS_RING_BASE:
1348	case SQ_ESTMP_RING_BASE:
1349	case SQ_GSTMP_RING_BASE:
1350	case SQ_HSTMP_RING_BASE:
1351	case SQ_LSTMP_RING_BASE:
1352	case SQ_PSTMP_RING_BASE:
1353	case SQ_VSTMP_RING_BASE:
1354		r = evergreen_cs_packet_next_reloc(p, &reloc);
1355		if (r) {
1356			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1357					"0x%04X\n", reg);
1358			return -EINVAL;
1359		}
1360		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1361		break;
1362	case DB_DEPTH_CONTROL:
1363		track->db_depth_control = radeon_get_ib_value(p, idx);
1364		track->db_dirty = true;
1365		break;
1366	case CAYMAN_DB_EQAA:
1367		if (p->rdev->family < CHIP_CAYMAN) {
1368			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1369				 "0x%04X\n", reg);
1370			return -EINVAL;
1371		}
1372		break;
1373	case CAYMAN_DB_DEPTH_INFO:
1374		if (p->rdev->family < CHIP_CAYMAN) {
1375			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1376				 "0x%04X\n", reg);
1377			return -EINVAL;
1378		}
1379		break;
1380	case DB_Z_INFO:
1381		track->db_z_info = radeon_get_ib_value(p, idx);
1382		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1383			r = evergreen_cs_packet_next_reloc(p, &reloc);
1384			if (r) {
1385				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1386						"0x%04X\n", reg);
1387				return -EINVAL;
1388			}
1389			ib[idx] &= ~Z_ARRAY_MODE(0xf);
1390			track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1391			ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1392			track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1393			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1394				unsigned bankw, bankh, mtaspect, tile_split;
1395
1396				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1397							&bankw, &bankh, &mtaspect,
1398							&tile_split);
1399				ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1400				ib[idx] |= DB_TILE_SPLIT(tile_split) |
1401						DB_BANK_WIDTH(bankw) |
1402						DB_BANK_HEIGHT(bankh) |
1403						DB_MACRO_TILE_ASPECT(mtaspect);
1404			}
1405		}
1406		track->db_dirty = true;
1407		break;
1408	case DB_STENCIL_INFO:
1409		track->db_s_info = radeon_get_ib_value(p, idx);
1410		track->db_dirty = true;
1411		break;
1412	case DB_DEPTH_VIEW:
1413		track->db_depth_view = radeon_get_ib_value(p, idx);
1414		track->db_dirty = true;
1415		break;
1416	case DB_DEPTH_SIZE:
1417		track->db_depth_size = radeon_get_ib_value(p, idx);
1418		track->db_dirty = true;
1419		break;
1420	case R_02805C_DB_DEPTH_SLICE:
1421		track->db_depth_slice = radeon_get_ib_value(p, idx);
1422		track->db_dirty = true;
1423		break;
1424	case DB_Z_READ_BASE:
1425		r = evergreen_cs_packet_next_reloc(p, &reloc);
1426		if (r) {
1427			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1428					"0x%04X\n", reg);
1429			return -EINVAL;
1430		}
1431		track->db_z_read_offset = radeon_get_ib_value(p, idx);
1432		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1433		track->db_z_read_bo = reloc->robj;
1434		track->db_dirty = true;
1435		break;
1436	case DB_Z_WRITE_BASE:
1437		r = evergreen_cs_packet_next_reloc(p, &reloc);
1438		if (r) {
1439			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1440					"0x%04X\n", reg);
1441			return -EINVAL;
1442		}
1443		track->db_z_write_offset = radeon_get_ib_value(p, idx);
1444		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1445		track->db_z_write_bo = reloc->robj;
1446		track->db_dirty = true;
1447		break;
1448	case DB_STENCIL_READ_BASE:
1449		r = evergreen_cs_packet_next_reloc(p, &reloc);
1450		if (r) {
1451			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1452					"0x%04X\n", reg);
1453			return -EINVAL;
1454		}
1455		track->db_s_read_offset = radeon_get_ib_value(p, idx);
1456		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1457		track->db_s_read_bo = reloc->robj;
1458		track->db_dirty = true;
1459		break;
1460	case DB_STENCIL_WRITE_BASE:
1461		r = evergreen_cs_packet_next_reloc(p, &reloc);
1462		if (r) {
1463			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1464					"0x%04X\n", reg);
1465			return -EINVAL;
1466		}
1467		track->db_s_write_offset = radeon_get_ib_value(p, idx);
1468		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1469		track->db_s_write_bo = reloc->robj;
1470		track->db_dirty = true;
1471		break;
1472	case VGT_STRMOUT_CONFIG:
1473		track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1474		track->streamout_dirty = true;
1475		break;
1476	case VGT_STRMOUT_BUFFER_CONFIG:
1477		track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1478		track->streamout_dirty = true;
1479		break;
1480	case VGT_STRMOUT_BUFFER_BASE_0:
1481	case VGT_STRMOUT_BUFFER_BASE_1:
1482	case VGT_STRMOUT_BUFFER_BASE_2:
1483	case VGT_STRMOUT_BUFFER_BASE_3:
1484		r = evergreen_cs_packet_next_reloc(p, &reloc);
1485		if (r) {
1486			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1487					"0x%04X\n", reg);
1488			return -EINVAL;
1489		}
1490		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1491		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1492		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1493		track->vgt_strmout_bo[tmp] = reloc->robj;
1494		track->streamout_dirty = true;
1495		break;
1496	case VGT_STRMOUT_BUFFER_SIZE_0:
1497	case VGT_STRMOUT_BUFFER_SIZE_1:
1498	case VGT_STRMOUT_BUFFER_SIZE_2:
1499	case VGT_STRMOUT_BUFFER_SIZE_3:
1500		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1501		/* size in register is DWs, convert to bytes */
1502		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1503		track->streamout_dirty = true;
1504		break;
1505	case CP_COHER_BASE:
1506		r = evergreen_cs_packet_next_reloc(p, &reloc);
1507		if (r) {
1508			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1509					"0x%04X\n", reg);
1510			return -EINVAL;
1511		}
1512		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1513	case CB_TARGET_MASK:
1514		track->cb_target_mask = radeon_get_ib_value(p, idx);
1515		track->cb_dirty = true;
1516		break;
1517	case CB_SHADER_MASK:
1518		track->cb_shader_mask = radeon_get_ib_value(p, idx);
1519		track->cb_dirty = true;
1520		break;
1521	case PA_SC_AA_CONFIG:
1522		if (p->rdev->family >= CHIP_CAYMAN) {
1523			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1524				 "0x%04X\n", reg);
1525			return -EINVAL;
1526		}
1527		tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1528		track->nsamples = 1 << tmp;
1529		break;
1530	case CAYMAN_PA_SC_AA_CONFIG:
1531		if (p->rdev->family < CHIP_CAYMAN) {
1532			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1533				 "0x%04X\n", reg);
1534			return -EINVAL;
1535		}
1536		tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1537		track->nsamples = 1 << tmp;
1538		break;
1539	case CB_COLOR0_VIEW:
1540	case CB_COLOR1_VIEW:
1541	case CB_COLOR2_VIEW:
1542	case CB_COLOR3_VIEW:
1543	case CB_COLOR4_VIEW:
1544	case CB_COLOR5_VIEW:
1545	case CB_COLOR6_VIEW:
1546	case CB_COLOR7_VIEW:
1547		tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1548		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1549		track->cb_dirty = true;
1550		break;
1551	case CB_COLOR8_VIEW:
1552	case CB_COLOR9_VIEW:
1553	case CB_COLOR10_VIEW:
1554	case CB_COLOR11_VIEW:
1555		tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1556		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1557		track->cb_dirty = true;
1558		break;
1559	case CB_COLOR0_INFO:
1560	case CB_COLOR1_INFO:
1561	case CB_COLOR2_INFO:
1562	case CB_COLOR3_INFO:
1563	case CB_COLOR4_INFO:
1564	case CB_COLOR5_INFO:
1565	case CB_COLOR6_INFO:
1566	case CB_COLOR7_INFO:
1567		tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1568		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1569		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1570			r = evergreen_cs_packet_next_reloc(p, &reloc);
1571			if (r) {
1572				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1573						"0x%04X\n", reg);
1574				return -EINVAL;
1575			}
1576			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1577			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1578		}
1579		track->cb_dirty = true;
1580		break;
1581	case CB_COLOR8_INFO:
1582	case CB_COLOR9_INFO:
1583	case CB_COLOR10_INFO:
1584	case CB_COLOR11_INFO:
1585		tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1586		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1587		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1588			r = evergreen_cs_packet_next_reloc(p, &reloc);
1589			if (r) {
1590				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1591						"0x%04X\n", reg);
1592				return -EINVAL;
1593			}
1594			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1595			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1596		}
1597		track->cb_dirty = true;
1598		break;
1599	case CB_COLOR0_PITCH:
1600	case CB_COLOR1_PITCH:
1601	case CB_COLOR2_PITCH:
1602	case CB_COLOR3_PITCH:
1603	case CB_COLOR4_PITCH:
1604	case CB_COLOR5_PITCH:
1605	case CB_COLOR6_PITCH:
1606	case CB_COLOR7_PITCH:
1607		tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1608		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1609		track->cb_dirty = true;
1610		break;
1611	case CB_COLOR8_PITCH:
1612	case CB_COLOR9_PITCH:
1613	case CB_COLOR10_PITCH:
1614	case CB_COLOR11_PITCH:
1615		tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1616		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1617		track->cb_dirty = true;
1618		break;
1619	case CB_COLOR0_SLICE:
1620	case CB_COLOR1_SLICE:
1621	case CB_COLOR2_SLICE:
1622	case CB_COLOR3_SLICE:
1623	case CB_COLOR4_SLICE:
1624	case CB_COLOR5_SLICE:
1625	case CB_COLOR6_SLICE:
1626	case CB_COLOR7_SLICE:
1627		tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1628		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1629		track->cb_color_slice_idx[tmp] = idx;
1630		track->cb_dirty = true;
1631		break;
1632	case CB_COLOR8_SLICE:
1633	case CB_COLOR9_SLICE:
1634	case CB_COLOR10_SLICE:
1635	case CB_COLOR11_SLICE:
1636		tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1637		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1638		track->cb_color_slice_idx[tmp] = idx;
1639		track->cb_dirty = true;
1640		break;
1641	case CB_COLOR0_ATTRIB:
1642	case CB_COLOR1_ATTRIB:
1643	case CB_COLOR2_ATTRIB:
1644	case CB_COLOR3_ATTRIB:
1645	case CB_COLOR4_ATTRIB:
1646	case CB_COLOR5_ATTRIB:
1647	case CB_COLOR6_ATTRIB:
1648	case CB_COLOR7_ATTRIB:
1649		r = evergreen_cs_packet_next_reloc(p, &reloc);
1650		if (r) {
1651			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1652					"0x%04X\n", reg);
1653			return -EINVAL;
1654		}
1655		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1656			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1657				unsigned bankw, bankh, mtaspect, tile_split;
1658
1659				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1660							&bankw, &bankh, &mtaspect,
1661							&tile_split);
1662				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1663				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1664					   CB_BANK_WIDTH(bankw) |
1665					   CB_BANK_HEIGHT(bankh) |
1666					   CB_MACRO_TILE_ASPECT(mtaspect);
1667			}
1668		}
1669		tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1670		track->cb_color_attrib[tmp] = ib[idx];
1671		track->cb_dirty = true;
1672		break;
1673	case CB_COLOR8_ATTRIB:
1674	case CB_COLOR9_ATTRIB:
1675	case CB_COLOR10_ATTRIB:
1676	case CB_COLOR11_ATTRIB:
1677		r = evergreen_cs_packet_next_reloc(p, &reloc);
1678		if (r) {
1679			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1680					"0x%04X\n", reg);
1681			return -EINVAL;
1682		}
1683		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1684			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1685				unsigned bankw, bankh, mtaspect, tile_split;
1686
1687				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1688							&bankw, &bankh, &mtaspect,
1689							&tile_split);
1690				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1691				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1692					   CB_BANK_WIDTH(bankw) |
1693					   CB_BANK_HEIGHT(bankh) |
1694					   CB_MACRO_TILE_ASPECT(mtaspect);
1695			}
1696		}
1697		tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1698		track->cb_color_attrib[tmp] = ib[idx];
1699		track->cb_dirty = true;
1700		break;
1701	case CB_COLOR0_FMASK:
1702	case CB_COLOR1_FMASK:
1703	case CB_COLOR2_FMASK:
1704	case CB_COLOR3_FMASK:
1705	case CB_COLOR4_FMASK:
1706	case CB_COLOR5_FMASK:
1707	case CB_COLOR6_FMASK:
1708	case CB_COLOR7_FMASK:
1709		tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1710		r = evergreen_cs_packet_next_reloc(p, &reloc);
1711		if (r) {
1712			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1713			return -EINVAL;
1714		}
1715		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1716		track->cb_color_fmask_bo[tmp] = reloc->robj;
1717		break;
1718	case CB_COLOR0_CMASK:
1719	case CB_COLOR1_CMASK:
1720	case CB_COLOR2_CMASK:
1721	case CB_COLOR3_CMASK:
1722	case CB_COLOR4_CMASK:
1723	case CB_COLOR5_CMASK:
1724	case CB_COLOR6_CMASK:
1725	case CB_COLOR7_CMASK:
1726		tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1727		r = evergreen_cs_packet_next_reloc(p, &reloc);
1728		if (r) {
1729			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1730			return -EINVAL;
1731		}
1732		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1733		track->cb_color_cmask_bo[tmp] = reloc->robj;
1734		break;
1735	case CB_COLOR0_FMASK_SLICE:
1736	case CB_COLOR1_FMASK_SLICE:
1737	case CB_COLOR2_FMASK_SLICE:
1738	case CB_COLOR3_FMASK_SLICE:
1739	case CB_COLOR4_FMASK_SLICE:
1740	case CB_COLOR5_FMASK_SLICE:
1741	case CB_COLOR6_FMASK_SLICE:
1742	case CB_COLOR7_FMASK_SLICE:
1743		tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1744		track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1745		break;
1746	case CB_COLOR0_CMASK_SLICE:
1747	case CB_COLOR1_CMASK_SLICE:
1748	case CB_COLOR2_CMASK_SLICE:
1749	case CB_COLOR3_CMASK_SLICE:
1750	case CB_COLOR4_CMASK_SLICE:
1751	case CB_COLOR5_CMASK_SLICE:
1752	case CB_COLOR6_CMASK_SLICE:
1753	case CB_COLOR7_CMASK_SLICE:
1754		tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1755		track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1756		break;
1757	case CB_COLOR0_BASE:
1758	case CB_COLOR1_BASE:
1759	case CB_COLOR2_BASE:
1760	case CB_COLOR3_BASE:
1761	case CB_COLOR4_BASE:
1762	case CB_COLOR5_BASE:
1763	case CB_COLOR6_BASE:
1764	case CB_COLOR7_BASE:
1765		r = evergreen_cs_packet_next_reloc(p, &reloc);
1766		if (r) {
1767			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1768					"0x%04X\n", reg);
1769			return -EINVAL;
1770		}
1771		tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1772		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1773		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1774		track->cb_color_bo[tmp] = reloc->robj;
1775		track->cb_dirty = true;
1776		break;
1777	case CB_COLOR8_BASE:
1778	case CB_COLOR9_BASE:
1779	case CB_COLOR10_BASE:
1780	case CB_COLOR11_BASE:
1781		r = evergreen_cs_packet_next_reloc(p, &reloc);
1782		if (r) {
1783			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1784					"0x%04X\n", reg);
1785			return -EINVAL;
1786		}
1787		tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1788		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1789		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1790		track->cb_color_bo[tmp] = reloc->robj;
1791		track->cb_dirty = true;
1792		break;
1793	case DB_HTILE_DATA_BASE:
1794		r = evergreen_cs_packet_next_reloc(p, &reloc);
1795		if (r) {
1796			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1797					"0x%04X\n", reg);
1798			return -EINVAL;
1799		}
1800		track->htile_offset = radeon_get_ib_value(p, idx);
1801		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1802		track->htile_bo = reloc->robj;
1803		track->db_dirty = true;
1804		break;
1805	case DB_HTILE_SURFACE:
1806		/* 8x8 only */
1807		track->htile_surface = radeon_get_ib_value(p, idx);
1808		/* force 8x8 htile width and height */
1809		ib[idx] |= 3;
1810		track->db_dirty = true;
1811		break;
1812	case CB_IMMED0_BASE:
1813	case CB_IMMED1_BASE:
1814	case CB_IMMED2_BASE:
1815	case CB_IMMED3_BASE:
1816	case CB_IMMED4_BASE:
1817	case CB_IMMED5_BASE:
1818	case CB_IMMED6_BASE:
1819	case CB_IMMED7_BASE:
1820	case CB_IMMED8_BASE:
1821	case CB_IMMED9_BASE:
1822	case CB_IMMED10_BASE:
1823	case CB_IMMED11_BASE:
1824	case SQ_PGM_START_FS:
1825	case SQ_PGM_START_ES:
1826	case SQ_PGM_START_VS:
1827	case SQ_PGM_START_GS:
1828	case SQ_PGM_START_PS:
1829	case SQ_PGM_START_HS:
1830	case SQ_PGM_START_LS:
1831	case SQ_CONST_MEM_BASE:
1832	case SQ_ALU_CONST_CACHE_GS_0:
1833	case SQ_ALU_CONST_CACHE_GS_1:
1834	case SQ_ALU_CONST_CACHE_GS_2:
1835	case SQ_ALU_CONST_CACHE_GS_3:
1836	case SQ_ALU_CONST_CACHE_GS_4:
1837	case SQ_ALU_CONST_CACHE_GS_5:
1838	case SQ_ALU_CONST_CACHE_GS_6:
1839	case SQ_ALU_CONST_CACHE_GS_7:
1840	case SQ_ALU_CONST_CACHE_GS_8:
1841	case SQ_ALU_CONST_CACHE_GS_9:
1842	case SQ_ALU_CONST_CACHE_GS_10:
1843	case SQ_ALU_CONST_CACHE_GS_11:
1844	case SQ_ALU_CONST_CACHE_GS_12:
1845	case SQ_ALU_CONST_CACHE_GS_13:
1846	case SQ_ALU_CONST_CACHE_GS_14:
1847	case SQ_ALU_CONST_CACHE_GS_15:
1848	case SQ_ALU_CONST_CACHE_PS_0:
1849	case SQ_ALU_CONST_CACHE_PS_1:
1850	case SQ_ALU_CONST_CACHE_PS_2:
1851	case SQ_ALU_CONST_CACHE_PS_3:
1852	case SQ_ALU_CONST_CACHE_PS_4:
1853	case SQ_ALU_CONST_CACHE_PS_5:
1854	case SQ_ALU_CONST_CACHE_PS_6:
1855	case SQ_ALU_CONST_CACHE_PS_7:
1856	case SQ_ALU_CONST_CACHE_PS_8:
1857	case SQ_ALU_CONST_CACHE_PS_9:
1858	case SQ_ALU_CONST_CACHE_PS_10:
1859	case SQ_ALU_CONST_CACHE_PS_11:
1860	case SQ_ALU_CONST_CACHE_PS_12:
1861	case SQ_ALU_CONST_CACHE_PS_13:
1862	case SQ_ALU_CONST_CACHE_PS_14:
1863	case SQ_ALU_CONST_CACHE_PS_15:
1864	case SQ_ALU_CONST_CACHE_VS_0:
1865	case SQ_ALU_CONST_CACHE_VS_1:
1866	case SQ_ALU_CONST_CACHE_VS_2:
1867	case SQ_ALU_CONST_CACHE_VS_3:
1868	case SQ_ALU_CONST_CACHE_VS_4:
1869	case SQ_ALU_CONST_CACHE_VS_5:
1870	case SQ_ALU_CONST_CACHE_VS_6:
1871	case SQ_ALU_CONST_CACHE_VS_7:
1872	case SQ_ALU_CONST_CACHE_VS_8:
1873	case SQ_ALU_CONST_CACHE_VS_9:
1874	case SQ_ALU_CONST_CACHE_VS_10:
1875	case SQ_ALU_CONST_CACHE_VS_11:
1876	case SQ_ALU_CONST_CACHE_VS_12:
1877	case SQ_ALU_CONST_CACHE_VS_13:
1878	case SQ_ALU_CONST_CACHE_VS_14:
1879	case SQ_ALU_CONST_CACHE_VS_15:
1880	case SQ_ALU_CONST_CACHE_HS_0:
1881	case SQ_ALU_CONST_CACHE_HS_1:
1882	case SQ_ALU_CONST_CACHE_HS_2:
1883	case SQ_ALU_CONST_CACHE_HS_3:
1884	case SQ_ALU_CONST_CACHE_HS_4:
1885	case SQ_ALU_CONST_CACHE_HS_5:
1886	case SQ_ALU_CONST_CACHE_HS_6:
1887	case SQ_ALU_CONST_CACHE_HS_7:
1888	case SQ_ALU_CONST_CACHE_HS_8:
1889	case SQ_ALU_CONST_CACHE_HS_9:
1890	case SQ_ALU_CONST_CACHE_HS_10:
1891	case SQ_ALU_CONST_CACHE_HS_11:
1892	case SQ_ALU_CONST_CACHE_HS_12:
1893	case SQ_ALU_CONST_CACHE_HS_13:
1894	case SQ_ALU_CONST_CACHE_HS_14:
1895	case SQ_ALU_CONST_CACHE_HS_15:
1896	case SQ_ALU_CONST_CACHE_LS_0:
1897	case SQ_ALU_CONST_CACHE_LS_1:
1898	case SQ_ALU_CONST_CACHE_LS_2:
1899	case SQ_ALU_CONST_CACHE_LS_3:
1900	case SQ_ALU_CONST_CACHE_LS_4:
1901	case SQ_ALU_CONST_CACHE_LS_5:
1902	case SQ_ALU_CONST_CACHE_LS_6:
1903	case SQ_ALU_CONST_CACHE_LS_7:
1904	case SQ_ALU_CONST_CACHE_LS_8:
1905	case SQ_ALU_CONST_CACHE_LS_9:
1906	case SQ_ALU_CONST_CACHE_LS_10:
1907	case SQ_ALU_CONST_CACHE_LS_11:
1908	case SQ_ALU_CONST_CACHE_LS_12:
1909	case SQ_ALU_CONST_CACHE_LS_13:
1910	case SQ_ALU_CONST_CACHE_LS_14:
1911	case SQ_ALU_CONST_CACHE_LS_15:
1912		r = evergreen_cs_packet_next_reloc(p, &reloc);
1913		if (r) {
1914			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1915					"0x%04X\n", reg);
1916			return -EINVAL;
1917		}
1918		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1919		break;
1920	case SX_MEMORY_EXPORT_BASE:
1921		if (p->rdev->family >= CHIP_CAYMAN) {
1922			dev_warn(p->dev, "bad SET_CONFIG_REG "
1923				 "0x%04X\n", reg);
1924			return -EINVAL;
1925		}
1926		r = evergreen_cs_packet_next_reloc(p, &reloc);
1927		if (r) {
1928			dev_warn(p->dev, "bad SET_CONFIG_REG "
1929					"0x%04X\n", reg);
1930			return -EINVAL;
1931		}
1932		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1933		break;
1934	case CAYMAN_SX_SCATTER_EXPORT_BASE:
1935		if (p->rdev->family < CHIP_CAYMAN) {
1936			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1937				 "0x%04X\n", reg);
1938			return -EINVAL;
1939		}
1940		r = evergreen_cs_packet_next_reloc(p, &reloc);
1941		if (r) {
1942			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1943					"0x%04X\n", reg);
1944			return -EINVAL;
1945		}
1946		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1947		break;
1948	case SX_MISC:
1949		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1950		break;
1951	default:
1952		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1953		return -EINVAL;
1954	}
1955	return 0;
1956}
1957
1958static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1959{
1960	u32 last_reg, m, i;
1961
1962	if (p->rdev->family >= CHIP_CAYMAN)
1963		last_reg = DRM_ARRAY_SIZE(cayman_reg_safe_bm);
1964	else
1965		last_reg = DRM_ARRAY_SIZE(evergreen_reg_safe_bm);
1966
1967	i = (reg >> 7);
1968	if (i >= last_reg) {
1969		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1970		return false;
1971	}
1972	m = 1 << ((reg >> 2) & 31);
1973	if (p->rdev->family >= CHIP_CAYMAN) {
1974		if (!(cayman_reg_safe_bm[i] & m))
1975			return true;
1976	} else {
1977		if (!(evergreen_reg_safe_bm[i] & m))
1978			return true;
1979	}
1980	dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1981	return false;
1982}
1983
1984static int evergreen_packet3_check(struct radeon_cs_parser *p,
1985				   struct radeon_cs_packet *pkt)
1986{
1987	struct radeon_cs_reloc *reloc;
1988	struct evergreen_cs_track *track;
1989	volatile u32 *ib;
1990	unsigned idx;
1991	unsigned i;
1992	unsigned start_reg, end_reg, reg;
1993	int r;
1994	u32 idx_value;
1995
1996	track = (struct evergreen_cs_track *)p->track;
1997	ib = p->ib.ptr;
1998	idx = pkt->idx + 1;
1999	idx_value = radeon_get_ib_value(p, idx);
2000
2001	switch (pkt->opcode) {
2002	case PACKET3_SET_PREDICATION:
2003	{
2004		int pred_op;
2005		int tmp;
2006		uint64_t offset;
2007
2008		if (pkt->count != 1) {
2009			DRM_ERROR("bad SET PREDICATION\n");
2010			return -EINVAL;
2011		}
2012
2013		tmp = radeon_get_ib_value(p, idx + 1);
2014		pred_op = (tmp >> 16) & 0x7;
2015
2016		/* for the clear predicate operation */
2017		if (pred_op == 0)
2018			return 0;
2019
2020		if (pred_op > 2) {
2021			DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
2022			return -EINVAL;
2023		}
2024
2025		r = evergreen_cs_packet_next_reloc(p, &reloc);
2026		if (r) {
2027			DRM_ERROR("bad SET PREDICATION\n");
2028			return -EINVAL;
2029		}
2030
2031		offset = reloc->lobj.gpu_offset +
2032		         (idx_value & 0xfffffff0) +
2033		         ((u64)(tmp & 0xff) << 32);
2034
2035		ib[idx + 0] = offset;
2036		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2037	}
2038	break;
2039	case PACKET3_CONTEXT_CONTROL:
2040		if (pkt->count != 1) {
2041			DRM_ERROR("bad CONTEXT_CONTROL\n");
2042			return -EINVAL;
2043		}
2044		break;
2045	case PACKET3_INDEX_TYPE:
2046	case PACKET3_NUM_INSTANCES:
2047	case PACKET3_CLEAR_STATE:
2048		if (pkt->count) {
2049			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2050			return -EINVAL;
2051		}
2052		break;
2053	case CAYMAN_PACKET3_DEALLOC_STATE:
2054		if (p->rdev->family < CHIP_CAYMAN) {
2055			DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
2056			return -EINVAL;
2057		}
2058		if (pkt->count) {
2059			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2060			return -EINVAL;
2061		}
2062		break;
2063	case PACKET3_INDEX_BASE:
2064	{
2065		uint64_t offset;
2066
2067		if (pkt->count != 1) {
2068			DRM_ERROR("bad INDEX_BASE\n");
2069			return -EINVAL;
2070		}
2071		r = evergreen_cs_packet_next_reloc(p, &reloc);
2072		if (r) {
2073			DRM_ERROR("bad INDEX_BASE\n");
2074			return -EINVAL;
2075		}
2076
2077		offset = reloc->lobj.gpu_offset +
2078		         idx_value +
2079		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2080
2081		ib[idx+0] = offset;
2082		ib[idx+1] = upper_32_bits(offset) & 0xff;
2083
2084		r = evergreen_cs_track_check(p);
2085		if (r) {
2086			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2087			return r;
2088		}
2089		break;
2090	}
2091	case PACKET3_DRAW_INDEX:
2092	{
2093		uint64_t offset;
2094		if (pkt->count != 3) {
2095			DRM_ERROR("bad DRAW_INDEX\n");
2096			return -EINVAL;
2097		}
2098		r = evergreen_cs_packet_next_reloc(p, &reloc);
2099		if (r) {
2100			DRM_ERROR("bad DRAW_INDEX\n");
2101			return -EINVAL;
2102		}
2103
2104		offset = reloc->lobj.gpu_offset +
2105		         idx_value +
2106		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2107
2108		ib[idx+0] = offset;
2109		ib[idx+1] = upper_32_bits(offset) & 0xff;
2110
2111		r = evergreen_cs_track_check(p);
2112		if (r) {
2113			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2114			return r;
2115		}
2116		break;
2117	}
2118	case PACKET3_DRAW_INDEX_2:
2119	{
2120		uint64_t offset;
2121
2122		if (pkt->count != 4) {
2123			DRM_ERROR("bad DRAW_INDEX_2\n");
2124			return -EINVAL;
2125		}
2126		r = evergreen_cs_packet_next_reloc(p, &reloc);
2127		if (r) {
2128			DRM_ERROR("bad DRAW_INDEX_2\n");
2129			return -EINVAL;
2130		}
2131
2132		offset = reloc->lobj.gpu_offset +
2133		         radeon_get_ib_value(p, idx+1) +
2134		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2135
2136		ib[idx+1] = offset;
2137		ib[idx+2] = upper_32_bits(offset) & 0xff;
2138
2139		r = evergreen_cs_track_check(p);
2140		if (r) {
2141			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2142			return r;
2143		}
2144		break;
2145	}
2146	case PACKET3_DRAW_INDEX_AUTO:
2147		if (pkt->count != 1) {
2148			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
2149			return -EINVAL;
2150		}
2151		r = evergreen_cs_track_check(p);
2152		if (r) {
2153			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2154			return r;
2155		}
2156		break;
2157	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2158		if (pkt->count != 2) {
2159			DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
2160			return -EINVAL;
2161		}
2162		r = evergreen_cs_track_check(p);
2163		if (r) {
2164			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2165			return r;
2166		}
2167		break;
2168	case PACKET3_DRAW_INDEX_IMMD:
2169		if (pkt->count < 2) {
2170			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2171			return -EINVAL;
2172		}
2173		r = evergreen_cs_track_check(p);
2174		if (r) {
2175			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2176			return r;
2177		}
2178		break;
2179	case PACKET3_DRAW_INDEX_OFFSET:
2180		if (pkt->count != 2) {
2181			DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2182			return -EINVAL;
2183		}
2184		r = evergreen_cs_track_check(p);
2185		if (r) {
2186			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2187			return r;
2188		}
2189		break;
2190	case PACKET3_DRAW_INDEX_OFFSET_2:
2191		if (pkt->count != 3) {
2192			DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2193			return -EINVAL;
2194		}
2195		r = evergreen_cs_track_check(p);
2196		if (r) {
2197			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2198			return r;
2199		}
2200		break;
2201	case PACKET3_DISPATCH_DIRECT:
2202		if (pkt->count != 3) {
2203			DRM_ERROR("bad DISPATCH_DIRECT\n");
2204			return -EINVAL;
2205		}
2206		r = evergreen_cs_track_check(p);
2207		if (r) {
2208			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2209			return r;
2210		}
2211		break;
2212	case PACKET3_DISPATCH_INDIRECT:
2213		if (pkt->count != 1) {
2214			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2215			return -EINVAL;
2216		}
2217		r = evergreen_cs_packet_next_reloc(p, &reloc);
2218		if (r) {
2219			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2220			return -EINVAL;
2221		}
2222		ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
2223		r = evergreen_cs_track_check(p);
2224		if (r) {
2225			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2226			return r;
2227		}
2228		break;
2229	case PACKET3_WAIT_REG_MEM:
2230		if (pkt->count != 5) {
2231			DRM_ERROR("bad WAIT_REG_MEM\n");
2232			return -EINVAL;
2233		}
2234		/* bit 4 is reg (0) or mem (1) */
2235		if (idx_value & 0x10) {
2236			uint64_t offset;
2237
2238			r = evergreen_cs_packet_next_reloc(p, &reloc);
2239			if (r) {
2240				DRM_ERROR("bad WAIT_REG_MEM\n");
2241				return -EINVAL;
2242			}
2243
2244			offset = reloc->lobj.gpu_offset +
2245			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2246			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2247
2248			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2249			ib[idx+2] = upper_32_bits(offset) & 0xff;
2250		}
2251		break;
2252	case PACKET3_CP_DMA:
2253	{
2254		u32 command, size, info;
2255		u64 offset, tmp;
2256		if (pkt->count != 4) {
2257			DRM_ERROR("bad CP DMA\n");
2258			return -EINVAL;
2259		}
2260		command = radeon_get_ib_value(p, idx+4);
2261		size = command & 0x1fffff;
2262		info = radeon_get_ib_value(p, idx+1);
2263		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2264		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2265		    ((((info & 0x00300000) >> 20) == 0) &&
2266		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2267		    ((((info & 0x60000000) >> 29) == 0) &&
2268		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2269			/* non mem to mem copies requires dw aligned count */
2270			if (size % 4) {
2271				DRM_ERROR("CP DMA command requires dw count alignment\n");
2272				return -EINVAL;
2273			}
2274		}
2275		if (command & PACKET3_CP_DMA_CMD_SAS) {
2276			/* src address space is register */
2277			/* GDS is ok */
2278			if (((info & 0x60000000) >> 29) != 1) {
2279				DRM_ERROR("CP DMA SAS not supported\n");
2280				return -EINVAL;
2281			}
2282		} else {
2283			if (command & PACKET3_CP_DMA_CMD_SAIC) {
2284				DRM_ERROR("CP DMA SAIC only supported for registers\n");
2285				return -EINVAL;
2286			}
2287			/* src address space is memory */
2288			if (((info & 0x60000000) >> 29) == 0) {
2289				r = evergreen_cs_packet_next_reloc(p, &reloc);
2290				if (r) {
2291					DRM_ERROR("bad CP DMA SRC\n");
2292					return -EINVAL;
2293				}
2294
2295				tmp = radeon_get_ib_value(p, idx) +
2296					((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2297
2298				offset = reloc->lobj.gpu_offset + tmp;
2299
2300				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2301					dev_warn(p->dev, "CP DMA src buffer too small (%ju %lu)\n",
2302						 (uintmax_t)tmp + size, radeon_bo_size(reloc->robj));
2303					return -EINVAL;
2304				}
2305
2306				ib[idx] = offset;
2307				ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2308			} else if (((info & 0x60000000) >> 29) != 2) {
2309				DRM_ERROR("bad CP DMA SRC_SEL\n");
2310				return -EINVAL;
2311			}
2312		}
2313		if (command & PACKET3_CP_DMA_CMD_DAS) {
2314			/* dst address space is register */
2315			/* GDS is ok */
2316			if (((info & 0x00300000) >> 20) != 1) {
2317				DRM_ERROR("CP DMA DAS not supported\n");
2318				return -EINVAL;
2319			}
2320		} else {
2321			/* dst address space is memory */
2322			if (command & PACKET3_CP_DMA_CMD_DAIC) {
2323				DRM_ERROR("CP DMA DAIC only supported for registers\n");
2324				return -EINVAL;
2325			}
2326			if (((info & 0x00300000) >> 20) == 0) {
2327				r = evergreen_cs_packet_next_reloc(p, &reloc);
2328				if (r) {
2329					DRM_ERROR("bad CP DMA DST\n");
2330					return -EINVAL;
2331				}
2332
2333				tmp = radeon_get_ib_value(p, idx+2) +
2334					((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2335
2336				offset = reloc->lobj.gpu_offset + tmp;
2337
2338				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2339					dev_warn(p->dev, "CP DMA dst buffer too small (%ju %lu)\n",
2340						 (uintmax_t)tmp + size, radeon_bo_size(reloc->robj));
2341					return -EINVAL;
2342				}
2343
2344				ib[idx+2] = offset;
2345				ib[idx+3] = upper_32_bits(offset) & 0xff;
2346			} else {
2347				DRM_ERROR("bad CP DMA DST_SEL\n");
2348				return -EINVAL;
2349			}
2350		}
2351		break;
2352	}
2353	case PACKET3_SURFACE_SYNC:
2354		if (pkt->count != 3) {
2355			DRM_ERROR("bad SURFACE_SYNC\n");
2356			return -EINVAL;
2357		}
2358		/* 0xffffffff/0x0 is flush all cache flag */
2359		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2360		    radeon_get_ib_value(p, idx + 2) != 0) {
2361			r = evergreen_cs_packet_next_reloc(p, &reloc);
2362			if (r) {
2363				DRM_ERROR("bad SURFACE_SYNC\n");
2364				return -EINVAL;
2365			}
2366			ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2367		}
2368		break;
2369	case PACKET3_EVENT_WRITE:
2370		if (pkt->count != 2 && pkt->count != 0) {
2371			DRM_ERROR("bad EVENT_WRITE\n");
2372			return -EINVAL;
2373		}
2374		if (pkt->count) {
2375			uint64_t offset;
2376
2377			r = evergreen_cs_packet_next_reloc(p, &reloc);
2378			if (r) {
2379				DRM_ERROR("bad EVENT_WRITE\n");
2380				return -EINVAL;
2381			}
2382			offset = reloc->lobj.gpu_offset +
2383			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2384			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2385
2386			ib[idx+1] = offset & 0xfffffff8;
2387			ib[idx+2] = upper_32_bits(offset) & 0xff;
2388		}
2389		break;
2390	case PACKET3_EVENT_WRITE_EOP:
2391	{
2392		uint64_t offset;
2393
2394		if (pkt->count != 4) {
2395			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2396			return -EINVAL;
2397		}
2398		r = evergreen_cs_packet_next_reloc(p, &reloc);
2399		if (r) {
2400			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2401			return -EINVAL;
2402		}
2403
2404		offset = reloc->lobj.gpu_offset +
2405		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2406		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2407
2408		ib[idx+1] = offset & 0xfffffffc;
2409		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2410		break;
2411	}
2412	case PACKET3_EVENT_WRITE_EOS:
2413	{
2414		uint64_t offset;
2415
2416		if (pkt->count != 3) {
2417			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2418			return -EINVAL;
2419		}
2420		r = evergreen_cs_packet_next_reloc(p, &reloc);
2421		if (r) {
2422			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2423			return -EINVAL;
2424		}
2425
2426		offset = reloc->lobj.gpu_offset +
2427		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2428		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2429
2430		ib[idx+1] = offset & 0xfffffffc;
2431		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2432		break;
2433	}
2434	case PACKET3_SET_CONFIG_REG:
2435		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2436		end_reg = 4 * pkt->count + start_reg - 4;
2437		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2438		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2439		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2440			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2441			return -EINVAL;
2442		}
2443		for (i = 0; i < pkt->count; i++) {
2444			reg = start_reg + (4 * i);
2445			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2446			if (r)
2447				return r;
2448		}
2449		break;
2450	case PACKET3_SET_CONTEXT_REG:
2451		start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2452		end_reg = 4 * pkt->count + start_reg - 4;
2453		if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2454		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2455		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2456			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2457			return -EINVAL;
2458		}
2459		for (i = 0; i < pkt->count; i++) {
2460			reg = start_reg + (4 * i);
2461			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2462			if (r)
2463				return r;
2464		}
2465		break;
2466	case PACKET3_SET_RESOURCE:
2467		if (pkt->count % 8) {
2468			DRM_ERROR("bad SET_RESOURCE\n");
2469			return -EINVAL;
2470		}
2471		start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2472		end_reg = 4 * pkt->count + start_reg - 4;
2473		if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2474		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2475		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2476			DRM_ERROR("bad SET_RESOURCE\n");
2477			return -EINVAL;
2478		}
2479		for (i = 0; i < (pkt->count / 8); i++) {
2480			struct radeon_bo *texture, *mipmap;
2481			u32 toffset, moffset;
2482			u32 size, offset, mip_address, tex_dim;
2483
2484			switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2485			case SQ_TEX_VTX_VALID_TEXTURE:
2486				/* tex base */
2487				r = evergreen_cs_packet_next_reloc(p, &reloc);
2488				if (r) {
2489					DRM_ERROR("bad SET_RESOURCE (tex)\n");
2490					return -EINVAL;
2491				}
2492				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2493					ib[idx+1+(i*8)+1] |=
2494						TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
2495					if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
2496						unsigned bankw, bankh, mtaspect, tile_split;
2497
2498						evergreen_tiling_fields(reloc->lobj.tiling_flags,
2499									&bankw, &bankh, &mtaspect,
2500									&tile_split);
2501						ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2502						ib[idx+1+(i*8)+7] |=
2503							TEX_BANK_WIDTH(bankw) |
2504							TEX_BANK_HEIGHT(bankh) |
2505							MACRO_TILE_ASPECT(mtaspect) |
2506							TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2507					}
2508				}
2509				texture = reloc->robj;
2510				toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2511
2512				/* tex mip base */
2513				tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2514				mip_address = ib[idx+1+(i*8)+3];
2515
2516				if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2517				    !mip_address &&
2518				    !evergreen_cs_packet_next_is_pkt3_nop(p)) {
2519					/* MIP_ADDRESS should point to FMASK for an MSAA texture.
2520					 * It should be 0 if FMASK is disabled. */
2521					moffset = 0;
2522					mipmap = NULL;
2523				} else {
2524					r = evergreen_cs_packet_next_reloc(p, &reloc);
2525					if (r) {
2526						DRM_ERROR("bad SET_RESOURCE (tex)\n");
2527						return -EINVAL;
2528					}
2529					moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2530					mipmap = reloc->robj;
2531				}
2532
2533				r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2534				if (r)
2535					return r;
2536				ib[idx+1+(i*8)+2] += toffset;
2537				ib[idx+1+(i*8)+3] += moffset;
2538				break;
2539			case SQ_TEX_VTX_VALID_BUFFER:
2540			{
2541				uint64_t offset64;
2542				/* vtx base */
2543				r = evergreen_cs_packet_next_reloc(p, &reloc);
2544				if (r) {
2545					DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2546					return -EINVAL;
2547				}
2548				offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2549				size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2550				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2551					/* force size to size of the buffer */
2552					dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2553					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2554				}
2555
2556				offset64 = reloc->lobj.gpu_offset + offset;
2557				ib[idx+1+(i*8)+0] = offset64;
2558				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2559						    (upper_32_bits(offset64) & 0xff);
2560				break;
2561			}
2562			case SQ_TEX_VTX_INVALID_TEXTURE:
2563			case SQ_TEX_VTX_INVALID_BUFFER:
2564			default:
2565				DRM_ERROR("bad SET_RESOURCE\n");
2566				return -EINVAL;
2567			}
2568		}
2569		break;
2570	case PACKET3_SET_ALU_CONST:
2571		/* XXX fix me ALU const buffers only */
2572		break;
2573	case PACKET3_SET_BOOL_CONST:
2574		start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2575		end_reg = 4 * pkt->count + start_reg - 4;
2576		if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2577		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2578		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2579			DRM_ERROR("bad SET_BOOL_CONST\n");
2580			return -EINVAL;
2581		}
2582		break;
2583	case PACKET3_SET_LOOP_CONST:
2584		start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2585		end_reg = 4 * pkt->count + start_reg - 4;
2586		if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2587		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2588		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2589			DRM_ERROR("bad SET_LOOP_CONST\n");
2590			return -EINVAL;
2591		}
2592		break;
2593	case PACKET3_SET_CTL_CONST:
2594		start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2595		end_reg = 4 * pkt->count + start_reg - 4;
2596		if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2597		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2598		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2599			DRM_ERROR("bad SET_CTL_CONST\n");
2600			return -EINVAL;
2601		}
2602		break;
2603	case PACKET3_SET_SAMPLER:
2604		if (pkt->count % 3) {
2605			DRM_ERROR("bad SET_SAMPLER\n");
2606			return -EINVAL;
2607		}
2608		start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2609		end_reg = 4 * pkt->count + start_reg - 4;
2610		if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2611		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2612		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2613			DRM_ERROR("bad SET_SAMPLER\n");
2614			return -EINVAL;
2615		}
2616		break;
2617	case PACKET3_STRMOUT_BUFFER_UPDATE:
2618		if (pkt->count != 4) {
2619			DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2620			return -EINVAL;
2621		}
2622		/* Updating memory at DST_ADDRESS. */
2623		if (idx_value & 0x1) {
2624			u64 offset;
2625			r = evergreen_cs_packet_next_reloc(p, &reloc);
2626			if (r) {
2627				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2628				return -EINVAL;
2629			}
2630			offset = radeon_get_ib_value(p, idx+1);
2631			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2632			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2633				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%jx, 0x%lx\n",
2634					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2635				return -EINVAL;
2636			}
2637			offset += reloc->lobj.gpu_offset;
2638			ib[idx+1] = offset;
2639			ib[idx+2] = upper_32_bits(offset) & 0xff;
2640		}
2641		/* Reading data from SRC_ADDRESS. */
2642		if (((idx_value >> 1) & 0x3) == 2) {
2643			u64 offset;
2644			r = evergreen_cs_packet_next_reloc(p, &reloc);
2645			if (r) {
2646				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2647				return -EINVAL;
2648			}
2649			offset = radeon_get_ib_value(p, idx+3);
2650			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2651			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2652				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%jx, 0x%lx\n",
2653					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2654				return -EINVAL;
2655			}
2656			offset += reloc->lobj.gpu_offset;
2657			ib[idx+3] = offset;
2658			ib[idx+4] = upper_32_bits(offset) & 0xff;
2659		}
2660		break;
2661	case PACKET3_MEM_WRITE:
2662	{
2663		u64 offset;
2664
2665		if (pkt->count != 3) {
2666			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2667			return -EINVAL;
2668		}
2669		r = evergreen_cs_packet_next_reloc(p, &reloc);
2670		if (r) {
2671			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2672			return -EINVAL;
2673		}
2674		offset = radeon_get_ib_value(p, idx+0);
2675		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2676		if (offset & 0x7) {
2677			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2678			return -EINVAL;
2679		}
2680		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2681			DRM_ERROR("bad MEM_WRITE bo too small: 0x%jx, 0x%lx\n",
2682				  (uintmax_t)offset + 8, radeon_bo_size(reloc->robj));
2683			return -EINVAL;
2684		}
2685		offset += reloc->lobj.gpu_offset;
2686		ib[idx+0] = offset;
2687		ib[idx+1] = upper_32_bits(offset) & 0xff;
2688		break;
2689	}
2690	case PACKET3_COPY_DW:
2691		if (pkt->count != 4) {
2692			DRM_ERROR("bad COPY_DW (invalid count)\n");
2693			return -EINVAL;
2694		}
2695		if (idx_value & 0x1) {
2696			u64 offset;
2697			/* SRC is memory. */
2698			r = evergreen_cs_packet_next_reloc(p, &reloc);
2699			if (r) {
2700				DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2701				return -EINVAL;
2702			}
2703			offset = radeon_get_ib_value(p, idx+1);
2704			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2705			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2706				DRM_ERROR("bad COPY_DW src bo too small: 0x%jx, 0x%lx\n",
2707					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2708				return -EINVAL;
2709			}
2710			offset += reloc->lobj.gpu_offset;
2711			ib[idx+1] = offset;
2712			ib[idx+2] = upper_32_bits(offset) & 0xff;
2713		} else {
2714			/* SRC is a reg. */
2715			reg = radeon_get_ib_value(p, idx+1) << 2;
2716			if (!evergreen_is_safe_reg(p, reg, idx+1))
2717				return -EINVAL;
2718		}
2719		if (idx_value & 0x2) {
2720			u64 offset;
2721			/* DST is memory. */
2722			r = evergreen_cs_packet_next_reloc(p, &reloc);
2723			if (r) {
2724				DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2725				return -EINVAL;
2726			}
2727			offset = radeon_get_ib_value(p, idx+3);
2728			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2729			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2730				DRM_ERROR("bad COPY_DW dst bo too small: 0x%jx, 0x%lx\n",
2731					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2732				return -EINVAL;
2733			}
2734			offset += reloc->lobj.gpu_offset;
2735			ib[idx+3] = offset;
2736			ib[idx+4] = upper_32_bits(offset) & 0xff;
2737		} else {
2738			/* DST is a reg. */
2739			reg = radeon_get_ib_value(p, idx+3) << 2;
2740			if (!evergreen_is_safe_reg(p, reg, idx+3))
2741				return -EINVAL;
2742		}
2743		break;
2744	case PACKET3_NOP:
2745		break;
2746	default:
2747		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2748		return -EINVAL;
2749	}
2750	return 0;
2751}
2752
2753int evergreen_cs_parse(struct radeon_cs_parser *p)
2754{
2755	struct radeon_cs_packet pkt;
2756	struct evergreen_cs_track *track;
2757	u32 tmp;
2758	int r;
2759
2760	if (p->track == NULL) {
2761		/* initialize tracker, we are in kms */
2762		track = malloc(sizeof(*track), DRM_MEM_DRIVER, M_ZERO | M_WAITOK);
2763		if (track == NULL)
2764			return -ENOMEM;
2765		evergreen_cs_track_init(track);
2766		if (p->rdev->family >= CHIP_CAYMAN)
2767			tmp = p->rdev->config.cayman.tile_config;
2768		else
2769			tmp = p->rdev->config.evergreen.tile_config;
2770
2771		switch (tmp & 0xf) {
2772		case 0:
2773			track->npipes = 1;
2774			break;
2775		case 1:
2776		default:
2777			track->npipes = 2;
2778			break;
2779		case 2:
2780			track->npipes = 4;
2781			break;
2782		case 3:
2783			track->npipes = 8;
2784			break;
2785		}
2786
2787		switch ((tmp & 0xf0) >> 4) {
2788		case 0:
2789			track->nbanks = 4;
2790			break;
2791		case 1:
2792		default:
2793			track->nbanks = 8;
2794			break;
2795		case 2:
2796			track->nbanks = 16;
2797			break;
2798		}
2799
2800		switch ((tmp & 0xf00) >> 8) {
2801		case 0:
2802			track->group_size = 256;
2803			break;
2804		case 1:
2805		default:
2806			track->group_size = 512;
2807			break;
2808		}
2809
2810		switch ((tmp & 0xf000) >> 12) {
2811		case 0:
2812			track->row_size = 1;
2813			break;
2814		case 1:
2815		default:
2816			track->row_size = 2;
2817			break;
2818		case 2:
2819			track->row_size = 4;
2820			break;
2821		}
2822
2823		p->track = track;
2824	}
2825	do {
2826		r = evergreen_cs_packet_parse(p, &pkt, p->idx);
2827		if (r) {
2828			free(p->track, DRM_MEM_DRIVER);
2829			p->track = NULL;
2830			return r;
2831		}
2832		p->idx += pkt.count + 2;
2833		switch (pkt.type) {
2834		case PACKET_TYPE0:
2835			r = evergreen_cs_parse_packet0(p, &pkt);
2836			break;
2837		case PACKET_TYPE2:
2838			break;
2839		case PACKET_TYPE3:
2840			r = evergreen_packet3_check(p, &pkt);
2841			break;
2842		default:
2843			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2844			free(p->track, DRM_MEM_DRIVER);
2845			p->track = NULL;
2846			return -EINVAL;
2847		}
2848		if (r) {
2849			free(p->track, DRM_MEM_DRIVER);
2850			p->track = NULL;
2851			return r;
2852		}
2853	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2854#if 0
2855	for (r = 0; r < p->ib.length_dw; r++) {
2856		DRM_INFO("%05d  0x%08X\n", r, p->ib.ptr[r]);
2857		mdelay(1);
2858	}
2859#endif
2860	free(p->track, DRM_MEM_DRIVER);
2861	p->track = NULL;
2862	return 0;
2863}
2864
2865/*
2866 *  DMA
2867 */
2868
2869#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2870#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2871#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2872#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2873#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2874
2875/**
2876 * evergreen_dma_cs_parse() - parse the DMA IB
2877 * @p:		parser structure holding parsing context.
2878 *
2879 * Parses the DMA IB from the CS ioctl and updates
2880 * the GPU addresses based on the reloc information and
2881 * checks for errors. (Evergreen-Cayman)
2882 * Returns 0 for success and an error on failure.
2883 **/
2884int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2885{
2886	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2887	struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2888	u32 header, cmd, count, tiled, new_cmd, misc;
2889	volatile u32 *ib = p->ib.ptr;
2890	u32 idx, idx_value;
2891	u64 src_offset, dst_offset, dst2_offset;
2892	int r;
2893
2894	do {
2895		if (p->idx >= ib_chunk->length_dw) {
2896			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2897				  p->idx, ib_chunk->length_dw);
2898			return -EINVAL;
2899		}
2900		idx = p->idx;
2901		header = radeon_get_ib_value(p, idx);
2902		cmd = GET_DMA_CMD(header);
2903		count = GET_DMA_COUNT(header);
2904		tiled = GET_DMA_T(header);
2905		new_cmd = GET_DMA_NEW(header);
2906		misc = GET_DMA_MISC(header);
2907
2908		switch (cmd) {
2909		case DMA_PACKET_WRITE:
2910			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2911			if (r) {
2912				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2913				return -EINVAL;
2914			}
2915			if (tiled) {
2916				dst_offset = radeon_get_ib_value(p, idx+1);
2917				dst_offset <<= 8;
2918
2919				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2920				p->idx += count + 7;
2921			} else {
2922				dst_offset = radeon_get_ib_value(p, idx+1);
2923				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2924
2925				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2926				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2927				p->idx += count + 3;
2928			}
2929			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2930				dev_warn(p->dev, "DMA write buffer too small (%ju %lu)\n",
2931					 (uintmax_t)dst_offset, radeon_bo_size(dst_reloc->robj));
2932				return -EINVAL;
2933			}
2934			break;
2935		case DMA_PACKET_COPY:
2936			r = r600_dma_cs_next_reloc(p, &src_reloc);
2937			if (r) {
2938				DRM_ERROR("bad DMA_PACKET_COPY\n");
2939				return -EINVAL;
2940			}
2941			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2942			if (r) {
2943				DRM_ERROR("bad DMA_PACKET_COPY\n");
2944				return -EINVAL;
2945			}
2946			if (tiled) {
2947				idx_value = radeon_get_ib_value(p, idx + 2);
2948				if (new_cmd) {
2949					switch (misc) {
2950					case 0:
2951						/* L2T, frame to fields */
2952						if (idx_value & (1U << 31)) {
2953							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2954							return -EINVAL;
2955						}
2956						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2957						if (r) {
2958							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2959							return -EINVAL;
2960						}
2961						dst_offset = radeon_get_ib_value(p, idx+1);
2962						dst_offset <<= 8;
2963						dst2_offset = radeon_get_ib_value(p, idx+2);
2964						dst2_offset <<= 8;
2965						src_offset = radeon_get_ib_value(p, idx+8);
2966						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2967						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2968							dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%ju %lu)\n",
2969								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2970							return -EINVAL;
2971						}
2972						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2973							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n",
2974								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2975							return -EINVAL;
2976						}
2977						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2978							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n",
2979								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2980							return -EINVAL;
2981						}
2982						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2983						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2984						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2985						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2986						p->idx += 10;
2987						break;
2988					case 1:
2989						/* L2T, T2L partial */
2990						if (p->family < CHIP_CAYMAN) {
2991							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2992							return -EINVAL;
2993						}
2994						/* detile bit */
2995						if (idx_value & (1U << 31)) {
2996							/* tiled src, linear dst */
2997							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2998
2999							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3000							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3001						} else {
3002							/* linear src, tiled dst */
3003							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3004							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3005
3006							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3007						}
3008						p->idx += 12;
3009						break;
3010					case 3:
3011						/* L2T, broadcast */
3012						if (idx_value & (1U << 31)) {
3013							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3014							return -EINVAL;
3015						}
3016						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3017						if (r) {
3018							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3019							return -EINVAL;
3020						}
3021						dst_offset = radeon_get_ib_value(p, idx+1);
3022						dst_offset <<= 8;
3023						dst2_offset = radeon_get_ib_value(p, idx+2);
3024						dst2_offset <<= 8;
3025						src_offset = radeon_get_ib_value(p, idx+8);
3026						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3027						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3028							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n",
3029								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3030							return -EINVAL;
3031						}
3032						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3033							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n",
3034								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3035							return -EINVAL;
3036						}
3037						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3038							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n",
3039								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3040							return -EINVAL;
3041						}
3042						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3043						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3044						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3045						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3046						p->idx += 10;
3047						break;
3048					case 4:
3049						/* L2T, T2L */
3050						/* detile bit */
3051						if (idx_value & (1U << 31)) {
3052							/* tiled src, linear dst */
3053							src_offset = radeon_get_ib_value(p, idx+1);
3054							src_offset <<= 8;
3055							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3056
3057							dst_offset = radeon_get_ib_value(p, idx+7);
3058							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3059							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3060							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3061						} else {
3062							/* linear src, tiled dst */
3063							src_offset = radeon_get_ib_value(p, idx+7);
3064							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3065							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3066							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3067
3068							dst_offset = radeon_get_ib_value(p, idx+1);
3069							dst_offset <<= 8;
3070							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3071						}
3072						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3073							dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%ju %lu)\n",
3074								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3075							return -EINVAL;
3076						}
3077						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3078							dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%ju %lu)\n",
3079								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3080							return -EINVAL;
3081						}
3082						p->idx += 9;
3083						break;
3084					case 5:
3085						/* T2T partial */
3086						if (p->family < CHIP_CAYMAN) {
3087							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3088							return -EINVAL;
3089						}
3090						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3091						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3092						p->idx += 13;
3093						break;
3094					case 7:
3095						/* L2T, broadcast */
3096						if (idx_value & (1U << 31)) {
3097							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3098							return -EINVAL;
3099						}
3100						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3101						if (r) {
3102							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3103							return -EINVAL;
3104						}
3105						dst_offset = radeon_get_ib_value(p, idx+1);
3106						dst_offset <<= 8;
3107						dst2_offset = radeon_get_ib_value(p, idx+2);
3108						dst2_offset <<= 8;
3109						src_offset = radeon_get_ib_value(p, idx+8);
3110						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3111						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3112							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n",
3113								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3114							return -EINVAL;
3115						}
3116						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3117							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n",
3118								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3119							return -EINVAL;
3120						}
3121						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3122							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n",
3123								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3124							return -EINVAL;
3125						}
3126						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3127						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3128						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3129						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3130						p->idx += 10;
3131						break;
3132					default:
3133						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3134						return -EINVAL;
3135					}
3136				} else {
3137					switch (misc) {
3138					case 0:
3139						/* detile bit */
3140						if (idx_value & (1U << 31)) {
3141							/* tiled src, linear dst */
3142							src_offset = radeon_get_ib_value(p, idx+1);
3143							src_offset <<= 8;
3144							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3145
3146							dst_offset = radeon_get_ib_value(p, idx+7);
3147							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3148							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3149							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3150						} else {
3151							/* linear src, tiled dst */
3152							src_offset = radeon_get_ib_value(p, idx+7);
3153							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3154							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3155							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3156
3157							dst_offset = radeon_get_ib_value(p, idx+1);
3158							dst_offset <<= 8;
3159							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3160						}
3161						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3162							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n",
3163								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3164							return -EINVAL;
3165						}
3166						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3167							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n",
3168								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3169							return -EINVAL;
3170						}
3171						p->idx += 9;
3172						break;
3173					default:
3174						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3175						return -EINVAL;
3176					}
3177				}
3178			} else {
3179				if (new_cmd) {
3180					switch (misc) {
3181					case 0:
3182						/* L2L, byte */
3183						src_offset = radeon_get_ib_value(p, idx+2);
3184						src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3185						dst_offset = radeon_get_ib_value(p, idx+1);
3186						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3187						if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3188							dev_warn(p->dev, "DMA L2L, byte src buffer too small (%ju %lu)\n",
3189								 (uintmax_t)src_offset + count, radeon_bo_size(src_reloc->robj));
3190							return -EINVAL;
3191						}
3192						if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3193							dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%ju %lu)\n",
3194								 (uintmax_t)dst_offset + count, radeon_bo_size(dst_reloc->robj));
3195							return -EINVAL;
3196						}
3197						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3198						ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3199						ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3200						ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3201						p->idx += 5;
3202						break;
3203					case 1:
3204						/* L2L, partial */
3205						if (p->family < CHIP_CAYMAN) {
3206							DRM_ERROR("L2L Partial is cayman only !\n");
3207							return -EINVAL;
3208						}
3209						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3210						ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3211						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3212						ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3213
3214						p->idx += 9;
3215						break;
3216					case 4:
3217						/* L2L, dw, broadcast */
3218						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3219						if (r) {
3220							DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3221							return -EINVAL;
3222						}
3223						dst_offset = radeon_get_ib_value(p, idx+1);
3224						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3225						dst2_offset = radeon_get_ib_value(p, idx+2);
3226						dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
3227						src_offset = radeon_get_ib_value(p, idx+3);
3228						src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
3229						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3230							dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%ju %lu)\n",
3231								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3232							return -EINVAL;
3233						}
3234						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3235							dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%ju %lu)\n",
3236								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3237							return -EINVAL;
3238						}
3239						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3240							dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%ju %lu)\n",
3241								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3242							return -EINVAL;
3243						}
3244						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3245						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3246						ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3247						ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3248						ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3249						ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3250						p->idx += 7;
3251						break;
3252					default:
3253						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3254						return -EINVAL;
3255					}
3256				} else {
3257					/* L2L, dw */
3258					src_offset = radeon_get_ib_value(p, idx+2);
3259					src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3260					dst_offset = radeon_get_ib_value(p, idx+1);
3261					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3262					if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3263						dev_warn(p->dev, "DMA L2L, dw src buffer too small (%ju %lu)\n",
3264							 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3265						return -EINVAL;
3266					}
3267					if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3268						dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%ju %lu)\n",
3269							 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3270						return -EINVAL;
3271					}
3272					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3273					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3274					ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3275					ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3276					p->idx += 5;
3277				}
3278			}
3279			break;
3280		case DMA_PACKET_CONSTANT_FILL:
3281			r = r600_dma_cs_next_reloc(p, &dst_reloc);
3282			if (r) {
3283				DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3284				return -EINVAL;
3285			}
3286			dst_offset = radeon_get_ib_value(p, idx+1);
3287			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3288			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3289				dev_warn(p->dev, "DMA constant fill buffer too small (%ju %lu)\n",
3290					 (uintmax_t)dst_offset, radeon_bo_size(dst_reloc->robj));
3291				return -EINVAL;
3292			}
3293			ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3294			ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3295			p->idx += 4;
3296			break;
3297		case DMA_PACKET_NOP:
3298			p->idx += 1;
3299			break;
3300		default:
3301			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3302			return -EINVAL;
3303		}
3304	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3305#if 0
3306	for (r = 0; r < p->ib->length_dw; r++) {
3307		DRM_INFO("%05d  0x%08X\n", r, p->ib.ptr[r]);
3308		mdelay(1);
3309	}
3310#endif
3311	return 0;
3312}
3313
3314/* vm parser */
3315static bool evergreen_vm_reg_valid(u32 reg)
3316{
3317	/* context regs are fine */
3318	if (reg >= 0x28000)
3319		return true;
3320
3321	/* check config regs */
3322	switch (reg) {
3323	case WAIT_UNTIL:
3324	case GRBM_GFX_INDEX:
3325	case CP_STRMOUT_CNTL:
3326	case CP_COHER_CNTL:
3327	case CP_COHER_SIZE:
3328	case VGT_VTX_VECT_EJECT_REG:
3329	case VGT_CACHE_INVALIDATION:
3330	case VGT_GS_VERTEX_REUSE:
3331	case VGT_PRIMITIVE_TYPE:
3332	case VGT_INDEX_TYPE:
3333	case VGT_NUM_INDICES:
3334	case VGT_NUM_INSTANCES:
3335	case VGT_COMPUTE_DIM_X:
3336	case VGT_COMPUTE_DIM_Y:
3337	case VGT_COMPUTE_DIM_Z:
3338	case VGT_COMPUTE_START_X:
3339	case VGT_COMPUTE_START_Y:
3340	case VGT_COMPUTE_START_Z:
3341	case VGT_COMPUTE_INDEX:
3342	case VGT_COMPUTE_THREAD_GROUP_SIZE:
3343	case VGT_HS_OFFCHIP_PARAM:
3344	case PA_CL_ENHANCE:
3345	case PA_SU_LINE_STIPPLE_VALUE:
3346	case PA_SC_LINE_STIPPLE_STATE:
3347	case PA_SC_ENHANCE:
3348	case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3349	case SQ_DYN_GPR_SIMD_LOCK_EN:
3350	case SQ_CONFIG:
3351	case SQ_GPR_RESOURCE_MGMT_1:
3352	case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3353	case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3354	case SQ_CONST_MEM_BASE:
3355	case SQ_STATIC_THREAD_MGMT_1:
3356	case SQ_STATIC_THREAD_MGMT_2:
3357	case SQ_STATIC_THREAD_MGMT_3:
3358	case SPI_CONFIG_CNTL:
3359	case SPI_CONFIG_CNTL_1:
3360	case TA_CNTL_AUX:
3361	case DB_DEBUG:
3362	case DB_DEBUG2:
3363	case DB_DEBUG3:
3364	case DB_DEBUG4:
3365	case DB_WATERMARKS:
3366	case TD_PS_BORDER_COLOR_INDEX:
3367	case TD_PS_BORDER_COLOR_RED:
3368	case TD_PS_BORDER_COLOR_GREEN:
3369	case TD_PS_BORDER_COLOR_BLUE:
3370	case TD_PS_BORDER_COLOR_ALPHA:
3371	case TD_VS_BORDER_COLOR_INDEX:
3372	case TD_VS_BORDER_COLOR_RED:
3373	case TD_VS_BORDER_COLOR_GREEN:
3374	case TD_VS_BORDER_COLOR_BLUE:
3375	case TD_VS_BORDER_COLOR_ALPHA:
3376	case TD_GS_BORDER_COLOR_INDEX:
3377	case TD_GS_BORDER_COLOR_RED:
3378	case TD_GS_BORDER_COLOR_GREEN:
3379	case TD_GS_BORDER_COLOR_BLUE:
3380	case TD_GS_BORDER_COLOR_ALPHA:
3381	case TD_HS_BORDER_COLOR_INDEX:
3382	case TD_HS_BORDER_COLOR_RED:
3383	case TD_HS_BORDER_COLOR_GREEN:
3384	case TD_HS_BORDER_COLOR_BLUE:
3385	case TD_HS_BORDER_COLOR_ALPHA:
3386	case TD_LS_BORDER_COLOR_INDEX:
3387	case TD_LS_BORDER_COLOR_RED:
3388	case TD_LS_BORDER_COLOR_GREEN:
3389	case TD_LS_BORDER_COLOR_BLUE:
3390	case TD_LS_BORDER_COLOR_ALPHA:
3391	case TD_CS_BORDER_COLOR_INDEX:
3392	case TD_CS_BORDER_COLOR_RED:
3393	case TD_CS_BORDER_COLOR_GREEN:
3394	case TD_CS_BORDER_COLOR_BLUE:
3395	case TD_CS_BORDER_COLOR_ALPHA:
3396	case SQ_ESGS_RING_SIZE:
3397	case SQ_GSVS_RING_SIZE:
3398	case SQ_ESTMP_RING_SIZE:
3399	case SQ_GSTMP_RING_SIZE:
3400	case SQ_HSTMP_RING_SIZE:
3401	case SQ_LSTMP_RING_SIZE:
3402	case SQ_PSTMP_RING_SIZE:
3403	case SQ_VSTMP_RING_SIZE:
3404	case SQ_ESGS_RING_ITEMSIZE:
3405	case SQ_ESTMP_RING_ITEMSIZE:
3406	case SQ_GSTMP_RING_ITEMSIZE:
3407	case SQ_GSVS_RING_ITEMSIZE:
3408	case SQ_GS_VERT_ITEMSIZE:
3409	case SQ_GS_VERT_ITEMSIZE_1:
3410	case SQ_GS_VERT_ITEMSIZE_2:
3411	case SQ_GS_VERT_ITEMSIZE_3:
3412	case SQ_GSVS_RING_OFFSET_1:
3413	case SQ_GSVS_RING_OFFSET_2:
3414	case SQ_GSVS_RING_OFFSET_3:
3415	case SQ_HSTMP_RING_ITEMSIZE:
3416	case SQ_LSTMP_RING_ITEMSIZE:
3417	case SQ_PSTMP_RING_ITEMSIZE:
3418	case SQ_VSTMP_RING_ITEMSIZE:
3419	case VGT_TF_RING_SIZE:
3420	case SQ_ESGS_RING_BASE:
3421	case SQ_GSVS_RING_BASE:
3422	case SQ_ESTMP_RING_BASE:
3423	case SQ_GSTMP_RING_BASE:
3424	case SQ_HSTMP_RING_BASE:
3425	case SQ_LSTMP_RING_BASE:
3426	case SQ_PSTMP_RING_BASE:
3427	case SQ_VSTMP_RING_BASE:
3428	case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3429	case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3430		return true;
3431	default:
3432		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3433		return false;
3434	}
3435}
3436
3437static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3438				      u32 *ib, struct radeon_cs_packet *pkt)
3439{
3440	u32 idx = pkt->idx + 1;
3441	u32 idx_value = ib[idx];
3442	u32 start_reg, end_reg, reg, i;
3443	u32 command, info;
3444
3445	switch (pkt->opcode) {
3446	case PACKET3_NOP:
3447	case PACKET3_SET_BASE:
3448	case PACKET3_CLEAR_STATE:
3449	case PACKET3_INDEX_BUFFER_SIZE:
3450	case PACKET3_DISPATCH_DIRECT:
3451	case PACKET3_DISPATCH_INDIRECT:
3452	case PACKET3_MODE_CONTROL:
3453	case PACKET3_SET_PREDICATION:
3454	case PACKET3_COND_EXEC:
3455	case PACKET3_PRED_EXEC:
3456	case PACKET3_DRAW_INDIRECT:
3457	case PACKET3_DRAW_INDEX_INDIRECT:
3458	case PACKET3_INDEX_BASE:
3459	case PACKET3_DRAW_INDEX_2:
3460	case PACKET3_CONTEXT_CONTROL:
3461	case PACKET3_DRAW_INDEX_OFFSET:
3462	case PACKET3_INDEX_TYPE:
3463	case PACKET3_DRAW_INDEX:
3464	case PACKET3_DRAW_INDEX_AUTO:
3465	case PACKET3_DRAW_INDEX_IMMD:
3466	case PACKET3_NUM_INSTANCES:
3467	case PACKET3_DRAW_INDEX_MULTI_AUTO:
3468	case PACKET3_STRMOUT_BUFFER_UPDATE:
3469	case PACKET3_DRAW_INDEX_OFFSET_2:
3470	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3471	case PACKET3_MPEG_INDEX:
3472	case PACKET3_WAIT_REG_MEM:
3473	case PACKET3_MEM_WRITE:
3474	case PACKET3_SURFACE_SYNC:
3475	case PACKET3_EVENT_WRITE:
3476	case PACKET3_EVENT_WRITE_EOP:
3477	case PACKET3_EVENT_WRITE_EOS:
3478	case PACKET3_SET_CONTEXT_REG:
3479	case PACKET3_SET_BOOL_CONST:
3480	case PACKET3_SET_LOOP_CONST:
3481	case PACKET3_SET_RESOURCE:
3482	case PACKET3_SET_SAMPLER:
3483	case PACKET3_SET_CTL_CONST:
3484	case PACKET3_SET_RESOURCE_OFFSET:
3485	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3486	case PACKET3_SET_RESOURCE_INDIRECT:
3487	case CAYMAN_PACKET3_DEALLOC_STATE:
3488		break;
3489	case PACKET3_COND_WRITE:
3490		if (idx_value & 0x100) {
3491			reg = ib[idx + 5] * 4;
3492			if (!evergreen_vm_reg_valid(reg))
3493				return -EINVAL;
3494		}
3495		break;
3496	case PACKET3_COPY_DW:
3497		if (idx_value & 0x2) {
3498			reg = ib[idx + 3] * 4;
3499			if (!evergreen_vm_reg_valid(reg))
3500				return -EINVAL;
3501		}
3502		break;
3503	case PACKET3_SET_CONFIG_REG:
3504		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3505		end_reg = 4 * pkt->count + start_reg - 4;
3506		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3507		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3508		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3509			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3510			return -EINVAL;
3511		}
3512		for (i = 0; i < pkt->count; i++) {
3513			reg = start_reg + (4 * i);
3514			if (!evergreen_vm_reg_valid(reg))
3515				return -EINVAL;
3516		}
3517		break;
3518	case PACKET3_CP_DMA:
3519		command = ib[idx + 4];
3520		info = ib[idx + 1];
3521		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3522		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3523		    ((((info & 0x00300000) >> 20) == 0) &&
3524		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3525		    ((((info & 0x60000000) >> 29) == 0) &&
3526		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3527			/* non mem to mem copies requires dw aligned count */
3528			if ((command & 0x1fffff) % 4) {
3529				DRM_ERROR("CP DMA command requires dw count alignment\n");
3530				return -EINVAL;
3531			}
3532		}
3533		if (command & PACKET3_CP_DMA_CMD_SAS) {
3534			/* src address space is register */
3535			if (((info & 0x60000000) >> 29) == 0) {
3536				start_reg = idx_value << 2;
3537				if (command & PACKET3_CP_DMA_CMD_SAIC) {
3538					reg = start_reg;
3539					if (!evergreen_vm_reg_valid(reg)) {
3540						DRM_ERROR("CP DMA Bad SRC register\n");
3541						return -EINVAL;
3542					}
3543				} else {
3544					for (i = 0; i < (command & 0x1fffff); i++) {
3545						reg = start_reg + (4 * i);
3546						if (!evergreen_vm_reg_valid(reg)) {
3547							DRM_ERROR("CP DMA Bad SRC register\n");
3548							return -EINVAL;
3549						}
3550					}
3551				}
3552			}
3553		}
3554		if (command & PACKET3_CP_DMA_CMD_DAS) {
3555			/* dst address space is register */
3556			if (((info & 0x00300000) >> 20) == 0) {
3557				start_reg = ib[idx + 2];
3558				if (command & PACKET3_CP_DMA_CMD_DAIC) {
3559					reg = start_reg;
3560					if (!evergreen_vm_reg_valid(reg)) {
3561						DRM_ERROR("CP DMA Bad DST register\n");
3562						return -EINVAL;
3563					}
3564				} else {
3565					for (i = 0; i < (command & 0x1fffff); i++) {
3566						reg = start_reg + (4 * i);
3567						if (!evergreen_vm_reg_valid(reg)) {
3568							DRM_ERROR("CP DMA Bad DST register\n");
3569							return -EINVAL;
3570						}
3571					}
3572				}
3573			}
3574		}
3575		break;
3576	default:
3577		return -EINVAL;
3578	}
3579	return 0;
3580}
3581
3582int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3583{
3584	int ret = 0;
3585	u32 idx = 0;
3586	struct radeon_cs_packet pkt;
3587
3588	do {
3589		pkt.idx = idx;
3590		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
3591		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
3592		pkt.one_reg_wr = 0;
3593		switch (pkt.type) {
3594		case PACKET_TYPE0:
3595			dev_err(rdev->dev, "Packet0 not allowed!\n");
3596			ret = -EINVAL;
3597			break;
3598		case PACKET_TYPE2:
3599			idx += 1;
3600			break;
3601		case PACKET_TYPE3:
3602			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3603			ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3604			idx += pkt.count + 2;
3605			break;
3606		default:
3607			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3608			ret = -EINVAL;
3609			break;
3610		}
3611		if (ret)
3612			break;
3613	} while (idx < ib->length_dw);
3614
3615	return ret;
3616}
3617
3618/**
3619 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3620 * @rdev: radeon_device pointer
3621 * @ib:	radeon_ib pointer
3622 *
3623 * Parses the DMA IB from the VM CS ioctl
3624 * checks for errors. (Cayman-SI)
3625 * Returns 0 for success and an error on failure.
3626 **/
3627int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3628{
3629	u32 idx = 0;
3630	u32 header, cmd, count, tiled, new_cmd, misc;
3631
3632	do {
3633		header = ib->ptr[idx];
3634		cmd = GET_DMA_CMD(header);
3635		count = GET_DMA_COUNT(header);
3636		tiled = GET_DMA_T(header);
3637		new_cmd = GET_DMA_NEW(header);
3638		misc = GET_DMA_MISC(header);
3639
3640		switch (cmd) {
3641		case DMA_PACKET_WRITE:
3642			if (tiled)
3643				idx += count + 7;
3644			else
3645				idx += count + 3;
3646			break;
3647		case DMA_PACKET_COPY:
3648			if (tiled) {
3649				if (new_cmd) {
3650					switch (misc) {
3651					case 0:
3652						/* L2T, frame to fields */
3653						idx += 10;
3654						break;
3655					case 1:
3656						/* L2T, T2L partial */
3657						idx += 12;
3658						break;
3659					case 3:
3660						/* L2T, broadcast */
3661						idx += 10;
3662						break;
3663					case 4:
3664						/* L2T, T2L */
3665						idx += 9;
3666						break;
3667					case 5:
3668						/* T2T partial */
3669						idx += 13;
3670						break;
3671					case 7:
3672						/* L2T, broadcast */
3673						idx += 10;
3674						break;
3675					default:
3676						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3677						return -EINVAL;
3678					}
3679				} else {
3680					switch (misc) {
3681					case 0:
3682						idx += 9;
3683						break;
3684					default:
3685						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3686						return -EINVAL;
3687					}
3688				}
3689			} else {
3690				if (new_cmd) {
3691					switch (misc) {
3692					case 0:
3693						/* L2L, byte */
3694						idx += 5;
3695						break;
3696					case 1:
3697						/* L2L, partial */
3698						idx += 9;
3699						break;
3700					case 4:
3701						/* L2L, dw, broadcast */
3702						idx += 7;
3703						break;
3704					default:
3705						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3706						return -EINVAL;
3707					}
3708				} else {
3709					/* L2L, dw */
3710					idx += 5;
3711				}
3712			}
3713			break;
3714		case DMA_PACKET_CONSTANT_FILL:
3715			idx += 4;
3716			break;
3717		case DMA_PACKET_NOP:
3718			idx += 1;
3719			break;
3720		default:
3721			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3722			return -EINVAL;
3723		}
3724	} while (idx < ib->length_dw);
3725
3726	return 0;
3727}
3728