1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 *          Alex Deucher
26 *          Jerome Glisse
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <dev/drm2/drmP.h>
33#include "radeon.h"
34#include "radeon_asic.h"
35#include "evergreend.h"
36#include "evergreen_reg_safe.h"
37#include "cayman_reg_safe.h"
38#include "r600_cs.h"
39
40#define MAX(a,b)                   (((a)>(b))?(a):(b))
41#define MIN(a,b)                   (((a)<(b))?(a):(b))
42
43#ifdef FREEBSD_WIP /* FreeBSD: to please GCC 4.2. */
44int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
45			   struct radeon_cs_reloc **cs_reloc);
46#endif
47static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
48					  struct radeon_cs_reloc **cs_reloc);
49
50struct evergreen_cs_track {
51	u32			group_size;
52	u32			nbanks;
53	u32			npipes;
54	u32			row_size;
55	/* value we track */
56	u32			nsamples;		/* unused */
57	struct radeon_bo	*cb_color_bo[12];
58	u32			cb_color_bo_offset[12];
59	struct radeon_bo	*cb_color_fmask_bo[8];	/* unused */
60	struct radeon_bo	*cb_color_cmask_bo[8];	/* unused */
61	u32			cb_color_info[12];
62	u32			cb_color_view[12];
63	u32			cb_color_pitch[12];
64	u32			cb_color_slice[12];
65	u32			cb_color_slice_idx[12];
66	u32			cb_color_attrib[12];
67	u32			cb_color_cmask_slice[8];/* unused */
68	u32			cb_color_fmask_slice[8];/* unused */
69	u32			cb_target_mask;
70	u32			cb_shader_mask; /* unused */
71	u32			vgt_strmout_config;
72	u32			vgt_strmout_buffer_config;
73	struct radeon_bo	*vgt_strmout_bo[4];
74	u32			vgt_strmout_bo_offset[4];
75	u32			vgt_strmout_size[4];
76	u32			db_depth_control;
77	u32			db_depth_view;
78	u32			db_depth_slice;
79	u32			db_depth_size;
80	u32			db_z_info;
81	u32			db_z_read_offset;
82	u32			db_z_write_offset;
83	struct radeon_bo	*db_z_read_bo;
84	struct radeon_bo	*db_z_write_bo;
85	u32			db_s_info;
86	u32			db_s_read_offset;
87	u32			db_s_write_offset;
88	struct radeon_bo	*db_s_read_bo;
89	struct radeon_bo	*db_s_write_bo;
90	bool			sx_misc_kill_all_prims;
91	bool			cb_dirty;
92	bool			db_dirty;
93	bool			streamout_dirty;
94	u32			htile_offset;
95	u32			htile_surface;
96	struct radeon_bo	*htile_bo;
97};
98
99static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
100{
101	if (tiling_flags & RADEON_TILING_MACRO)
102		return ARRAY_2D_TILED_THIN1;
103	else if (tiling_flags & RADEON_TILING_MICRO)
104		return ARRAY_1D_TILED_THIN1;
105	else
106		return ARRAY_LINEAR_GENERAL;
107}
108
109static u32 evergreen_cs_get_num_banks(u32 nbanks)
110{
111	switch (nbanks) {
112	case 2:
113		return ADDR_SURF_2_BANK;
114	case 4:
115		return ADDR_SURF_4_BANK;
116	case 8:
117	default:
118		return ADDR_SURF_8_BANK;
119	case 16:
120		return ADDR_SURF_16_BANK;
121	}
122}
123
124static void evergreen_cs_track_init(struct evergreen_cs_track *track)
125{
126	int i;
127
128	for (i = 0; i < 8; i++) {
129		track->cb_color_fmask_bo[i] = NULL;
130		track->cb_color_cmask_bo[i] = NULL;
131		track->cb_color_cmask_slice[i] = 0;
132		track->cb_color_fmask_slice[i] = 0;
133	}
134
135	for (i = 0; i < 12; i++) {
136		track->cb_color_bo[i] = NULL;
137		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
138		track->cb_color_info[i] = 0;
139		track->cb_color_view[i] = 0xFFFFFFFF;
140		track->cb_color_pitch[i] = 0;
141		track->cb_color_slice[i] = 0xfffffff;
142		track->cb_color_slice_idx[i] = 0;
143	}
144	track->cb_target_mask = 0xFFFFFFFF;
145	track->cb_shader_mask = 0xFFFFFFFF;
146	track->cb_dirty = true;
147
148	track->db_depth_slice = 0xffffffff;
149	track->db_depth_view = 0xFFFFC000;
150	track->db_depth_size = 0xFFFFFFFF;
151	track->db_depth_control = 0xFFFFFFFF;
152	track->db_z_info = 0xFFFFFFFF;
153	track->db_z_read_offset = 0xFFFFFFFF;
154	track->db_z_write_offset = 0xFFFFFFFF;
155	track->db_z_read_bo = NULL;
156	track->db_z_write_bo = NULL;
157	track->db_s_info = 0xFFFFFFFF;
158	track->db_s_read_offset = 0xFFFFFFFF;
159	track->db_s_write_offset = 0xFFFFFFFF;
160	track->db_s_read_bo = NULL;
161	track->db_s_write_bo = NULL;
162	track->db_dirty = true;
163	track->htile_bo = NULL;
164	track->htile_offset = 0xFFFFFFFF;
165	track->htile_surface = 0;
166
167	for (i = 0; i < 4; i++) {
168		track->vgt_strmout_size[i] = 0;
169		track->vgt_strmout_bo[i] = NULL;
170		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
171	}
172	track->streamout_dirty = true;
173	track->sx_misc_kill_all_prims = false;
174}
175
176struct eg_surface {
177	/* value gathered from cs */
178	unsigned	nbx;
179	unsigned	nby;
180	unsigned	format;
181	unsigned	mode;
182	unsigned	nbanks;
183	unsigned	bankw;
184	unsigned	bankh;
185	unsigned	tsplit;
186	unsigned	mtilea;
187	unsigned	nsamples;
188	/* output value */
189	unsigned	bpe;
190	unsigned	layer_size;
191	unsigned	palign;
192	unsigned	halign;
193	unsigned long	base_align;
194};
195
196static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
197					  struct eg_surface *surf,
198					  const char *prefix)
199{
200	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
201	surf->base_align = surf->bpe;
202	surf->palign = 1;
203	surf->halign = 1;
204	return 0;
205}
206
207static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
208						  struct eg_surface *surf,
209						  const char *prefix)
210{
211	struct evergreen_cs_track *track = p->track;
212	unsigned palign;
213
214	palign = MAX(64, track->group_size / surf->bpe);
215	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
216	surf->base_align = track->group_size;
217	surf->palign = palign;
218	surf->halign = 1;
219	if (surf->nbx & (palign - 1)) {
220		if (prefix) {
221			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
222				 __func__, __LINE__, prefix, surf->nbx, palign);
223		}
224		return -EINVAL;
225	}
226	return 0;
227}
228
229static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
230				      struct eg_surface *surf,
231				      const char *prefix)
232{
233	struct evergreen_cs_track *track = p->track;
234	unsigned palign;
235
236	palign = track->group_size / (8 * surf->bpe * surf->nsamples);
237	palign = MAX(8, palign);
238	surf->layer_size = surf->nbx * surf->nby * surf->bpe;
239	surf->base_align = track->group_size;
240	surf->palign = palign;
241	surf->halign = 8;
242	if ((surf->nbx & (palign - 1))) {
243		if (prefix) {
244			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
245				 __func__, __LINE__, prefix, surf->nbx, palign,
246				 track->group_size, surf->bpe, surf->nsamples);
247		}
248		return -EINVAL;
249	}
250	if ((surf->nby & (8 - 1))) {
251		if (prefix) {
252			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
253				 __func__, __LINE__, prefix, surf->nby);
254		}
255		return -EINVAL;
256	}
257	return 0;
258}
259
260static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
261				      struct eg_surface *surf,
262				      const char *prefix)
263{
264	struct evergreen_cs_track *track = p->track;
265	unsigned palign, halign, tileb, slice_pt;
266	unsigned mtile_pr, mtile_ps, mtileb;
267
268	tileb = 64 * surf->bpe * surf->nsamples;
269	slice_pt = 1;
270	if (tileb > surf->tsplit) {
271		slice_pt = tileb / surf->tsplit;
272	}
273	tileb = tileb / slice_pt;
274	/* macro tile width & height */
275	palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
276	halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
277	mtileb = (palign / 8) * (halign / 8) * tileb;
278	mtile_pr = surf->nbx / palign;
279	mtile_ps = (mtile_pr * surf->nby) / halign;
280	surf->layer_size = mtile_ps * mtileb * slice_pt;
281	surf->base_align = (palign / 8) * (halign / 8) * tileb;
282	surf->palign = palign;
283	surf->halign = halign;
284
285	if ((surf->nbx & (palign - 1))) {
286		if (prefix) {
287			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
288				 __func__, __LINE__, prefix, surf->nbx, palign);
289		}
290		return -EINVAL;
291	}
292	if ((surf->nby & (halign - 1))) {
293		if (prefix) {
294			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
295				 __func__, __LINE__, prefix, surf->nby, halign);
296		}
297		return -EINVAL;
298	}
299
300	return 0;
301}
302
303static int evergreen_surface_check(struct radeon_cs_parser *p,
304				   struct eg_surface *surf,
305				   const char *prefix)
306{
307	/* some common value computed here */
308	surf->bpe = r600_fmt_get_blocksize(surf->format);
309
310	switch (surf->mode) {
311	case ARRAY_LINEAR_GENERAL:
312		return evergreen_surface_check_linear(p, surf, prefix);
313	case ARRAY_LINEAR_ALIGNED:
314		return evergreen_surface_check_linear_aligned(p, surf, prefix);
315	case ARRAY_1D_TILED_THIN1:
316		return evergreen_surface_check_1d(p, surf, prefix);
317	case ARRAY_2D_TILED_THIN1:
318		return evergreen_surface_check_2d(p, surf, prefix);
319	default:
320		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
321				__func__, __LINE__, prefix, surf->mode);
322		return -EINVAL;
323	}
324	return -EINVAL;
325}
326
327static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
328					      struct eg_surface *surf,
329					      const char *prefix)
330{
331	switch (surf->mode) {
332	case ARRAY_2D_TILED_THIN1:
333		break;
334	case ARRAY_LINEAR_GENERAL:
335	case ARRAY_LINEAR_ALIGNED:
336	case ARRAY_1D_TILED_THIN1:
337		return 0;
338	default:
339		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
340				__func__, __LINE__, prefix, surf->mode);
341		return -EINVAL;
342	}
343
344	switch (surf->nbanks) {
345	case 0: surf->nbanks = 2; break;
346	case 1: surf->nbanks = 4; break;
347	case 2: surf->nbanks = 8; break;
348	case 3: surf->nbanks = 16; break;
349	default:
350		dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
351			 __func__, __LINE__, prefix, surf->nbanks);
352		return -EINVAL;
353	}
354	switch (surf->bankw) {
355	case 0: surf->bankw = 1; break;
356	case 1: surf->bankw = 2; break;
357	case 2: surf->bankw = 4; break;
358	case 3: surf->bankw = 8; break;
359	default:
360		dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
361			 __func__, __LINE__, prefix, surf->bankw);
362		return -EINVAL;
363	}
364	switch (surf->bankh) {
365	case 0: surf->bankh = 1; break;
366	case 1: surf->bankh = 2; break;
367	case 2: surf->bankh = 4; break;
368	case 3: surf->bankh = 8; break;
369	default:
370		dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
371			 __func__, __LINE__, prefix, surf->bankh);
372		return -EINVAL;
373	}
374	switch (surf->mtilea) {
375	case 0: surf->mtilea = 1; break;
376	case 1: surf->mtilea = 2; break;
377	case 2: surf->mtilea = 4; break;
378	case 3: surf->mtilea = 8; break;
379	default:
380		dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
381			 __func__, __LINE__, prefix, surf->mtilea);
382		return -EINVAL;
383	}
384	switch (surf->tsplit) {
385	case 0: surf->tsplit = 64; break;
386	case 1: surf->tsplit = 128; break;
387	case 2: surf->tsplit = 256; break;
388	case 3: surf->tsplit = 512; break;
389	case 4: surf->tsplit = 1024; break;
390	case 5: surf->tsplit = 2048; break;
391	case 6: surf->tsplit = 4096; break;
392	default:
393		dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
394			 __func__, __LINE__, prefix, surf->tsplit);
395		return -EINVAL;
396	}
397	return 0;
398}
399
400static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
401{
402	struct evergreen_cs_track *track = p->track;
403	struct eg_surface surf;
404	unsigned pitch, slice, mslice;
405	unsigned long offset;
406	int r;
407
408	mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
409	pitch = track->cb_color_pitch[id];
410	slice = track->cb_color_slice[id];
411	surf.nbx = (pitch + 1) * 8;
412	surf.nby = ((slice + 1) * 64) / surf.nbx;
413	surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
414	surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
415	surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
416	surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
417	surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
418	surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
419	surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
420	surf.nsamples = 1;
421
422	if (!r600_fmt_is_valid_color(surf.format)) {
423		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
424			 __func__, __LINE__, surf.format,
425			id, track->cb_color_info[id]);
426		return -EINVAL;
427	}
428
429	r = evergreen_surface_value_conv_check(p, &surf, "cb");
430	if (r) {
431		return r;
432	}
433
434	r = evergreen_surface_check(p, &surf, "cb");
435	if (r) {
436		dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
437			 __func__, __LINE__, id, track->cb_color_pitch[id],
438			 track->cb_color_slice[id], track->cb_color_attrib[id],
439			 track->cb_color_info[id]);
440		return r;
441	}
442
443	offset = track->cb_color_bo_offset[id] << 8;
444	if (offset & (surf.base_align - 1)) {
445		dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
446			 __func__, __LINE__, id, offset, surf.base_align);
447		return -EINVAL;
448	}
449
450	offset += surf.layer_size * mslice;
451	if (offset > radeon_bo_size(track->cb_color_bo[id])) {
452		/* old ddx are broken they allocate bo with w*h*bpp but
453		 * program slice with ALIGN(h, 8), catch this and patch
454		 * command stream.
455		 */
456		if (!surf.mode) {
457			volatile u32 *ib = p->ib.ptr;
458			unsigned long tmp, nby, bsize, size, min = 0;
459
460			/* find the height the ddx wants */
461			if (surf.nby > 8) {
462				min = surf.nby - 8;
463			}
464			bsize = radeon_bo_size(track->cb_color_bo[id]);
465			tmp = track->cb_color_bo_offset[id] << 8;
466			for (nby = surf.nby; nby > min; nby--) {
467				size = nby * surf.nbx * surf.bpe * surf.nsamples;
468				if ((tmp + size * mslice) <= bsize) {
469					break;
470				}
471			}
472			if (nby > min) {
473				surf.nby = nby;
474				slice = ((nby * surf.nbx) / 64) - 1;
475				if (!evergreen_surface_check(p, &surf, "cb")) {
476					/* check if this one works */
477					tmp += surf.layer_size * mslice;
478					if (tmp <= bsize) {
479						ib[track->cb_color_slice_idx[id]] = slice;
480						goto old_ddx_ok;
481					}
482				}
483			}
484		}
485		dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
486			 "offset %d, max layer %d, bo size %ld, slice %d)\n",
487			 __func__, __LINE__, id, surf.layer_size,
488			track->cb_color_bo_offset[id] << 8, mslice,
489			radeon_bo_size(track->cb_color_bo[id]), slice);
490		dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
491			 __func__, __LINE__, surf.nbx, surf.nby,
492			surf.mode, surf.bpe, surf.nsamples,
493			surf.bankw, surf.bankh,
494			surf.tsplit, surf.mtilea);
495		return -EINVAL;
496	}
497old_ddx_ok:
498
499	return 0;
500}
501
502static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
503						unsigned nbx, unsigned nby)
504{
505	struct evergreen_cs_track *track = p->track;
506	unsigned long size;
507
508	if (track->htile_bo == NULL) {
509		dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
510				__func__, __LINE__, track->db_z_info);
511		return -EINVAL;
512	}
513
514	if (G_028ABC_LINEAR(track->htile_surface)) {
515		/* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
516		nbx = roundup(nbx, 16 * 8);
517		/* height is npipes htiles aligned == npipes * 8 pixel aligned */
518		nby = roundup(nby, track->npipes * 8);
519	} else {
520		/* always assume 8x8 htile */
521		/* align is htile align * 8, htile align vary according to
522		 * number of pipe and tile width and nby
523		 */
524		switch (track->npipes) {
525		case 8:
526			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
527			nbx = roundup(nbx, 64 * 8);
528			nby = roundup(nby, 64 * 8);
529			break;
530		case 4:
531			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
532			nbx = roundup(nbx, 64 * 8);
533			nby = roundup(nby, 32 * 8);
534			break;
535		case 2:
536			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
537			nbx = roundup(nbx, 32 * 8);
538			nby = roundup(nby, 32 * 8);
539			break;
540		case 1:
541			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
542			nbx = roundup(nbx, 32 * 8);
543			nby = roundup(nby, 16 * 8);
544			break;
545		default:
546			dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
547					__func__, __LINE__, track->npipes);
548			return -EINVAL;
549		}
550	}
551	/* compute number of htile */
552	nbx = nbx >> 3;
553	nby = nby >> 3;
554	/* size must be aligned on npipes * 2K boundary */
555	size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
556	size += track->htile_offset;
557
558	if (size > radeon_bo_size(track->htile_bo)) {
559		dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
560				__func__, __LINE__, radeon_bo_size(track->htile_bo),
561				size, nbx, nby);
562		return -EINVAL;
563	}
564	return 0;
565}
566
567static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
568{
569	struct evergreen_cs_track *track = p->track;
570	struct eg_surface surf;
571	unsigned pitch, slice, mslice;
572	unsigned long offset;
573	int r;
574
575	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
576	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
577	slice = track->db_depth_slice;
578	surf.nbx = (pitch + 1) * 8;
579	surf.nby = ((slice + 1) * 64) / surf.nbx;
580	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
581	surf.format = G_028044_FORMAT(track->db_s_info);
582	surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
583	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
584	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
585	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
586	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
587	surf.nsamples = 1;
588
589	if (surf.format != 1) {
590		dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
591			 __func__, __LINE__, surf.format);
592		return -EINVAL;
593	}
594	/* replace by color format so we can use same code */
595	surf.format = V_028C70_COLOR_8;
596
597	r = evergreen_surface_value_conv_check(p, &surf, "stencil");
598	if (r) {
599		return r;
600	}
601
602	r = evergreen_surface_check(p, &surf, NULL);
603	if (r) {
604		/* old userspace doesn't compute proper depth/stencil alignment
605		 * check that alignment against a bigger byte per elements and
606		 * only report if that alignment is wrong too.
607		 */
608		surf.format = V_028C70_COLOR_8_8_8_8;
609		r = evergreen_surface_check(p, &surf, "stencil");
610		if (r) {
611			dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
612				 __func__, __LINE__, track->db_depth_size,
613				 track->db_depth_slice, track->db_s_info, track->db_z_info);
614		}
615		return r;
616	}
617
618	offset = track->db_s_read_offset << 8;
619	if (offset & (surf.base_align - 1)) {
620		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
621			 __func__, __LINE__, offset, surf.base_align);
622		return -EINVAL;
623	}
624	offset += surf.layer_size * mslice;
625	if (offset > radeon_bo_size(track->db_s_read_bo)) {
626		dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
627			 "offset %ld, max layer %d, bo size %ld)\n",
628			 __func__, __LINE__, surf.layer_size,
629			(unsigned long)track->db_s_read_offset << 8, mslice,
630			radeon_bo_size(track->db_s_read_bo));
631		dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
632			 __func__, __LINE__, track->db_depth_size,
633			 track->db_depth_slice, track->db_s_info, track->db_z_info);
634		return -EINVAL;
635	}
636
637	offset = track->db_s_write_offset << 8;
638	if (offset & (surf.base_align - 1)) {
639		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
640			 __func__, __LINE__, offset, surf.base_align);
641		return -EINVAL;
642	}
643	offset += surf.layer_size * mslice;
644	if (offset > radeon_bo_size(track->db_s_write_bo)) {
645		dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
646			 "offset %ld, max layer %d, bo size %ld)\n",
647			 __func__, __LINE__, surf.layer_size,
648			(unsigned long)track->db_s_write_offset << 8, mslice,
649			radeon_bo_size(track->db_s_write_bo));
650		return -EINVAL;
651	}
652
653	/* hyperz */
654	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
655		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
656		if (r) {
657			return r;
658		}
659	}
660
661	return 0;
662}
663
664static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
665{
666	struct evergreen_cs_track *track = p->track;
667	struct eg_surface surf;
668	unsigned pitch, slice, mslice;
669	unsigned long offset;
670	int r;
671
672	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
673	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
674	slice = track->db_depth_slice;
675	surf.nbx = (pitch + 1) * 8;
676	surf.nby = ((slice + 1) * 64) / surf.nbx;
677	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
678	surf.format = G_028040_FORMAT(track->db_z_info);
679	surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
680	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
681	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
682	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
683	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
684	surf.nsamples = 1;
685
686	switch (surf.format) {
687	case V_028040_Z_16:
688		surf.format = V_028C70_COLOR_16;
689		break;
690	case V_028040_Z_24:
691	case V_028040_Z_32_FLOAT:
692		surf.format = V_028C70_COLOR_8_8_8_8;
693		break;
694	default:
695		dev_warn(p->dev, "%s:%d depth invalid format %d\n",
696			 __func__, __LINE__, surf.format);
697		return -EINVAL;
698	}
699
700	r = evergreen_surface_value_conv_check(p, &surf, "depth");
701	if (r) {
702		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
703			 __func__, __LINE__, track->db_depth_size,
704			 track->db_depth_slice, track->db_z_info);
705		return r;
706	}
707
708	r = evergreen_surface_check(p, &surf, "depth");
709	if (r) {
710		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
711			 __func__, __LINE__, track->db_depth_size,
712			 track->db_depth_slice, track->db_z_info);
713		return r;
714	}
715
716	offset = track->db_z_read_offset << 8;
717	if (offset & (surf.base_align - 1)) {
718		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
719			 __func__, __LINE__, offset, surf.base_align);
720		return -EINVAL;
721	}
722	offset += surf.layer_size * mslice;
723	if (offset > radeon_bo_size(track->db_z_read_bo)) {
724		dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
725			 "offset %ld, max layer %d, bo size %ld)\n",
726			 __func__, __LINE__, surf.layer_size,
727			(unsigned long)track->db_z_read_offset << 8, mslice,
728			radeon_bo_size(track->db_z_read_bo));
729		return -EINVAL;
730	}
731
732	offset = track->db_z_write_offset << 8;
733	if (offset & (surf.base_align - 1)) {
734		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
735			 __func__, __LINE__, offset, surf.base_align);
736		return -EINVAL;
737	}
738	offset += surf.layer_size * mslice;
739	if (offset > radeon_bo_size(track->db_z_write_bo)) {
740		dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
741			 "offset %ld, max layer %d, bo size %ld)\n",
742			 __func__, __LINE__, surf.layer_size,
743			(unsigned long)track->db_z_write_offset << 8, mslice,
744			radeon_bo_size(track->db_z_write_bo));
745		return -EINVAL;
746	}
747
748	/* hyperz */
749	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
750		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
751		if (r) {
752			return r;
753		}
754	}
755
756	return 0;
757}
758
759static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
760					       struct radeon_bo *texture,
761					       struct radeon_bo *mipmap,
762					       unsigned idx)
763{
764	struct eg_surface surf;
765	unsigned long toffset, moffset;
766	unsigned dim, llevel, mslice, width, height, depth, i;
767	u32 texdw[8];
768	int r;
769
770	texdw[0] = radeon_get_ib_value(p, idx + 0);
771	texdw[1] = radeon_get_ib_value(p, idx + 1);
772	texdw[2] = radeon_get_ib_value(p, idx + 2);
773	texdw[3] = radeon_get_ib_value(p, idx + 3);
774	texdw[4] = radeon_get_ib_value(p, idx + 4);
775	texdw[5] = radeon_get_ib_value(p, idx + 5);
776	texdw[6] = radeon_get_ib_value(p, idx + 6);
777	texdw[7] = radeon_get_ib_value(p, idx + 7);
778	dim = G_030000_DIM(texdw[0]);
779	llevel = G_030014_LAST_LEVEL(texdw[5]);
780	mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
781	width = G_030000_TEX_WIDTH(texdw[0]) + 1;
782	height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
783	depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
784	surf.format = G_03001C_DATA_FORMAT(texdw[7]);
785	surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
786	surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
787	surf.nby = r600_fmt_get_nblocksy(surf.format, height);
788	surf.mode = G_030004_ARRAY_MODE(texdw[1]);
789	surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
790	surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
791	surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
792	surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
793	surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
794	surf.nsamples = 1;
795	toffset = texdw[2] << 8;
796	moffset = texdw[3] << 8;
797
798	if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
799		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
800			 __func__, __LINE__, surf.format);
801		return -EINVAL;
802	}
803	switch (dim) {
804	case V_030000_SQ_TEX_DIM_1D:
805	case V_030000_SQ_TEX_DIM_2D:
806	case V_030000_SQ_TEX_DIM_CUBEMAP:
807	case V_030000_SQ_TEX_DIM_1D_ARRAY:
808	case V_030000_SQ_TEX_DIM_2D_ARRAY:
809		depth = 1;
810		break;
811	case V_030000_SQ_TEX_DIM_2D_MSAA:
812	case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
813		surf.nsamples = 1 << llevel;
814		llevel = 0;
815		depth = 1;
816		break;
817	case V_030000_SQ_TEX_DIM_3D:
818		break;
819	default:
820		dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
821			 __func__, __LINE__, dim);
822		return -EINVAL;
823	}
824
825	r = evergreen_surface_value_conv_check(p, &surf, "texture");
826	if (r) {
827		return r;
828	}
829
830	/* align height */
831	evergreen_surface_check(p, &surf, NULL);
832	surf.nby = roundup(surf.nby, surf.halign);
833
834	r = evergreen_surface_check(p, &surf, "texture");
835	if (r) {
836		dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
837			 __func__, __LINE__, texdw[0], texdw[1], texdw[4],
838			 texdw[5], texdw[6], texdw[7]);
839		return r;
840	}
841
842	/* check texture size */
843	if (toffset & (surf.base_align - 1)) {
844		dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
845			 __func__, __LINE__, toffset, surf.base_align);
846		return -EINVAL;
847	}
848	if (moffset & (surf.base_align - 1)) {
849		dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
850			 __func__, __LINE__, moffset, surf.base_align);
851		return -EINVAL;
852	}
853	if (dim == SQ_TEX_DIM_3D) {
854		toffset += surf.layer_size * depth;
855	} else {
856		toffset += surf.layer_size * mslice;
857	}
858	if (toffset > radeon_bo_size(texture)) {
859		dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
860			 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
861			 __func__, __LINE__, surf.layer_size,
862			(unsigned long)texdw[2] << 8, mslice,
863			depth, radeon_bo_size(texture),
864			surf.nbx, surf.nby);
865		return -EINVAL;
866	}
867
868	if (!mipmap) {
869		if (llevel) {
870			dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
871				 __func__, __LINE__);
872			return -EINVAL;
873		} else {
874			return 0; /* everything's ok */
875		}
876	}
877
878	/* check mipmap size */
879	for (i = 1; i <= llevel; i++) {
880		unsigned w, h, d;
881
882		w = r600_mip_minify(width, i);
883		h = r600_mip_minify(height, i);
884		d = r600_mip_minify(depth, i);
885		surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
886		surf.nby = r600_fmt_get_nblocksy(surf.format, h);
887
888		switch (surf.mode) {
889		case ARRAY_2D_TILED_THIN1:
890			if (surf.nbx < surf.palign || surf.nby < surf.halign) {
891				surf.mode = ARRAY_1D_TILED_THIN1;
892			}
893			/* recompute alignment */
894			evergreen_surface_check(p, &surf, NULL);
895			break;
896		case ARRAY_LINEAR_GENERAL:
897		case ARRAY_LINEAR_ALIGNED:
898		case ARRAY_1D_TILED_THIN1:
899			break;
900		default:
901			dev_warn(p->dev, "%s:%d invalid array mode %d\n",
902				 __func__, __LINE__, surf.mode);
903			return -EINVAL;
904		}
905		surf.nbx = roundup(surf.nbx, surf.palign);
906		surf.nby = roundup(surf.nby, surf.halign);
907
908		r = evergreen_surface_check(p, &surf, "mipmap");
909		if (r) {
910			return r;
911		}
912
913		if (dim == SQ_TEX_DIM_3D) {
914			moffset += surf.layer_size * d;
915		} else {
916			moffset += surf.layer_size * mslice;
917		}
918		if (moffset > radeon_bo_size(mipmap)) {
919			dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
920					"offset %ld, coffset %ld, max layer %d, depth %d, "
921					"bo size %ld) level0 (%d %d %d)\n",
922					__func__, __LINE__, i, surf.layer_size,
923					(unsigned long)texdw[3] << 8, moffset, mslice,
924					d, radeon_bo_size(mipmap),
925					width, height, depth);
926			dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
927				 __func__, __LINE__, surf.nbx, surf.nby,
928				surf.mode, surf.bpe, surf.nsamples,
929				surf.bankw, surf.bankh,
930				surf.tsplit, surf.mtilea);
931			return -EINVAL;
932		}
933	}
934
935	return 0;
936}
937
938static int evergreen_cs_track_check(struct radeon_cs_parser *p)
939{
940	struct evergreen_cs_track *track = p->track;
941	unsigned tmp, i;
942	int r;
943	unsigned buffer_mask = 0;
944
945	/* check streamout */
946	if (track->streamout_dirty && track->vgt_strmout_config) {
947		for (i = 0; i < 4; i++) {
948			if (track->vgt_strmout_config & (1 << i)) {
949				buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
950			}
951		}
952
953		for (i = 0; i < 4; i++) {
954			if (buffer_mask & (1 << i)) {
955				if (track->vgt_strmout_bo[i]) {
956					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
957							(u64)track->vgt_strmout_size[i];
958					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
959						DRM_ERROR("streamout %d bo too small: 0x%jx, 0x%lx\n",
960							  i, (uintmax_t)offset,
961							  radeon_bo_size(track->vgt_strmout_bo[i]));
962						return -EINVAL;
963					}
964				} else {
965					dev_warn(p->dev, "No buffer for streamout %d\n", i);
966					return -EINVAL;
967				}
968			}
969		}
970		track->streamout_dirty = false;
971	}
972
973	if (track->sx_misc_kill_all_prims)
974		return 0;
975
976	/* check that we have a cb for each enabled target
977	 */
978	if (track->cb_dirty) {
979		tmp = track->cb_target_mask;
980		for (i = 0; i < 8; i++) {
981			if ((tmp >> (i * 4)) & 0xF) {
982				/* at least one component is enabled */
983				if (track->cb_color_bo[i] == NULL) {
984					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
985						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
986					return -EINVAL;
987				}
988				/* check cb */
989				r = evergreen_cs_track_validate_cb(p, i);
990				if (r) {
991					return r;
992				}
993			}
994		}
995		track->cb_dirty = false;
996	}
997
998	if (track->db_dirty) {
999		/* Check stencil buffer */
1000		if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
1001		    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
1002			r = evergreen_cs_track_validate_stencil(p);
1003			if (r)
1004				return r;
1005		}
1006		/* Check depth buffer */
1007		if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1008		    G_028800_Z_ENABLE(track->db_depth_control)) {
1009			r = evergreen_cs_track_validate_depth(p);
1010			if (r)
1011				return r;
1012		}
1013		track->db_dirty = false;
1014	}
1015
1016	return 0;
1017}
1018
1019/**
1020 * evergreen_cs_packet_parse() - parse cp packet and point ib index to next packet
1021 * @parser:	parser structure holding parsing context.
1022 * @pkt:	where to store packet informations
1023 *
1024 * Assume that chunk_ib_index is properly set. Will return -EINVAL
1025 * if packet is bigger than remaining ib size. or if packets is unknown.
1026 **/
1027static int evergreen_cs_packet_parse(struct radeon_cs_parser *p,
1028			      struct radeon_cs_packet *pkt,
1029			      unsigned idx)
1030{
1031	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1032	uint32_t header;
1033
1034	if (idx >= ib_chunk->length_dw) {
1035		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1036			  idx, ib_chunk->length_dw);
1037		return -EINVAL;
1038	}
1039	header = radeon_get_ib_value(p, idx);
1040	pkt->idx = idx;
1041	pkt->type = CP_PACKET_GET_TYPE(header);
1042	pkt->count = CP_PACKET_GET_COUNT(header);
1043	pkt->one_reg_wr = 0;
1044	switch (pkt->type) {
1045	case PACKET_TYPE0:
1046		pkt->reg = CP_PACKET0_GET_REG(header);
1047		break;
1048	case PACKET_TYPE3:
1049		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1050		break;
1051	case PACKET_TYPE2:
1052		pkt->count = -1;
1053		break;
1054	default:
1055		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1056		return -EINVAL;
1057	}
1058	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1059		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1060			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1061		return -EINVAL;
1062	}
1063	return 0;
1064}
1065
1066/**
1067 * evergreen_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1068 * @parser:		parser structure holding parsing context.
1069 * @data:		pointer to relocation data
1070 * @offset_start:	starting offset
1071 * @offset_mask:	offset mask (to align start offset on)
1072 * @reloc:		reloc informations
1073 *
1074 * Check next packet is relocation packet3, do bo validation and compute
1075 * GPU offset using the provided start.
1076 **/
1077static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
1078					  struct radeon_cs_reloc **cs_reloc)
1079{
1080	struct radeon_cs_chunk *relocs_chunk;
1081	struct radeon_cs_packet p3reloc;
1082	unsigned idx;
1083	int r;
1084
1085	if (p->chunk_relocs_idx == -1) {
1086		DRM_ERROR("No relocation chunk !\n");
1087		return -EINVAL;
1088	}
1089	*cs_reloc = NULL;
1090	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1091	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1092	if (r) {
1093		return r;
1094	}
1095	p->idx += p3reloc.count + 2;
1096	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1097		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1098			  p3reloc.idx);
1099		return -EINVAL;
1100	}
1101	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
1102	if (idx >= relocs_chunk->length_dw) {
1103		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1104			  idx, relocs_chunk->length_dw);
1105		return -EINVAL;
1106	}
1107	/* FIXME: we assume reloc size is 4 dwords */
1108	*cs_reloc = p->relocs_ptr[(idx / 4)];
1109	return 0;
1110}
1111
1112/**
1113 * evergreen_cs_packet_next_is_pkt3_nop() - test if the next packet is NOP
1114 * @p:		structure holding the parser context.
1115 *
1116 * Check if the next packet is a relocation packet3.
1117 **/
1118static bool evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
1119{
1120	struct radeon_cs_packet p3reloc;
1121	int r;
1122
1123	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1124	if (r) {
1125		return false;
1126	}
1127	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1128		return false;
1129	}
1130	return true;
1131}
1132
1133/**
1134 * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
1135 * @parser:		parser structure holding parsing context.
1136 *
1137 * Userspace sends a special sequence for VLINE waits.
1138 * PACKET0 - VLINE_START_END + value
1139 * PACKET3 - WAIT_REG_MEM poll vline status reg
1140 * RELOC (P3) - crtc_id in reloc.
1141 *
1142 * This function parses this and relocates the VLINE START END
1143 * and WAIT_REG_MEM packets to the correct crtc.
1144 * It also detects a switched off crtc and nulls out the
1145 * wait in that case.
1146 */
1147static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1148{
1149	struct drm_mode_object *obj;
1150	struct drm_crtc *crtc;
1151	struct radeon_crtc *radeon_crtc;
1152	struct radeon_cs_packet p3reloc, wait_reg_mem;
1153	int crtc_id;
1154	int r;
1155	uint32_t header, h_idx, reg, wait_reg_mem_info;
1156	volatile uint32_t *ib;
1157
1158	ib = p->ib.ptr;
1159
1160	/* parse the WAIT_REG_MEM */
1161	r = evergreen_cs_packet_parse(p, &wait_reg_mem, p->idx);
1162	if (r)
1163		return r;
1164
1165	/* check its a WAIT_REG_MEM */
1166	if (wait_reg_mem.type != PACKET_TYPE3 ||
1167	    wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
1168		DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
1169		return -EINVAL;
1170	}
1171
1172	wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
1173	/* bit 4 is reg (0) or mem (1) */
1174	if (wait_reg_mem_info & 0x10) {
1175		DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
1176		return -EINVAL;
1177	}
1178	/* waiting for value to be equal */
1179	if ((wait_reg_mem_info & 0x7) != 0x3) {
1180		DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1181		return -EINVAL;
1182	}
1183	if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != EVERGREEN_VLINE_STATUS) {
1184		DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1185		return -EINVAL;
1186	}
1187
1188	if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != EVERGREEN_VLINE_STAT) {
1189		DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1190		return -EINVAL;
1191	}
1192
1193	/* jump over the NOP */
1194	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
1195	if (r)
1196		return r;
1197
1198	h_idx = p->idx - 2;
1199	p->idx += wait_reg_mem.count + 2;
1200	p->idx += p3reloc.count + 2;
1201
1202	header = radeon_get_ib_value(p, h_idx);
1203	crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
1204	reg = CP_PACKET0_GET_REG(header);
1205	obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1206	if (!obj) {
1207		DRM_ERROR("cannot find crtc %d\n", crtc_id);
1208		return -EINVAL;
1209	}
1210	crtc = obj_to_crtc(obj);
1211	radeon_crtc = to_radeon_crtc(crtc);
1212	crtc_id = radeon_crtc->crtc_id;
1213
1214	if (!crtc->enabled) {
1215		/* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
1216		ib[h_idx + 2] = PACKET2(0);
1217		ib[h_idx + 3] = PACKET2(0);
1218		ib[h_idx + 4] = PACKET2(0);
1219		ib[h_idx + 5] = PACKET2(0);
1220		ib[h_idx + 6] = PACKET2(0);
1221		ib[h_idx + 7] = PACKET2(0);
1222		ib[h_idx + 8] = PACKET2(0);
1223	} else {
1224		switch (reg) {
1225		case EVERGREEN_VLINE_START_END:
1226			header &= ~R600_CP_PACKET0_REG_MASK;
1227			header |= (EVERGREEN_VLINE_START_END + radeon_crtc->crtc_offset) >> 2;
1228			ib[h_idx] = header;
1229			ib[h_idx + 4] = (EVERGREEN_VLINE_STATUS + radeon_crtc->crtc_offset) >> 2;
1230			break;
1231		default:
1232			DRM_ERROR("unknown crtc reloc\n");
1233			return -EINVAL;
1234		}
1235	}
1236	return 0;
1237}
1238
1239static int evergreen_packet0_check(struct radeon_cs_parser *p,
1240				   struct radeon_cs_packet *pkt,
1241				   unsigned idx, unsigned reg)
1242{
1243	int r;
1244
1245	switch (reg) {
1246	case EVERGREEN_VLINE_START_END:
1247		r = evergreen_cs_packet_parse_vline(p);
1248		if (r) {
1249			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1250					idx, reg);
1251			return r;
1252		}
1253		break;
1254	default:
1255		DRM_ERROR("Forbidden register 0x%04X in cs at %d\n",
1256		       reg, idx);
1257		return -EINVAL;
1258	}
1259	return 0;
1260}
1261
1262static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1263				      struct radeon_cs_packet *pkt)
1264{
1265	unsigned reg, i;
1266	unsigned idx;
1267	int r;
1268
1269	idx = pkt->idx + 1;
1270	reg = pkt->reg;
1271	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1272		r = evergreen_packet0_check(p, pkt, idx, reg);
1273		if (r) {
1274			return r;
1275		}
1276	}
1277	return 0;
1278}
1279
1280/**
1281 * evergreen_cs_check_reg() - check if register is authorized or not
1282 * @parser: parser structure holding parsing context
1283 * @reg: register we are testing
1284 * @idx: index into the cs buffer
1285 *
1286 * This function will test against evergreen_reg_safe_bm and return 0
1287 * if register is safe. If register is not flag as safe this function
1288 * will test it against a list of register needind special handling.
1289 */
1290static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1291{
1292	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1293	struct radeon_cs_reloc *reloc;
1294	u32 last_reg;
1295	u32 m, i, tmp, *ib;
1296	int r;
1297
1298	if (p->rdev->family >= CHIP_CAYMAN)
1299		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1300	else
1301		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1302
1303	i = (reg >> 7);
1304	if (i >= last_reg) {
1305		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1306		return -EINVAL;
1307	}
1308	m = 1 << ((reg >> 2) & 31);
1309	if (p->rdev->family >= CHIP_CAYMAN) {
1310		if (!(cayman_reg_safe_bm[i] & m))
1311			return 0;
1312	} else {
1313		if (!(evergreen_reg_safe_bm[i] & m))
1314			return 0;
1315	}
1316	ib = p->ib.ptr;
1317	switch (reg) {
1318	/* force following reg to 0 in an attempt to disable out buffer
1319	 * which will need us to better understand how it works to perform
1320	 * security check on it (Jerome)
1321	 */
1322	case SQ_ESGS_RING_SIZE:
1323	case SQ_GSVS_RING_SIZE:
1324	case SQ_ESTMP_RING_SIZE:
1325	case SQ_GSTMP_RING_SIZE:
1326	case SQ_HSTMP_RING_SIZE:
1327	case SQ_LSTMP_RING_SIZE:
1328	case SQ_PSTMP_RING_SIZE:
1329	case SQ_VSTMP_RING_SIZE:
1330	case SQ_ESGS_RING_ITEMSIZE:
1331	case SQ_ESTMP_RING_ITEMSIZE:
1332	case SQ_GSTMP_RING_ITEMSIZE:
1333	case SQ_GSVS_RING_ITEMSIZE:
1334	case SQ_GS_VERT_ITEMSIZE:
1335	case SQ_GS_VERT_ITEMSIZE_1:
1336	case SQ_GS_VERT_ITEMSIZE_2:
1337	case SQ_GS_VERT_ITEMSIZE_3:
1338	case SQ_GSVS_RING_OFFSET_1:
1339	case SQ_GSVS_RING_OFFSET_2:
1340	case SQ_GSVS_RING_OFFSET_3:
1341	case SQ_HSTMP_RING_ITEMSIZE:
1342	case SQ_LSTMP_RING_ITEMSIZE:
1343	case SQ_PSTMP_RING_ITEMSIZE:
1344	case SQ_VSTMP_RING_ITEMSIZE:
1345	case VGT_TF_RING_SIZE:
1346		/* get value to populate the IB don't remove */
1347		/*tmp =radeon_get_ib_value(p, idx);
1348		  ib[idx] = 0;*/
1349		break;
1350	case SQ_ESGS_RING_BASE:
1351	case SQ_GSVS_RING_BASE:
1352	case SQ_ESTMP_RING_BASE:
1353	case SQ_GSTMP_RING_BASE:
1354	case SQ_HSTMP_RING_BASE:
1355	case SQ_LSTMP_RING_BASE:
1356	case SQ_PSTMP_RING_BASE:
1357	case SQ_VSTMP_RING_BASE:
1358		r = evergreen_cs_packet_next_reloc(p, &reloc);
1359		if (r) {
1360			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1361					"0x%04X\n", reg);
1362			return -EINVAL;
1363		}
1364		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1365		break;
1366	case DB_DEPTH_CONTROL:
1367		track->db_depth_control = radeon_get_ib_value(p, idx);
1368		track->db_dirty = true;
1369		break;
1370	case CAYMAN_DB_EQAA:
1371		if (p->rdev->family < CHIP_CAYMAN) {
1372			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1373				 "0x%04X\n", reg);
1374			return -EINVAL;
1375		}
1376		break;
1377	case CAYMAN_DB_DEPTH_INFO:
1378		if (p->rdev->family < CHIP_CAYMAN) {
1379			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1380				 "0x%04X\n", reg);
1381			return -EINVAL;
1382		}
1383		break;
1384	case DB_Z_INFO:
1385		track->db_z_info = radeon_get_ib_value(p, idx);
1386		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1387			r = evergreen_cs_packet_next_reloc(p, &reloc);
1388			if (r) {
1389				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1390						"0x%04X\n", reg);
1391				return -EINVAL;
1392			}
1393			ib[idx] &= ~Z_ARRAY_MODE(0xf);
1394			track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1395			ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1396			track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1397			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1398				unsigned bankw, bankh, mtaspect, tile_split;
1399
1400				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1401							&bankw, &bankh, &mtaspect,
1402							&tile_split);
1403				ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1404				ib[idx] |= DB_TILE_SPLIT(tile_split) |
1405						DB_BANK_WIDTH(bankw) |
1406						DB_BANK_HEIGHT(bankh) |
1407						DB_MACRO_TILE_ASPECT(mtaspect);
1408			}
1409		}
1410		track->db_dirty = true;
1411		break;
1412	case DB_STENCIL_INFO:
1413		track->db_s_info = radeon_get_ib_value(p, idx);
1414		track->db_dirty = true;
1415		break;
1416	case DB_DEPTH_VIEW:
1417		track->db_depth_view = radeon_get_ib_value(p, idx);
1418		track->db_dirty = true;
1419		break;
1420	case DB_DEPTH_SIZE:
1421		track->db_depth_size = radeon_get_ib_value(p, idx);
1422		track->db_dirty = true;
1423		break;
1424	case R_02805C_DB_DEPTH_SLICE:
1425		track->db_depth_slice = radeon_get_ib_value(p, idx);
1426		track->db_dirty = true;
1427		break;
1428	case DB_Z_READ_BASE:
1429		r = evergreen_cs_packet_next_reloc(p, &reloc);
1430		if (r) {
1431			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1432					"0x%04X\n", reg);
1433			return -EINVAL;
1434		}
1435		track->db_z_read_offset = radeon_get_ib_value(p, idx);
1436		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1437		track->db_z_read_bo = reloc->robj;
1438		track->db_dirty = true;
1439		break;
1440	case DB_Z_WRITE_BASE:
1441		r = evergreen_cs_packet_next_reloc(p, &reloc);
1442		if (r) {
1443			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1444					"0x%04X\n", reg);
1445			return -EINVAL;
1446		}
1447		track->db_z_write_offset = radeon_get_ib_value(p, idx);
1448		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1449		track->db_z_write_bo = reloc->robj;
1450		track->db_dirty = true;
1451		break;
1452	case DB_STENCIL_READ_BASE:
1453		r = evergreen_cs_packet_next_reloc(p, &reloc);
1454		if (r) {
1455			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1456					"0x%04X\n", reg);
1457			return -EINVAL;
1458		}
1459		track->db_s_read_offset = radeon_get_ib_value(p, idx);
1460		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1461		track->db_s_read_bo = reloc->robj;
1462		track->db_dirty = true;
1463		break;
1464	case DB_STENCIL_WRITE_BASE:
1465		r = evergreen_cs_packet_next_reloc(p, &reloc);
1466		if (r) {
1467			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1468					"0x%04X\n", reg);
1469			return -EINVAL;
1470		}
1471		track->db_s_write_offset = radeon_get_ib_value(p, idx);
1472		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1473		track->db_s_write_bo = reloc->robj;
1474		track->db_dirty = true;
1475		break;
1476	case VGT_STRMOUT_CONFIG:
1477		track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1478		track->streamout_dirty = true;
1479		break;
1480	case VGT_STRMOUT_BUFFER_CONFIG:
1481		track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1482		track->streamout_dirty = true;
1483		break;
1484	case VGT_STRMOUT_BUFFER_BASE_0:
1485	case VGT_STRMOUT_BUFFER_BASE_1:
1486	case VGT_STRMOUT_BUFFER_BASE_2:
1487	case VGT_STRMOUT_BUFFER_BASE_3:
1488		r = evergreen_cs_packet_next_reloc(p, &reloc);
1489		if (r) {
1490			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1491					"0x%04X\n", reg);
1492			return -EINVAL;
1493		}
1494		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1495		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1496		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1497		track->vgt_strmout_bo[tmp] = reloc->robj;
1498		track->streamout_dirty = true;
1499		break;
1500	case VGT_STRMOUT_BUFFER_SIZE_0:
1501	case VGT_STRMOUT_BUFFER_SIZE_1:
1502	case VGT_STRMOUT_BUFFER_SIZE_2:
1503	case VGT_STRMOUT_BUFFER_SIZE_3:
1504		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1505		/* size in register is DWs, convert to bytes */
1506		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1507		track->streamout_dirty = true;
1508		break;
1509	case CP_COHER_BASE:
1510		r = evergreen_cs_packet_next_reloc(p, &reloc);
1511		if (r) {
1512			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1513					"0x%04X\n", reg);
1514			return -EINVAL;
1515		}
1516		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1517	case CB_TARGET_MASK:
1518		track->cb_target_mask = radeon_get_ib_value(p, idx);
1519		track->cb_dirty = true;
1520		break;
1521	case CB_SHADER_MASK:
1522		track->cb_shader_mask = radeon_get_ib_value(p, idx);
1523		track->cb_dirty = true;
1524		break;
1525	case PA_SC_AA_CONFIG:
1526		if (p->rdev->family >= CHIP_CAYMAN) {
1527			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1528				 "0x%04X\n", reg);
1529			return -EINVAL;
1530		}
1531		tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1532		track->nsamples = 1 << tmp;
1533		break;
1534	case CAYMAN_PA_SC_AA_CONFIG:
1535		if (p->rdev->family < CHIP_CAYMAN) {
1536			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1537				 "0x%04X\n", reg);
1538			return -EINVAL;
1539		}
1540		tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1541		track->nsamples = 1 << tmp;
1542		break;
1543	case CB_COLOR0_VIEW:
1544	case CB_COLOR1_VIEW:
1545	case CB_COLOR2_VIEW:
1546	case CB_COLOR3_VIEW:
1547	case CB_COLOR4_VIEW:
1548	case CB_COLOR5_VIEW:
1549	case CB_COLOR6_VIEW:
1550	case CB_COLOR7_VIEW:
1551		tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1552		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1553		track->cb_dirty = true;
1554		break;
1555	case CB_COLOR8_VIEW:
1556	case CB_COLOR9_VIEW:
1557	case CB_COLOR10_VIEW:
1558	case CB_COLOR11_VIEW:
1559		tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1560		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1561		track->cb_dirty = true;
1562		break;
1563	case CB_COLOR0_INFO:
1564	case CB_COLOR1_INFO:
1565	case CB_COLOR2_INFO:
1566	case CB_COLOR3_INFO:
1567	case CB_COLOR4_INFO:
1568	case CB_COLOR5_INFO:
1569	case CB_COLOR6_INFO:
1570	case CB_COLOR7_INFO:
1571		tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1572		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1573		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1574			r = evergreen_cs_packet_next_reloc(p, &reloc);
1575			if (r) {
1576				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1577						"0x%04X\n", reg);
1578				return -EINVAL;
1579			}
1580			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1581			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1582		}
1583		track->cb_dirty = true;
1584		break;
1585	case CB_COLOR8_INFO:
1586	case CB_COLOR9_INFO:
1587	case CB_COLOR10_INFO:
1588	case CB_COLOR11_INFO:
1589		tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1590		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1591		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1592			r = evergreen_cs_packet_next_reloc(p, &reloc);
1593			if (r) {
1594				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1595						"0x%04X\n", reg);
1596				return -EINVAL;
1597			}
1598			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1599			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1600		}
1601		track->cb_dirty = true;
1602		break;
1603	case CB_COLOR0_PITCH:
1604	case CB_COLOR1_PITCH:
1605	case CB_COLOR2_PITCH:
1606	case CB_COLOR3_PITCH:
1607	case CB_COLOR4_PITCH:
1608	case CB_COLOR5_PITCH:
1609	case CB_COLOR6_PITCH:
1610	case CB_COLOR7_PITCH:
1611		tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1612		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1613		track->cb_dirty = true;
1614		break;
1615	case CB_COLOR8_PITCH:
1616	case CB_COLOR9_PITCH:
1617	case CB_COLOR10_PITCH:
1618	case CB_COLOR11_PITCH:
1619		tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1620		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1621		track->cb_dirty = true;
1622		break;
1623	case CB_COLOR0_SLICE:
1624	case CB_COLOR1_SLICE:
1625	case CB_COLOR2_SLICE:
1626	case CB_COLOR3_SLICE:
1627	case CB_COLOR4_SLICE:
1628	case CB_COLOR5_SLICE:
1629	case CB_COLOR6_SLICE:
1630	case CB_COLOR7_SLICE:
1631		tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1632		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1633		track->cb_color_slice_idx[tmp] = idx;
1634		track->cb_dirty = true;
1635		break;
1636	case CB_COLOR8_SLICE:
1637	case CB_COLOR9_SLICE:
1638	case CB_COLOR10_SLICE:
1639	case CB_COLOR11_SLICE:
1640		tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1641		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1642		track->cb_color_slice_idx[tmp] = idx;
1643		track->cb_dirty = true;
1644		break;
1645	case CB_COLOR0_ATTRIB:
1646	case CB_COLOR1_ATTRIB:
1647	case CB_COLOR2_ATTRIB:
1648	case CB_COLOR3_ATTRIB:
1649	case CB_COLOR4_ATTRIB:
1650	case CB_COLOR5_ATTRIB:
1651	case CB_COLOR6_ATTRIB:
1652	case CB_COLOR7_ATTRIB:
1653		r = evergreen_cs_packet_next_reloc(p, &reloc);
1654		if (r) {
1655			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1656					"0x%04X\n", reg);
1657			return -EINVAL;
1658		}
1659		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1660			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1661				unsigned bankw, bankh, mtaspect, tile_split;
1662
1663				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1664							&bankw, &bankh, &mtaspect,
1665							&tile_split);
1666				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1667				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1668					   CB_BANK_WIDTH(bankw) |
1669					   CB_BANK_HEIGHT(bankh) |
1670					   CB_MACRO_TILE_ASPECT(mtaspect);
1671			}
1672		}
1673		tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1674		track->cb_color_attrib[tmp] = ib[idx];
1675		track->cb_dirty = true;
1676		break;
1677	case CB_COLOR8_ATTRIB:
1678	case CB_COLOR9_ATTRIB:
1679	case CB_COLOR10_ATTRIB:
1680	case CB_COLOR11_ATTRIB:
1681		r = evergreen_cs_packet_next_reloc(p, &reloc);
1682		if (r) {
1683			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1684					"0x%04X\n", reg);
1685			return -EINVAL;
1686		}
1687		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1688			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1689				unsigned bankw, bankh, mtaspect, tile_split;
1690
1691				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1692							&bankw, &bankh, &mtaspect,
1693							&tile_split);
1694				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1695				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1696					   CB_BANK_WIDTH(bankw) |
1697					   CB_BANK_HEIGHT(bankh) |
1698					   CB_MACRO_TILE_ASPECT(mtaspect);
1699			}
1700		}
1701		tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1702		track->cb_color_attrib[tmp] = ib[idx];
1703		track->cb_dirty = true;
1704		break;
1705	case CB_COLOR0_FMASK:
1706	case CB_COLOR1_FMASK:
1707	case CB_COLOR2_FMASK:
1708	case CB_COLOR3_FMASK:
1709	case CB_COLOR4_FMASK:
1710	case CB_COLOR5_FMASK:
1711	case CB_COLOR6_FMASK:
1712	case CB_COLOR7_FMASK:
1713		tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1714		r = evergreen_cs_packet_next_reloc(p, &reloc);
1715		if (r) {
1716			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1717			return -EINVAL;
1718		}
1719		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1720		track->cb_color_fmask_bo[tmp] = reloc->robj;
1721		break;
1722	case CB_COLOR0_CMASK:
1723	case CB_COLOR1_CMASK:
1724	case CB_COLOR2_CMASK:
1725	case CB_COLOR3_CMASK:
1726	case CB_COLOR4_CMASK:
1727	case CB_COLOR5_CMASK:
1728	case CB_COLOR6_CMASK:
1729	case CB_COLOR7_CMASK:
1730		tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1731		r = evergreen_cs_packet_next_reloc(p, &reloc);
1732		if (r) {
1733			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1734			return -EINVAL;
1735		}
1736		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1737		track->cb_color_cmask_bo[tmp] = reloc->robj;
1738		break;
1739	case CB_COLOR0_FMASK_SLICE:
1740	case CB_COLOR1_FMASK_SLICE:
1741	case CB_COLOR2_FMASK_SLICE:
1742	case CB_COLOR3_FMASK_SLICE:
1743	case CB_COLOR4_FMASK_SLICE:
1744	case CB_COLOR5_FMASK_SLICE:
1745	case CB_COLOR6_FMASK_SLICE:
1746	case CB_COLOR7_FMASK_SLICE:
1747		tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1748		track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1749		break;
1750	case CB_COLOR0_CMASK_SLICE:
1751	case CB_COLOR1_CMASK_SLICE:
1752	case CB_COLOR2_CMASK_SLICE:
1753	case CB_COLOR3_CMASK_SLICE:
1754	case CB_COLOR4_CMASK_SLICE:
1755	case CB_COLOR5_CMASK_SLICE:
1756	case CB_COLOR6_CMASK_SLICE:
1757	case CB_COLOR7_CMASK_SLICE:
1758		tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1759		track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1760		break;
1761	case CB_COLOR0_BASE:
1762	case CB_COLOR1_BASE:
1763	case CB_COLOR2_BASE:
1764	case CB_COLOR3_BASE:
1765	case CB_COLOR4_BASE:
1766	case CB_COLOR5_BASE:
1767	case CB_COLOR6_BASE:
1768	case CB_COLOR7_BASE:
1769		r = evergreen_cs_packet_next_reloc(p, &reloc);
1770		if (r) {
1771			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1772					"0x%04X\n", reg);
1773			return -EINVAL;
1774		}
1775		tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1776		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1777		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1778		track->cb_color_bo[tmp] = reloc->robj;
1779		track->cb_dirty = true;
1780		break;
1781	case CB_COLOR8_BASE:
1782	case CB_COLOR9_BASE:
1783	case CB_COLOR10_BASE:
1784	case CB_COLOR11_BASE:
1785		r = evergreen_cs_packet_next_reloc(p, &reloc);
1786		if (r) {
1787			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1788					"0x%04X\n", reg);
1789			return -EINVAL;
1790		}
1791		tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1792		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1793		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1794		track->cb_color_bo[tmp] = reloc->robj;
1795		track->cb_dirty = true;
1796		break;
1797	case DB_HTILE_DATA_BASE:
1798		r = evergreen_cs_packet_next_reloc(p, &reloc);
1799		if (r) {
1800			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1801					"0x%04X\n", reg);
1802			return -EINVAL;
1803		}
1804		track->htile_offset = radeon_get_ib_value(p, idx);
1805		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1806		track->htile_bo = reloc->robj;
1807		track->db_dirty = true;
1808		break;
1809	case DB_HTILE_SURFACE:
1810		/* 8x8 only */
1811		track->htile_surface = radeon_get_ib_value(p, idx);
1812		/* force 8x8 htile width and height */
1813		ib[idx] |= 3;
1814		track->db_dirty = true;
1815		break;
1816	case CB_IMMED0_BASE:
1817	case CB_IMMED1_BASE:
1818	case CB_IMMED2_BASE:
1819	case CB_IMMED3_BASE:
1820	case CB_IMMED4_BASE:
1821	case CB_IMMED5_BASE:
1822	case CB_IMMED6_BASE:
1823	case CB_IMMED7_BASE:
1824	case CB_IMMED8_BASE:
1825	case CB_IMMED9_BASE:
1826	case CB_IMMED10_BASE:
1827	case CB_IMMED11_BASE:
1828	case SQ_PGM_START_FS:
1829	case SQ_PGM_START_ES:
1830	case SQ_PGM_START_VS:
1831	case SQ_PGM_START_GS:
1832	case SQ_PGM_START_PS:
1833	case SQ_PGM_START_HS:
1834	case SQ_PGM_START_LS:
1835	case SQ_CONST_MEM_BASE:
1836	case SQ_ALU_CONST_CACHE_GS_0:
1837	case SQ_ALU_CONST_CACHE_GS_1:
1838	case SQ_ALU_CONST_CACHE_GS_2:
1839	case SQ_ALU_CONST_CACHE_GS_3:
1840	case SQ_ALU_CONST_CACHE_GS_4:
1841	case SQ_ALU_CONST_CACHE_GS_5:
1842	case SQ_ALU_CONST_CACHE_GS_6:
1843	case SQ_ALU_CONST_CACHE_GS_7:
1844	case SQ_ALU_CONST_CACHE_GS_8:
1845	case SQ_ALU_CONST_CACHE_GS_9:
1846	case SQ_ALU_CONST_CACHE_GS_10:
1847	case SQ_ALU_CONST_CACHE_GS_11:
1848	case SQ_ALU_CONST_CACHE_GS_12:
1849	case SQ_ALU_CONST_CACHE_GS_13:
1850	case SQ_ALU_CONST_CACHE_GS_14:
1851	case SQ_ALU_CONST_CACHE_GS_15:
1852	case SQ_ALU_CONST_CACHE_PS_0:
1853	case SQ_ALU_CONST_CACHE_PS_1:
1854	case SQ_ALU_CONST_CACHE_PS_2:
1855	case SQ_ALU_CONST_CACHE_PS_3:
1856	case SQ_ALU_CONST_CACHE_PS_4:
1857	case SQ_ALU_CONST_CACHE_PS_5:
1858	case SQ_ALU_CONST_CACHE_PS_6:
1859	case SQ_ALU_CONST_CACHE_PS_7:
1860	case SQ_ALU_CONST_CACHE_PS_8:
1861	case SQ_ALU_CONST_CACHE_PS_9:
1862	case SQ_ALU_CONST_CACHE_PS_10:
1863	case SQ_ALU_CONST_CACHE_PS_11:
1864	case SQ_ALU_CONST_CACHE_PS_12:
1865	case SQ_ALU_CONST_CACHE_PS_13:
1866	case SQ_ALU_CONST_CACHE_PS_14:
1867	case SQ_ALU_CONST_CACHE_PS_15:
1868	case SQ_ALU_CONST_CACHE_VS_0:
1869	case SQ_ALU_CONST_CACHE_VS_1:
1870	case SQ_ALU_CONST_CACHE_VS_2:
1871	case SQ_ALU_CONST_CACHE_VS_3:
1872	case SQ_ALU_CONST_CACHE_VS_4:
1873	case SQ_ALU_CONST_CACHE_VS_5:
1874	case SQ_ALU_CONST_CACHE_VS_6:
1875	case SQ_ALU_CONST_CACHE_VS_7:
1876	case SQ_ALU_CONST_CACHE_VS_8:
1877	case SQ_ALU_CONST_CACHE_VS_9:
1878	case SQ_ALU_CONST_CACHE_VS_10:
1879	case SQ_ALU_CONST_CACHE_VS_11:
1880	case SQ_ALU_CONST_CACHE_VS_12:
1881	case SQ_ALU_CONST_CACHE_VS_13:
1882	case SQ_ALU_CONST_CACHE_VS_14:
1883	case SQ_ALU_CONST_CACHE_VS_15:
1884	case SQ_ALU_CONST_CACHE_HS_0:
1885	case SQ_ALU_CONST_CACHE_HS_1:
1886	case SQ_ALU_CONST_CACHE_HS_2:
1887	case SQ_ALU_CONST_CACHE_HS_3:
1888	case SQ_ALU_CONST_CACHE_HS_4:
1889	case SQ_ALU_CONST_CACHE_HS_5:
1890	case SQ_ALU_CONST_CACHE_HS_6:
1891	case SQ_ALU_CONST_CACHE_HS_7:
1892	case SQ_ALU_CONST_CACHE_HS_8:
1893	case SQ_ALU_CONST_CACHE_HS_9:
1894	case SQ_ALU_CONST_CACHE_HS_10:
1895	case SQ_ALU_CONST_CACHE_HS_11:
1896	case SQ_ALU_CONST_CACHE_HS_12:
1897	case SQ_ALU_CONST_CACHE_HS_13:
1898	case SQ_ALU_CONST_CACHE_HS_14:
1899	case SQ_ALU_CONST_CACHE_HS_15:
1900	case SQ_ALU_CONST_CACHE_LS_0:
1901	case SQ_ALU_CONST_CACHE_LS_1:
1902	case SQ_ALU_CONST_CACHE_LS_2:
1903	case SQ_ALU_CONST_CACHE_LS_3:
1904	case SQ_ALU_CONST_CACHE_LS_4:
1905	case SQ_ALU_CONST_CACHE_LS_5:
1906	case SQ_ALU_CONST_CACHE_LS_6:
1907	case SQ_ALU_CONST_CACHE_LS_7:
1908	case SQ_ALU_CONST_CACHE_LS_8:
1909	case SQ_ALU_CONST_CACHE_LS_9:
1910	case SQ_ALU_CONST_CACHE_LS_10:
1911	case SQ_ALU_CONST_CACHE_LS_11:
1912	case SQ_ALU_CONST_CACHE_LS_12:
1913	case SQ_ALU_CONST_CACHE_LS_13:
1914	case SQ_ALU_CONST_CACHE_LS_14:
1915	case SQ_ALU_CONST_CACHE_LS_15:
1916		r = evergreen_cs_packet_next_reloc(p, &reloc);
1917		if (r) {
1918			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1919					"0x%04X\n", reg);
1920			return -EINVAL;
1921		}
1922		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1923		break;
1924	case SX_MEMORY_EXPORT_BASE:
1925		if (p->rdev->family >= CHIP_CAYMAN) {
1926			dev_warn(p->dev, "bad SET_CONFIG_REG "
1927				 "0x%04X\n", reg);
1928			return -EINVAL;
1929		}
1930		r = evergreen_cs_packet_next_reloc(p, &reloc);
1931		if (r) {
1932			dev_warn(p->dev, "bad SET_CONFIG_REG "
1933					"0x%04X\n", reg);
1934			return -EINVAL;
1935		}
1936		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1937		break;
1938	case CAYMAN_SX_SCATTER_EXPORT_BASE:
1939		if (p->rdev->family < CHIP_CAYMAN) {
1940			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1941				 "0x%04X\n", reg);
1942			return -EINVAL;
1943		}
1944		r = evergreen_cs_packet_next_reloc(p, &reloc);
1945		if (r) {
1946			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1947					"0x%04X\n", reg);
1948			return -EINVAL;
1949		}
1950		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1951		break;
1952	case SX_MISC:
1953		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1954		break;
1955	default:
1956		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1957		return -EINVAL;
1958	}
1959	return 0;
1960}
1961
1962static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1963{
1964	u32 last_reg, m, i;
1965
1966	if (p->rdev->family >= CHIP_CAYMAN)
1967		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1968	else
1969		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1970
1971	i = (reg >> 7);
1972	if (i >= last_reg) {
1973		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1974		return false;
1975	}
1976	m = 1 << ((reg >> 2) & 31);
1977	if (p->rdev->family >= CHIP_CAYMAN) {
1978		if (!(cayman_reg_safe_bm[i] & m))
1979			return true;
1980	} else {
1981		if (!(evergreen_reg_safe_bm[i] & m))
1982			return true;
1983	}
1984	dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1985	return false;
1986}
1987
1988static int evergreen_packet3_check(struct radeon_cs_parser *p,
1989				   struct radeon_cs_packet *pkt)
1990{
1991	struct radeon_cs_reloc *reloc;
1992	struct evergreen_cs_track *track;
1993	volatile u32 *ib;
1994	unsigned idx;
1995	unsigned i;
1996	unsigned start_reg, end_reg, reg;
1997	int r;
1998	u32 idx_value;
1999
2000	track = (struct evergreen_cs_track *)p->track;
2001	ib = p->ib.ptr;
2002	idx = pkt->idx + 1;
2003	idx_value = radeon_get_ib_value(p, idx);
2004
2005	switch (pkt->opcode) {
2006	case PACKET3_SET_PREDICATION:
2007	{
2008		int pred_op;
2009		int tmp;
2010		uint64_t offset;
2011
2012		if (pkt->count != 1) {
2013			DRM_ERROR("bad SET PREDICATION\n");
2014			return -EINVAL;
2015		}
2016
2017		tmp = radeon_get_ib_value(p, idx + 1);
2018		pred_op = (tmp >> 16) & 0x7;
2019
2020		/* for the clear predicate operation */
2021		if (pred_op == 0)
2022			return 0;
2023
2024		if (pred_op > 2) {
2025			DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
2026			return -EINVAL;
2027		}
2028
2029		r = evergreen_cs_packet_next_reloc(p, &reloc);
2030		if (r) {
2031			DRM_ERROR("bad SET PREDICATION\n");
2032			return -EINVAL;
2033		}
2034
2035		offset = reloc->lobj.gpu_offset +
2036		         (idx_value & 0xfffffff0) +
2037		         ((u64)(tmp & 0xff) << 32);
2038
2039		ib[idx + 0] = offset;
2040		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2041	}
2042	break;
2043	case PACKET3_CONTEXT_CONTROL:
2044		if (pkt->count != 1) {
2045			DRM_ERROR("bad CONTEXT_CONTROL\n");
2046			return -EINVAL;
2047		}
2048		break;
2049	case PACKET3_INDEX_TYPE:
2050	case PACKET3_NUM_INSTANCES:
2051	case PACKET3_CLEAR_STATE:
2052		if (pkt->count) {
2053			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2054			return -EINVAL;
2055		}
2056		break;
2057	case CAYMAN_PACKET3_DEALLOC_STATE:
2058		if (p->rdev->family < CHIP_CAYMAN) {
2059			DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
2060			return -EINVAL;
2061		}
2062		if (pkt->count) {
2063			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2064			return -EINVAL;
2065		}
2066		break;
2067	case PACKET3_INDEX_BASE:
2068	{
2069		uint64_t offset;
2070
2071		if (pkt->count != 1) {
2072			DRM_ERROR("bad INDEX_BASE\n");
2073			return -EINVAL;
2074		}
2075		r = evergreen_cs_packet_next_reloc(p, &reloc);
2076		if (r) {
2077			DRM_ERROR("bad INDEX_BASE\n");
2078			return -EINVAL;
2079		}
2080
2081		offset = reloc->lobj.gpu_offset +
2082		         idx_value +
2083		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2084
2085		ib[idx+0] = offset;
2086		ib[idx+1] = upper_32_bits(offset) & 0xff;
2087
2088		r = evergreen_cs_track_check(p);
2089		if (r) {
2090			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2091			return r;
2092		}
2093		break;
2094	}
2095	case PACKET3_DRAW_INDEX:
2096	{
2097		uint64_t offset;
2098		if (pkt->count != 3) {
2099			DRM_ERROR("bad DRAW_INDEX\n");
2100			return -EINVAL;
2101		}
2102		r = evergreen_cs_packet_next_reloc(p, &reloc);
2103		if (r) {
2104			DRM_ERROR("bad DRAW_INDEX\n");
2105			return -EINVAL;
2106		}
2107
2108		offset = reloc->lobj.gpu_offset +
2109		         idx_value +
2110		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2111
2112		ib[idx+0] = offset;
2113		ib[idx+1] = upper_32_bits(offset) & 0xff;
2114
2115		r = evergreen_cs_track_check(p);
2116		if (r) {
2117			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2118			return r;
2119		}
2120		break;
2121	}
2122	case PACKET3_DRAW_INDEX_2:
2123	{
2124		uint64_t offset;
2125
2126		if (pkt->count != 4) {
2127			DRM_ERROR("bad DRAW_INDEX_2\n");
2128			return -EINVAL;
2129		}
2130		r = evergreen_cs_packet_next_reloc(p, &reloc);
2131		if (r) {
2132			DRM_ERROR("bad DRAW_INDEX_2\n");
2133			return -EINVAL;
2134		}
2135
2136		offset = reloc->lobj.gpu_offset +
2137		         radeon_get_ib_value(p, idx+1) +
2138		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2139
2140		ib[idx+1] = offset;
2141		ib[idx+2] = upper_32_bits(offset) & 0xff;
2142
2143		r = evergreen_cs_track_check(p);
2144		if (r) {
2145			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2146			return r;
2147		}
2148		break;
2149	}
2150	case PACKET3_DRAW_INDEX_AUTO:
2151		if (pkt->count != 1) {
2152			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
2153			return -EINVAL;
2154		}
2155		r = evergreen_cs_track_check(p);
2156		if (r) {
2157			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2158			return r;
2159		}
2160		break;
2161	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2162		if (pkt->count != 2) {
2163			DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
2164			return -EINVAL;
2165		}
2166		r = evergreen_cs_track_check(p);
2167		if (r) {
2168			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2169			return r;
2170		}
2171		break;
2172	case PACKET3_DRAW_INDEX_IMMD:
2173		if (pkt->count < 2) {
2174			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2175			return -EINVAL;
2176		}
2177		r = evergreen_cs_track_check(p);
2178		if (r) {
2179			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2180			return r;
2181		}
2182		break;
2183	case PACKET3_DRAW_INDEX_OFFSET:
2184		if (pkt->count != 2) {
2185			DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2186			return -EINVAL;
2187		}
2188		r = evergreen_cs_track_check(p);
2189		if (r) {
2190			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2191			return r;
2192		}
2193		break;
2194	case PACKET3_DRAW_INDEX_OFFSET_2:
2195		if (pkt->count != 3) {
2196			DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2197			return -EINVAL;
2198		}
2199		r = evergreen_cs_track_check(p);
2200		if (r) {
2201			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2202			return r;
2203		}
2204		break;
2205	case PACKET3_DISPATCH_DIRECT:
2206		if (pkt->count != 3) {
2207			DRM_ERROR("bad DISPATCH_DIRECT\n");
2208			return -EINVAL;
2209		}
2210		r = evergreen_cs_track_check(p);
2211		if (r) {
2212			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2213			return r;
2214		}
2215		break;
2216	case PACKET3_DISPATCH_INDIRECT:
2217		if (pkt->count != 1) {
2218			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2219			return -EINVAL;
2220		}
2221		r = evergreen_cs_packet_next_reloc(p, &reloc);
2222		if (r) {
2223			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2224			return -EINVAL;
2225		}
2226		ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
2227		r = evergreen_cs_track_check(p);
2228		if (r) {
2229			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2230			return r;
2231		}
2232		break;
2233	case PACKET3_WAIT_REG_MEM:
2234		if (pkt->count != 5) {
2235			DRM_ERROR("bad WAIT_REG_MEM\n");
2236			return -EINVAL;
2237		}
2238		/* bit 4 is reg (0) or mem (1) */
2239		if (idx_value & 0x10) {
2240			uint64_t offset;
2241
2242			r = evergreen_cs_packet_next_reloc(p, &reloc);
2243			if (r) {
2244				DRM_ERROR("bad WAIT_REG_MEM\n");
2245				return -EINVAL;
2246			}
2247
2248			offset = reloc->lobj.gpu_offset +
2249			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2250			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2251
2252			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2253			ib[idx+2] = upper_32_bits(offset) & 0xff;
2254		}
2255		break;
2256	case PACKET3_CP_DMA:
2257	{
2258		u32 command, size, info;
2259		u64 offset, tmp;
2260		if (pkt->count != 4) {
2261			DRM_ERROR("bad CP DMA\n");
2262			return -EINVAL;
2263		}
2264		command = radeon_get_ib_value(p, idx+4);
2265		size = command & 0x1fffff;
2266		info = radeon_get_ib_value(p, idx+1);
2267		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2268		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2269		    ((((info & 0x00300000) >> 20) == 0) &&
2270		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2271		    ((((info & 0x60000000) >> 29) == 0) &&
2272		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2273			/* non mem to mem copies requires dw aligned count */
2274			if (size % 4) {
2275				DRM_ERROR("CP DMA command requires dw count alignment\n");
2276				return -EINVAL;
2277			}
2278		}
2279		if (command & PACKET3_CP_DMA_CMD_SAS) {
2280			/* src address space is register */
2281			/* GDS is ok */
2282			if (((info & 0x60000000) >> 29) != 1) {
2283				DRM_ERROR("CP DMA SAS not supported\n");
2284				return -EINVAL;
2285			}
2286		} else {
2287			if (command & PACKET3_CP_DMA_CMD_SAIC) {
2288				DRM_ERROR("CP DMA SAIC only supported for registers\n");
2289				return -EINVAL;
2290			}
2291			/* src address space is memory */
2292			if (((info & 0x60000000) >> 29) == 0) {
2293				r = evergreen_cs_packet_next_reloc(p, &reloc);
2294				if (r) {
2295					DRM_ERROR("bad CP DMA SRC\n");
2296					return -EINVAL;
2297				}
2298
2299				tmp = radeon_get_ib_value(p, idx) +
2300					((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2301
2302				offset = reloc->lobj.gpu_offset + tmp;
2303
2304				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2305					dev_warn(p->dev, "CP DMA src buffer too small (%ju %lu)\n",
2306						 (uintmax_t)tmp + size, radeon_bo_size(reloc->robj));
2307					return -EINVAL;
2308				}
2309
2310				ib[idx] = offset;
2311				ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2312			} else if (((info & 0x60000000) >> 29) != 2) {
2313				DRM_ERROR("bad CP DMA SRC_SEL\n");
2314				return -EINVAL;
2315			}
2316		}
2317		if (command & PACKET3_CP_DMA_CMD_DAS) {
2318			/* dst address space is register */
2319			/* GDS is ok */
2320			if (((info & 0x00300000) >> 20) != 1) {
2321				DRM_ERROR("CP DMA DAS not supported\n");
2322				return -EINVAL;
2323			}
2324		} else {
2325			/* dst address space is memory */
2326			if (command & PACKET3_CP_DMA_CMD_DAIC) {
2327				DRM_ERROR("CP DMA DAIC only supported for registers\n");
2328				return -EINVAL;
2329			}
2330			if (((info & 0x00300000) >> 20) == 0) {
2331				r = evergreen_cs_packet_next_reloc(p, &reloc);
2332				if (r) {
2333					DRM_ERROR("bad CP DMA DST\n");
2334					return -EINVAL;
2335				}
2336
2337				tmp = radeon_get_ib_value(p, idx+2) +
2338					((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2339
2340				offset = reloc->lobj.gpu_offset + tmp;
2341
2342				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2343					dev_warn(p->dev, "CP DMA dst buffer too small (%ju %lu)\n",
2344						 (uintmax_t)tmp + size, radeon_bo_size(reloc->robj));
2345					return -EINVAL;
2346				}
2347
2348				ib[idx+2] = offset;
2349				ib[idx+3] = upper_32_bits(offset) & 0xff;
2350			} else {
2351				DRM_ERROR("bad CP DMA DST_SEL\n");
2352				return -EINVAL;
2353			}
2354		}
2355		break;
2356	}
2357	case PACKET3_SURFACE_SYNC:
2358		if (pkt->count != 3) {
2359			DRM_ERROR("bad SURFACE_SYNC\n");
2360			return -EINVAL;
2361		}
2362		/* 0xffffffff/0x0 is flush all cache flag */
2363		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2364		    radeon_get_ib_value(p, idx + 2) != 0) {
2365			r = evergreen_cs_packet_next_reloc(p, &reloc);
2366			if (r) {
2367				DRM_ERROR("bad SURFACE_SYNC\n");
2368				return -EINVAL;
2369			}
2370			ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2371		}
2372		break;
2373	case PACKET3_EVENT_WRITE:
2374		if (pkt->count != 2 && pkt->count != 0) {
2375			DRM_ERROR("bad EVENT_WRITE\n");
2376			return -EINVAL;
2377		}
2378		if (pkt->count) {
2379			uint64_t offset;
2380
2381			r = evergreen_cs_packet_next_reloc(p, &reloc);
2382			if (r) {
2383				DRM_ERROR("bad EVENT_WRITE\n");
2384				return -EINVAL;
2385			}
2386			offset = reloc->lobj.gpu_offset +
2387			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2388			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2389
2390			ib[idx+1] = offset & 0xfffffff8;
2391			ib[idx+2] = upper_32_bits(offset) & 0xff;
2392		}
2393		break;
2394	case PACKET3_EVENT_WRITE_EOP:
2395	{
2396		uint64_t offset;
2397
2398		if (pkt->count != 4) {
2399			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2400			return -EINVAL;
2401		}
2402		r = evergreen_cs_packet_next_reloc(p, &reloc);
2403		if (r) {
2404			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2405			return -EINVAL;
2406		}
2407
2408		offset = reloc->lobj.gpu_offset +
2409		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2410		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2411
2412		ib[idx+1] = offset & 0xfffffffc;
2413		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2414		break;
2415	}
2416	case PACKET3_EVENT_WRITE_EOS:
2417	{
2418		uint64_t offset;
2419
2420		if (pkt->count != 3) {
2421			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2422			return -EINVAL;
2423		}
2424		r = evergreen_cs_packet_next_reloc(p, &reloc);
2425		if (r) {
2426			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2427			return -EINVAL;
2428		}
2429
2430		offset = reloc->lobj.gpu_offset +
2431		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2432		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2433
2434		ib[idx+1] = offset & 0xfffffffc;
2435		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2436		break;
2437	}
2438	case PACKET3_SET_CONFIG_REG:
2439		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2440		end_reg = 4 * pkt->count + start_reg - 4;
2441		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2442		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2443		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2444			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2445			return -EINVAL;
2446		}
2447		for (i = 0; i < pkt->count; i++) {
2448			reg = start_reg + (4 * i);
2449			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2450			if (r)
2451				return r;
2452		}
2453		break;
2454	case PACKET3_SET_CONTEXT_REG:
2455		start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2456		end_reg = 4 * pkt->count + start_reg - 4;
2457		if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2458		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2459		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2460			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2461			return -EINVAL;
2462		}
2463		for (i = 0; i < pkt->count; i++) {
2464			reg = start_reg + (4 * i);
2465			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2466			if (r)
2467				return r;
2468		}
2469		break;
2470	case PACKET3_SET_RESOURCE:
2471		if (pkt->count % 8) {
2472			DRM_ERROR("bad SET_RESOURCE\n");
2473			return -EINVAL;
2474		}
2475		start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2476		end_reg = 4 * pkt->count + start_reg - 4;
2477		if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2478		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2479		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2480			DRM_ERROR("bad SET_RESOURCE\n");
2481			return -EINVAL;
2482		}
2483		for (i = 0; i < (pkt->count / 8); i++) {
2484			struct radeon_bo *texture, *mipmap;
2485			u32 toffset, moffset;
2486			u32 size, offset, mip_address, tex_dim;
2487
2488			switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2489			case SQ_TEX_VTX_VALID_TEXTURE:
2490				/* tex base */
2491				r = evergreen_cs_packet_next_reloc(p, &reloc);
2492				if (r) {
2493					DRM_ERROR("bad SET_RESOURCE (tex)\n");
2494					return -EINVAL;
2495				}
2496				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2497					ib[idx+1+(i*8)+1] |=
2498						TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
2499					if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
2500						unsigned bankw, bankh, mtaspect, tile_split;
2501
2502						evergreen_tiling_fields(reloc->lobj.tiling_flags,
2503									&bankw, &bankh, &mtaspect,
2504									&tile_split);
2505						ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2506						ib[idx+1+(i*8)+7] |=
2507							TEX_BANK_WIDTH(bankw) |
2508							TEX_BANK_HEIGHT(bankh) |
2509							MACRO_TILE_ASPECT(mtaspect) |
2510							TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2511					}
2512				}
2513				texture = reloc->robj;
2514				toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2515
2516				/* tex mip base */
2517				tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2518				mip_address = ib[idx+1+(i*8)+3];
2519
2520				if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2521				    !mip_address &&
2522				    !evergreen_cs_packet_next_is_pkt3_nop(p)) {
2523					/* MIP_ADDRESS should point to FMASK for an MSAA texture.
2524					 * It should be 0 if FMASK is disabled. */
2525					moffset = 0;
2526					mipmap = NULL;
2527				} else {
2528					r = evergreen_cs_packet_next_reloc(p, &reloc);
2529					if (r) {
2530						DRM_ERROR("bad SET_RESOURCE (tex)\n");
2531						return -EINVAL;
2532					}
2533					moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2534					mipmap = reloc->robj;
2535				}
2536
2537				r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2538				if (r)
2539					return r;
2540				ib[idx+1+(i*8)+2] += toffset;
2541				ib[idx+1+(i*8)+3] += moffset;
2542				break;
2543			case SQ_TEX_VTX_VALID_BUFFER:
2544			{
2545				uint64_t offset64;
2546				/* vtx base */
2547				r = evergreen_cs_packet_next_reloc(p, &reloc);
2548				if (r) {
2549					DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2550					return -EINVAL;
2551				}
2552				offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2553				size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2554				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2555					/* force size to size of the buffer */
2556					dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2557					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2558				}
2559
2560				offset64 = reloc->lobj.gpu_offset + offset;
2561				ib[idx+1+(i*8)+0] = offset64;
2562				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2563						    (upper_32_bits(offset64) & 0xff);
2564				break;
2565			}
2566			case SQ_TEX_VTX_INVALID_TEXTURE:
2567			case SQ_TEX_VTX_INVALID_BUFFER:
2568			default:
2569				DRM_ERROR("bad SET_RESOURCE\n");
2570				return -EINVAL;
2571			}
2572		}
2573		break;
2574	case PACKET3_SET_ALU_CONST:
2575		/* XXX fix me ALU const buffers only */
2576		break;
2577	case PACKET3_SET_BOOL_CONST:
2578		start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2579		end_reg = 4 * pkt->count + start_reg - 4;
2580		if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2581		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2582		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2583			DRM_ERROR("bad SET_BOOL_CONST\n");
2584			return -EINVAL;
2585		}
2586		break;
2587	case PACKET3_SET_LOOP_CONST:
2588		start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2589		end_reg = 4 * pkt->count + start_reg - 4;
2590		if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2591		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2592		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2593			DRM_ERROR("bad SET_LOOP_CONST\n");
2594			return -EINVAL;
2595		}
2596		break;
2597	case PACKET3_SET_CTL_CONST:
2598		start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2599		end_reg = 4 * pkt->count + start_reg - 4;
2600		if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2601		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2602		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2603			DRM_ERROR("bad SET_CTL_CONST\n");
2604			return -EINVAL;
2605		}
2606		break;
2607	case PACKET3_SET_SAMPLER:
2608		if (pkt->count % 3) {
2609			DRM_ERROR("bad SET_SAMPLER\n");
2610			return -EINVAL;
2611		}
2612		start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2613		end_reg = 4 * pkt->count + start_reg - 4;
2614		if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2615		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2616		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2617			DRM_ERROR("bad SET_SAMPLER\n");
2618			return -EINVAL;
2619		}
2620		break;
2621	case PACKET3_STRMOUT_BUFFER_UPDATE:
2622		if (pkt->count != 4) {
2623			DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2624			return -EINVAL;
2625		}
2626		/* Updating memory at DST_ADDRESS. */
2627		if (idx_value & 0x1) {
2628			u64 offset;
2629			r = evergreen_cs_packet_next_reloc(p, &reloc);
2630			if (r) {
2631				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2632				return -EINVAL;
2633			}
2634			offset = radeon_get_ib_value(p, idx+1);
2635			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2636			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2637				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%jx, 0x%lx\n",
2638					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2639				return -EINVAL;
2640			}
2641			offset += reloc->lobj.gpu_offset;
2642			ib[idx+1] = offset;
2643			ib[idx+2] = upper_32_bits(offset) & 0xff;
2644		}
2645		/* Reading data from SRC_ADDRESS. */
2646		if (((idx_value >> 1) & 0x3) == 2) {
2647			u64 offset;
2648			r = evergreen_cs_packet_next_reloc(p, &reloc);
2649			if (r) {
2650				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2651				return -EINVAL;
2652			}
2653			offset = radeon_get_ib_value(p, idx+3);
2654			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2655			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2656				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%jx, 0x%lx\n",
2657					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2658				return -EINVAL;
2659			}
2660			offset += reloc->lobj.gpu_offset;
2661			ib[idx+3] = offset;
2662			ib[idx+4] = upper_32_bits(offset) & 0xff;
2663		}
2664		break;
2665	case PACKET3_MEM_WRITE:
2666	{
2667		u64 offset;
2668
2669		if (pkt->count != 3) {
2670			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2671			return -EINVAL;
2672		}
2673		r = evergreen_cs_packet_next_reloc(p, &reloc);
2674		if (r) {
2675			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2676			return -EINVAL;
2677		}
2678		offset = radeon_get_ib_value(p, idx+0);
2679		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2680		if (offset & 0x7) {
2681			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2682			return -EINVAL;
2683		}
2684		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2685			DRM_ERROR("bad MEM_WRITE bo too small: 0x%jx, 0x%lx\n",
2686				  (uintmax_t)offset + 8, radeon_bo_size(reloc->robj));
2687			return -EINVAL;
2688		}
2689		offset += reloc->lobj.gpu_offset;
2690		ib[idx+0] = offset;
2691		ib[idx+1] = upper_32_bits(offset) & 0xff;
2692		break;
2693	}
2694	case PACKET3_COPY_DW:
2695		if (pkt->count != 4) {
2696			DRM_ERROR("bad COPY_DW (invalid count)\n");
2697			return -EINVAL;
2698		}
2699		if (idx_value & 0x1) {
2700			u64 offset;
2701			/* SRC is memory. */
2702			r = evergreen_cs_packet_next_reloc(p, &reloc);
2703			if (r) {
2704				DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2705				return -EINVAL;
2706			}
2707			offset = radeon_get_ib_value(p, idx+1);
2708			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2709			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2710				DRM_ERROR("bad COPY_DW src bo too small: 0x%jx, 0x%lx\n",
2711					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2712				return -EINVAL;
2713			}
2714			offset += reloc->lobj.gpu_offset;
2715			ib[idx+1] = offset;
2716			ib[idx+2] = upper_32_bits(offset) & 0xff;
2717		} else {
2718			/* SRC is a reg. */
2719			reg = radeon_get_ib_value(p, idx+1) << 2;
2720			if (!evergreen_is_safe_reg(p, reg, idx+1))
2721				return -EINVAL;
2722		}
2723		if (idx_value & 0x2) {
2724			u64 offset;
2725			/* DST is memory. */
2726			r = evergreen_cs_packet_next_reloc(p, &reloc);
2727			if (r) {
2728				DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2729				return -EINVAL;
2730			}
2731			offset = radeon_get_ib_value(p, idx+3);
2732			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2733			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2734				DRM_ERROR("bad COPY_DW dst bo too small: 0x%jx, 0x%lx\n",
2735					  (uintmax_t)offset + 4, radeon_bo_size(reloc->robj));
2736				return -EINVAL;
2737			}
2738			offset += reloc->lobj.gpu_offset;
2739			ib[idx+3] = offset;
2740			ib[idx+4] = upper_32_bits(offset) & 0xff;
2741		} else {
2742			/* DST is a reg. */
2743			reg = radeon_get_ib_value(p, idx+3) << 2;
2744			if (!evergreen_is_safe_reg(p, reg, idx+3))
2745				return -EINVAL;
2746		}
2747		break;
2748	case PACKET3_NOP:
2749		break;
2750	default:
2751		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2752		return -EINVAL;
2753	}
2754	return 0;
2755}
2756
2757int evergreen_cs_parse(struct radeon_cs_parser *p)
2758{
2759	struct radeon_cs_packet pkt;
2760	struct evergreen_cs_track *track;
2761	u32 tmp;
2762	int r;
2763
2764	if (p->track == NULL) {
2765		/* initialize tracker, we are in kms */
2766		track = malloc(sizeof(*track), DRM_MEM_DRIVER, M_NOWAIT | M_ZERO);
2767		if (track == NULL)
2768			return -ENOMEM;
2769		evergreen_cs_track_init(track);
2770		if (p->rdev->family >= CHIP_CAYMAN)
2771			tmp = p->rdev->config.cayman.tile_config;
2772		else
2773			tmp = p->rdev->config.evergreen.tile_config;
2774
2775		switch (tmp & 0xf) {
2776		case 0:
2777			track->npipes = 1;
2778			break;
2779		case 1:
2780		default:
2781			track->npipes = 2;
2782			break;
2783		case 2:
2784			track->npipes = 4;
2785			break;
2786		case 3:
2787			track->npipes = 8;
2788			break;
2789		}
2790
2791		switch ((tmp & 0xf0) >> 4) {
2792		case 0:
2793			track->nbanks = 4;
2794			break;
2795		case 1:
2796		default:
2797			track->nbanks = 8;
2798			break;
2799		case 2:
2800			track->nbanks = 16;
2801			break;
2802		}
2803
2804		switch ((tmp & 0xf00) >> 8) {
2805		case 0:
2806			track->group_size = 256;
2807			break;
2808		case 1:
2809		default:
2810			track->group_size = 512;
2811			break;
2812		}
2813
2814		switch ((tmp & 0xf000) >> 12) {
2815		case 0:
2816			track->row_size = 1;
2817			break;
2818		case 1:
2819		default:
2820			track->row_size = 2;
2821			break;
2822		case 2:
2823			track->row_size = 4;
2824			break;
2825		}
2826
2827		p->track = track;
2828	}
2829	do {
2830		r = evergreen_cs_packet_parse(p, &pkt, p->idx);
2831		if (r) {
2832			free(p->track, DRM_MEM_DRIVER);
2833			p->track = NULL;
2834			return r;
2835		}
2836		p->idx += pkt.count + 2;
2837		switch (pkt.type) {
2838		case PACKET_TYPE0:
2839			r = evergreen_cs_parse_packet0(p, &pkt);
2840			break;
2841		case PACKET_TYPE2:
2842			break;
2843		case PACKET_TYPE3:
2844			r = evergreen_packet3_check(p, &pkt);
2845			break;
2846		default:
2847			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2848			free(p->track, DRM_MEM_DRIVER);
2849			p->track = NULL;
2850			return -EINVAL;
2851		}
2852		if (r) {
2853			free(p->track, DRM_MEM_DRIVER);
2854			p->track = NULL;
2855			return r;
2856		}
2857	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2858#if 0
2859	for (r = 0; r < p->ib.length_dw; r++) {
2860		DRM_INFO("%05d  0x%08X\n", r, p->ib.ptr[r]);
2861		mdelay(1);
2862	}
2863#endif
2864	free(p->track, DRM_MEM_DRIVER);
2865	p->track = NULL;
2866	return 0;
2867}
2868
2869/*
2870 *  DMA
2871 */
2872
2873#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2874#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2875#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2876#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2877#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2878
2879/**
2880 * evergreen_dma_cs_parse() - parse the DMA IB
2881 * @p:		parser structure holding parsing context.
2882 *
2883 * Parses the DMA IB from the CS ioctl and updates
2884 * the GPU addresses based on the reloc information and
2885 * checks for errors. (Evergreen-Cayman)
2886 * Returns 0 for success and an error on failure.
2887 **/
2888int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2889{
2890	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2891	struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2892	u32 header, cmd, count, tiled, new_cmd, misc;
2893	volatile u32 *ib = p->ib.ptr;
2894	u32 idx, idx_value;
2895	u64 src_offset, dst_offset, dst2_offset;
2896	int r;
2897
2898	do {
2899		if (p->idx >= ib_chunk->length_dw) {
2900			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2901				  p->idx, ib_chunk->length_dw);
2902			return -EINVAL;
2903		}
2904		idx = p->idx;
2905		header = radeon_get_ib_value(p, idx);
2906		cmd = GET_DMA_CMD(header);
2907		count = GET_DMA_COUNT(header);
2908		tiled = GET_DMA_T(header);
2909		new_cmd = GET_DMA_NEW(header);
2910		misc = GET_DMA_MISC(header);
2911
2912		switch (cmd) {
2913		case DMA_PACKET_WRITE:
2914			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2915			if (r) {
2916				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2917				return -EINVAL;
2918			}
2919			if (tiled) {
2920				dst_offset = radeon_get_ib_value(p, idx+1);
2921				dst_offset <<= 8;
2922
2923				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2924				p->idx += count + 7;
2925			} else {
2926				dst_offset = radeon_get_ib_value(p, idx+1);
2927				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2928
2929				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2930				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2931				p->idx += count + 3;
2932			}
2933			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2934				dev_warn(p->dev, "DMA write buffer too small (%ju %lu)\n",
2935					 (uintmax_t)dst_offset, radeon_bo_size(dst_reloc->robj));
2936				return -EINVAL;
2937			}
2938			break;
2939		case DMA_PACKET_COPY:
2940			r = r600_dma_cs_next_reloc(p, &src_reloc);
2941			if (r) {
2942				DRM_ERROR("bad DMA_PACKET_COPY\n");
2943				return -EINVAL;
2944			}
2945			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2946			if (r) {
2947				DRM_ERROR("bad DMA_PACKET_COPY\n");
2948				return -EINVAL;
2949			}
2950			if (tiled) {
2951				idx_value = radeon_get_ib_value(p, idx + 2);
2952				if (new_cmd) {
2953					switch (misc) {
2954					case 0:
2955						/* L2T, frame to fields */
2956						if (idx_value & (1U << 31)) {
2957							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2958							return -EINVAL;
2959						}
2960						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2961						if (r) {
2962							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2963							return -EINVAL;
2964						}
2965						dst_offset = radeon_get_ib_value(p, idx+1);
2966						dst_offset <<= 8;
2967						dst2_offset = radeon_get_ib_value(p, idx+2);
2968						dst2_offset <<= 8;
2969						src_offset = radeon_get_ib_value(p, idx+8);
2970						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2971						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2972							dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%ju %lu)\n",
2973								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2974							return -EINVAL;
2975						}
2976						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2977							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n",
2978								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2979							return -EINVAL;
2980						}
2981						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2982							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n",
2983								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2984							return -EINVAL;
2985						}
2986						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2987						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2988						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2989						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2990						p->idx += 10;
2991						break;
2992					case 1:
2993						/* L2T, T2L partial */
2994						if (p->family < CHIP_CAYMAN) {
2995							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2996							return -EINVAL;
2997						}
2998						/* detile bit */
2999						if (idx_value & (1U << 31)) {
3000							/* tiled src, linear dst */
3001							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3002
3003							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3004							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3005						} else {
3006							/* linear src, tiled dst */
3007							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3008							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3009
3010							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3011						}
3012						p->idx += 12;
3013						break;
3014					case 3:
3015						/* L2T, broadcast */
3016						if (idx_value & (1U << 31)) {
3017							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3018							return -EINVAL;
3019						}
3020						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3021						if (r) {
3022							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3023							return -EINVAL;
3024						}
3025						dst_offset = radeon_get_ib_value(p, idx+1);
3026						dst_offset <<= 8;
3027						dst2_offset = radeon_get_ib_value(p, idx+2);
3028						dst2_offset <<= 8;
3029						src_offset = radeon_get_ib_value(p, idx+8);
3030						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3031						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3032							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n",
3033								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3034							return -EINVAL;
3035						}
3036						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3037							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n",
3038								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3039							return -EINVAL;
3040						}
3041						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3042							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n",
3043								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3044							return -EINVAL;
3045						}
3046						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3047						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3048						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3049						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3050						p->idx += 10;
3051						break;
3052					case 4:
3053						/* L2T, T2L */
3054						/* detile bit */
3055						if (idx_value & (1U << 31)) {
3056							/* tiled src, linear dst */
3057							src_offset = radeon_get_ib_value(p, idx+1);
3058							src_offset <<= 8;
3059							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3060
3061							dst_offset = radeon_get_ib_value(p, idx+7);
3062							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3063							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3064							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3065						} else {
3066							/* linear src, tiled dst */
3067							src_offset = radeon_get_ib_value(p, idx+7);
3068							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3069							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3070							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3071
3072							dst_offset = radeon_get_ib_value(p, idx+1);
3073							dst_offset <<= 8;
3074							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3075						}
3076						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3077							dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%ju %lu)\n",
3078								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3079							return -EINVAL;
3080						}
3081						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3082							dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%ju %lu)\n",
3083								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3084							return -EINVAL;
3085						}
3086						p->idx += 9;
3087						break;
3088					case 5:
3089						/* T2T partial */
3090						if (p->family < CHIP_CAYMAN) {
3091							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3092							return -EINVAL;
3093						}
3094						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3095						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3096						p->idx += 13;
3097						break;
3098					case 7:
3099						/* L2T, broadcast */
3100						if (idx_value & (1U << 31)) {
3101							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3102							return -EINVAL;
3103						}
3104						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3105						if (r) {
3106							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3107							return -EINVAL;
3108						}
3109						dst_offset = radeon_get_ib_value(p, idx+1);
3110						dst_offset <<= 8;
3111						dst2_offset = radeon_get_ib_value(p, idx+2);
3112						dst2_offset <<= 8;
3113						src_offset = radeon_get_ib_value(p, idx+8);
3114						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3115						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3116							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n",
3117								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3118							return -EINVAL;
3119						}
3120						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3121							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n",
3122								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3123							return -EINVAL;
3124						}
3125						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3126							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n",
3127								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3128							return -EINVAL;
3129						}
3130						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3131						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3132						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3133						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3134						p->idx += 10;
3135						break;
3136					default:
3137						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3138						return -EINVAL;
3139					}
3140				} else {
3141					switch (misc) {
3142					case 0:
3143						/* detile bit */
3144						if (idx_value & (1U << 31)) {
3145							/* tiled src, linear dst */
3146							src_offset = radeon_get_ib_value(p, idx+1);
3147							src_offset <<= 8;
3148							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3149
3150							dst_offset = radeon_get_ib_value(p, idx+7);
3151							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3152							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3153							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3154						} else {
3155							/* linear src, tiled dst */
3156							src_offset = radeon_get_ib_value(p, idx+7);
3157							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3158							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3159							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3160
3161							dst_offset = radeon_get_ib_value(p, idx+1);
3162							dst_offset <<= 8;
3163							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3164						}
3165						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3166							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n",
3167								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3168							return -EINVAL;
3169						}
3170						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3171							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n",
3172								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3173							return -EINVAL;
3174						}
3175						p->idx += 9;
3176						break;
3177					default:
3178						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3179						return -EINVAL;
3180					}
3181				}
3182			} else {
3183				if (new_cmd) {
3184					switch (misc) {
3185					case 0:
3186						/* L2L, byte */
3187						src_offset = radeon_get_ib_value(p, idx+2);
3188						src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3189						dst_offset = radeon_get_ib_value(p, idx+1);
3190						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3191						if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3192							dev_warn(p->dev, "DMA L2L, byte src buffer too small (%ju %lu)\n",
3193								 (uintmax_t)src_offset + count, radeon_bo_size(src_reloc->robj));
3194							return -EINVAL;
3195						}
3196						if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3197							dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%ju %lu)\n",
3198								 (uintmax_t)dst_offset + count, radeon_bo_size(dst_reloc->robj));
3199							return -EINVAL;
3200						}
3201						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3202						ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3203						ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3204						ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3205						p->idx += 5;
3206						break;
3207					case 1:
3208						/* L2L, partial */
3209						if (p->family < CHIP_CAYMAN) {
3210							DRM_ERROR("L2L Partial is cayman only !\n");
3211							return -EINVAL;
3212						}
3213						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3214						ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3215						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3216						ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3217
3218						p->idx += 9;
3219						break;
3220					case 4:
3221						/* L2L, dw, broadcast */
3222						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3223						if (r) {
3224							DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3225							return -EINVAL;
3226						}
3227						dst_offset = radeon_get_ib_value(p, idx+1);
3228						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3229						dst2_offset = radeon_get_ib_value(p, idx+2);
3230						dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
3231						src_offset = radeon_get_ib_value(p, idx+3);
3232						src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
3233						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3234							dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%ju %lu)\n",
3235								 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3236							return -EINVAL;
3237						}
3238						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3239							dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%ju %lu)\n",
3240								 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3241							return -EINVAL;
3242						}
3243						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3244							dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%ju %lu)\n",
3245								 (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3246							return -EINVAL;
3247						}
3248						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3249						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3250						ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3251						ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3252						ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3253						ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3254						p->idx += 7;
3255						break;
3256					default:
3257						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3258						return -EINVAL;
3259					}
3260				} else {
3261					/* L2L, dw */
3262					src_offset = radeon_get_ib_value(p, idx+2);
3263					src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3264					dst_offset = radeon_get_ib_value(p, idx+1);
3265					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3266					if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3267						dev_warn(p->dev, "DMA L2L, dw src buffer too small (%ju %lu)\n",
3268							 (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3269						return -EINVAL;
3270					}
3271					if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3272						dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%ju %lu)\n",
3273							 (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3274						return -EINVAL;
3275					}
3276					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3277					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3278					ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3279					ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3280					p->idx += 5;
3281				}
3282			}
3283			break;
3284		case DMA_PACKET_CONSTANT_FILL:
3285			r = r600_dma_cs_next_reloc(p, &dst_reloc);
3286			if (r) {
3287				DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3288				return -EINVAL;
3289			}
3290			dst_offset = radeon_get_ib_value(p, idx+1);
3291			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3292			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3293				dev_warn(p->dev, "DMA constant fill buffer too small (%ju %lu)\n",
3294					 (uintmax_t)dst_offset, radeon_bo_size(dst_reloc->robj));
3295				return -EINVAL;
3296			}
3297			ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3298			ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3299			p->idx += 4;
3300			break;
3301		case DMA_PACKET_NOP:
3302			p->idx += 1;
3303			break;
3304		default:
3305			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3306			return -EINVAL;
3307		}
3308	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3309#if 0
3310	for (r = 0; r < p->ib->length_dw; r++) {
3311		DRM_INFO("%05d  0x%08X\n", r, p->ib.ptr[r]);
3312		mdelay(1);
3313	}
3314#endif
3315	return 0;
3316}
3317
3318/* vm parser */
3319static bool evergreen_vm_reg_valid(u32 reg)
3320{
3321	/* context regs are fine */
3322	if (reg >= 0x28000)
3323		return true;
3324
3325	/* check config regs */
3326	switch (reg) {
3327	case WAIT_UNTIL:
3328	case GRBM_GFX_INDEX:
3329	case CP_STRMOUT_CNTL:
3330	case CP_COHER_CNTL:
3331	case CP_COHER_SIZE:
3332	case VGT_VTX_VECT_EJECT_REG:
3333	case VGT_CACHE_INVALIDATION:
3334	case VGT_GS_VERTEX_REUSE:
3335	case VGT_PRIMITIVE_TYPE:
3336	case VGT_INDEX_TYPE:
3337	case VGT_NUM_INDICES:
3338	case VGT_NUM_INSTANCES:
3339	case VGT_COMPUTE_DIM_X:
3340	case VGT_COMPUTE_DIM_Y:
3341	case VGT_COMPUTE_DIM_Z:
3342	case VGT_COMPUTE_START_X:
3343	case VGT_COMPUTE_START_Y:
3344	case VGT_COMPUTE_START_Z:
3345	case VGT_COMPUTE_INDEX:
3346	case VGT_COMPUTE_THREAD_GROUP_SIZE:
3347	case VGT_HS_OFFCHIP_PARAM:
3348	case PA_CL_ENHANCE:
3349	case PA_SU_LINE_STIPPLE_VALUE:
3350	case PA_SC_LINE_STIPPLE_STATE:
3351	case PA_SC_ENHANCE:
3352	case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3353	case SQ_DYN_GPR_SIMD_LOCK_EN:
3354	case SQ_CONFIG:
3355	case SQ_GPR_RESOURCE_MGMT_1:
3356	case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3357	case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3358	case SQ_CONST_MEM_BASE:
3359	case SQ_STATIC_THREAD_MGMT_1:
3360	case SQ_STATIC_THREAD_MGMT_2:
3361	case SQ_STATIC_THREAD_MGMT_3:
3362	case SPI_CONFIG_CNTL:
3363	case SPI_CONFIG_CNTL_1:
3364	case TA_CNTL_AUX:
3365	case DB_DEBUG:
3366	case DB_DEBUG2:
3367	case DB_DEBUG3:
3368	case DB_DEBUG4:
3369	case DB_WATERMARKS:
3370	case TD_PS_BORDER_COLOR_INDEX:
3371	case TD_PS_BORDER_COLOR_RED:
3372	case TD_PS_BORDER_COLOR_GREEN:
3373	case TD_PS_BORDER_COLOR_BLUE:
3374	case TD_PS_BORDER_COLOR_ALPHA:
3375	case TD_VS_BORDER_COLOR_INDEX:
3376	case TD_VS_BORDER_COLOR_RED:
3377	case TD_VS_BORDER_COLOR_GREEN:
3378	case TD_VS_BORDER_COLOR_BLUE:
3379	case TD_VS_BORDER_COLOR_ALPHA:
3380	case TD_GS_BORDER_COLOR_INDEX:
3381	case TD_GS_BORDER_COLOR_RED:
3382	case TD_GS_BORDER_COLOR_GREEN:
3383	case TD_GS_BORDER_COLOR_BLUE:
3384	case TD_GS_BORDER_COLOR_ALPHA:
3385	case TD_HS_BORDER_COLOR_INDEX:
3386	case TD_HS_BORDER_COLOR_RED:
3387	case TD_HS_BORDER_COLOR_GREEN:
3388	case TD_HS_BORDER_COLOR_BLUE:
3389	case TD_HS_BORDER_COLOR_ALPHA:
3390	case TD_LS_BORDER_COLOR_INDEX:
3391	case TD_LS_BORDER_COLOR_RED:
3392	case TD_LS_BORDER_COLOR_GREEN:
3393	case TD_LS_BORDER_COLOR_BLUE:
3394	case TD_LS_BORDER_COLOR_ALPHA:
3395	case TD_CS_BORDER_COLOR_INDEX:
3396	case TD_CS_BORDER_COLOR_RED:
3397	case TD_CS_BORDER_COLOR_GREEN:
3398	case TD_CS_BORDER_COLOR_BLUE:
3399	case TD_CS_BORDER_COLOR_ALPHA:
3400	case SQ_ESGS_RING_SIZE:
3401	case SQ_GSVS_RING_SIZE:
3402	case SQ_ESTMP_RING_SIZE:
3403	case SQ_GSTMP_RING_SIZE:
3404	case SQ_HSTMP_RING_SIZE:
3405	case SQ_LSTMP_RING_SIZE:
3406	case SQ_PSTMP_RING_SIZE:
3407	case SQ_VSTMP_RING_SIZE:
3408	case SQ_ESGS_RING_ITEMSIZE:
3409	case SQ_ESTMP_RING_ITEMSIZE:
3410	case SQ_GSTMP_RING_ITEMSIZE:
3411	case SQ_GSVS_RING_ITEMSIZE:
3412	case SQ_GS_VERT_ITEMSIZE:
3413	case SQ_GS_VERT_ITEMSIZE_1:
3414	case SQ_GS_VERT_ITEMSIZE_2:
3415	case SQ_GS_VERT_ITEMSIZE_3:
3416	case SQ_GSVS_RING_OFFSET_1:
3417	case SQ_GSVS_RING_OFFSET_2:
3418	case SQ_GSVS_RING_OFFSET_3:
3419	case SQ_HSTMP_RING_ITEMSIZE:
3420	case SQ_LSTMP_RING_ITEMSIZE:
3421	case SQ_PSTMP_RING_ITEMSIZE:
3422	case SQ_VSTMP_RING_ITEMSIZE:
3423	case VGT_TF_RING_SIZE:
3424	case SQ_ESGS_RING_BASE:
3425	case SQ_GSVS_RING_BASE:
3426	case SQ_ESTMP_RING_BASE:
3427	case SQ_GSTMP_RING_BASE:
3428	case SQ_HSTMP_RING_BASE:
3429	case SQ_LSTMP_RING_BASE:
3430	case SQ_PSTMP_RING_BASE:
3431	case SQ_VSTMP_RING_BASE:
3432	case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3433	case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3434		return true;
3435	default:
3436		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3437		return false;
3438	}
3439}
3440
3441static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3442				      u32 *ib, struct radeon_cs_packet *pkt)
3443{
3444	u32 idx = pkt->idx + 1;
3445	u32 idx_value = ib[idx];
3446	u32 start_reg, end_reg, reg, i;
3447	u32 command, info;
3448
3449	switch (pkt->opcode) {
3450	case PACKET3_NOP:
3451	case PACKET3_SET_BASE:
3452	case PACKET3_CLEAR_STATE:
3453	case PACKET3_INDEX_BUFFER_SIZE:
3454	case PACKET3_DISPATCH_DIRECT:
3455	case PACKET3_DISPATCH_INDIRECT:
3456	case PACKET3_MODE_CONTROL:
3457	case PACKET3_SET_PREDICATION:
3458	case PACKET3_COND_EXEC:
3459	case PACKET3_PRED_EXEC:
3460	case PACKET3_DRAW_INDIRECT:
3461	case PACKET3_DRAW_INDEX_INDIRECT:
3462	case PACKET3_INDEX_BASE:
3463	case PACKET3_DRAW_INDEX_2:
3464	case PACKET3_CONTEXT_CONTROL:
3465	case PACKET3_DRAW_INDEX_OFFSET:
3466	case PACKET3_INDEX_TYPE:
3467	case PACKET3_DRAW_INDEX:
3468	case PACKET3_DRAW_INDEX_AUTO:
3469	case PACKET3_DRAW_INDEX_IMMD:
3470	case PACKET3_NUM_INSTANCES:
3471	case PACKET3_DRAW_INDEX_MULTI_AUTO:
3472	case PACKET3_STRMOUT_BUFFER_UPDATE:
3473	case PACKET3_DRAW_INDEX_OFFSET_2:
3474	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3475	case PACKET3_MPEG_INDEX:
3476	case PACKET3_WAIT_REG_MEM:
3477	case PACKET3_MEM_WRITE:
3478	case PACKET3_SURFACE_SYNC:
3479	case PACKET3_EVENT_WRITE:
3480	case PACKET3_EVENT_WRITE_EOP:
3481	case PACKET3_EVENT_WRITE_EOS:
3482	case PACKET3_SET_CONTEXT_REG:
3483	case PACKET3_SET_BOOL_CONST:
3484	case PACKET3_SET_LOOP_CONST:
3485	case PACKET3_SET_RESOURCE:
3486	case PACKET3_SET_SAMPLER:
3487	case PACKET3_SET_CTL_CONST:
3488	case PACKET3_SET_RESOURCE_OFFSET:
3489	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3490	case PACKET3_SET_RESOURCE_INDIRECT:
3491	case CAYMAN_PACKET3_DEALLOC_STATE:
3492		break;
3493	case PACKET3_COND_WRITE:
3494		if (idx_value & 0x100) {
3495			reg = ib[idx + 5] * 4;
3496			if (!evergreen_vm_reg_valid(reg))
3497				return -EINVAL;
3498		}
3499		break;
3500	case PACKET3_COPY_DW:
3501		if (idx_value & 0x2) {
3502			reg = ib[idx + 3] * 4;
3503			if (!evergreen_vm_reg_valid(reg))
3504				return -EINVAL;
3505		}
3506		break;
3507	case PACKET3_SET_CONFIG_REG:
3508		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3509		end_reg = 4 * pkt->count + start_reg - 4;
3510		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3511		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3512		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3513			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3514			return -EINVAL;
3515		}
3516		for (i = 0; i < pkt->count; i++) {
3517			reg = start_reg + (4 * i);
3518			if (!evergreen_vm_reg_valid(reg))
3519				return -EINVAL;
3520		}
3521		break;
3522	case PACKET3_CP_DMA:
3523		command = ib[idx + 4];
3524		info = ib[idx + 1];
3525		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3526		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3527		    ((((info & 0x00300000) >> 20) == 0) &&
3528		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3529		    ((((info & 0x60000000) >> 29) == 0) &&
3530		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3531			/* non mem to mem copies requires dw aligned count */
3532			if ((command & 0x1fffff) % 4) {
3533				DRM_ERROR("CP DMA command requires dw count alignment\n");
3534				return -EINVAL;
3535			}
3536		}
3537		if (command & PACKET3_CP_DMA_CMD_SAS) {
3538			/* src address space is register */
3539			if (((info & 0x60000000) >> 29) == 0) {
3540				start_reg = idx_value << 2;
3541				if (command & PACKET3_CP_DMA_CMD_SAIC) {
3542					reg = start_reg;
3543					if (!evergreen_vm_reg_valid(reg)) {
3544						DRM_ERROR("CP DMA Bad SRC register\n");
3545						return -EINVAL;
3546					}
3547				} else {
3548					for (i = 0; i < (command & 0x1fffff); i++) {
3549						reg = start_reg + (4 * i);
3550						if (!evergreen_vm_reg_valid(reg)) {
3551							DRM_ERROR("CP DMA Bad SRC register\n");
3552							return -EINVAL;
3553						}
3554					}
3555				}
3556			}
3557		}
3558		if (command & PACKET3_CP_DMA_CMD_DAS) {
3559			/* dst address space is register */
3560			if (((info & 0x00300000) >> 20) == 0) {
3561				start_reg = ib[idx + 2];
3562				if (command & PACKET3_CP_DMA_CMD_DAIC) {
3563					reg = start_reg;
3564					if (!evergreen_vm_reg_valid(reg)) {
3565						DRM_ERROR("CP DMA Bad DST register\n");
3566						return -EINVAL;
3567					}
3568				} else {
3569					for (i = 0; i < (command & 0x1fffff); i++) {
3570						reg = start_reg + (4 * i);
3571						if (!evergreen_vm_reg_valid(reg)) {
3572							DRM_ERROR("CP DMA Bad DST register\n");
3573							return -EINVAL;
3574						}
3575					}
3576				}
3577			}
3578		}
3579		break;
3580	default:
3581		return -EINVAL;
3582	}
3583	return 0;
3584}
3585
3586int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3587{
3588	int ret = 0;
3589	u32 idx = 0;
3590	struct radeon_cs_packet pkt;
3591
3592	do {
3593		pkt.idx = idx;
3594		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
3595		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
3596		pkt.one_reg_wr = 0;
3597		switch (pkt.type) {
3598		case PACKET_TYPE0:
3599			dev_err(rdev->dev, "Packet0 not allowed!\n");
3600			ret = -EINVAL;
3601			break;
3602		case PACKET_TYPE2:
3603			idx += 1;
3604			break;
3605		case PACKET_TYPE3:
3606			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3607			ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3608			idx += pkt.count + 2;
3609			break;
3610		default:
3611			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3612			ret = -EINVAL;
3613			break;
3614		}
3615		if (ret)
3616			break;
3617	} while (idx < ib->length_dw);
3618
3619	return ret;
3620}
3621
3622/**
3623 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3624 * @rdev: radeon_device pointer
3625 * @ib:	radeon_ib pointer
3626 *
3627 * Parses the DMA IB from the VM CS ioctl
3628 * checks for errors. (Cayman-SI)
3629 * Returns 0 for success and an error on failure.
3630 **/
3631int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3632{
3633	u32 idx = 0;
3634	u32 header, cmd, count, tiled, new_cmd, misc;
3635
3636	do {
3637		header = ib->ptr[idx];
3638		cmd = GET_DMA_CMD(header);
3639		count = GET_DMA_COUNT(header);
3640		tiled = GET_DMA_T(header);
3641		new_cmd = GET_DMA_NEW(header);
3642		misc = GET_DMA_MISC(header);
3643
3644		switch (cmd) {
3645		case DMA_PACKET_WRITE:
3646			if (tiled)
3647				idx += count + 7;
3648			else
3649				idx += count + 3;
3650			break;
3651		case DMA_PACKET_COPY:
3652			if (tiled) {
3653				if (new_cmd) {
3654					switch (misc) {
3655					case 0:
3656						/* L2T, frame to fields */
3657						idx += 10;
3658						break;
3659					case 1:
3660						/* L2T, T2L partial */
3661						idx += 12;
3662						break;
3663					case 3:
3664						/* L2T, broadcast */
3665						idx += 10;
3666						break;
3667					case 4:
3668						/* L2T, T2L */
3669						idx += 9;
3670						break;
3671					case 5:
3672						/* T2T partial */
3673						idx += 13;
3674						break;
3675					case 7:
3676						/* L2T, broadcast */
3677						idx += 10;
3678						break;
3679					default:
3680						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3681						return -EINVAL;
3682					}
3683				} else {
3684					switch (misc) {
3685					case 0:
3686						idx += 9;
3687						break;
3688					default:
3689						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3690						return -EINVAL;
3691					}
3692				}
3693			} else {
3694				if (new_cmd) {
3695					switch (misc) {
3696					case 0:
3697						/* L2L, byte */
3698						idx += 5;
3699						break;
3700					case 1:
3701						/* L2L, partial */
3702						idx += 9;
3703						break;
3704					case 4:
3705						/* L2L, dw, broadcast */
3706						idx += 7;
3707						break;
3708					default:
3709						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3710						return -EINVAL;
3711					}
3712				} else {
3713					/* L2L, dw */
3714					idx += 5;
3715				}
3716			}
3717			break;
3718		case DMA_PACKET_CONSTANT_FILL:
3719			idx += 4;
3720			break;
3721		case DMA_PACKET_NOP:
3722			idx += 1;
3723			break;
3724		default:
3725			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3726			return -EINVAL;
3727		}
3728	} while (idx < ib->length_dw);
3729
3730	return 0;
3731}
3732