1/*	$NetBSD: radeon_vce.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $	*/
2
3/*
4 * Copyright 2013 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 * Authors: Christian K��nig <christian.koenig@amd.com>
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: radeon_vce.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $");
32
33#include <linux/firmware.h>
34#include <linux/module.h>
35
36#include <drm/drm.h>
37
38#include "radeon.h"
39#include "radeon_asic.h"
40#include "sid.h"
41
42/* 1 second timeout */
43#define VCE_IDLE_TIMEOUT_MS	1000
44
45/* Firmware Names */
46#define FIRMWARE_TAHITI	"radeon/TAHITI_vce.bin"
47#define FIRMWARE_BONAIRE	"radeon/BONAIRE_vce.bin"
48
49MODULE_FIRMWARE(FIRMWARE_TAHITI);
50MODULE_FIRMWARE(FIRMWARE_BONAIRE);
51
52static void radeon_vce_idle_work_handler(struct work_struct *work);
53
54#ifdef __NetBSD__		/* XXX Ugh!  */
55static bool
56scan_2dec_uint(const char **sp, char delim, unsigned int *uintp)
57{
58	u_int val = 0, n;
59	char c;
60
61	for (n = 0; n < 2; n++) {
62		c = *(*sp)++;
63		if (!isdigit((unsigned char)c))
64			return false;
65		if (n != 0)
66			val *= 10;
67		val += (c - '0');
68		if (*(*sp) == delim)
69			break;
70	}
71	if (*(*sp) != delim)
72		return false;
73
74	(*sp)++;
75	*uintp = val;
76	return true;
77}
78
79static bool
80scan_2dec_u8(const char **sp, char delim, uint8_t *u8p)
81{
82	unsigned int val;
83
84	if (!scan_2dec_uint(sp, delim, &val))
85		return false;
86
87	*u8p = (uint8_t)val;
88	return true;
89}
90#endif
91
92/**
93 * radeon_vce_init - allocate memory, load vce firmware
94 *
95 * @rdev: radeon_device pointer
96 *
97 * First step to get VCE online, allocate memory and load the firmware
98 */
99int radeon_vce_init(struct radeon_device *rdev)
100{
101	static const char *fw_version = "[ATI LIB=VCEFW,";
102	static const char *fb_version = "[ATI LIB=VCEFWSTATS,";
103	unsigned long size;
104	const char *fw_name, *c;
105	uint8_t start, mid, end;
106	int i, r;
107
108	INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
109
110	switch (rdev->family) {
111	case CHIP_TAHITI:
112	case CHIP_PITCAIRN:
113	case CHIP_VERDE:
114	case CHIP_OLAND:
115	case CHIP_ARUBA:
116		fw_name = FIRMWARE_TAHITI;
117		break;
118
119	case CHIP_BONAIRE:
120	case CHIP_KAVERI:
121	case CHIP_KABINI:
122	case CHIP_HAWAII:
123	case CHIP_MULLINS:
124		fw_name = FIRMWARE_BONAIRE;
125		break;
126
127	default:
128		return -EINVAL;
129	}
130
131	r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev);
132	if (r) {
133		dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n",
134			fw_name);
135		return r;
136	}
137
138	/* search for firmware version */
139
140	size = rdev->vce_fw->size - strlen(fw_version) - 9;
141	c = rdev->vce_fw->data;
142	for (;size > 0; --size, ++c)
143		if (strncmp(c, fw_version, strlen(fw_version)) == 0)
144			break;
145
146	if (size == 0)
147		return -EINVAL;
148
149	c += strlen(fw_version);
150#ifdef __NetBSD__
151	if (!scan_2dec_u8(&c, '.', &start))
152		return -EINVAL;
153	if (!scan_2dec_u8(&c, '.', &mid))
154		return -EINVAL;
155	if (!scan_2dec_u8(&c, ']', &end))
156		return -EINVAL;
157#else
158	if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3)
159		return -EINVAL;
160#endif
161
162	/* search for feedback version */
163
164	size = rdev->vce_fw->size - strlen(fb_version) - 3;
165	c = rdev->vce_fw->data;
166	for (;size > 0; --size, ++c)
167		if (strncmp(c, fb_version, strlen(fb_version)) == 0)
168			break;
169
170	if (size == 0)
171		return -EINVAL;
172
173	c += strlen(fb_version);
174#ifdef __NetBSD__
175	if (!scan_2dec_uint(&c, ']', &rdev->vce.fb_version))
176		return -EINVAL;
177#else
178	if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1)
179		return -EINVAL;
180#endif
181
182	DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n",
183		 start, mid, end, rdev->vce.fb_version);
184
185	rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8);
186
187	/* we can only work with this fw version for now */
188	if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) &&
189	    (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) &&
190	    (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8))))
191		return -EINVAL;
192
193	/* allocate firmware, stack and heap BO */
194
195	if (rdev->family < CHIP_BONAIRE)
196		size = vce_v1_0_bo_size(rdev);
197	else
198		size = vce_v2_0_bo_size(rdev);
199	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
200			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
201			     &rdev->vce.vcpu_bo);
202	if (r) {
203		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
204		return r;
205	}
206
207	r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
208	if (r) {
209		radeon_bo_unref(&rdev->vce.vcpu_bo);
210		dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
211		return r;
212	}
213
214	r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
215			  &rdev->vce.gpu_addr);
216	radeon_bo_unreserve(rdev->vce.vcpu_bo);
217	if (r) {
218		radeon_bo_unref(&rdev->vce.vcpu_bo);
219		dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r);
220		return r;
221	}
222
223	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
224		atomic_set(&rdev->vce.handles[i], 0);
225		rdev->vce.filp[i] = NULL;
226	}
227
228	return 0;
229}
230
231/**
232 * radeon_vce_fini - free memory
233 *
234 * @rdev: radeon_device pointer
235 *
236 * Last step on VCE teardown, free firmware memory
237 */
238void radeon_vce_fini(struct radeon_device *rdev)
239{
240	if (rdev->vce.vcpu_bo == NULL)
241		return;
242
243	radeon_bo_unref(&rdev->vce.vcpu_bo);
244
245	release_firmware(rdev->vce_fw);
246}
247
248/**
249 * radeon_vce_suspend - unpin VCE fw memory
250 *
251 * @rdev: radeon_device pointer
252 *
253 */
254int radeon_vce_suspend(struct radeon_device *rdev)
255{
256	int i;
257
258	if (rdev->vce.vcpu_bo == NULL)
259		return 0;
260
261	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
262		if (atomic_read(&rdev->vce.handles[i]))
263			break;
264
265	if (i == RADEON_MAX_VCE_HANDLES)
266		return 0;
267
268	/* TODO: suspending running encoding sessions isn't supported */
269	return -EINVAL;
270}
271
272/**
273 * radeon_vce_resume - pin VCE fw memory
274 *
275 * @rdev: radeon_device pointer
276 *
277 */
278int radeon_vce_resume(struct radeon_device *rdev)
279{
280	void *cpu_addr;
281	int r;
282
283	if (rdev->vce.vcpu_bo == NULL)
284		return -EINVAL;
285
286	r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
287	if (r) {
288		dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
289		return r;
290	}
291
292	r = radeon_bo_kmap(rdev->vce.vcpu_bo, &cpu_addr);
293	if (r) {
294		radeon_bo_unreserve(rdev->vce.vcpu_bo);
295		dev_err(rdev->dev, "(%d) VCE map failed\n", r);
296		return r;
297	}
298
299	memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
300	if (rdev->family < CHIP_BONAIRE)
301		r = vce_v1_0_load_fw(rdev, cpu_addr);
302	else
303		memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
304
305	radeon_bo_kunmap(rdev->vce.vcpu_bo);
306
307	radeon_bo_unreserve(rdev->vce.vcpu_bo);
308
309	return r;
310}
311
312/**
313 * radeon_vce_idle_work_handler - power off VCE
314 *
315 * @work: pointer to work structure
316 *
317 * power of VCE when it's not used any more
318 */
319static void radeon_vce_idle_work_handler(struct work_struct *work)
320{
321	struct radeon_device *rdev =
322		container_of(work, struct radeon_device, vce.idle_work.work);
323
324	if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) &&
325	    (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) {
326		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
327			radeon_dpm_enable_vce(rdev, false);
328		} else {
329			radeon_set_vce_clocks(rdev, 0, 0);
330		}
331	} else {
332		schedule_delayed_work(&rdev->vce.idle_work,
333				      msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
334	}
335}
336
337/**
338 * radeon_vce_note_usage - power up VCE
339 *
340 * @rdev: radeon_device pointer
341 *
342 * Make sure VCE is powerd up when we want to use it
343 */
344void radeon_vce_note_usage(struct radeon_device *rdev)
345{
346	bool streams_changed = false;
347	bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work);
348	set_clocks &= schedule_delayed_work(&rdev->vce.idle_work,
349					    msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
350
351	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
352		/* XXX figure out if the streams changed */
353		streams_changed = false;
354	}
355
356	if (set_clocks || streams_changed) {
357		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
358			radeon_dpm_enable_vce(rdev, true);
359		} else {
360			radeon_set_vce_clocks(rdev, 53300, 40000);
361		}
362	}
363}
364
365/**
366 * radeon_vce_free_handles - free still open VCE handles
367 *
368 * @rdev: radeon_device pointer
369 * @filp: drm file pointer
370 *
371 * Close all VCE handles still open by this file pointer
372 */
373void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp)
374{
375	int i, r;
376	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
377		uint32_t handle = atomic_read(&rdev->vce.handles[i]);
378		if (!handle || rdev->vce.filp[i] != filp)
379			continue;
380
381		radeon_vce_note_usage(rdev);
382
383		r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX,
384					       handle, NULL);
385		if (r)
386			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
387
388		rdev->vce.filp[i] = NULL;
389		atomic_set(&rdev->vce.handles[i], 0);
390	}
391}
392
393/**
394 * radeon_vce_get_create_msg - generate a VCE create msg
395 *
396 * @rdev: radeon_device pointer
397 * @ring: ring we should submit the msg to
398 * @handle: VCE session handle to use
399 * @fence: optional fence to return
400 *
401 * Open up a stream for HW test
402 */
403int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
404			      uint32_t handle, struct radeon_fence **fence)
405{
406	const unsigned ib_size_dw = 1024;
407	struct radeon_ib ib;
408	uint64_t dummy;
409	int i, r;
410
411	r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
412	if (r) {
413		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
414		return r;
415	}
416
417	dummy = ib.gpu_addr + 1024;
418
419	/* stitch together an VCE create msg */
420	ib.length_dw = 0;
421	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
422	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
423	ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
424
425	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000030); /* len */
426	ib.ptr[ib.length_dw++] = cpu_to_le32(0x01000001); /* create cmd */
427	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
428	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000042);
429	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000a);
430	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
431	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000080);
432	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000060);
433	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
434	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
435	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c);
436	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
437
438	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
439	ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
440	ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
441	ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
442	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
443
444	for (i = ib.length_dw; i < ib_size_dw; ++i)
445		ib.ptr[i] = cpu_to_le32(0x0);
446
447	r = radeon_ib_schedule(rdev, &ib, NULL, false);
448	if (r)
449		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
450
451
452	if (fence)
453		*fence = radeon_fence_ref(ib.fence);
454
455	radeon_ib_free(rdev, &ib);
456
457	return r;
458}
459
460/**
461 * radeon_vce_get_destroy_msg - generate a VCE destroy msg
462 *
463 * @rdev: radeon_device pointer
464 * @ring: ring we should submit the msg to
465 * @handle: VCE session handle to use
466 * @fence: optional fence to return
467 *
468 * Close up a stream for HW test or if userspace failed to do so
469 */
470int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
471			       uint32_t handle, struct radeon_fence **fence)
472{
473	const unsigned ib_size_dw = 1024;
474	struct radeon_ib ib;
475	uint64_t dummy;
476	int i, r;
477
478	r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
479	if (r) {
480		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
481		return r;
482	}
483
484	dummy = ib.gpu_addr + 1024;
485
486	/* stitch together an VCE destroy msg */
487	ib.length_dw = 0;
488	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
489	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
490	ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
491
492	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
493	ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
494	ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
495	ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
496	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
497
498	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000008); /* len */
499	ib.ptr[ib.length_dw++] = cpu_to_le32(0x02000001); /* destroy cmd */
500
501	for (i = ib.length_dw; i < ib_size_dw; ++i)
502		ib.ptr[i] = cpu_to_le32(0x0);
503
504	r = radeon_ib_schedule(rdev, &ib, NULL, false);
505	if (r) {
506		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
507	}
508
509	if (fence)
510		*fence = radeon_fence_ref(ib.fence);
511
512	radeon_ib_free(rdev, &ib);
513
514	return r;
515}
516
517/**
518 * radeon_vce_cs_reloc - command submission relocation
519 *
520 * @p: parser context
521 * @lo: address of lower dword
522 * @hi: address of higher dword
523 * @size: size of checker for relocation buffer
524 *
525 * Patch relocation inside command stream with real buffer address
526 */
527int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi,
528			unsigned size)
529{
530	struct radeon_cs_chunk *relocs_chunk;
531	struct radeon_bo_list *reloc;
532	uint64_t start, end, offset;
533	unsigned idx;
534
535	relocs_chunk = p->chunk_relocs;
536	offset = radeon_get_ib_value(p, lo);
537	idx = radeon_get_ib_value(p, hi);
538
539	if (idx >= relocs_chunk->length_dw) {
540		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
541			  idx, relocs_chunk->length_dw);
542		return -EINVAL;
543	}
544
545	reloc = &p->relocs[(idx / 4)];
546	start = reloc->gpu_offset;
547	end = start + radeon_bo_size(reloc->robj);
548	start += offset;
549
550	p->ib.ptr[lo] = start & 0xFFFFFFFF;
551	p->ib.ptr[hi] = start >> 32;
552
553	if (end <= start) {
554		DRM_ERROR("invalid reloc offset %"PRIX64"!\n", offset);
555		return -EINVAL;
556	}
557	if ((end - start) < size) {
558		DRM_ERROR("buffer to small (%d / %d)!\n",
559			(unsigned)(end - start), size);
560		return -EINVAL;
561	}
562
563	return 0;
564}
565
566/**
567 * radeon_vce_validate_handle - validate stream handle
568 *
569 * @p: parser context
570 * @handle: handle to validate
571 * @allocated: allocated a new handle?
572 *
573 * Validates the handle and return the found session index or -EINVAL
574 * we we don't have another free session index.
575 */
576static int radeon_vce_validate_handle(struct radeon_cs_parser *p,
577				      uint32_t handle, bool *allocated)
578{
579	unsigned i;
580
581	*allocated = false;
582
583	/* validate the handle */
584	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
585		if (atomic_read(&p->rdev->vce.handles[i]) == handle) {
586			if (p->rdev->vce.filp[i] != p->filp) {
587				DRM_ERROR("VCE handle collision detected!\n");
588				return -EINVAL;
589			}
590			return i;
591		}
592	}
593
594	/* handle not found try to alloc a new one */
595	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
596		if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) {
597			p->rdev->vce.filp[i] = p->filp;
598			p->rdev->vce.img_size[i] = 0;
599			*allocated = true;
600			return i;
601		}
602	}
603
604	DRM_ERROR("No more free VCE handles!\n");
605	return -EINVAL;
606}
607
608/**
609 * radeon_vce_cs_parse - parse and validate the command stream
610 *
611 * @p: parser context
612 *
613 */
614int radeon_vce_cs_parse(struct radeon_cs_parser *p)
615{
616	int session_idx = -1;
617	bool destroyed = false, created = false, allocated = false;
618	uint32_t tmp, handle = 0;
619	uint32_t *size = &tmp;
620	int i, r = 0;
621
622	while (p->idx < p->chunk_ib->length_dw) {
623		uint32_t len = radeon_get_ib_value(p, p->idx);
624		uint32_t cmd = radeon_get_ib_value(p, p->idx + 1);
625
626		if ((len < 8) || (len & 3)) {
627			DRM_ERROR("invalid VCE command length (%d)!\n", len);
628			r = -EINVAL;
629			goto out;
630		}
631
632		if (destroyed) {
633			DRM_ERROR("No other command allowed after destroy!\n");
634			r = -EINVAL;
635			goto out;
636		}
637
638		switch (cmd) {
639		case 0x00000001: // session
640			handle = radeon_get_ib_value(p, p->idx + 2);
641			session_idx = radeon_vce_validate_handle(p, handle,
642								 &allocated);
643			if (session_idx < 0)
644				return session_idx;
645			size = &p->rdev->vce.img_size[session_idx];
646			break;
647
648		case 0x00000002: // task info
649			break;
650
651		case 0x01000001: // create
652			created = true;
653			if (!allocated) {
654				DRM_ERROR("Handle already in use!\n");
655				r = -EINVAL;
656				goto out;
657			}
658
659			*size = radeon_get_ib_value(p, p->idx + 8) *
660				radeon_get_ib_value(p, p->idx + 10) *
661				8 * 3 / 2;
662			break;
663
664		case 0x04000001: // config extension
665		case 0x04000002: // pic control
666		case 0x04000005: // rate control
667		case 0x04000007: // motion estimation
668		case 0x04000008: // rdo
669		case 0x04000009: // vui
670			break;
671
672		case 0x03000001: // encode
673			r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9,
674						*size);
675			if (r)
676				goto out;
677
678			r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11,
679						*size / 3);
680			if (r)
681				goto out;
682			break;
683
684		case 0x02000001: // destroy
685			destroyed = true;
686			break;
687
688		case 0x05000001: // context buffer
689			r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
690						*size * 2);
691			if (r)
692				goto out;
693			break;
694
695		case 0x05000004: // video bitstream buffer
696			tmp = radeon_get_ib_value(p, p->idx + 4);
697			r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
698						tmp);
699			if (r)
700				goto out;
701			break;
702
703		case 0x05000005: // feedback buffer
704			r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
705						4096);
706			if (r)
707				goto out;
708			break;
709
710		default:
711			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
712			r = -EINVAL;
713			goto out;
714		}
715
716		if (session_idx == -1) {
717			DRM_ERROR("no session command at start of IB\n");
718			r = -EINVAL;
719			goto out;
720		}
721
722		p->idx += len / 4;
723	}
724
725	if (allocated && !created) {
726		DRM_ERROR("New session without create command!\n");
727		r = -ENOENT;
728	}
729
730out:
731	if ((!r && destroyed) || (r && allocated)) {
732		/*
733		 * IB contains a destroy msg or we have allocated an
734		 * handle and got an error, anyway free the handle
735		 */
736		for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
737			atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0);
738	}
739
740	return r;
741}
742
743/**
744 * radeon_vce_semaphore_emit - emit a semaphore command
745 *
746 * @rdev: radeon_device pointer
747 * @ring: engine to use
748 * @semaphore: address of semaphore
749 * @emit_wait: true=emit wait, false=emit signal
750 *
751 */
752bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
753			       struct radeon_ring *ring,
754			       struct radeon_semaphore *semaphore,
755			       bool emit_wait)
756{
757	uint64_t addr = semaphore->gpu_addr;
758
759	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_SEMAPHORE));
760	radeon_ring_write(ring, cpu_to_le32((addr >> 3) & 0x000FFFFF));
761	radeon_ring_write(ring, cpu_to_le32((addr >> 23) & 0x000FFFFF));
762	radeon_ring_write(ring, cpu_to_le32(0x01003000 | (emit_wait ? 1 : 0)));
763	if (!emit_wait)
764		radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
765
766	return true;
767}
768
769/**
770 * radeon_vce_ib_execute - execute indirect buffer
771 *
772 * @rdev: radeon_device pointer
773 * @ib: the IB to execute
774 *
775 */
776void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
777{
778	struct radeon_ring *ring = &rdev->ring[ib->ring];
779	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_IB));
780	radeon_ring_write(ring, cpu_to_le32(ib->gpu_addr));
781	radeon_ring_write(ring, cpu_to_le32(upper_32_bits(ib->gpu_addr)));
782	radeon_ring_write(ring, cpu_to_le32(ib->length_dw));
783}
784
785/**
786 * radeon_vce_fence_emit - add a fence command to the ring
787 *
788 * @rdev: radeon_device pointer
789 * @fence: the fence
790 *
791 */
792void radeon_vce_fence_emit(struct radeon_device *rdev,
793			   struct radeon_fence *fence)
794{
795	struct radeon_ring *ring = &rdev->ring[fence->ring];
796	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
797
798	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_FENCE));
799	radeon_ring_write(ring, cpu_to_le32(addr));
800	radeon_ring_write(ring, cpu_to_le32(upper_32_bits(addr)));
801	radeon_ring_write(ring, cpu_to_le32(fence->seq));
802	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_TRAP));
803	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
804}
805
806/**
807 * radeon_vce_ring_test - test if VCE ring is working
808 *
809 * @rdev: radeon_device pointer
810 * @ring: the engine to test on
811 *
812 */
813int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
814{
815	uint32_t rptr = vce_v1_0_get_rptr(rdev, ring);
816	unsigned i;
817	int r;
818
819	r = radeon_ring_lock(rdev, ring, 16);
820	if (r) {
821		DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n",
822			  ring->idx, r);
823		return r;
824	}
825	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
826	radeon_ring_unlock_commit(rdev, ring, false);
827
828	for (i = 0; i < rdev->usec_timeout; i++) {
829		if (vce_v1_0_get_rptr(rdev, ring) != rptr)
830			break;
831		udelay(1);
832	}
833
834	if (i < rdev->usec_timeout) {
835		DRM_INFO("ring test on %d succeeded in %d usecs\n",
836			 ring->idx, i);
837	} else {
838		DRM_ERROR("radeon: ring %d test failed\n",
839			 ring->idx);
840		r = -ETIMEDOUT;
841	}
842
843	return r;
844}
845
846/**
847 * radeon_vce_ib_test - test if VCE IBs are working
848 *
849 * @rdev: radeon_device pointer
850 * @ring: the engine to test on
851 *
852 */
853int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
854{
855	struct radeon_fence *fence = NULL;
856	int r;
857
858	r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL);
859	if (r) {
860		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
861		goto error;
862	}
863
864	r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence);
865	if (r) {
866		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
867		goto error;
868	}
869
870	r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies(
871		RADEON_USEC_IB_TEST_TIMEOUT));
872	if (r < 0) {
873		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
874	} else if (r == 0) {
875		DRM_ERROR("radeon: fence wait timed out.\n");
876		r = -ETIMEDOUT;
877	} else {
878		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
879		r = 0;
880	}
881error:
882	radeon_fence_unref(&fence);
883	return r;
884}
885