1/*	$NetBSD: amdgpu_vce.c,v 1.7 2021/12/19 12:21:29 riastradh Exp $	*/
2
3/*
4 * Copyright 2013 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 * Authors: Christian K��nig <christian.koenig@amd.com>
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: amdgpu_vce.c,v 1.7 2021/12/19 12:21:29 riastradh Exp $");
32
33#include <linux/firmware.h>
34#include <linux/module.h>
35
36#include <drm/drm.h>
37
38#include "amdgpu.h"
39#include "amdgpu_pm.h"
40#include "amdgpu_vce.h"
41#include "cikd.h"
42
43#include <linux/nbsd-namespace.h>
44
45/* 1 second timeout */
46#define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
47
48/* Firmware Names */
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin"
51#define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin"
52#define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin"
53#define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin"
54#define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"
55#endif
56#define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
57#define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
58#define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
59#define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
60#define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
61#define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
62#define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
63#define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
64
65#define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
66#define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
67#define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
68
69#ifdef CONFIG_DRM_AMDGPU_CIK
70MODULE_FIRMWARE(FIRMWARE_BONAIRE);
71MODULE_FIRMWARE(FIRMWARE_KABINI);
72MODULE_FIRMWARE(FIRMWARE_KAVERI);
73MODULE_FIRMWARE(FIRMWARE_HAWAII);
74MODULE_FIRMWARE(FIRMWARE_MULLINS);
75#endif
76MODULE_FIRMWARE(FIRMWARE_TONGA);
77MODULE_FIRMWARE(FIRMWARE_CARRIZO);
78MODULE_FIRMWARE(FIRMWARE_FIJI);
79MODULE_FIRMWARE(FIRMWARE_STONEY);
80MODULE_FIRMWARE(FIRMWARE_POLARIS10);
81MODULE_FIRMWARE(FIRMWARE_POLARIS11);
82MODULE_FIRMWARE(FIRMWARE_POLARIS12);
83MODULE_FIRMWARE(FIRMWARE_VEGAM);
84
85MODULE_FIRMWARE(FIRMWARE_VEGA10);
86MODULE_FIRMWARE(FIRMWARE_VEGA12);
87MODULE_FIRMWARE(FIRMWARE_VEGA20);
88
89static void amdgpu_vce_idle_work_handler(struct work_struct *work);
90static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
91				     struct amdgpu_bo *bo,
92				     struct dma_fence **fence);
93static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
94				      bool direct, struct dma_fence **fence);
95
96/**
97 * amdgpu_vce_init - allocate memory, load vce firmware
98 *
99 * @adev: amdgpu_device pointer
100 *
101 * First step to get VCE online, allocate memory and load the firmware
102 */
103int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
104{
105	const char *fw_name;
106	const struct common_firmware_header *hdr;
107	unsigned ucode_version, version_major, version_minor, binary_id;
108	int i, r;
109
110	switch (adev->asic_type) {
111#ifdef CONFIG_DRM_AMDGPU_CIK
112	case CHIP_BONAIRE:
113		fw_name = FIRMWARE_BONAIRE;
114		break;
115	case CHIP_KAVERI:
116		fw_name = FIRMWARE_KAVERI;
117		break;
118	case CHIP_KABINI:
119		fw_name = FIRMWARE_KABINI;
120		break;
121	case CHIP_HAWAII:
122		fw_name = FIRMWARE_HAWAII;
123		break;
124	case CHIP_MULLINS:
125		fw_name = FIRMWARE_MULLINS;
126		break;
127#endif
128	case CHIP_TONGA:
129		fw_name = FIRMWARE_TONGA;
130		break;
131	case CHIP_CARRIZO:
132		fw_name = FIRMWARE_CARRIZO;
133		break;
134	case CHIP_FIJI:
135		fw_name = FIRMWARE_FIJI;
136		break;
137	case CHIP_STONEY:
138		fw_name = FIRMWARE_STONEY;
139		break;
140	case CHIP_POLARIS10:
141		fw_name = FIRMWARE_POLARIS10;
142		break;
143	case CHIP_POLARIS11:
144		fw_name = FIRMWARE_POLARIS11;
145		break;
146	case CHIP_POLARIS12:
147		fw_name = FIRMWARE_POLARIS12;
148		break;
149	case CHIP_VEGAM:
150		fw_name = FIRMWARE_VEGAM;
151		break;
152	case CHIP_VEGA10:
153		fw_name = FIRMWARE_VEGA10;
154		break;
155	case CHIP_VEGA12:
156		fw_name = FIRMWARE_VEGA12;
157		break;
158	case CHIP_VEGA20:
159		fw_name = FIRMWARE_VEGA20;
160		break;
161
162	default:
163		return -EINVAL;
164	}
165
166	r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
167	if (r) {
168		dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
169			fw_name);
170		return r;
171	}
172
173	r = amdgpu_ucode_validate(adev->vce.fw);
174	if (r) {
175		dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
176			fw_name);
177		release_firmware(adev->vce.fw);
178		adev->vce.fw = NULL;
179		return r;
180	}
181
182	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
183
184	ucode_version = le32_to_cpu(hdr->ucode_version);
185	version_major = (ucode_version >> 20) & 0xfff;
186	version_minor = (ucode_version >> 8) & 0xfff;
187	binary_id = ucode_version & 0xff;
188	DRM_INFO("Found VCE firmware Version: %x.%x Binary ID: %x\n",
189		version_major, version_minor, binary_id);
190	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
191				(binary_id << 8));
192
193	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
194				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
195				    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
196	if (r) {
197		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
198		return r;
199	}
200
201	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
202		atomic_set(&adev->vce.handles[i], 0);
203		adev->vce.filp[i] = NULL;
204	}
205
206	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
207	mutex_init(&adev->vce.idle_mutex);
208
209	return 0;
210}
211
212/**
213 * amdgpu_vce_fini - free memory
214 *
215 * @adev: amdgpu_device pointer
216 *
217 * Last step on VCE teardown, free firmware memory
218 */
219int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
220{
221	unsigned i;
222
223	if (adev->vce.vcpu_bo == NULL)
224		return 0;
225
226	cancel_delayed_work_sync(&adev->vce.idle_work);
227	drm_sched_entity_destroy(&adev->vce.entity);
228
229	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
230		(void **)&adev->vce.cpu_addr);
231
232	for (i = 0; i < adev->vce.num_rings; i++)
233		amdgpu_ring_fini(&adev->vce.ring[i]);
234
235	release_firmware(adev->vce.fw);
236	mutex_destroy(&adev->vce.idle_mutex);
237
238	return 0;
239}
240
241/**
242 * amdgpu_vce_entity_init - init entity
243 *
244 * @adev: amdgpu_device pointer
245 *
246 */
247int amdgpu_vce_entity_init(struct amdgpu_device *adev)
248{
249	struct amdgpu_ring *ring;
250	struct drm_gpu_scheduler *sched;
251	int r;
252
253	ring = &adev->vce.ring[0];
254	sched = &ring->sched;
255	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
256				  &sched, 1, NULL);
257	if (r != 0) {
258		DRM_ERROR("Failed setting up VCE run queue.\n");
259		return r;
260	}
261
262	return 0;
263}
264
265/**
266 * amdgpu_vce_suspend - unpin VCE fw memory
267 *
268 * @adev: amdgpu_device pointer
269 *
270 */
271int amdgpu_vce_suspend(struct amdgpu_device *adev)
272{
273	int i;
274
275	cancel_delayed_work_sync(&adev->vce.idle_work);
276
277	if (adev->vce.vcpu_bo == NULL)
278		return 0;
279
280	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
281		if (atomic_read(&adev->vce.handles[i]))
282			break;
283
284	if (i == AMDGPU_MAX_VCE_HANDLES)
285		return 0;
286
287	/* TODO: suspending running encoding sessions isn't supported */
288	return -EINVAL;
289}
290
291/**
292 * amdgpu_vce_resume - pin VCE fw memory
293 *
294 * @adev: amdgpu_device pointer
295 *
296 */
297int amdgpu_vce_resume(struct amdgpu_device *adev)
298{
299	void *cpu_addr;
300	const struct common_firmware_header *hdr;
301	unsigned offset;
302	int r;
303
304	if (adev->vce.vcpu_bo == NULL)
305		return -EINVAL;
306
307	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
308	if (r) {
309		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
310		return r;
311	}
312
313	r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
314	if (r) {
315		amdgpu_bo_unreserve(adev->vce.vcpu_bo);
316		dev_err(adev->dev, "(%d) VCE map failed\n", r);
317		return r;
318	}
319
320	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
321	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
322	memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
323		    adev->vce.fw->size - offset);
324
325	amdgpu_bo_kunmap(adev->vce.vcpu_bo);
326
327	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
328
329	return 0;
330}
331
332/**
333 * amdgpu_vce_idle_work_handler - power off VCE
334 *
335 * @work: pointer to work structure
336 *
337 * power of VCE when it's not used any more
338 */
339static void amdgpu_vce_idle_work_handler(struct work_struct *work)
340{
341	struct amdgpu_device *adev =
342		container_of(work, struct amdgpu_device, vce.idle_work.work);
343	unsigned i, count = 0;
344
345	for (i = 0; i < adev->vce.num_rings; i++)
346		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
347
348	if (count == 0) {
349		if (adev->pm.dpm_enabled) {
350			amdgpu_dpm_enable_vce(adev, false);
351		} else {
352			amdgpu_asic_set_vce_clocks(adev, 0, 0);
353			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
354							       AMD_PG_STATE_GATE);
355			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
356							       AMD_CG_STATE_GATE);
357		}
358	} else {
359		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
360	}
361}
362
363/**
364 * amdgpu_vce_ring_begin_use - power up VCE
365 *
366 * @ring: amdgpu ring
367 *
368 * Make sure VCE is powerd up when we want to use it
369 */
370void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
371{
372	struct amdgpu_device *adev = ring->adev;
373	bool set_clocks;
374
375	if (amdgpu_sriov_vf(adev))
376		return;
377
378	mutex_lock(&adev->vce.idle_mutex);
379	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
380	if (set_clocks) {
381		if (adev->pm.dpm_enabled) {
382			amdgpu_dpm_enable_vce(adev, true);
383		} else {
384			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
385			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
386							       AMD_CG_STATE_UNGATE);
387			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
388							       AMD_PG_STATE_UNGATE);
389
390		}
391	}
392	mutex_unlock(&adev->vce.idle_mutex);
393}
394
395/**
396 * amdgpu_vce_ring_end_use - power VCE down
397 *
398 * @ring: amdgpu ring
399 *
400 * Schedule work to power VCE down again
401 */
402void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
403{
404	if (!amdgpu_sriov_vf(ring->adev))
405		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
406}
407
408/**
409 * amdgpu_vce_free_handles - free still open VCE handles
410 *
411 * @adev: amdgpu_device pointer
412 * @filp: drm file pointer
413 *
414 * Close all VCE handles still open by this file pointer
415 */
416void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
417{
418	struct amdgpu_ring *ring = &adev->vce.ring[0];
419	int i, r;
420	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
421		uint32_t handle = atomic_read(&adev->vce.handles[i]);
422
423		if (!handle || adev->vce.filp[i] != filp)
424			continue;
425
426		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
427		if (r)
428			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
429
430		adev->vce.filp[i] = NULL;
431		atomic_set(&adev->vce.handles[i], 0);
432	}
433}
434
435/**
436 * amdgpu_vce_get_create_msg - generate a VCE create msg
437 *
438 * @adev: amdgpu_device pointer
439 * @ring: ring we should submit the msg to
440 * @handle: VCE session handle to use
441 * @fence: optional fence to return
442 *
443 * Open up a stream for HW test
444 */
445static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
446				     struct amdgpu_bo *bo,
447				     struct dma_fence **fence)
448{
449	const unsigned ib_size_dw = 1024;
450	struct amdgpu_job *job;
451	struct amdgpu_ib *ib;
452	struct dma_fence *f = NULL;
453	uint64_t addr;
454	int i, r;
455
456	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
457	if (r)
458		return r;
459
460	ib = &job->ibs[0];
461
462	addr = amdgpu_bo_gpu_offset(bo);
463
464	/* stitch together an VCE create msg */
465	ib->length_dw = 0;
466	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
467	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
468	ib->ptr[ib->length_dw++] = handle;
469
470	if ((ring->adev->vce.fw_version >> 24) >= 52)
471		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
472	else
473		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
474	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
475	ib->ptr[ib->length_dw++] = 0x00000000;
476	ib->ptr[ib->length_dw++] = 0x00000042;
477	ib->ptr[ib->length_dw++] = 0x0000000a;
478	ib->ptr[ib->length_dw++] = 0x00000001;
479	ib->ptr[ib->length_dw++] = 0x00000080;
480	ib->ptr[ib->length_dw++] = 0x00000060;
481	ib->ptr[ib->length_dw++] = 0x00000100;
482	ib->ptr[ib->length_dw++] = 0x00000100;
483	ib->ptr[ib->length_dw++] = 0x0000000c;
484	ib->ptr[ib->length_dw++] = 0x00000000;
485	if ((ring->adev->vce.fw_version >> 24) >= 52) {
486		ib->ptr[ib->length_dw++] = 0x00000000;
487		ib->ptr[ib->length_dw++] = 0x00000000;
488		ib->ptr[ib->length_dw++] = 0x00000000;
489		ib->ptr[ib->length_dw++] = 0x00000000;
490	}
491
492	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
493	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
494	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
495	ib->ptr[ib->length_dw++] = addr;
496	ib->ptr[ib->length_dw++] = 0x00000001;
497
498	for (i = ib->length_dw; i < ib_size_dw; ++i)
499		ib->ptr[i] = 0x0;
500
501	r = amdgpu_job_submit_direct(job, ring, &f);
502	if (r)
503		goto err;
504
505	if (fence)
506		*fence = dma_fence_get(f);
507	dma_fence_put(f);
508	return 0;
509
510err:
511	amdgpu_job_free(job);
512	return r;
513}
514
515/**
516 * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
517 *
518 * @adev: amdgpu_device pointer
519 * @ring: ring we should submit the msg to
520 * @handle: VCE session handle to use
521 * @fence: optional fence to return
522 *
523 * Close up a stream for HW test or if userspace failed to do so
524 */
525static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
526				      bool direct, struct dma_fence **fence)
527{
528	const unsigned ib_size_dw = 1024;
529	struct amdgpu_job *job;
530	struct amdgpu_ib *ib;
531	struct dma_fence *f = NULL;
532	int i, r;
533
534	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
535	if (r)
536		return r;
537
538	ib = &job->ibs[0];
539
540	/* stitch together an VCE destroy msg */
541	ib->length_dw = 0;
542	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
543	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
544	ib->ptr[ib->length_dw++] = handle;
545
546	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
547	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
548	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
549	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
550	ib->ptr[ib->length_dw++] = 0x00000000;
551	ib->ptr[ib->length_dw++] = 0x00000000;
552	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
553	ib->ptr[ib->length_dw++] = 0x00000000;
554
555	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
556	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
557
558	for (i = ib->length_dw; i < ib_size_dw; ++i)
559		ib->ptr[i] = 0x0;
560
561	if (direct)
562		r = amdgpu_job_submit_direct(job, ring, &f);
563	else
564		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
565				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
566	if (r)
567		goto err;
568
569	if (fence)
570		*fence = dma_fence_get(f);
571	dma_fence_put(f);
572	return 0;
573
574err:
575	amdgpu_job_free(job);
576	return r;
577}
578
579/**
580 * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary
581 *
582 * @p: parser context
583 * @lo: address of lower dword
584 * @hi: address of higher dword
585 * @size: minimum size
586 * @index: bs/fb index
587 *
588 * Make sure that no BO cross a 4GB boundary.
589 */
590static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
591				  int lo, int hi, unsigned size, int32_t index)
592{
593	int64_t offset = ((uint64_t)size) * ((int64_t)index);
594	struct ttm_operation_ctx ctx = { false, false };
595	struct amdgpu_bo_va_mapping *mapping;
596	unsigned i, fpfn, lpfn;
597	struct amdgpu_bo *bo;
598	uint64_t addr;
599	int r;
600
601	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
602	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
603	if (index >= 0) {
604		addr += offset;
605		fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
606		lpfn = 0x100000000ULL >> PAGE_SHIFT;
607	} else {
608		fpfn = 0;
609		lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
610	}
611
612	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
613	if (r) {
614		DRM_ERROR("Can't find BO for addr 0x%010"PRIx64" %d %d %d %d\n",
615			  addr, lo, hi, size, index);
616		return r;
617	}
618
619	for (i = 0; i < bo->placement.num_placement; ++i) {
620		bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
621		bo->placements[i].lpfn = bo->placements[i].lpfn ?
622			min(bo->placements[i].lpfn, lpfn) : lpfn;
623	}
624	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
625}
626
627
628/**
629 * amdgpu_vce_cs_reloc - command submission relocation
630 *
631 * @p: parser context
632 * @lo: address of lower dword
633 * @hi: address of higher dword
634 * @size: minimum size
635 *
636 * Patch relocation inside command stream with real buffer address
637 */
638static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
639			       int lo, int hi, unsigned size, uint32_t index)
640{
641	struct amdgpu_bo_va_mapping *mapping;
642	struct amdgpu_bo *bo;
643	uint64_t addr;
644	int r;
645
646	if (index == 0xffffffff)
647		index = 0;
648
649	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
650	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
651	addr += ((uint64_t)size) * ((uint64_t)index);
652
653	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
654	if (r) {
655		DRM_ERROR("Can't find BO for addr 0x%010"PRIx64" %d %d %d %d\n",
656			  addr, lo, hi, size, index);
657		return r;
658	}
659
660	if ((addr + (uint64_t)size) >
661	    ((uint64_t)mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
662		DRM_ERROR("BO too small for addr 0x%010"PRIx64" %d %d\n",
663			  addr, lo, hi);
664		return -EINVAL;
665	}
666
667	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
668	addr += amdgpu_bo_gpu_offset(bo);
669	addr -= ((uint64_t)size) * ((uint64_t)index);
670
671	amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
672	amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
673
674	return 0;
675}
676
677/**
678 * amdgpu_vce_validate_handle - validate stream handle
679 *
680 * @p: parser context
681 * @handle: handle to validate
682 * @allocated: allocated a new handle?
683 *
684 * Validates the handle and return the found session index or -EINVAL
685 * we we don't have another free session index.
686 */
687static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
688				      uint32_t handle, uint32_t *allocated)
689{
690	unsigned i;
691
692	/* validate the handle */
693	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
694		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
695			if (p->adev->vce.filp[i] != p->filp) {
696				DRM_ERROR("VCE handle collision detected!\n");
697				return -EINVAL;
698			}
699			return i;
700		}
701	}
702
703	/* handle not found try to alloc a new one */
704	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
705		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
706			p->adev->vce.filp[i] = p->filp;
707			p->adev->vce.img_size[i] = 0;
708			*allocated |= 1 << i;
709			return i;
710		}
711	}
712
713	DRM_ERROR("No more free VCE handles!\n");
714	return -EINVAL;
715}
716
717/**
718 * amdgpu_vce_cs_parse - parse and validate the command stream
719 *
720 * @p: parser context
721 *
722 */
723int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
724{
725	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
726	unsigned fb_idx = 0, bs_idx = 0;
727	int session_idx = -1;
728	uint32_t destroyed = 0;
729	uint32_t created = 0;
730	uint32_t allocated = 0;
731	uint32_t tmp, handle = 0;
732	uint32_t *size = &tmp;
733	unsigned idx;
734	int i, r = 0;
735
736	p->job->vm = NULL;
737	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
738
739	for (idx = 0; idx < ib->length_dw;) {
740		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
741		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
742
743		if ((len < 8) || (len & 3)) {
744			DRM_ERROR("invalid VCE command length (%d)!\n", len);
745			r = -EINVAL;
746			goto out;
747		}
748
749		switch (cmd) {
750		case 0x00000002: /* task info */
751			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
752			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
753			break;
754
755		case 0x03000001: /* encode */
756			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
757						   idx + 9, 0, 0);
758			if (r)
759				goto out;
760
761			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
762						   idx + 11, 0, 0);
763			if (r)
764				goto out;
765			break;
766
767		case 0x05000001: /* context buffer */
768			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
769						   idx + 2, 0, 0);
770			if (r)
771				goto out;
772			break;
773
774		case 0x05000004: /* video bitstream buffer */
775			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
776			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
777						   tmp, bs_idx);
778			if (r)
779				goto out;
780			break;
781
782		case 0x05000005: /* feedback buffer */
783			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
784						   4096, fb_idx);
785			if (r)
786				goto out;
787			break;
788
789		case 0x0500000d: /* MV buffer */
790			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
791							idx + 2, 0, 0);
792			if (r)
793				goto out;
794
795			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
796							idx + 7, 0, 0);
797			if (r)
798				goto out;
799			break;
800		}
801
802		idx += len / 4;
803	}
804
805	for (idx = 0; idx < ib->length_dw;) {
806		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
807		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
808
809		switch (cmd) {
810		case 0x00000001: /* session */
811			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
812			session_idx = amdgpu_vce_validate_handle(p, handle,
813								 &allocated);
814			if (session_idx < 0) {
815				r = session_idx;
816				goto out;
817			}
818			size = &p->adev->vce.img_size[session_idx];
819			break;
820
821		case 0x00000002: /* task info */
822			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
823			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
824			break;
825
826		case 0x01000001: /* create */
827			created |= 1 << session_idx;
828			if (destroyed & (1 << session_idx)) {
829				destroyed &= ~(1 << session_idx);
830				allocated |= 1 << session_idx;
831
832			} else if (!(allocated & (1 << session_idx))) {
833				DRM_ERROR("Handle already in use!\n");
834				r = -EINVAL;
835				goto out;
836			}
837
838			*size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
839				amdgpu_get_ib_value(p, ib_idx, idx + 10) *
840				8 * 3 / 2;
841			break;
842
843		case 0x04000001: /* config extension */
844		case 0x04000002: /* pic control */
845		case 0x04000005: /* rate control */
846		case 0x04000007: /* motion estimation */
847		case 0x04000008: /* rdo */
848		case 0x04000009: /* vui */
849		case 0x05000002: /* auxiliary buffer */
850		case 0x05000009: /* clock table */
851			break;
852
853		case 0x0500000c: /* hw config */
854			switch (p->adev->asic_type) {
855#ifdef CONFIG_DRM_AMDGPU_CIK
856			case CHIP_KAVERI:
857			case CHIP_MULLINS:
858#endif
859			case CHIP_CARRIZO:
860				break;
861			default:
862				r = -EINVAL;
863				goto out;
864			}
865			break;
866
867		case 0x03000001: /* encode */
868			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
869						*size, 0);
870			if (r)
871				goto out;
872
873			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
874						*size / 3, 0);
875			if (r)
876				goto out;
877			break;
878
879		case 0x02000001: /* destroy */
880			destroyed |= 1 << session_idx;
881			break;
882
883		case 0x05000001: /* context buffer */
884			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
885						*size * 2, 0);
886			if (r)
887				goto out;
888			break;
889
890		case 0x05000004: /* video bitstream buffer */
891			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
892			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
893						tmp, bs_idx);
894			if (r)
895				goto out;
896			break;
897
898		case 0x05000005: /* feedback buffer */
899			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
900						4096, fb_idx);
901			if (r)
902				goto out;
903			break;
904
905		case 0x0500000d: /* MV buffer */
906			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
907							idx + 2, *size, 0);
908			if (r)
909				goto out;
910
911			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
912							idx + 7, *size / 12, 0);
913			if (r)
914				goto out;
915			break;
916
917		default:
918			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
919			r = -EINVAL;
920			goto out;
921		}
922
923		if (session_idx == -1) {
924			DRM_ERROR("no session command at start of IB\n");
925			r = -EINVAL;
926			goto out;
927		}
928
929		idx += len / 4;
930	}
931
932	if (allocated & ~created) {
933		DRM_ERROR("New session without create command!\n");
934		r = -ENOENT;
935	}
936
937out:
938	if (!r) {
939		/* No error, free all destroyed handle slots */
940		tmp = destroyed;
941	} else {
942		/* Error during parsing, free all allocated handle slots */
943		tmp = allocated;
944	}
945
946	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
947		if (tmp & (1 << i))
948			atomic_set(&p->adev->vce.handles[i], 0);
949
950	return r;
951}
952
953/**
954 * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode
955 *
956 * @p: parser context
957 *
958 */
959int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
960{
961	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
962	int session_idx = -1;
963	uint32_t destroyed = 0;
964	uint32_t created = 0;
965	uint32_t allocated = 0;
966	uint32_t tmp, handle = 0;
967	int i, r = 0, idx = 0;
968
969	while (idx < ib->length_dw) {
970		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
971		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
972
973		if ((len < 8) || (len & 3)) {
974			DRM_ERROR("invalid VCE command length (%d)!\n", len);
975			r = -EINVAL;
976			goto out;
977		}
978
979		switch (cmd) {
980		case 0x00000001: /* session */
981			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
982			session_idx = amdgpu_vce_validate_handle(p, handle,
983								 &allocated);
984			if (session_idx < 0) {
985				r = session_idx;
986				goto out;
987			}
988			break;
989
990		case 0x01000001: /* create */
991			created |= 1 << session_idx;
992			if (destroyed & (1 << session_idx)) {
993				destroyed &= ~(1 << session_idx);
994				allocated |= 1 << session_idx;
995
996			} else if (!(allocated & (1 << session_idx))) {
997				DRM_ERROR("Handle already in use!\n");
998				r = -EINVAL;
999				goto out;
1000			}
1001
1002			break;
1003
1004		case 0x02000001: /* destroy */
1005			destroyed |= 1 << session_idx;
1006			break;
1007
1008		default:
1009			break;
1010		}
1011
1012		if (session_idx == -1) {
1013			DRM_ERROR("no session command at start of IB\n");
1014			r = -EINVAL;
1015			goto out;
1016		}
1017
1018		idx += len / 4;
1019	}
1020
1021	if (allocated & ~created) {
1022		DRM_ERROR("New session without create command!\n");
1023		r = -ENOENT;
1024	}
1025
1026out:
1027	if (!r) {
1028		/* No error, free all destroyed handle slots */
1029		tmp = destroyed;
1030		amdgpu_ib_free(p->adev, ib, NULL);
1031	} else {
1032		/* Error during parsing, free all allocated handle slots */
1033		tmp = allocated;
1034	}
1035
1036	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
1037		if (tmp & (1 << i))
1038			atomic_set(&p->adev->vce.handles[i], 0);
1039
1040	return r;
1041}
1042
1043/**
1044 * amdgpu_vce_ring_emit_ib - execute indirect buffer
1045 *
1046 * @ring: engine to use
1047 * @ib: the IB to execute
1048 *
1049 */
1050void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1051				struct amdgpu_job *job,
1052				struct amdgpu_ib *ib,
1053				uint32_t flags)
1054{
1055	amdgpu_ring_write(ring, VCE_CMD_IB);
1056	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1057	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1058	amdgpu_ring_write(ring, ib->length_dw);
1059}
1060
1061/**
1062 * amdgpu_vce_ring_emit_fence - add a fence command to the ring
1063 *
1064 * @ring: engine to use
1065 * @fence: the fence
1066 *
1067 */
1068void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1069				unsigned flags)
1070{
1071	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1072
1073	amdgpu_ring_write(ring, VCE_CMD_FENCE);
1074	amdgpu_ring_write(ring, addr);
1075	amdgpu_ring_write(ring, upper_32_bits(addr));
1076	amdgpu_ring_write(ring, seq);
1077	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1078	amdgpu_ring_write(ring, VCE_CMD_END);
1079}
1080
1081/**
1082 * amdgpu_vce_ring_test_ring - test if VCE ring is working
1083 *
1084 * @ring: the engine to test on
1085 *
1086 */
1087int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1088{
1089	struct amdgpu_device *adev = ring->adev;
1090	uint32_t rptr;
1091	unsigned i;
1092	int r, timeout = adev->usec_timeout;
1093
1094	/* skip ring test for sriov*/
1095	if (amdgpu_sriov_vf(adev))
1096		return 0;
1097
1098	r = amdgpu_ring_alloc(ring, 16);
1099	if (r)
1100		return r;
1101
1102	rptr = amdgpu_ring_get_rptr(ring);
1103
1104	amdgpu_ring_write(ring, VCE_CMD_END);
1105	amdgpu_ring_commit(ring);
1106
1107	for (i = 0; i < timeout; i++) {
1108		if (amdgpu_ring_get_rptr(ring) != rptr)
1109			break;
1110		udelay(1);
1111	}
1112
1113	if (i >= timeout)
1114		r = -ETIMEDOUT;
1115
1116	return r;
1117}
1118
1119/**
1120 * amdgpu_vce_ring_test_ib - test if VCE IBs are working
1121 *
1122 * @ring: the engine to test on
1123 *
1124 */
1125int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1126{
1127	struct dma_fence *fence = NULL;
1128	struct amdgpu_bo *bo = NULL;
1129	long r;
1130
1131	/* skip vce ring1/2 ib test for now, since it's not reliable */
1132	if (ring != &ring->adev->vce.ring[0])
1133		return 0;
1134
1135	r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
1136				      AMDGPU_GEM_DOMAIN_VRAM,
1137				      &bo, NULL, NULL);
1138	if (r)
1139		return r;
1140
1141	r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
1142	if (r)
1143		goto error;
1144
1145	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
1146	if (r)
1147		goto error;
1148
1149	r = dma_fence_wait_timeout(fence, false, timeout);
1150	if (r == 0)
1151		r = -ETIMEDOUT;
1152	else if (r > 0)
1153		r = 0;
1154
1155error:
1156	dma_fence_put(fence);
1157	amdgpu_bo_unreserve(bo);
1158	amdgpu_bo_unref(&bo);
1159	return r;
1160}
1161