1/* $NetBSD: amdgpu_vce.c,v 1.7 2021/12/19 12:21:29 riastradh Exp $ */ 2 3/* 4 * Copyright 2013 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 * Authors: Christian K��nig <christian.koenig@amd.com> 28 */ 29 30#include <sys/cdefs.h> 31__KERNEL_RCSID(0, "$NetBSD: amdgpu_vce.c,v 1.7 2021/12/19 12:21:29 riastradh Exp $"); 32 33#include <linux/firmware.h> 34#include <linux/module.h> 35 36#include <drm/drm.h> 37 38#include "amdgpu.h" 39#include "amdgpu_pm.h" 40#include "amdgpu_vce.h" 41#include "cikd.h" 42 43#include <linux/nbsd-namespace.h> 44 45/* 1 second timeout */ 46#define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000) 47 48/* Firmware Names */ 49#ifdef CONFIG_DRM_AMDGPU_CIK 50#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin" 51#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin" 52#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin" 53#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin" 54#define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin" 55#endif 56#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" 57#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" 58#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" 59#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" 60#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" 61#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" 62#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" 63#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin" 64 65#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" 66#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" 67#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin" 68 69#ifdef CONFIG_DRM_AMDGPU_CIK 70MODULE_FIRMWARE(FIRMWARE_BONAIRE); 71MODULE_FIRMWARE(FIRMWARE_KABINI); 72MODULE_FIRMWARE(FIRMWARE_KAVERI); 73MODULE_FIRMWARE(FIRMWARE_HAWAII); 74MODULE_FIRMWARE(FIRMWARE_MULLINS); 75#endif 76MODULE_FIRMWARE(FIRMWARE_TONGA); 77MODULE_FIRMWARE(FIRMWARE_CARRIZO); 78MODULE_FIRMWARE(FIRMWARE_FIJI); 79MODULE_FIRMWARE(FIRMWARE_STONEY); 80MODULE_FIRMWARE(FIRMWARE_POLARIS10); 81MODULE_FIRMWARE(FIRMWARE_POLARIS11); 82MODULE_FIRMWARE(FIRMWARE_POLARIS12); 83MODULE_FIRMWARE(FIRMWARE_VEGAM); 84 85MODULE_FIRMWARE(FIRMWARE_VEGA10); 86MODULE_FIRMWARE(FIRMWARE_VEGA12); 87MODULE_FIRMWARE(FIRMWARE_VEGA20); 88 89static void amdgpu_vce_idle_work_handler(struct work_struct *work); 90static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 91 struct amdgpu_bo *bo, 92 struct dma_fence **fence); 93static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 94 bool direct, struct dma_fence **fence); 95 96/** 97 * amdgpu_vce_init - allocate memory, load vce firmware 98 * 99 * @adev: amdgpu_device pointer 100 * 101 * First step to get VCE online, allocate memory and load the firmware 102 */ 103int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) 104{ 105 const char *fw_name; 106 const struct common_firmware_header *hdr; 107 unsigned ucode_version, version_major, version_minor, binary_id; 108 int i, r; 109 110 switch (adev->asic_type) { 111#ifdef CONFIG_DRM_AMDGPU_CIK 112 case CHIP_BONAIRE: 113 fw_name = FIRMWARE_BONAIRE; 114 break; 115 case CHIP_KAVERI: 116 fw_name = FIRMWARE_KAVERI; 117 break; 118 case CHIP_KABINI: 119 fw_name = FIRMWARE_KABINI; 120 break; 121 case CHIP_HAWAII: 122 fw_name = FIRMWARE_HAWAII; 123 break; 124 case CHIP_MULLINS: 125 fw_name = FIRMWARE_MULLINS; 126 break; 127#endif 128 case CHIP_TONGA: 129 fw_name = FIRMWARE_TONGA; 130 break; 131 case CHIP_CARRIZO: 132 fw_name = FIRMWARE_CARRIZO; 133 break; 134 case CHIP_FIJI: 135 fw_name = FIRMWARE_FIJI; 136 break; 137 case CHIP_STONEY: 138 fw_name = FIRMWARE_STONEY; 139 break; 140 case CHIP_POLARIS10: 141 fw_name = FIRMWARE_POLARIS10; 142 break; 143 case CHIP_POLARIS11: 144 fw_name = FIRMWARE_POLARIS11; 145 break; 146 case CHIP_POLARIS12: 147 fw_name = FIRMWARE_POLARIS12; 148 break; 149 case CHIP_VEGAM: 150 fw_name = FIRMWARE_VEGAM; 151 break; 152 case CHIP_VEGA10: 153 fw_name = FIRMWARE_VEGA10; 154 break; 155 case CHIP_VEGA12: 156 fw_name = FIRMWARE_VEGA12; 157 break; 158 case CHIP_VEGA20: 159 fw_name = FIRMWARE_VEGA20; 160 break; 161 162 default: 163 return -EINVAL; 164 } 165 166 r = request_firmware(&adev->vce.fw, fw_name, adev->dev); 167 if (r) { 168 dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n", 169 fw_name); 170 return r; 171 } 172 173 r = amdgpu_ucode_validate(adev->vce.fw); 174 if (r) { 175 dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", 176 fw_name); 177 release_firmware(adev->vce.fw); 178 adev->vce.fw = NULL; 179 return r; 180 } 181 182 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 183 184 ucode_version = le32_to_cpu(hdr->ucode_version); 185 version_major = (ucode_version >> 20) & 0xfff; 186 version_minor = (ucode_version >> 8) & 0xfff; 187 binary_id = ucode_version & 0xff; 188 DRM_INFO("Found VCE firmware Version: %x.%x Binary ID: %x\n", 189 version_major, version_minor, binary_id); 190 adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | 191 (binary_id << 8)); 192 193 r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, 194 AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo, 195 &adev->vce.gpu_addr, &adev->vce.cpu_addr); 196 if (r) { 197 dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); 198 return r; 199 } 200 201 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { 202 atomic_set(&adev->vce.handles[i], 0); 203 adev->vce.filp[i] = NULL; 204 } 205 206 INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler); 207 mutex_init(&adev->vce.idle_mutex); 208 209 return 0; 210} 211 212/** 213 * amdgpu_vce_fini - free memory 214 * 215 * @adev: amdgpu_device pointer 216 * 217 * Last step on VCE teardown, free firmware memory 218 */ 219int amdgpu_vce_sw_fini(struct amdgpu_device *adev) 220{ 221 unsigned i; 222 223 if (adev->vce.vcpu_bo == NULL) 224 return 0; 225 226 cancel_delayed_work_sync(&adev->vce.idle_work); 227 drm_sched_entity_destroy(&adev->vce.entity); 228 229 amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, 230 (void **)&adev->vce.cpu_addr); 231 232 for (i = 0; i < adev->vce.num_rings; i++) 233 amdgpu_ring_fini(&adev->vce.ring[i]); 234 235 release_firmware(adev->vce.fw); 236 mutex_destroy(&adev->vce.idle_mutex); 237 238 return 0; 239} 240 241/** 242 * amdgpu_vce_entity_init - init entity 243 * 244 * @adev: amdgpu_device pointer 245 * 246 */ 247int amdgpu_vce_entity_init(struct amdgpu_device *adev) 248{ 249 struct amdgpu_ring *ring; 250 struct drm_gpu_scheduler *sched; 251 int r; 252 253 ring = &adev->vce.ring[0]; 254 sched = &ring->sched; 255 r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, 256 &sched, 1, NULL); 257 if (r != 0) { 258 DRM_ERROR("Failed setting up VCE run queue.\n"); 259 return r; 260 } 261 262 return 0; 263} 264 265/** 266 * amdgpu_vce_suspend - unpin VCE fw memory 267 * 268 * @adev: amdgpu_device pointer 269 * 270 */ 271int amdgpu_vce_suspend(struct amdgpu_device *adev) 272{ 273 int i; 274 275 cancel_delayed_work_sync(&adev->vce.idle_work); 276 277 if (adev->vce.vcpu_bo == NULL) 278 return 0; 279 280 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) 281 if (atomic_read(&adev->vce.handles[i])) 282 break; 283 284 if (i == AMDGPU_MAX_VCE_HANDLES) 285 return 0; 286 287 /* TODO: suspending running encoding sessions isn't supported */ 288 return -EINVAL; 289} 290 291/** 292 * amdgpu_vce_resume - pin VCE fw memory 293 * 294 * @adev: amdgpu_device pointer 295 * 296 */ 297int amdgpu_vce_resume(struct amdgpu_device *adev) 298{ 299 void *cpu_addr; 300 const struct common_firmware_header *hdr; 301 unsigned offset; 302 int r; 303 304 if (adev->vce.vcpu_bo == NULL) 305 return -EINVAL; 306 307 r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); 308 if (r) { 309 dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); 310 return r; 311 } 312 313 r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); 314 if (r) { 315 amdgpu_bo_unreserve(adev->vce.vcpu_bo); 316 dev_err(adev->dev, "(%d) VCE map failed\n", r); 317 return r; 318 } 319 320 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 321 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 322 memcpy_toio(cpu_addr, adev->vce.fw->data + offset, 323 adev->vce.fw->size - offset); 324 325 amdgpu_bo_kunmap(adev->vce.vcpu_bo); 326 327 amdgpu_bo_unreserve(adev->vce.vcpu_bo); 328 329 return 0; 330} 331 332/** 333 * amdgpu_vce_idle_work_handler - power off VCE 334 * 335 * @work: pointer to work structure 336 * 337 * power of VCE when it's not used any more 338 */ 339static void amdgpu_vce_idle_work_handler(struct work_struct *work) 340{ 341 struct amdgpu_device *adev = 342 container_of(work, struct amdgpu_device, vce.idle_work.work); 343 unsigned i, count = 0; 344 345 for (i = 0; i < adev->vce.num_rings; i++) 346 count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); 347 348 if (count == 0) { 349 if (adev->pm.dpm_enabled) { 350 amdgpu_dpm_enable_vce(adev, false); 351 } else { 352 amdgpu_asic_set_vce_clocks(adev, 0, 0); 353 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 354 AMD_PG_STATE_GATE); 355 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 356 AMD_CG_STATE_GATE); 357 } 358 } else { 359 schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT); 360 } 361} 362 363/** 364 * amdgpu_vce_ring_begin_use - power up VCE 365 * 366 * @ring: amdgpu ring 367 * 368 * Make sure VCE is powerd up when we want to use it 369 */ 370void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) 371{ 372 struct amdgpu_device *adev = ring->adev; 373 bool set_clocks; 374 375 if (amdgpu_sriov_vf(adev)) 376 return; 377 378 mutex_lock(&adev->vce.idle_mutex); 379 set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work); 380 if (set_clocks) { 381 if (adev->pm.dpm_enabled) { 382 amdgpu_dpm_enable_vce(adev, true); 383 } else { 384 amdgpu_asic_set_vce_clocks(adev, 53300, 40000); 385 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 386 AMD_CG_STATE_UNGATE); 387 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 388 AMD_PG_STATE_UNGATE); 389 390 } 391 } 392 mutex_unlock(&adev->vce.idle_mutex); 393} 394 395/** 396 * amdgpu_vce_ring_end_use - power VCE down 397 * 398 * @ring: amdgpu ring 399 * 400 * Schedule work to power VCE down again 401 */ 402void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) 403{ 404 if (!amdgpu_sriov_vf(ring->adev)) 405 schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); 406} 407 408/** 409 * amdgpu_vce_free_handles - free still open VCE handles 410 * 411 * @adev: amdgpu_device pointer 412 * @filp: drm file pointer 413 * 414 * Close all VCE handles still open by this file pointer 415 */ 416void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) 417{ 418 struct amdgpu_ring *ring = &adev->vce.ring[0]; 419 int i, r; 420 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { 421 uint32_t handle = atomic_read(&adev->vce.handles[i]); 422 423 if (!handle || adev->vce.filp[i] != filp) 424 continue; 425 426 r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL); 427 if (r) 428 DRM_ERROR("Error destroying VCE handle (%d)!\n", r); 429 430 adev->vce.filp[i] = NULL; 431 atomic_set(&adev->vce.handles[i], 0); 432 } 433} 434 435/** 436 * amdgpu_vce_get_create_msg - generate a VCE create msg 437 * 438 * @adev: amdgpu_device pointer 439 * @ring: ring we should submit the msg to 440 * @handle: VCE session handle to use 441 * @fence: optional fence to return 442 * 443 * Open up a stream for HW test 444 */ 445static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 446 struct amdgpu_bo *bo, 447 struct dma_fence **fence) 448{ 449 const unsigned ib_size_dw = 1024; 450 struct amdgpu_job *job; 451 struct amdgpu_ib *ib; 452 struct dma_fence *f = NULL; 453 uint64_t addr; 454 int i, r; 455 456 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 457 if (r) 458 return r; 459 460 ib = &job->ibs[0]; 461 462 addr = amdgpu_bo_gpu_offset(bo); 463 464 /* stitch together an VCE create msg */ 465 ib->length_dw = 0; 466 ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ 467 ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ 468 ib->ptr[ib->length_dw++] = handle; 469 470 if ((ring->adev->vce.fw_version >> 24) >= 52) 471 ib->ptr[ib->length_dw++] = 0x00000040; /* len */ 472 else 473 ib->ptr[ib->length_dw++] = 0x00000030; /* len */ 474 ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ 475 ib->ptr[ib->length_dw++] = 0x00000000; 476 ib->ptr[ib->length_dw++] = 0x00000042; 477 ib->ptr[ib->length_dw++] = 0x0000000a; 478 ib->ptr[ib->length_dw++] = 0x00000001; 479 ib->ptr[ib->length_dw++] = 0x00000080; 480 ib->ptr[ib->length_dw++] = 0x00000060; 481 ib->ptr[ib->length_dw++] = 0x00000100; 482 ib->ptr[ib->length_dw++] = 0x00000100; 483 ib->ptr[ib->length_dw++] = 0x0000000c; 484 ib->ptr[ib->length_dw++] = 0x00000000; 485 if ((ring->adev->vce.fw_version >> 24) >= 52) { 486 ib->ptr[ib->length_dw++] = 0x00000000; 487 ib->ptr[ib->length_dw++] = 0x00000000; 488 ib->ptr[ib->length_dw++] = 0x00000000; 489 ib->ptr[ib->length_dw++] = 0x00000000; 490 } 491 492 ib->ptr[ib->length_dw++] = 0x00000014; /* len */ 493 ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ 494 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 495 ib->ptr[ib->length_dw++] = addr; 496 ib->ptr[ib->length_dw++] = 0x00000001; 497 498 for (i = ib->length_dw; i < ib_size_dw; ++i) 499 ib->ptr[i] = 0x0; 500 501 r = amdgpu_job_submit_direct(job, ring, &f); 502 if (r) 503 goto err; 504 505 if (fence) 506 *fence = dma_fence_get(f); 507 dma_fence_put(f); 508 return 0; 509 510err: 511 amdgpu_job_free(job); 512 return r; 513} 514 515/** 516 * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg 517 * 518 * @adev: amdgpu_device pointer 519 * @ring: ring we should submit the msg to 520 * @handle: VCE session handle to use 521 * @fence: optional fence to return 522 * 523 * Close up a stream for HW test or if userspace failed to do so 524 */ 525static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 526 bool direct, struct dma_fence **fence) 527{ 528 const unsigned ib_size_dw = 1024; 529 struct amdgpu_job *job; 530 struct amdgpu_ib *ib; 531 struct dma_fence *f = NULL; 532 int i, r; 533 534 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 535 if (r) 536 return r; 537 538 ib = &job->ibs[0]; 539 540 /* stitch together an VCE destroy msg */ 541 ib->length_dw = 0; 542 ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ 543 ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ 544 ib->ptr[ib->length_dw++] = handle; 545 546 ib->ptr[ib->length_dw++] = 0x00000020; /* len */ 547 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 548 ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */ 549 ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */ 550 ib->ptr[ib->length_dw++] = 0x00000000; 551 ib->ptr[ib->length_dw++] = 0x00000000; 552 ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ 553 ib->ptr[ib->length_dw++] = 0x00000000; 554 555 ib->ptr[ib->length_dw++] = 0x00000008; /* len */ 556 ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */ 557 558 for (i = ib->length_dw; i < ib_size_dw; ++i) 559 ib->ptr[i] = 0x0; 560 561 if (direct) 562 r = amdgpu_job_submit_direct(job, ring, &f); 563 else 564 r = amdgpu_job_submit(job, &ring->adev->vce.entity, 565 AMDGPU_FENCE_OWNER_UNDEFINED, &f); 566 if (r) 567 goto err; 568 569 if (fence) 570 *fence = dma_fence_get(f); 571 dma_fence_put(f); 572 return 0; 573 574err: 575 amdgpu_job_free(job); 576 return r; 577} 578 579/** 580 * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary 581 * 582 * @p: parser context 583 * @lo: address of lower dword 584 * @hi: address of higher dword 585 * @size: minimum size 586 * @index: bs/fb index 587 * 588 * Make sure that no BO cross a 4GB boundary. 589 */ 590static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, 591 int lo, int hi, unsigned size, int32_t index) 592{ 593 int64_t offset = ((uint64_t)size) * ((int64_t)index); 594 struct ttm_operation_ctx ctx = { false, false }; 595 struct amdgpu_bo_va_mapping *mapping; 596 unsigned i, fpfn, lpfn; 597 struct amdgpu_bo *bo; 598 uint64_t addr; 599 int r; 600 601 addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | 602 ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; 603 if (index >= 0) { 604 addr += offset; 605 fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT; 606 lpfn = 0x100000000ULL >> PAGE_SHIFT; 607 } else { 608 fpfn = 0; 609 lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT; 610 } 611 612 r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); 613 if (r) { 614 DRM_ERROR("Can't find BO for addr 0x%010"PRIx64" %d %d %d %d\n", 615 addr, lo, hi, size, index); 616 return r; 617 } 618 619 for (i = 0; i < bo->placement.num_placement; ++i) { 620 bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn); 621 bo->placements[i].lpfn = bo->placements[i].lpfn ? 622 min(bo->placements[i].lpfn, lpfn) : lpfn; 623 } 624 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 625} 626 627 628/** 629 * amdgpu_vce_cs_reloc - command submission relocation 630 * 631 * @p: parser context 632 * @lo: address of lower dword 633 * @hi: address of higher dword 634 * @size: minimum size 635 * 636 * Patch relocation inside command stream with real buffer address 637 */ 638static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, 639 int lo, int hi, unsigned size, uint32_t index) 640{ 641 struct amdgpu_bo_va_mapping *mapping; 642 struct amdgpu_bo *bo; 643 uint64_t addr; 644 int r; 645 646 if (index == 0xffffffff) 647 index = 0; 648 649 addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | 650 ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; 651 addr += ((uint64_t)size) * ((uint64_t)index); 652 653 r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); 654 if (r) { 655 DRM_ERROR("Can't find BO for addr 0x%010"PRIx64" %d %d %d %d\n", 656 addr, lo, hi, size, index); 657 return r; 658 } 659 660 if ((addr + (uint64_t)size) > 661 ((uint64_t)mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { 662 DRM_ERROR("BO too small for addr 0x%010"PRIx64" %d %d\n", 663 addr, lo, hi); 664 return -EINVAL; 665 } 666 667 addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; 668 addr += amdgpu_bo_gpu_offset(bo); 669 addr -= ((uint64_t)size) * ((uint64_t)index); 670 671 amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); 672 amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); 673 674 return 0; 675} 676 677/** 678 * amdgpu_vce_validate_handle - validate stream handle 679 * 680 * @p: parser context 681 * @handle: handle to validate 682 * @allocated: allocated a new handle? 683 * 684 * Validates the handle and return the found session index or -EINVAL 685 * we we don't have another free session index. 686 */ 687static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, 688 uint32_t handle, uint32_t *allocated) 689{ 690 unsigned i; 691 692 /* validate the handle */ 693 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { 694 if (atomic_read(&p->adev->vce.handles[i]) == handle) { 695 if (p->adev->vce.filp[i] != p->filp) { 696 DRM_ERROR("VCE handle collision detected!\n"); 697 return -EINVAL; 698 } 699 return i; 700 } 701 } 702 703 /* handle not found try to alloc a new one */ 704 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { 705 if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { 706 p->adev->vce.filp[i] = p->filp; 707 p->adev->vce.img_size[i] = 0; 708 *allocated |= 1 << i; 709 return i; 710 } 711 } 712 713 DRM_ERROR("No more free VCE handles!\n"); 714 return -EINVAL; 715} 716 717/** 718 * amdgpu_vce_cs_parse - parse and validate the command stream 719 * 720 * @p: parser context 721 * 722 */ 723int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) 724{ 725 struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; 726 unsigned fb_idx = 0, bs_idx = 0; 727 int session_idx = -1; 728 uint32_t destroyed = 0; 729 uint32_t created = 0; 730 uint32_t allocated = 0; 731 uint32_t tmp, handle = 0; 732 uint32_t *size = &tmp; 733 unsigned idx; 734 int i, r = 0; 735 736 p->job->vm = NULL; 737 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); 738 739 for (idx = 0; idx < ib->length_dw;) { 740 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); 741 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); 742 743 if ((len < 8) || (len & 3)) { 744 DRM_ERROR("invalid VCE command length (%d)!\n", len); 745 r = -EINVAL; 746 goto out; 747 } 748 749 switch (cmd) { 750 case 0x00000002: /* task info */ 751 fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); 752 bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); 753 break; 754 755 case 0x03000001: /* encode */ 756 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10, 757 idx + 9, 0, 0); 758 if (r) 759 goto out; 760 761 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12, 762 idx + 11, 0, 0); 763 if (r) 764 goto out; 765 break; 766 767 case 0x05000001: /* context buffer */ 768 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, 769 idx + 2, 0, 0); 770 if (r) 771 goto out; 772 break; 773 774 case 0x05000004: /* video bitstream buffer */ 775 tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); 776 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, 777 tmp, bs_idx); 778 if (r) 779 goto out; 780 break; 781 782 case 0x05000005: /* feedback buffer */ 783 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, 784 4096, fb_idx); 785 if (r) 786 goto out; 787 break; 788 789 case 0x0500000d: /* MV buffer */ 790 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, 791 idx + 2, 0, 0); 792 if (r) 793 goto out; 794 795 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, 796 idx + 7, 0, 0); 797 if (r) 798 goto out; 799 break; 800 } 801 802 idx += len / 4; 803 } 804 805 for (idx = 0; idx < ib->length_dw;) { 806 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); 807 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); 808 809 switch (cmd) { 810 case 0x00000001: /* session */ 811 handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); 812 session_idx = amdgpu_vce_validate_handle(p, handle, 813 &allocated); 814 if (session_idx < 0) { 815 r = session_idx; 816 goto out; 817 } 818 size = &p->adev->vce.img_size[session_idx]; 819 break; 820 821 case 0x00000002: /* task info */ 822 fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); 823 bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); 824 break; 825 826 case 0x01000001: /* create */ 827 created |= 1 << session_idx; 828 if (destroyed & (1 << session_idx)) { 829 destroyed &= ~(1 << session_idx); 830 allocated |= 1 << session_idx; 831 832 } else if (!(allocated & (1 << session_idx))) { 833 DRM_ERROR("Handle already in use!\n"); 834 r = -EINVAL; 835 goto out; 836 } 837 838 *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * 839 amdgpu_get_ib_value(p, ib_idx, idx + 10) * 840 8 * 3 / 2; 841 break; 842 843 case 0x04000001: /* config extension */ 844 case 0x04000002: /* pic control */ 845 case 0x04000005: /* rate control */ 846 case 0x04000007: /* motion estimation */ 847 case 0x04000008: /* rdo */ 848 case 0x04000009: /* vui */ 849 case 0x05000002: /* auxiliary buffer */ 850 case 0x05000009: /* clock table */ 851 break; 852 853 case 0x0500000c: /* hw config */ 854 switch (p->adev->asic_type) { 855#ifdef CONFIG_DRM_AMDGPU_CIK 856 case CHIP_KAVERI: 857 case CHIP_MULLINS: 858#endif 859 case CHIP_CARRIZO: 860 break; 861 default: 862 r = -EINVAL; 863 goto out; 864 } 865 break; 866 867 case 0x03000001: /* encode */ 868 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, 869 *size, 0); 870 if (r) 871 goto out; 872 873 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, 874 *size / 3, 0); 875 if (r) 876 goto out; 877 break; 878 879 case 0x02000001: /* destroy */ 880 destroyed |= 1 << session_idx; 881 break; 882 883 case 0x05000001: /* context buffer */ 884 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, 885 *size * 2, 0); 886 if (r) 887 goto out; 888 break; 889 890 case 0x05000004: /* video bitstream buffer */ 891 tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); 892 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, 893 tmp, bs_idx); 894 if (r) 895 goto out; 896 break; 897 898 case 0x05000005: /* feedback buffer */ 899 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, 900 4096, fb_idx); 901 if (r) 902 goto out; 903 break; 904 905 case 0x0500000d: /* MV buffer */ 906 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, 907 idx + 2, *size, 0); 908 if (r) 909 goto out; 910 911 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, 912 idx + 7, *size / 12, 0); 913 if (r) 914 goto out; 915 break; 916 917 default: 918 DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); 919 r = -EINVAL; 920 goto out; 921 } 922 923 if (session_idx == -1) { 924 DRM_ERROR("no session command at start of IB\n"); 925 r = -EINVAL; 926 goto out; 927 } 928 929 idx += len / 4; 930 } 931 932 if (allocated & ~created) { 933 DRM_ERROR("New session without create command!\n"); 934 r = -ENOENT; 935 } 936 937out: 938 if (!r) { 939 /* No error, free all destroyed handle slots */ 940 tmp = destroyed; 941 } else { 942 /* Error during parsing, free all allocated handle slots */ 943 tmp = allocated; 944 } 945 946 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) 947 if (tmp & (1 << i)) 948 atomic_set(&p->adev->vce.handles[i], 0); 949 950 return r; 951} 952 953/** 954 * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode 955 * 956 * @p: parser context 957 * 958 */ 959int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) 960{ 961 struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; 962 int session_idx = -1; 963 uint32_t destroyed = 0; 964 uint32_t created = 0; 965 uint32_t allocated = 0; 966 uint32_t tmp, handle = 0; 967 int i, r = 0, idx = 0; 968 969 while (idx < ib->length_dw) { 970 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); 971 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); 972 973 if ((len < 8) || (len & 3)) { 974 DRM_ERROR("invalid VCE command length (%d)!\n", len); 975 r = -EINVAL; 976 goto out; 977 } 978 979 switch (cmd) { 980 case 0x00000001: /* session */ 981 handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); 982 session_idx = amdgpu_vce_validate_handle(p, handle, 983 &allocated); 984 if (session_idx < 0) { 985 r = session_idx; 986 goto out; 987 } 988 break; 989 990 case 0x01000001: /* create */ 991 created |= 1 << session_idx; 992 if (destroyed & (1 << session_idx)) { 993 destroyed &= ~(1 << session_idx); 994 allocated |= 1 << session_idx; 995 996 } else if (!(allocated & (1 << session_idx))) { 997 DRM_ERROR("Handle already in use!\n"); 998 r = -EINVAL; 999 goto out; 1000 } 1001 1002 break; 1003 1004 case 0x02000001: /* destroy */ 1005 destroyed |= 1 << session_idx; 1006 break; 1007 1008 default: 1009 break; 1010 } 1011 1012 if (session_idx == -1) { 1013 DRM_ERROR("no session command at start of IB\n"); 1014 r = -EINVAL; 1015 goto out; 1016 } 1017 1018 idx += len / 4; 1019 } 1020 1021 if (allocated & ~created) { 1022 DRM_ERROR("New session without create command!\n"); 1023 r = -ENOENT; 1024 } 1025 1026out: 1027 if (!r) { 1028 /* No error, free all destroyed handle slots */ 1029 tmp = destroyed; 1030 amdgpu_ib_free(p->adev, ib, NULL); 1031 } else { 1032 /* Error during parsing, free all allocated handle slots */ 1033 tmp = allocated; 1034 } 1035 1036 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) 1037 if (tmp & (1 << i)) 1038 atomic_set(&p->adev->vce.handles[i], 0); 1039 1040 return r; 1041} 1042 1043/** 1044 * amdgpu_vce_ring_emit_ib - execute indirect buffer 1045 * 1046 * @ring: engine to use 1047 * @ib: the IB to execute 1048 * 1049 */ 1050void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, 1051 struct amdgpu_job *job, 1052 struct amdgpu_ib *ib, 1053 uint32_t flags) 1054{ 1055 amdgpu_ring_write(ring, VCE_CMD_IB); 1056 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 1057 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 1058 amdgpu_ring_write(ring, ib->length_dw); 1059} 1060 1061/** 1062 * amdgpu_vce_ring_emit_fence - add a fence command to the ring 1063 * 1064 * @ring: engine to use 1065 * @fence: the fence 1066 * 1067 */ 1068void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 1069 unsigned flags) 1070{ 1071 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 1072 1073 amdgpu_ring_write(ring, VCE_CMD_FENCE); 1074 amdgpu_ring_write(ring, addr); 1075 amdgpu_ring_write(ring, upper_32_bits(addr)); 1076 amdgpu_ring_write(ring, seq); 1077 amdgpu_ring_write(ring, VCE_CMD_TRAP); 1078 amdgpu_ring_write(ring, VCE_CMD_END); 1079} 1080 1081/** 1082 * amdgpu_vce_ring_test_ring - test if VCE ring is working 1083 * 1084 * @ring: the engine to test on 1085 * 1086 */ 1087int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) 1088{ 1089 struct amdgpu_device *adev = ring->adev; 1090 uint32_t rptr; 1091 unsigned i; 1092 int r, timeout = adev->usec_timeout; 1093 1094 /* skip ring test for sriov*/ 1095 if (amdgpu_sriov_vf(adev)) 1096 return 0; 1097 1098 r = amdgpu_ring_alloc(ring, 16); 1099 if (r) 1100 return r; 1101 1102 rptr = amdgpu_ring_get_rptr(ring); 1103 1104 amdgpu_ring_write(ring, VCE_CMD_END); 1105 amdgpu_ring_commit(ring); 1106 1107 for (i = 0; i < timeout; i++) { 1108 if (amdgpu_ring_get_rptr(ring) != rptr) 1109 break; 1110 udelay(1); 1111 } 1112 1113 if (i >= timeout) 1114 r = -ETIMEDOUT; 1115 1116 return r; 1117} 1118 1119/** 1120 * amdgpu_vce_ring_test_ib - test if VCE IBs are working 1121 * 1122 * @ring: the engine to test on 1123 * 1124 */ 1125int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1126{ 1127 struct dma_fence *fence = NULL; 1128 struct amdgpu_bo *bo = NULL; 1129 long r; 1130 1131 /* skip vce ring1/2 ib test for now, since it's not reliable */ 1132 if (ring != &ring->adev->vce.ring[0]) 1133 return 0; 1134 1135 r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE, 1136 AMDGPU_GEM_DOMAIN_VRAM, 1137 &bo, NULL, NULL); 1138 if (r) 1139 return r; 1140 1141 r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL); 1142 if (r) 1143 goto error; 1144 1145 r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); 1146 if (r) 1147 goto error; 1148 1149 r = dma_fence_wait_timeout(fence, false, timeout); 1150 if (r == 0) 1151 r = -ETIMEDOUT; 1152 else if (r > 0) 1153 r = 0; 1154 1155error: 1156 dma_fence_put(fence); 1157 amdgpu_bo_unreserve(bo); 1158 amdgpu_bo_unref(&bo); 1159 return r; 1160} 1161