1/* $NetBSD: radeon_uvd.c,v 1.7 2021/12/18 23:45:43 riastradh Exp $ */ 2 3/* 4 * Copyright 2011 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28/* 29 * Authors: 30 * Christian K��nig <deathsimple@vodafone.de> 31 */ 32 33#include <sys/cdefs.h> 34__KERNEL_RCSID(0, "$NetBSD: radeon_uvd.c,v 1.7 2021/12/18 23:45:43 riastradh Exp $"); 35 36#include <linux/firmware.h> 37#include <linux/module.h> 38 39#include <drm/drm.h> 40 41#include "radeon.h" 42#include "radeon_ucode.h" 43#include "r600d.h" 44 45#include <linux/nbsd-namespace.h> 46 47/* 1 second timeout */ 48#define UVD_IDLE_TIMEOUT_MS 1000 49 50/* Firmware Names */ 51#define FIRMWARE_R600 "radeon/R600_uvd.bin" 52#define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 53#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 54#define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 55#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 56#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 57#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 58#define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 59#define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 60 61MODULE_FIRMWARE(FIRMWARE_R600); 62MODULE_FIRMWARE(FIRMWARE_RS780); 63MODULE_FIRMWARE(FIRMWARE_RV770); 64MODULE_FIRMWARE(FIRMWARE_RV710); 65MODULE_FIRMWARE(FIRMWARE_CYPRESS); 66MODULE_FIRMWARE(FIRMWARE_SUMO); 67MODULE_FIRMWARE(FIRMWARE_TAHITI); 68MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 69MODULE_FIRMWARE(FIRMWARE_BONAIRE); 70 71static void radeon_uvd_idle_work_handler(struct work_struct *work); 72 73int radeon_uvd_init(struct radeon_device *rdev) 74{ 75 unsigned long bo_size; 76 const char *fw_name = NULL, *legacy_fw_name = NULL; 77 int i, r; 78 79 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 80 81 switch (rdev->family) { 82 case CHIP_RV610: 83 case CHIP_RV630: 84 case CHIP_RV670: 85 case CHIP_RV620: 86 case CHIP_RV635: 87 legacy_fw_name = FIRMWARE_R600; 88 break; 89 90 case CHIP_RS780: 91 case CHIP_RS880: 92 legacy_fw_name = FIRMWARE_RS780; 93 break; 94 95 case CHIP_RV770: 96 legacy_fw_name = FIRMWARE_RV770; 97 break; 98 99 case CHIP_RV710: 100 case CHIP_RV730: 101 case CHIP_RV740: 102 legacy_fw_name = FIRMWARE_RV710; 103 break; 104 105 case CHIP_CYPRESS: 106 case CHIP_HEMLOCK: 107 case CHIP_JUNIPER: 108 case CHIP_REDWOOD: 109 case CHIP_CEDAR: 110 legacy_fw_name = FIRMWARE_CYPRESS; 111 break; 112 113 case CHIP_SUMO: 114 case CHIP_SUMO2: 115 case CHIP_PALM: 116 case CHIP_CAYMAN: 117 case CHIP_BARTS: 118 case CHIP_TURKS: 119 case CHIP_CAICOS: 120 legacy_fw_name = FIRMWARE_SUMO; 121 break; 122 123 case CHIP_TAHITI: 124 case CHIP_VERDE: 125 case CHIP_PITCAIRN: 126 case CHIP_ARUBA: 127 case CHIP_OLAND: 128 legacy_fw_name = FIRMWARE_TAHITI; 129 break; 130 131 case CHIP_BONAIRE: 132 case CHIP_KABINI: 133 case CHIP_KAVERI: 134 case CHIP_HAWAII: 135 case CHIP_MULLINS: 136 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 137 fw_name = FIRMWARE_BONAIRE; 138 break; 139 140 default: 141 return -EINVAL; 142 } 143 144 rdev->uvd.fw_header_present = false; 145 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 146 if (fw_name) { 147 /* Let's try to load the newer firmware first */ 148 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 149 if (r) { 150 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 151 fw_name); 152 } else { 153 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 154 unsigned version_major, version_minor, family_id; 155 156 r = radeon_ucode_validate(rdev->uvd_fw); 157 if (r) 158 return r; 159 160 rdev->uvd.fw_header_present = true; 161 162 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 163 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 164 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 165 DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", 166 version_major, version_minor, family_id); 167 168 /* 169 * Limit the number of UVD handles depending on 170 * microcode major and minor versions. 171 */ 172 if ((version_major >= 0x01) && (version_minor >= 0x37)) 173 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 174 } 175 } 176 177 /* 178 * In case there is only legacy firmware, or we encounter an error 179 * while loading the new firmware, we fall back to loading the legacy 180 * firmware now. 181 */ 182 if (!fw_name || r) { 183 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 184 if (r) { 185 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 186 legacy_fw_name); 187 return r; 188 } 189 } 190 191 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 192 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 193 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 194 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 195 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 196 NULL, &rdev->uvd.vcpu_bo); 197 if (r) { 198 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 199 return r; 200 } 201 202 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 203 if (r) { 204 radeon_bo_unref(&rdev->uvd.vcpu_bo); 205 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 206 return r; 207 } 208 209 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 210 &rdev->uvd.gpu_addr); 211 if (r) { 212 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 213 radeon_bo_unref(&rdev->uvd.vcpu_bo); 214 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 215 return r; 216 } 217 218 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 219 if (r) { 220 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 221 return r; 222 } 223 224 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 225 226 for (i = 0; i < rdev->uvd.max_handles; ++i) { 227 atomic_set(&rdev->uvd.handles[i], 0); 228 rdev->uvd.filp[i] = NULL; 229 rdev->uvd.img_size[i] = 0; 230 } 231 232 return 0; 233} 234 235void radeon_uvd_fini(struct radeon_device *rdev) 236{ 237 int r; 238 239 if (rdev->uvd.vcpu_bo == NULL) 240 return; 241 242 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 243 if (!r) { 244 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 245 radeon_bo_unpin(rdev->uvd.vcpu_bo); 246 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 247 } 248 249 radeon_bo_unref(&rdev->uvd.vcpu_bo); 250 251 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 252 253 release_firmware(rdev->uvd_fw); 254} 255 256int radeon_uvd_suspend(struct radeon_device *rdev) 257{ 258 int i, r; 259 260 if (rdev->uvd.vcpu_bo == NULL) 261 return 0; 262 263 for (i = 0; i < rdev->uvd.max_handles; ++i) { 264 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 265 if (handle != 0) { 266 struct radeon_fence *fence; 267 268 radeon_uvd_note_usage(rdev); 269 270 r = radeon_uvd_get_destroy_msg(rdev, 271 R600_RING_TYPE_UVD_INDEX, handle, &fence); 272 if (r) { 273 DRM_ERROR("Error destroying UVD (%d)!\n", r); 274 continue; 275 } 276 277 radeon_fence_wait(fence, false); 278 radeon_fence_unref(&fence); 279 280 rdev->uvd.filp[i] = NULL; 281 atomic_set(&rdev->uvd.handles[i], 0); 282 } 283 } 284 285 return 0; 286} 287 288int radeon_uvd_resume(struct radeon_device *rdev) 289{ 290 unsigned size; 291 void *ptr; 292 293 if (rdev->uvd.vcpu_bo == NULL) 294 return -EINVAL; 295 296 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 297 298 size = radeon_bo_size(rdev->uvd.vcpu_bo); 299 size -= rdev->uvd_fw->size; 300 301 ptr = rdev->uvd.cpu_addr; 302 ptr += rdev->uvd_fw->size; 303 304 memset(ptr, 0, size); 305 306 return 0; 307} 308 309void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 310 uint32_t allowed_domains) 311{ 312 int i; 313 314 for (i = 0; i < rbo->placement.num_placement; ++i) { 315 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 316 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 317 } 318 319 /* If it must be in VRAM it must be in the first segment as well */ 320 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 321 return; 322 323 /* abort if we already have more than one placement */ 324 if (rbo->placement.num_placement > 1) 325 return; 326 327 /* add another 256MB segment */ 328 rbo->placements[1] = rbo->placements[0]; 329 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 330 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 331 rbo->placement.num_placement++; 332 rbo->placement.num_busy_placement++; 333} 334 335void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 336{ 337 int i, r; 338 for (i = 0; i < rdev->uvd.max_handles; ++i) { 339 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 340 if (handle != 0 && rdev->uvd.filp[i] == filp) { 341 struct radeon_fence *fence; 342 343 radeon_uvd_note_usage(rdev); 344 345 r = radeon_uvd_get_destroy_msg(rdev, 346 R600_RING_TYPE_UVD_INDEX, handle, &fence); 347 if (r) { 348 DRM_ERROR("Error destroying UVD (%d)!\n", r); 349 continue; 350 } 351 352 radeon_fence_wait(fence, false); 353 radeon_fence_unref(&fence); 354 355 rdev->uvd.filp[i] = NULL; 356 atomic_set(&rdev->uvd.handles[i], 0); 357 } 358 } 359} 360 361static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 362{ 363 unsigned stream_type = msg[4]; 364 unsigned width = msg[6]; 365 unsigned height = msg[7]; 366 unsigned dpb_size = msg[9]; 367 unsigned pitch = msg[28]; 368 369 unsigned width_in_mb = width / 16; 370 unsigned height_in_mb = ALIGN(height / 16, 2); 371 372 unsigned image_size, tmp, min_dpb_size; 373 374 image_size = width * height; 375 image_size += image_size / 2; 376 image_size = ALIGN(image_size, 1024); 377 378 switch (stream_type) { 379 case 0: /* H264 */ 380 381 /* reference picture buffer */ 382 min_dpb_size = image_size * 17; 383 384 /* macroblock context buffer */ 385 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 386 387 /* IT surface buffer */ 388 min_dpb_size += width_in_mb * height_in_mb * 32; 389 break; 390 391 case 1: /* VC1 */ 392 393 /* reference picture buffer */ 394 min_dpb_size = image_size * 3; 395 396 /* CONTEXT_BUFFER */ 397 min_dpb_size += width_in_mb * height_in_mb * 128; 398 399 /* IT surface buffer */ 400 min_dpb_size += width_in_mb * 64; 401 402 /* DB surface buffer */ 403 min_dpb_size += width_in_mb * 128; 404 405 /* BP */ 406 tmp = max(width_in_mb, height_in_mb); 407 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 408 break; 409 410 case 3: /* MPEG2 */ 411 412 /* reference picture buffer */ 413 min_dpb_size = image_size * 3; 414 break; 415 416 case 4: /* MPEG4 */ 417 418 /* reference picture buffer */ 419 min_dpb_size = image_size * 3; 420 421 /* CM */ 422 min_dpb_size += width_in_mb * height_in_mb * 64; 423 424 /* IT surface buffer */ 425 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 426 break; 427 428 default: 429 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 430 return -EINVAL; 431 } 432 433 if (width > pitch) { 434 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 435 return -EINVAL; 436 } 437 438 if (dpb_size < min_dpb_size) { 439 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 440 dpb_size, min_dpb_size); 441 return -EINVAL; 442 } 443 444 buf_sizes[0x1] = dpb_size; 445 buf_sizes[0x2] = image_size; 446 return 0; 447} 448 449static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 450 unsigned stream_type) 451{ 452 switch (stream_type) { 453 case 0: /* H264 */ 454 case 1: /* VC1 */ 455 /* always supported */ 456 return 0; 457 458 case 3: /* MPEG2 */ 459 case 4: /* MPEG4 */ 460 /* only since UVD 3 */ 461 if (p->rdev->family >= CHIP_PALM) 462 return 0; 463 464 /* fall through */ 465 default: 466 DRM_ERROR("UVD codec not supported by hardware %d!\n", 467 stream_type); 468 return -EINVAL; 469 } 470} 471 472static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 473 unsigned offset, unsigned buf_sizes[]) 474{ 475 int32_t *msg, msg_type, handle; 476 unsigned img_size = 0; 477 struct dma_fence *f; 478 void *ptr; 479 480 int i, r; 481 482 if (offset & 0x3F) { 483 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 484 return -EINVAL; 485 } 486 487 f = dma_resv_get_excl(bo->tbo.base.resv); 488 if (f) { 489 r = radeon_fence_wait((struct radeon_fence *)f, false); 490 if (r) { 491 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 492 return r; 493 } 494 } 495 496 r = radeon_bo_kmap(bo, &ptr); 497 if (r) { 498 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 499 return r; 500 } 501 502 msg = ptr + offset; 503 504 msg_type = msg[1]; 505 handle = msg[2]; 506 507 if (handle == 0) { 508 DRM_ERROR("Invalid UVD handle!\n"); 509 return -EINVAL; 510 } 511 512 switch (msg_type) { 513 case 0: 514 /* it's a create msg, calc image size (width * height) */ 515 img_size = msg[7] * msg[8]; 516 517 r = radeon_uvd_validate_codec(p, msg[4]); 518 radeon_bo_kunmap(bo); 519 if (r) 520 return r; 521 522 /* try to alloc a new handle */ 523 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 524 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 525 DRM_ERROR("Handle 0x%x already in use!\n", handle); 526 return -EINVAL; 527 } 528 529 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 530 p->rdev->uvd.filp[i] = p->filp; 531 p->rdev->uvd.img_size[i] = img_size; 532 return 0; 533 } 534 } 535 536 DRM_ERROR("No more free UVD handles!\n"); 537 return -EINVAL; 538 539 case 1: 540 /* it's a decode msg, validate codec and calc buffer sizes */ 541 r = radeon_uvd_validate_codec(p, msg[4]); 542 if (!r) 543 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 544 radeon_bo_kunmap(bo); 545 if (r) 546 return r; 547 548 /* validate the handle */ 549 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 550 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 551 if (p->rdev->uvd.filp[i] != p->filp) { 552 DRM_ERROR("UVD handle collision detected!\n"); 553 return -EINVAL; 554 } 555 return 0; 556 } 557 } 558 559 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 560 return -ENOENT; 561 562 case 2: 563 /* it's a destroy msg, free the handle */ 564 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 565 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 566 radeon_bo_kunmap(bo); 567 return 0; 568 569 default: 570 571 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 572 return -EINVAL; 573 } 574 575 BUG(); 576 return -EINVAL; 577} 578 579static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 580 int data0, int data1, 581 unsigned buf_sizes[], bool *has_msg_cmd) 582{ 583 struct radeon_cs_chunk *relocs_chunk; 584 struct radeon_bo_list *reloc; 585 unsigned idx, cmd, offset; 586 uint64_t start, end; 587 int r; 588 589 relocs_chunk = p->chunk_relocs; 590 offset = radeon_get_ib_value(p, data0); 591 idx = radeon_get_ib_value(p, data1); 592 if (idx >= relocs_chunk->length_dw) { 593 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 594 idx, relocs_chunk->length_dw); 595 return -EINVAL; 596 } 597 598 reloc = &p->relocs[(idx / 4)]; 599 start = reloc->gpu_offset; 600 end = start + radeon_bo_size(reloc->robj); 601 start += offset; 602 603 p->ib.ptr[data0] = start & 0xFFFFFFFF; 604 p->ib.ptr[data1] = start >> 32; 605 606 cmd = radeon_get_ib_value(p, p->idx) >> 1; 607 608 if (cmd < 0x4) { 609 if (end <= start) { 610 DRM_ERROR("invalid reloc offset %X!\n", offset); 611 return -EINVAL; 612 } 613 if ((end - start) < buf_sizes[cmd]) { 614 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 615 (unsigned)(end - start), buf_sizes[cmd]); 616 return -EINVAL; 617 } 618 619 } else if (cmd != 0x100) { 620 DRM_ERROR("invalid UVD command %X!\n", cmd); 621 return -EINVAL; 622 } 623 624 if ((start >> 28) != ((end - 1) >> 28)) { 625 DRM_ERROR("reloc %"PRIX64"-%"PRIX64" crossing 256MB boundary!\n", 626 start, end); 627 return -EINVAL; 628 } 629 630 /* TODO: is this still necessary on NI+ ? */ 631 if ((cmd == 0 || cmd == 0x3) && 632 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 633 DRM_ERROR("msg/fb buffer %"PRIX64"-%"PRIX64" out of 256MB segment!\n", 634 start, end); 635 return -EINVAL; 636 } 637 638 if (cmd == 0) { 639 if (*has_msg_cmd) { 640 DRM_ERROR("More than one message in a UVD-IB!\n"); 641 return -EINVAL; 642 } 643 *has_msg_cmd = true; 644 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 645 if (r) 646 return r; 647 } else if (!*has_msg_cmd) { 648 DRM_ERROR("Message needed before other commands are send!\n"); 649 return -EINVAL; 650 } 651 652 return 0; 653} 654 655static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 656 struct radeon_cs_packet *pkt, 657 int *data0, int *data1, 658 unsigned buf_sizes[], 659 bool *has_msg_cmd) 660{ 661 int i, r; 662 663 p->idx++; 664 for (i = 0; i <= pkt->count; ++i) { 665 switch (pkt->reg + i*4) { 666 case UVD_GPCOM_VCPU_DATA0: 667 *data0 = p->idx; 668 break; 669 case UVD_GPCOM_VCPU_DATA1: 670 *data1 = p->idx; 671 break; 672 case UVD_GPCOM_VCPU_CMD: 673 r = radeon_uvd_cs_reloc(p, *data0, *data1, 674 buf_sizes, has_msg_cmd); 675 if (r) 676 return r; 677 break; 678 case UVD_ENGINE_CNTL: 679 case UVD_NO_OP: 680 break; 681 default: 682 DRM_ERROR("Invalid reg 0x%X!\n", 683 pkt->reg + i*4); 684 return -EINVAL; 685 } 686 p->idx++; 687 } 688 return 0; 689} 690 691int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 692{ 693 struct radeon_cs_packet pkt; 694 int r, data0 = 0, data1 = 0; 695 696 /* does the IB has a msg command */ 697 bool has_msg_cmd = false; 698 699 /* minimum buffer sizes */ 700 unsigned buf_sizes[] = { 701 [0x00000000] = 2048, 702 [0x00000001] = 32 * 1024 * 1024, 703 [0x00000002] = 2048 * 1152 * 3, 704 [0x00000003] = 2048, 705 }; 706 707 if (p->chunk_ib->length_dw % 16) { 708 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 709 p->chunk_ib->length_dw); 710 return -EINVAL; 711 } 712 713 if (p->chunk_relocs == NULL) { 714 DRM_ERROR("No relocation chunk !\n"); 715 return -EINVAL; 716 } 717 718 719 do { 720 r = radeon_cs_packet_parse(p, &pkt, p->idx); 721 if (r) 722 return r; 723 switch (pkt.type) { 724 case RADEON_PACKET_TYPE0: 725 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 726 buf_sizes, &has_msg_cmd); 727 if (r) 728 return r; 729 break; 730 case RADEON_PACKET_TYPE2: 731 p->idx += pkt.count + 2; 732 break; 733 default: 734 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 735 return -EINVAL; 736 } 737 } while (p->idx < p->chunk_ib->length_dw); 738 739 if (!has_msg_cmd) { 740 DRM_ERROR("UVD-IBs need a msg command!\n"); 741 return -EINVAL; 742 } 743 744 return 0; 745} 746 747static int radeon_uvd_send_msg(struct radeon_device *rdev, 748 int ring, uint64_t addr, 749 struct radeon_fence **fence) 750{ 751 struct radeon_ib ib; 752 int i, r; 753 754 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 755 if (r) 756 return r; 757 758 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 759 ib.ptr[1] = addr; 760 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 761 ib.ptr[3] = addr >> 32; 762 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 763 ib.ptr[5] = 0; 764 for (i = 6; i < 16; i += 2) { 765 ib.ptr[i] = PACKET0(UVD_NO_OP, 0); 766 ib.ptr[i+1] = 0; 767 } 768 ib.length_dw = 16; 769 770 r = radeon_ib_schedule(rdev, &ib, NULL, false); 771 772 if (fence) 773 *fence = radeon_fence_ref(ib.fence); 774 775 radeon_ib_free(rdev, &ib); 776 return r; 777} 778 779/* 780 * multiple fence commands without any stream commands in between can 781 * crash the vcpu so just try to emmit a dummy create/destroy msg to 782 * avoid this 783 */ 784int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 785 uint32_t handle, struct radeon_fence **fence) 786{ 787 /* we use the last page of the vcpu bo for the UVD message */ 788 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 789 RADEON_GPU_PAGE_SIZE; 790 791 uint32_t *msg = rdev->uvd.cpu_addr + offs; 792 uint64_t addr = rdev->uvd.gpu_addr + offs; 793 794 int r, i; 795 796 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 797 if (r) 798 return r; 799 800 /* stitch together an UVD create msg */ 801 msg[0] = cpu_to_le32(0x00000de4); 802 msg[1] = cpu_to_le32(0x00000000); 803 msg[2] = cpu_to_le32(handle); 804 msg[3] = cpu_to_le32(0x00000000); 805 msg[4] = cpu_to_le32(0x00000000); 806 msg[5] = cpu_to_le32(0x00000000); 807 msg[6] = cpu_to_le32(0x00000000); 808 msg[7] = cpu_to_le32(0x00000780); 809 msg[8] = cpu_to_le32(0x00000440); 810 msg[9] = cpu_to_le32(0x00000000); 811 msg[10] = cpu_to_le32(0x01b37000); 812 for (i = 11; i < 1024; ++i) 813 msg[i] = cpu_to_le32(0x0); 814 815 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 816 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 817 return r; 818} 819 820int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 821 uint32_t handle, struct radeon_fence **fence) 822{ 823 /* we use the last page of the vcpu bo for the UVD message */ 824 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 825 RADEON_GPU_PAGE_SIZE; 826 827 uint32_t *msg = rdev->uvd.cpu_addr + offs; 828 uint64_t addr = rdev->uvd.gpu_addr + offs; 829 830 int r, i; 831 832 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 833 if (r) 834 return r; 835 836 /* stitch together an UVD destroy msg */ 837 msg[0] = cpu_to_le32(0x00000de4); 838 msg[1] = cpu_to_le32(0x00000002); 839 msg[2] = cpu_to_le32(handle); 840 msg[3] = cpu_to_le32(0x00000000); 841 for (i = 4; i < 1024; ++i) 842 msg[i] = cpu_to_le32(0x0); 843 844 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 845 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 846 return r; 847} 848 849/** 850 * radeon_uvd_count_handles - count number of open streams 851 * 852 * @rdev: radeon_device pointer 853 * @sd: number of SD streams 854 * @hd: number of HD streams 855 * 856 * Count the number of open SD/HD streams as a hint for power mangement 857 */ 858static void radeon_uvd_count_handles(struct radeon_device *rdev, 859 unsigned *sd, unsigned *hd) 860{ 861 unsigned i; 862 863 *sd = 0; 864 *hd = 0; 865 866 for (i = 0; i < rdev->uvd.max_handles; ++i) { 867 if (!atomic_read(&rdev->uvd.handles[i])) 868 continue; 869 870 if (rdev->uvd.img_size[i] >= 720*576) 871 ++(*hd); 872 else 873 ++(*sd); 874 } 875} 876 877static void radeon_uvd_idle_work_handler(struct work_struct *work) 878{ 879 struct radeon_device *rdev = 880 container_of(work, struct radeon_device, uvd.idle_work.work); 881 882 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 883 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 884 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 885 &rdev->pm.dpm.hd); 886 radeon_dpm_enable_uvd(rdev, false); 887 } else { 888 radeon_set_uvd_clocks(rdev, 0, 0); 889 } 890 } else { 891 schedule_delayed_work(&rdev->uvd.idle_work, 892 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 893 } 894} 895 896void radeon_uvd_note_usage(struct radeon_device *rdev) 897{ 898 bool streams_changed = false; 899 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 900 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 901 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 902 903 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 904 unsigned hd = 0, sd = 0; 905 radeon_uvd_count_handles(rdev, &sd, &hd); 906 if ((rdev->pm.dpm.sd != sd) || 907 (rdev->pm.dpm.hd != hd)) { 908 rdev->pm.dpm.sd = sd; 909 rdev->pm.dpm.hd = hd; 910 /* disable this for now */ 911 /*streams_changed = true;*/ 912 } 913 } 914 915 if (set_clocks || streams_changed) { 916 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 917 radeon_dpm_enable_uvd(rdev, true); 918 } else { 919 radeon_set_uvd_clocks(rdev, 53300, 40000); 920 } 921 } 922} 923 924static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 925 unsigned target_freq, 926 unsigned pd_min, 927 unsigned pd_even) 928{ 929 unsigned post_div = vco_freq / target_freq; 930 931 /* adjust to post divider minimum value */ 932 if (post_div < pd_min) 933 post_div = pd_min; 934 935 /* we alway need a frequency less than or equal the target */ 936 if ((vco_freq / post_div) > target_freq) 937 post_div += 1; 938 939 /* post dividers above a certain value must be even */ 940 if (post_div > pd_even && post_div % 2) 941 post_div += 1; 942 943 return post_div; 944} 945 946/** 947 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 948 * 949 * @rdev: radeon_device pointer 950 * @vclk: wanted VCLK 951 * @dclk: wanted DCLK 952 * @vco_min: minimum VCO frequency 953 * @vco_max: maximum VCO frequency 954 * @fb_factor: factor to multiply vco freq with 955 * @fb_mask: limit and bitmask for feedback divider 956 * @pd_min: post divider minimum 957 * @pd_max: post divider maximum 958 * @pd_even: post divider must be even above this value 959 * @optimal_fb_div: resulting feedback divider 960 * @optimal_vclk_div: resulting vclk post divider 961 * @optimal_dclk_div: resulting dclk post divider 962 * 963 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 964 * Returns zero on success -EINVAL on error. 965 */ 966int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 967 unsigned vclk, unsigned dclk, 968 unsigned vco_min, unsigned vco_max, 969 unsigned fb_factor, unsigned fb_mask, 970 unsigned pd_min, unsigned pd_max, 971 unsigned pd_even, 972 unsigned *optimal_fb_div, 973 unsigned *optimal_vclk_div, 974 unsigned *optimal_dclk_div) 975{ 976 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 977 978 /* start off with something large */ 979 unsigned optimal_score = ~0; 980 981 /* loop through vco from low to high */ 982 vco_min = max(max(vco_min, vclk), dclk); 983 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 984 985 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 986 unsigned vclk_div, dclk_div, score; 987 988 do_div(fb_div, ref_freq); 989 990 /* fb div out of range ? */ 991 if (fb_div > fb_mask) 992 break; /* it can oly get worse */ 993 994 fb_div &= fb_mask; 995 996 /* calc vclk divider with current vco freq */ 997 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 998 pd_min, pd_even); 999 if (vclk_div > pd_max) 1000 break; /* vco is too big, it has to stop */ 1001 1002 /* calc dclk divider with current vco freq */ 1003 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 1004 pd_min, pd_even); 1005 if (dclk_div > pd_max) 1006 break; /* vco is too big, it has to stop */ 1007 1008 /* calc score with current vco freq */ 1009 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 1010 1011 /* determine if this vco setting is better than current optimal settings */ 1012 if (score < optimal_score) { 1013 *optimal_fb_div = fb_div; 1014 *optimal_vclk_div = vclk_div; 1015 *optimal_dclk_div = dclk_div; 1016 optimal_score = score; 1017 if (optimal_score == 0) 1018 break; /* it can't get better than this */ 1019 } 1020 } 1021 1022 /* did we found a valid setup ? */ 1023 if (optimal_score == ~0) 1024 return -EINVAL; 1025 1026 return 0; 1027} 1028 1029int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1030 unsigned cg_upll_func_cntl) 1031{ 1032 unsigned i; 1033 1034 /* make sure UPLL_CTLREQ is deasserted */ 1035 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1036 1037 mdelay(10); 1038 1039 /* assert UPLL_CTLREQ */ 1040 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1041 1042 /* wait for CTLACK and CTLACK2 to get asserted */ 1043 for (i = 0; i < 100; ++i) { 1044 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1045 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1046 break; 1047 mdelay(10); 1048 } 1049 1050 /* deassert UPLL_CTLREQ */ 1051 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1052 1053 if (i == 100) { 1054 DRM_ERROR("Timeout setting UVD clocks!\n"); 1055 return -ETIMEDOUT; 1056 } 1057 1058 return 0; 1059} 1060