1/* $NetBSD: amdgpu_ids.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $ */ 2 3/* 4 * Copyright 2017 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25#include <sys/cdefs.h> 26__KERNEL_RCSID(0, "$NetBSD: amdgpu_ids.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $"); 27 28#include "amdgpu_ids.h" 29 30#include <linux/idr.h> 31#include <linux/dma-fence-array.h> 32 33 34#include "amdgpu.h" 35#include "amdgpu_trace.h" 36 37#include <linux/nbsd-namespace.h> 38 39/* 40 * PASID manager 41 * 42 * PASIDs are global address space identifiers that can be shared 43 * between the GPU, an IOMMU and the driver. VMs on different devices 44 * may use the same PASID if they share the same address 45 * space. Therefore PASIDs are allocated using a global IDA. VMs are 46 * looked up from the PASID per amdgpu_device. 47 */ 48#ifdef __NetBSD__ /* XXX */ 49struct ida amdgpu_pasid_ida; 50#else 51static DEFINE_IDA(amdgpu_pasid_ida); 52#endif 53 54/* Helper to free pasid from a fence callback */ 55struct amdgpu_pasid_cb { 56 struct dma_fence_cb cb; 57 unsigned int pasid; 58}; 59 60/** 61 * amdgpu_pasid_alloc - Allocate a PASID 62 * @bits: Maximum width of the PASID in bits, must be at least 1 63 * 64 * Allocates a PASID of the given width while keeping smaller PASIDs 65 * available if possible. 66 * 67 * Returns a positive integer on success. Returns %-EINVAL if bits==0. 68 * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on 69 * memory allocation failure. 70 */ 71int amdgpu_pasid_alloc(unsigned int bits) 72{ 73 int pasid = -EINVAL; 74 75 for (bits = min(bits, 31U); bits > 0; bits--) { 76 pasid = ida_simple_get(&amdgpu_pasid_ida, 77 1U << (bits - 1), 1U << bits, 78 GFP_KERNEL); 79 if (pasid != -ENOSPC) 80 break; 81 } 82 83 if (pasid >= 0) 84 trace_amdgpu_pasid_allocated(pasid); 85 86 return pasid; 87} 88 89/** 90 * amdgpu_pasid_free - Free a PASID 91 * @pasid: PASID to free 92 */ 93void amdgpu_pasid_free(unsigned int pasid) 94{ 95 trace_amdgpu_pasid_freed(pasid); 96 ida_simple_remove(&amdgpu_pasid_ida, pasid); 97} 98 99static void amdgpu_pasid_free_cb(struct dma_fence *fence, 100 struct dma_fence_cb *_cb) 101{ 102 struct amdgpu_pasid_cb *cb = 103 container_of(_cb, struct amdgpu_pasid_cb, cb); 104 105 amdgpu_pasid_free(cb->pasid); 106 dma_fence_put(fence); 107 kfree(cb); 108} 109 110/** 111 * amdgpu_pasid_free_delayed - free pasid when fences signal 112 * 113 * @resv: reservation object with the fences to wait for 114 * @pasid: pasid to free 115 * 116 * Free the pasid only after all the fences in resv are signaled. 117 */ 118void amdgpu_pasid_free_delayed(struct dma_resv *resv, 119 unsigned int pasid) 120{ 121 struct dma_fence *fence, **fences; 122 struct amdgpu_pasid_cb *cb; 123 unsigned count; 124 int r; 125 126 r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences); 127 if (r) 128 goto fallback; 129 130 if (count == 0) { 131 amdgpu_pasid_free(pasid); 132 return; 133 } 134 135 if (count == 1) { 136 fence = fences[0]; 137 kfree(fences); 138 } else { 139 uint64_t context = dma_fence_context_alloc(1); 140 struct dma_fence_array *array; 141 142 array = dma_fence_array_create(count, fences, context, 143 1, false); 144 if (!array) { 145 kfree(fences); 146 goto fallback; 147 } 148 fence = &array->base; 149 } 150 151 cb = kmalloc(sizeof(*cb), GFP_KERNEL); 152 if (!cb) { 153 /* Last resort when we are OOM */ 154 dma_fence_wait(fence, false); 155 dma_fence_put(fence); 156 amdgpu_pasid_free(pasid); 157 } else { 158 cb->pasid = pasid; 159 if (dma_fence_add_callback(fence, &cb->cb, 160 amdgpu_pasid_free_cb)) 161 amdgpu_pasid_free_cb(fence, &cb->cb); 162 } 163 164 return; 165 166fallback: 167 /* Not enough memory for the delayed delete, as last resort 168 * block for all the fences to complete. 169 */ 170 dma_resv_wait_timeout_rcu(resv, true, false, 171 MAX_SCHEDULE_TIMEOUT); 172 amdgpu_pasid_free(pasid); 173} 174 175/* 176 * VMID manager 177 * 178 * VMIDs are a per VMHUB identifier for page tables handling. 179 */ 180 181/** 182 * amdgpu_vmid_had_gpu_reset - check if reset occured since last use 183 * 184 * @adev: amdgpu_device pointer 185 * @id: VMID structure 186 * 187 * Check if GPU reset occured since last use of the VMID. 188 */ 189bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, 190 struct amdgpu_vmid *id) 191{ 192 return id->current_gpu_reset_count != 193 atomic_read(&adev->gpu_reset_counter); 194} 195 196/** 197 * amdgpu_vm_grab_idle - grab idle VMID 198 * 199 * @vm: vm to allocate id for 200 * @ring: ring we want to submit job to 201 * @sync: sync object where we add dependencies 202 * @idle: resulting idle VMID 203 * 204 * Try to find an idle VMID, if none is idle add a fence to wait to the sync 205 * object. Returns -ENOMEM when we are out of memory. 206 */ 207static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, 208 struct amdgpu_ring *ring, 209 struct amdgpu_sync *sync, 210 struct amdgpu_vmid **idle) 211{ 212 struct amdgpu_device *adev = ring->adev; 213 unsigned vmhub = ring->funcs->vmhub; 214 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 215 struct dma_fence **fences; 216 unsigned i; 217 int r; 218 219 if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) 220 return amdgpu_sync_fence(sync, ring->vmid_wait, false); 221 222 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); 223 if (!fences) 224 return -ENOMEM; 225 226 /* Check if we have an idle VMID */ 227 i = 0; 228 list_for_each_entry((*idle), &id_mgr->ids_lru, list) { 229 fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring); 230 if (!fences[i]) 231 break; 232 ++i; 233 } 234 235 /* If we can't find a idle VMID to use, wait till one becomes available */ 236 if (&(*idle)->list == &id_mgr->ids_lru) { 237 u64 fence_context = adev->vm_manager.fence_context + ring->idx; 238 unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; 239 struct dma_fence_array *array; 240 unsigned j; 241 242 *idle = NULL; 243 for (j = 0; j < i; ++j) 244 dma_fence_get(fences[j]); 245 246 array = dma_fence_array_create(i, fences, fence_context, 247 seqno, true); 248 if (!array) { 249 for (j = 0; j < i; ++j) 250 dma_fence_put(fences[j]); 251 kfree(fences); 252 return -ENOMEM; 253 } 254 255 r = amdgpu_sync_fence(sync, &array->base, false); 256 dma_fence_put(ring->vmid_wait); 257 ring->vmid_wait = &array->base; 258 return r; 259 } 260 kfree(fences); 261 262 return 0; 263} 264 265/** 266 * amdgpu_vm_grab_reserved - try to assign reserved VMID 267 * 268 * @vm: vm to allocate id for 269 * @ring: ring we want to submit job to 270 * @sync: sync object where we add dependencies 271 * @fence: fence protecting ID from reuse 272 * @job: job who wants to use the VMID 273 * 274 * Try to assign a reserved VMID. 275 */ 276static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, 277 struct amdgpu_ring *ring, 278 struct amdgpu_sync *sync, 279 struct dma_fence *fence, 280 struct amdgpu_job *job, 281 struct amdgpu_vmid **id) 282{ 283 struct amdgpu_device *adev = ring->adev; 284 unsigned vmhub = ring->funcs->vmhub; 285 uint64_t fence_context = adev->fence_context + ring->idx; 286 struct dma_fence *updates = sync->last_vm_update; 287 bool needs_flush = vm->use_cpu_for_update; 288 int r = 0; 289 290 *id = vm->reserved_vmid[vmhub]; 291 if (updates && (*id)->flushed_updates && 292 updates->context == (*id)->flushed_updates->context && 293 !dma_fence_is_later(updates, (*id)->flushed_updates)) 294 updates = NULL; 295 296 if ((*id)->owner != vm->direct.fence_context || 297 job->vm_pd_addr != (*id)->pd_gpu_addr || 298 updates || !(*id)->last_flush || 299 ((*id)->last_flush->context != fence_context && 300 !dma_fence_is_signaled((*id)->last_flush))) { 301 struct dma_fence *tmp; 302 303 /* to prevent one context starved by another context */ 304 (*id)->pd_gpu_addr = 0; 305 tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); 306 if (tmp) { 307 *id = NULL; 308 r = amdgpu_sync_fence(sync, tmp, false); 309 return r; 310 } 311 needs_flush = true; 312 } 313 314 /* Good we can use this VMID. Remember this submission as 315 * user of the VMID. 316 */ 317 r = amdgpu_sync_fence(&(*id)->active, fence, false); 318 if (r) 319 return r; 320 321 if (updates) { 322 dma_fence_put((*id)->flushed_updates); 323 (*id)->flushed_updates = dma_fence_get(updates); 324 } 325 job->vm_needs_flush = needs_flush; 326 return 0; 327} 328 329/** 330 * amdgpu_vm_grab_used - try to reuse a VMID 331 * 332 * @vm: vm to allocate id for 333 * @ring: ring we want to submit job to 334 * @sync: sync object where we add dependencies 335 * @fence: fence protecting ID from reuse 336 * @job: job who wants to use the VMID 337 * @id: resulting VMID 338 * 339 * Try to reuse a VMID for this submission. 340 */ 341static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, 342 struct amdgpu_ring *ring, 343 struct amdgpu_sync *sync, 344 struct dma_fence *fence, 345 struct amdgpu_job *job, 346 struct amdgpu_vmid **id) 347{ 348 struct amdgpu_device *adev = ring->adev; 349 unsigned vmhub = ring->funcs->vmhub; 350 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 351 uint64_t fence_context = adev->fence_context + ring->idx; 352 struct dma_fence *updates = sync->last_vm_update; 353 int r; 354 355 job->vm_needs_flush = vm->use_cpu_for_update; 356 357 /* Check if we can use a VMID already assigned to this VM */ 358 list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { 359 bool needs_flush = vm->use_cpu_for_update; 360 struct dma_fence *flushed; 361 362 /* Check all the prerequisites to using this VMID */ 363 if ((*id)->owner != vm->direct.fence_context) 364 continue; 365 366 if ((*id)->pd_gpu_addr != job->vm_pd_addr) 367 continue; 368 369 if (!(*id)->last_flush || 370 ((*id)->last_flush->context != fence_context && 371 !dma_fence_is_signaled((*id)->last_flush))) 372 needs_flush = true; 373 374 flushed = (*id)->flushed_updates; 375 if (updates && (!flushed || dma_fence_is_later(updates, flushed))) 376 needs_flush = true; 377 378 /* Concurrent flushes are only possible starting with Vega10 and 379 * are broken on Navi10 and Navi14. 380 */ 381 if (needs_flush && (adev->asic_type < CHIP_VEGA10 || 382 adev->asic_type == CHIP_NAVI10 || 383 adev->asic_type == CHIP_NAVI14)) 384 continue; 385 386 /* Good, we can use this VMID. Remember this submission as 387 * user of the VMID. 388 */ 389 r = amdgpu_sync_fence(&(*id)->active, fence, false); 390 if (r) 391 return r; 392 393 if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { 394 dma_fence_put((*id)->flushed_updates); 395 (*id)->flushed_updates = dma_fence_get(updates); 396 } 397 398 job->vm_needs_flush |= needs_flush; 399 return 0; 400 } 401 402 *id = NULL; 403 return 0; 404} 405 406/** 407 * amdgpu_vm_grab_id - allocate the next free VMID 408 * 409 * @vm: vm to allocate id for 410 * @ring: ring we want to submit job to 411 * @sync: sync object where we add dependencies 412 * @fence: fence protecting ID from reuse 413 * @job: job who wants to use the VMID 414 * 415 * Allocate an id for the vm, adding fences to the sync obj as necessary. 416 */ 417int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 418 struct amdgpu_sync *sync, struct dma_fence *fence, 419 struct amdgpu_job *job) 420{ 421 struct amdgpu_device *adev = ring->adev; 422 unsigned vmhub = ring->funcs->vmhub; 423 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 424 struct amdgpu_vmid *idle = NULL; 425 struct amdgpu_vmid *id = NULL; 426 int r = 0; 427 428 mutex_lock(&id_mgr->lock); 429 r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle); 430 if (r || !idle) 431 goto error; 432 433 if (vm->reserved_vmid[vmhub]) { 434 r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id); 435 if (r || !id) 436 goto error; 437 } else { 438 r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id); 439 if (r) 440 goto error; 441 442 if (!id) { 443 struct dma_fence *updates = sync->last_vm_update; 444 445 /* Still no ID to use? Then use the idle one found earlier */ 446 id = idle; 447 448 /* Remember this submission as user of the VMID */ 449 r = amdgpu_sync_fence(&id->active, fence, false); 450 if (r) 451 goto error; 452 453 dma_fence_put(id->flushed_updates); 454 id->flushed_updates = dma_fence_get(updates); 455 job->vm_needs_flush = true; 456 } 457 458 list_move_tail(&id->list, &id_mgr->ids_lru); 459 } 460 461 id->pd_gpu_addr = job->vm_pd_addr; 462 id->owner = vm->direct.fence_context; 463 464 if (job->vm_needs_flush) { 465 dma_fence_put(id->last_flush); 466 id->last_flush = NULL; 467 } 468 job->vmid = id - id_mgr->ids; 469 job->pasid = vm->pasid; 470 trace_amdgpu_vm_grab_id(vm, ring, job); 471 472error: 473 mutex_unlock(&id_mgr->lock); 474 return r; 475} 476 477int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, 478 struct amdgpu_vm *vm, 479 unsigned vmhub) 480{ 481 struct amdgpu_vmid_mgr *id_mgr; 482 struct amdgpu_vmid *idle; 483 int r = 0; 484 485 id_mgr = &adev->vm_manager.id_mgr[vmhub]; 486 mutex_lock(&id_mgr->lock); 487 if (vm->reserved_vmid[vmhub]) 488 goto unlock; 489 if (atomic_inc_return(&id_mgr->reserved_vmid_num) > 490 AMDGPU_VM_MAX_RESERVED_VMID) { 491 DRM_ERROR("Over limitation of reserved vmid\n"); 492 atomic_dec(&id_mgr->reserved_vmid_num); 493 r = -EINVAL; 494 goto unlock; 495 } 496 /* Select the first entry VMID */ 497 idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); 498 list_del_init(&idle->list); 499 vm->reserved_vmid[vmhub] = idle; 500 mutex_unlock(&id_mgr->lock); 501 502 return 0; 503unlock: 504 mutex_unlock(&id_mgr->lock); 505 return r; 506} 507 508void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, 509 struct amdgpu_vm *vm, 510 unsigned vmhub) 511{ 512 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 513 514 mutex_lock(&id_mgr->lock); 515 if (vm->reserved_vmid[vmhub]) { 516 list_add(&vm->reserved_vmid[vmhub]->list, 517 &id_mgr->ids_lru); 518 vm->reserved_vmid[vmhub] = NULL; 519 atomic_dec(&id_mgr->reserved_vmid_num); 520 } 521 mutex_unlock(&id_mgr->lock); 522} 523 524/** 525 * amdgpu_vmid_reset - reset VMID to zero 526 * 527 * @adev: amdgpu device structure 528 * @vmid: vmid number to use 529 * 530 * Reset saved GDW, GWS and OA to force switch on next flush. 531 */ 532void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, 533 unsigned vmid) 534{ 535 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 536 struct amdgpu_vmid *id = &id_mgr->ids[vmid]; 537 538 mutex_lock(&id_mgr->lock); 539 id->owner = 0; 540 id->gds_base = 0; 541 id->gds_size = 0; 542 id->gws_base = 0; 543 id->gws_size = 0; 544 id->oa_base = 0; 545 id->oa_size = 0; 546 mutex_unlock(&id_mgr->lock); 547} 548 549/** 550 * amdgpu_vmid_reset_all - reset VMID to zero 551 * 552 * @adev: amdgpu device structure 553 * 554 * Reset VMID to force flush on next use 555 */ 556void amdgpu_vmid_reset_all(struct amdgpu_device *adev) 557{ 558 unsigned i, j; 559 560 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 561 struct amdgpu_vmid_mgr *id_mgr = 562 &adev->vm_manager.id_mgr[i]; 563 564 for (j = 1; j < id_mgr->num_ids; ++j) 565 amdgpu_vmid_reset(adev, i, j); 566 } 567} 568 569/** 570 * amdgpu_vmid_mgr_init - init the VMID manager 571 * 572 * @adev: amdgpu_device pointer 573 * 574 * Initialize the VM manager structures 575 */ 576void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) 577{ 578 unsigned i, j; 579 580 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 581 struct amdgpu_vmid_mgr *id_mgr = 582 &adev->vm_manager.id_mgr[i]; 583 584 mutex_init(&id_mgr->lock); 585 INIT_LIST_HEAD(&id_mgr->ids_lru); 586 atomic_set(&id_mgr->reserved_vmid_num, 0); 587 588 /* skip over VMID 0, since it is the system VM */ 589 for (j = 1; j < id_mgr->num_ids; ++j) { 590 amdgpu_vmid_reset(adev, i, j); 591 amdgpu_sync_create(&id_mgr->ids[j].active); 592 list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); 593 } 594 } 595} 596 597/** 598 * amdgpu_vmid_mgr_fini - cleanup VM manager 599 * 600 * @adev: amdgpu_device pointer 601 * 602 * Cleanup the VM manager and free resources. 603 */ 604void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) 605{ 606 unsigned i, j; 607 608 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 609 struct amdgpu_vmid_mgr *id_mgr = 610 &adev->vm_manager.id_mgr[i]; 611 612 mutex_destroy(&id_mgr->lock); 613 for (j = 0; j < AMDGPU_NUM_VMID; ++j) { 614 struct amdgpu_vmid *id = &id_mgr->ids[j]; 615 616 amdgpu_sync_free(&id->active); 617 dma_fence_put(id->flushed_updates); 618 dma_fence_put(id->last_flush); 619 dma_fence_put(id->pasid_mapping); 620 } 621 } 622} 623