1// SPDX-License-Identifier: GPL-2.0-only 2/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4#include <linux/iommufd.h> 5#include <linux/slab.h> 6#include <linux/iommu.h> 7#include <uapi/linux/iommufd.h> 8#include "../iommu-priv.h" 9 10#include "io_pagetable.h" 11#include "iommufd_private.h" 12 13static bool allow_unsafe_interrupts; 14module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); 15MODULE_PARM_DESC( 16 allow_unsafe_interrupts, 17 "Allow IOMMUFD to bind to devices even if the platform cannot isolate " 18 "the MSI interrupt window. Enabling this is a security weakness."); 19 20static void iommufd_group_release(struct kref *kref) 21{ 22 struct iommufd_group *igroup = 23 container_of(kref, struct iommufd_group, ref); 24 25 WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); 26 27 xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, 28 NULL, GFP_KERNEL); 29 iommu_group_put(igroup->group); 30 mutex_destroy(&igroup->lock); 31 kfree(igroup); 32} 33 34static void iommufd_put_group(struct iommufd_group *group) 35{ 36 kref_put(&group->ref, iommufd_group_release); 37} 38 39static bool iommufd_group_try_get(struct iommufd_group *igroup, 40 struct iommu_group *group) 41{ 42 if (!igroup) 43 return false; 44 /* 45 * group ID's cannot be re-used until the group is put back which does 46 * not happen if we could get an igroup pointer under the xa_lock. 47 */ 48 if (WARN_ON(igroup->group != group)) 49 return false; 50 return kref_get_unless_zero(&igroup->ref); 51} 52 53/* 54 * iommufd needs to store some more data for each iommu_group, we keep a 55 * parallel xarray indexed by iommu_group id to hold this instead of putting it 56 * in the core structure. To keep things simple the iommufd_group memory is 57 * unique within the iommufd_ctx. This makes it easy to check there are no 58 * memory leaks. 59 */ 60static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, 61 struct device *dev) 62{ 63 struct iommufd_group *new_igroup; 64 struct iommufd_group *cur_igroup; 65 struct iommufd_group *igroup; 66 struct iommu_group *group; 67 unsigned int id; 68 69 group = iommu_group_get(dev); 70 if (!group) 71 return ERR_PTR(-ENODEV); 72 73 id = iommu_group_id(group); 74 75 xa_lock(&ictx->groups); 76 igroup = xa_load(&ictx->groups, id); 77 if (iommufd_group_try_get(igroup, group)) { 78 xa_unlock(&ictx->groups); 79 iommu_group_put(group); 80 return igroup; 81 } 82 xa_unlock(&ictx->groups); 83 84 new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); 85 if (!new_igroup) { 86 iommu_group_put(group); 87 return ERR_PTR(-ENOMEM); 88 } 89 90 kref_init(&new_igroup->ref); 91 mutex_init(&new_igroup->lock); 92 INIT_LIST_HEAD(&new_igroup->device_list); 93 new_igroup->sw_msi_start = PHYS_ADDR_MAX; 94 /* group reference moves into new_igroup */ 95 new_igroup->group = group; 96 97 /* 98 * The ictx is not additionally refcounted here becase all objects using 99 * an igroup must put it before their destroy completes. 100 */ 101 new_igroup->ictx = ictx; 102 103 /* 104 * We dropped the lock so igroup is invalid. NULL is a safe and likely 105 * value to assume for the xa_cmpxchg algorithm. 106 */ 107 cur_igroup = NULL; 108 xa_lock(&ictx->groups); 109 while (true) { 110 igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, 111 GFP_KERNEL); 112 if (xa_is_err(igroup)) { 113 xa_unlock(&ictx->groups); 114 iommufd_put_group(new_igroup); 115 return ERR_PTR(xa_err(igroup)); 116 } 117 118 /* new_group was successfully installed */ 119 if (cur_igroup == igroup) { 120 xa_unlock(&ictx->groups); 121 return new_igroup; 122 } 123 124 /* Check again if the current group is any good */ 125 if (iommufd_group_try_get(igroup, group)) { 126 xa_unlock(&ictx->groups); 127 iommufd_put_group(new_igroup); 128 return igroup; 129 } 130 cur_igroup = igroup; 131 } 132} 133 134void iommufd_device_destroy(struct iommufd_object *obj) 135{ 136 struct iommufd_device *idev = 137 container_of(obj, struct iommufd_device, obj); 138 139 iommu_device_release_dma_owner(idev->dev); 140 iommufd_put_group(idev->igroup); 141 if (!iommufd_selftest_is_mock_dev(idev->dev)) 142 iommufd_ctx_put(idev->ictx); 143} 144 145/** 146 * iommufd_device_bind - Bind a physical device to an iommu fd 147 * @ictx: iommufd file descriptor 148 * @dev: Pointer to a physical device struct 149 * @id: Output ID number to return to userspace for this device 150 * 151 * A successful bind establishes an ownership over the device and returns 152 * struct iommufd_device pointer, otherwise returns error pointer. 153 * 154 * A driver using this API must set driver_managed_dma and must not touch 155 * the device until this routine succeeds and establishes ownership. 156 * 157 * Binding a PCI device places the entire RID under iommufd control. 158 * 159 * The caller must undo this with iommufd_device_unbind() 160 */ 161struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, 162 struct device *dev, u32 *id) 163{ 164 struct iommufd_device *idev; 165 struct iommufd_group *igroup; 166 int rc; 167 168 /* 169 * iommufd always sets IOMMU_CACHE because we offer no way for userspace 170 * to restore cache coherency. 171 */ 172 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) 173 return ERR_PTR(-EINVAL); 174 175 igroup = iommufd_get_group(ictx, dev); 176 if (IS_ERR(igroup)) 177 return ERR_CAST(igroup); 178 179 /* 180 * For historical compat with VFIO the insecure interrupt path is 181 * allowed if the module parameter is set. Secure/Isolated means that a 182 * MemWr operation from the device (eg a simple DMA) cannot trigger an 183 * interrupt outside this iommufd context. 184 */ 185 if (!iommufd_selftest_is_mock_dev(dev) && 186 !iommu_group_has_isolated_msi(igroup->group)) { 187 if (!allow_unsafe_interrupts) { 188 rc = -EPERM; 189 goto out_group_put; 190 } 191 192 dev_warn( 193 dev, 194 "MSI interrupts are not secure, they cannot be isolated by the platform. " 195 "Check that platform features like interrupt remapping are enabled. " 196 "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); 197 } 198 199 rc = iommu_device_claim_dma_owner(dev, ictx); 200 if (rc) 201 goto out_group_put; 202 203 idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); 204 if (IS_ERR(idev)) { 205 rc = PTR_ERR(idev); 206 goto out_release_owner; 207 } 208 idev->ictx = ictx; 209 if (!iommufd_selftest_is_mock_dev(dev)) 210 iommufd_ctx_get(ictx); 211 idev->dev = dev; 212 idev->enforce_cache_coherency = 213 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); 214 /* The calling driver is a user until iommufd_device_unbind() */ 215 refcount_inc(&idev->obj.users); 216 /* igroup refcount moves into iommufd_device */ 217 idev->igroup = igroup; 218 219 /* 220 * If the caller fails after this success it must call 221 * iommufd_unbind_device() which is safe since we hold this refcount. 222 * This also means the device is a leaf in the graph and no other object 223 * can take a reference on it. 224 */ 225 iommufd_object_finalize(ictx, &idev->obj); 226 *id = idev->obj.id; 227 return idev; 228 229out_release_owner: 230 iommu_device_release_dma_owner(dev); 231out_group_put: 232 iommufd_put_group(igroup); 233 return ERR_PTR(rc); 234} 235EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); 236 237/** 238 * iommufd_ctx_has_group - True if any device within the group is bound 239 * to the ictx 240 * @ictx: iommufd file descriptor 241 * @group: Pointer to a physical iommu_group struct 242 * 243 * True if any device within the group has been bound to this ictx, ex. via 244 * iommufd_device_bind(), therefore implying ictx ownership of the group. 245 */ 246bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) 247{ 248 struct iommufd_object *obj; 249 unsigned long index; 250 251 if (!ictx || !group) 252 return false; 253 254 xa_lock(&ictx->objects); 255 xa_for_each(&ictx->objects, index, obj) { 256 if (obj->type == IOMMUFD_OBJ_DEVICE && 257 container_of(obj, struct iommufd_device, obj) 258 ->igroup->group == group) { 259 xa_unlock(&ictx->objects); 260 return true; 261 } 262 } 263 xa_unlock(&ictx->objects); 264 return false; 265} 266EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); 267 268/** 269 * iommufd_device_unbind - Undo iommufd_device_bind() 270 * @idev: Device returned by iommufd_device_bind() 271 * 272 * Release the device from iommufd control. The DMA ownership will return back 273 * to unowned with DMA controlled by the DMA API. This invalidates the 274 * iommufd_device pointer, other APIs that consume it must not be called 275 * concurrently. 276 */ 277void iommufd_device_unbind(struct iommufd_device *idev) 278{ 279 iommufd_object_destroy_user(idev->ictx, &idev->obj); 280} 281EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); 282 283struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) 284{ 285 return idev->ictx; 286} 287EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); 288 289u32 iommufd_device_to_id(struct iommufd_device *idev) 290{ 291 return idev->obj.id; 292} 293EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); 294 295static int iommufd_group_setup_msi(struct iommufd_group *igroup, 296 struct iommufd_hwpt_paging *hwpt_paging) 297{ 298 phys_addr_t sw_msi_start = igroup->sw_msi_start; 299 int rc; 300 301 /* 302 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to 303 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup 304 * the MSI window so iommu_dma_prepare_msi() can install pages into our 305 * domain after request_irq(). If it is not done interrupts will not 306 * work on this domain. 307 * 308 * FIXME: This is conceptually broken for iommufd since we want to allow 309 * userspace to change the domains, eg switch from an identity IOAS to a 310 * DMA IOAS. There is currently no way to create a MSI window that 311 * matches what the IRQ layer actually expects in a newly created 312 * domain. 313 */ 314 if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { 315 rc = iommu_get_msi_cookie(hwpt_paging->common.domain, 316 sw_msi_start); 317 if (rc) 318 return rc; 319 320 /* 321 * iommu_get_msi_cookie() can only be called once per domain, 322 * it returns -EBUSY on later calls. 323 */ 324 hwpt_paging->msi_cookie = true; 325 } 326 return 0; 327} 328 329static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, 330 struct iommufd_device *idev) 331{ 332 int rc; 333 334 lockdep_assert_held(&idev->igroup->lock); 335 336 rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, 337 idev->dev, 338 &idev->igroup->sw_msi_start); 339 if (rc) 340 return rc; 341 342 if (list_empty(&idev->igroup->device_list)) { 343 rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); 344 if (rc) { 345 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, 346 idev->dev); 347 return rc; 348 } 349 } 350 return 0; 351} 352 353int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 354 struct iommufd_device *idev) 355{ 356 int rc; 357 358 mutex_lock(&idev->igroup->lock); 359 360 if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { 361 rc = -EINVAL; 362 goto err_unlock; 363 } 364 365 if (hwpt_is_paging(hwpt)) { 366 rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev); 367 if (rc) 368 goto err_unlock; 369 } 370 371 /* 372 * Only attach to the group once for the first device that is in the 373 * group. All the other devices will follow this attachment. The user 374 * should attach every device individually to the hwpt as the per-device 375 * reserved regions are only updated during individual device 376 * attachment. 377 */ 378 if (list_empty(&idev->igroup->device_list)) { 379 rc = iommu_attach_group(hwpt->domain, idev->igroup->group); 380 if (rc) 381 goto err_unresv; 382 idev->igroup->hwpt = hwpt; 383 } 384 refcount_inc(&hwpt->obj.users); 385 list_add_tail(&idev->group_item, &idev->igroup->device_list); 386 mutex_unlock(&idev->igroup->lock); 387 return 0; 388err_unresv: 389 if (hwpt_is_paging(hwpt)) 390 iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, 391 idev->dev); 392err_unlock: 393 mutex_unlock(&idev->igroup->lock); 394 return rc; 395} 396 397struct iommufd_hw_pagetable * 398iommufd_hw_pagetable_detach(struct iommufd_device *idev) 399{ 400 struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; 401 402 mutex_lock(&idev->igroup->lock); 403 list_del(&idev->group_item); 404 if (list_empty(&idev->igroup->device_list)) { 405 iommu_detach_group(hwpt->domain, idev->igroup->group); 406 idev->igroup->hwpt = NULL; 407 } 408 if (hwpt_is_paging(hwpt)) 409 iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, 410 idev->dev); 411 mutex_unlock(&idev->igroup->lock); 412 413 /* Caller must destroy hwpt */ 414 return hwpt; 415} 416 417static struct iommufd_hw_pagetable * 418iommufd_device_do_attach(struct iommufd_device *idev, 419 struct iommufd_hw_pagetable *hwpt) 420{ 421 int rc; 422 423 rc = iommufd_hw_pagetable_attach(hwpt, idev); 424 if (rc) 425 return ERR_PTR(rc); 426 return NULL; 427} 428 429static void 430iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, 431 struct iommufd_hwpt_paging *hwpt_paging) 432{ 433 struct iommufd_device *cur; 434 435 lockdep_assert_held(&igroup->lock); 436 437 list_for_each_entry(cur, &igroup->device_list, group_item) 438 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); 439} 440 441static int 442iommufd_group_do_replace_paging(struct iommufd_group *igroup, 443 struct iommufd_hwpt_paging *hwpt_paging) 444{ 445 struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; 446 struct iommufd_device *cur; 447 int rc; 448 449 lockdep_assert_held(&igroup->lock); 450 451 if (!hwpt_is_paging(old_hwpt) || 452 hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) { 453 list_for_each_entry(cur, &igroup->device_list, group_item) { 454 rc = iopt_table_enforce_dev_resv_regions( 455 &hwpt_paging->ioas->iopt, cur->dev, NULL); 456 if (rc) 457 goto err_unresv; 458 } 459 } 460 461 rc = iommufd_group_setup_msi(igroup, hwpt_paging); 462 if (rc) 463 goto err_unresv; 464 return 0; 465 466err_unresv: 467 iommufd_group_remove_reserved_iova(igroup, hwpt_paging); 468 return rc; 469} 470 471static struct iommufd_hw_pagetable * 472iommufd_device_do_replace(struct iommufd_device *idev, 473 struct iommufd_hw_pagetable *hwpt) 474{ 475 struct iommufd_group *igroup = idev->igroup; 476 struct iommufd_hw_pagetable *old_hwpt; 477 unsigned int num_devices; 478 int rc; 479 480 mutex_lock(&idev->igroup->lock); 481 482 if (igroup->hwpt == NULL) { 483 rc = -EINVAL; 484 goto err_unlock; 485 } 486 487 if (hwpt == igroup->hwpt) { 488 mutex_unlock(&idev->igroup->lock); 489 return NULL; 490 } 491 492 old_hwpt = igroup->hwpt; 493 if (hwpt_is_paging(hwpt)) { 494 rc = iommufd_group_do_replace_paging(igroup, 495 to_hwpt_paging(hwpt)); 496 if (rc) 497 goto err_unlock; 498 } 499 500 rc = iommu_group_replace_domain(igroup->group, hwpt->domain); 501 if (rc) 502 goto err_unresv; 503 504 if (hwpt_is_paging(old_hwpt) && 505 (!hwpt_is_paging(hwpt) || 506 to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas)) 507 iommufd_group_remove_reserved_iova(igroup, 508 to_hwpt_paging(old_hwpt)); 509 510 igroup->hwpt = hwpt; 511 512 num_devices = list_count_nodes(&igroup->device_list); 513 /* 514 * Move the refcounts held by the device_list to the new hwpt. Retain a 515 * refcount for this thread as the caller will free it. 516 */ 517 refcount_add(num_devices, &hwpt->obj.users); 518 if (num_devices > 1) 519 WARN_ON(refcount_sub_and_test(num_devices - 1, 520 &old_hwpt->obj.users)); 521 mutex_unlock(&idev->igroup->lock); 522 523 /* Caller must destroy old_hwpt */ 524 return old_hwpt; 525err_unresv: 526 if (hwpt_is_paging(hwpt)) 527 iommufd_group_remove_reserved_iova(igroup, 528 to_hwpt_paging(old_hwpt)); 529err_unlock: 530 mutex_unlock(&idev->igroup->lock); 531 return ERR_PTR(rc); 532} 533 534typedef struct iommufd_hw_pagetable *(*attach_fn)( 535 struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); 536 537/* 538 * When automatically managing the domains we search for a compatible domain in 539 * the iopt and if one is found use it, otherwise create a new domain. 540 * Automatic domain selection will never pick a manually created domain. 541 */ 542static struct iommufd_hw_pagetable * 543iommufd_device_auto_get_domain(struct iommufd_device *idev, 544 struct iommufd_ioas *ioas, u32 *pt_id, 545 attach_fn do_attach) 546{ 547 /* 548 * iommufd_hw_pagetable_attach() is called by 549 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as 550 * iommufd_device_do_attach(). So if we are in this mode then we prefer 551 * to use the immediate_attach path as it supports drivers that can't 552 * directly allocate a domain. 553 */ 554 bool immediate_attach = do_attach == iommufd_device_do_attach; 555 struct iommufd_hw_pagetable *destroy_hwpt; 556 struct iommufd_hwpt_paging *hwpt_paging; 557 struct iommufd_hw_pagetable *hwpt; 558 559 /* 560 * There is no differentiation when domains are allocated, so any domain 561 * that is willing to attach to the device is interchangeable with any 562 * other. 563 */ 564 mutex_lock(&ioas->mutex); 565 list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { 566 if (!hwpt_paging->auto_domain) 567 continue; 568 569 hwpt = &hwpt_paging->common; 570 if (!iommufd_lock_obj(&hwpt->obj)) 571 continue; 572 destroy_hwpt = (*do_attach)(idev, hwpt); 573 if (IS_ERR(destroy_hwpt)) { 574 iommufd_put_object(idev->ictx, &hwpt->obj); 575 /* 576 * -EINVAL means the domain is incompatible with the 577 * device. Other error codes should propagate to 578 * userspace as failure. Success means the domain is 579 * attached. 580 */ 581 if (PTR_ERR(destroy_hwpt) == -EINVAL) 582 continue; 583 goto out_unlock; 584 } 585 *pt_id = hwpt->obj.id; 586 iommufd_put_object(idev->ictx, &hwpt->obj); 587 goto out_unlock; 588 } 589 590 hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, 591 immediate_attach, NULL); 592 if (IS_ERR(hwpt_paging)) { 593 destroy_hwpt = ERR_CAST(hwpt_paging); 594 goto out_unlock; 595 } 596 hwpt = &hwpt_paging->common; 597 598 if (!immediate_attach) { 599 destroy_hwpt = (*do_attach)(idev, hwpt); 600 if (IS_ERR(destroy_hwpt)) 601 goto out_abort; 602 } else { 603 destroy_hwpt = NULL; 604 } 605 606 hwpt_paging->auto_domain = true; 607 *pt_id = hwpt->obj.id; 608 609 iommufd_object_finalize(idev->ictx, &hwpt->obj); 610 mutex_unlock(&ioas->mutex); 611 return destroy_hwpt; 612 613out_abort: 614 iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); 615out_unlock: 616 mutex_unlock(&ioas->mutex); 617 return destroy_hwpt; 618} 619 620static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, 621 attach_fn do_attach) 622{ 623 struct iommufd_hw_pagetable *destroy_hwpt; 624 struct iommufd_object *pt_obj; 625 626 pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); 627 if (IS_ERR(pt_obj)) 628 return PTR_ERR(pt_obj); 629 630 switch (pt_obj->type) { 631 case IOMMUFD_OBJ_HWPT_NESTED: 632 case IOMMUFD_OBJ_HWPT_PAGING: { 633 struct iommufd_hw_pagetable *hwpt = 634 container_of(pt_obj, struct iommufd_hw_pagetable, obj); 635 636 destroy_hwpt = (*do_attach)(idev, hwpt); 637 if (IS_ERR(destroy_hwpt)) 638 goto out_put_pt_obj; 639 break; 640 } 641 case IOMMUFD_OBJ_IOAS: { 642 struct iommufd_ioas *ioas = 643 container_of(pt_obj, struct iommufd_ioas, obj); 644 645 destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, 646 do_attach); 647 if (IS_ERR(destroy_hwpt)) 648 goto out_put_pt_obj; 649 break; 650 } 651 default: 652 destroy_hwpt = ERR_PTR(-EINVAL); 653 goto out_put_pt_obj; 654 } 655 iommufd_put_object(idev->ictx, pt_obj); 656 657 /* This destruction has to be after we unlock everything */ 658 if (destroy_hwpt) 659 iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); 660 return 0; 661 662out_put_pt_obj: 663 iommufd_put_object(idev->ictx, pt_obj); 664 return PTR_ERR(destroy_hwpt); 665} 666 667/** 668 * iommufd_device_attach - Connect a device to an iommu_domain 669 * @idev: device to attach 670 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING 671 * Output the IOMMUFD_OBJ_HWPT_PAGING ID 672 * 673 * This connects the device to an iommu_domain, either automatically or manually 674 * selected. Once this completes the device could do DMA. 675 * 676 * The caller should return the resulting pt_id back to userspace. 677 * This function is undone by calling iommufd_device_detach(). 678 */ 679int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) 680{ 681 int rc; 682 683 rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); 684 if (rc) 685 return rc; 686 687 /* 688 * Pairs with iommufd_device_detach() - catches caller bugs attempting 689 * to destroy a device with an attachment. 690 */ 691 refcount_inc(&idev->obj.users); 692 return 0; 693} 694EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); 695 696/** 697 * iommufd_device_replace - Change the device's iommu_domain 698 * @idev: device to change 699 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING 700 * Output the IOMMUFD_OBJ_HWPT_PAGING ID 701 * 702 * This is the same as:: 703 * 704 * iommufd_device_detach(); 705 * iommufd_device_attach(); 706 * 707 * If it fails then no change is made to the attachment. The iommu driver may 708 * implement this so there is no disruption in translation. This can only be 709 * called if iommufd_device_attach() has already succeeded. 710 */ 711int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) 712{ 713 return iommufd_device_change_pt(idev, pt_id, 714 &iommufd_device_do_replace); 715} 716EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); 717 718/** 719 * iommufd_device_detach - Disconnect a device to an iommu_domain 720 * @idev: device to detach 721 * 722 * Undo iommufd_device_attach(). This disconnects the idev from the previously 723 * attached pt_id. The device returns back to a blocked DMA translation. 724 */ 725void iommufd_device_detach(struct iommufd_device *idev) 726{ 727 struct iommufd_hw_pagetable *hwpt; 728 729 hwpt = iommufd_hw_pagetable_detach(idev); 730 iommufd_hw_pagetable_put(idev->ictx, hwpt); 731 refcount_dec(&idev->obj.users); 732} 733EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); 734 735/* 736 * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at 737 * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should 738 * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. 739 */ 740static int iommufd_access_change_ioas(struct iommufd_access *access, 741 struct iommufd_ioas *new_ioas) 742{ 743 u32 iopt_access_list_id = access->iopt_access_list_id; 744 struct iommufd_ioas *cur_ioas = access->ioas; 745 int rc; 746 747 lockdep_assert_held(&access->ioas_lock); 748 749 /* We are racing with a concurrent detach, bail */ 750 if (cur_ioas != access->ioas_unpin) 751 return -EBUSY; 752 753 if (cur_ioas == new_ioas) 754 return 0; 755 756 /* 757 * Set ioas to NULL to block any further iommufd_access_pin_pages(). 758 * iommufd_access_unpin_pages() can continue using access->ioas_unpin. 759 */ 760 access->ioas = NULL; 761 762 if (new_ioas) { 763 rc = iopt_add_access(&new_ioas->iopt, access); 764 if (rc) { 765 access->ioas = cur_ioas; 766 return rc; 767 } 768 refcount_inc(&new_ioas->obj.users); 769 } 770 771 if (cur_ioas) { 772 if (access->ops->unmap) { 773 mutex_unlock(&access->ioas_lock); 774 access->ops->unmap(access->data, 0, ULONG_MAX); 775 mutex_lock(&access->ioas_lock); 776 } 777 iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); 778 refcount_dec(&cur_ioas->obj.users); 779 } 780 781 access->ioas = new_ioas; 782 access->ioas_unpin = new_ioas; 783 784 return 0; 785} 786 787static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) 788{ 789 struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); 790 int rc; 791 792 if (IS_ERR(ioas)) 793 return PTR_ERR(ioas); 794 rc = iommufd_access_change_ioas(access, ioas); 795 iommufd_put_object(access->ictx, &ioas->obj); 796 return rc; 797} 798 799void iommufd_access_destroy_object(struct iommufd_object *obj) 800{ 801 struct iommufd_access *access = 802 container_of(obj, struct iommufd_access, obj); 803 804 mutex_lock(&access->ioas_lock); 805 if (access->ioas) 806 WARN_ON(iommufd_access_change_ioas(access, NULL)); 807 mutex_unlock(&access->ioas_lock); 808 iommufd_ctx_put(access->ictx); 809} 810 811/** 812 * iommufd_access_create - Create an iommufd_access 813 * @ictx: iommufd file descriptor 814 * @ops: Driver's ops to associate with the access 815 * @data: Opaque data to pass into ops functions 816 * @id: Output ID number to return to userspace for this access 817 * 818 * An iommufd_access allows a driver to read/write to the IOAS without using 819 * DMA. The underlying CPU memory can be accessed using the 820 * iommufd_access_pin_pages() or iommufd_access_rw() functions. 821 * 822 * The provided ops are required to use iommufd_access_pin_pages(). 823 */ 824struct iommufd_access * 825iommufd_access_create(struct iommufd_ctx *ictx, 826 const struct iommufd_access_ops *ops, void *data, u32 *id) 827{ 828 struct iommufd_access *access; 829 830 /* 831 * There is no uAPI for the access object, but to keep things symmetric 832 * use the object infrastructure anyhow. 833 */ 834 access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); 835 if (IS_ERR(access)) 836 return access; 837 838 access->data = data; 839 access->ops = ops; 840 841 if (ops->needs_pin_pages) 842 access->iova_alignment = PAGE_SIZE; 843 else 844 access->iova_alignment = 1; 845 846 /* The calling driver is a user until iommufd_access_destroy() */ 847 refcount_inc(&access->obj.users); 848 access->ictx = ictx; 849 iommufd_ctx_get(ictx); 850 iommufd_object_finalize(ictx, &access->obj); 851 *id = access->obj.id; 852 mutex_init(&access->ioas_lock); 853 return access; 854} 855EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); 856 857/** 858 * iommufd_access_destroy - Destroy an iommufd_access 859 * @access: The access to destroy 860 * 861 * The caller must stop using the access before destroying it. 862 */ 863void iommufd_access_destroy(struct iommufd_access *access) 864{ 865 iommufd_object_destroy_user(access->ictx, &access->obj); 866} 867EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); 868 869void iommufd_access_detach(struct iommufd_access *access) 870{ 871 mutex_lock(&access->ioas_lock); 872 if (WARN_ON(!access->ioas)) { 873 mutex_unlock(&access->ioas_lock); 874 return; 875 } 876 WARN_ON(iommufd_access_change_ioas(access, NULL)); 877 mutex_unlock(&access->ioas_lock); 878} 879EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); 880 881int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) 882{ 883 int rc; 884 885 mutex_lock(&access->ioas_lock); 886 if (WARN_ON(access->ioas)) { 887 mutex_unlock(&access->ioas_lock); 888 return -EINVAL; 889 } 890 891 rc = iommufd_access_change_ioas_id(access, ioas_id); 892 mutex_unlock(&access->ioas_lock); 893 return rc; 894} 895EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); 896 897int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) 898{ 899 int rc; 900 901 mutex_lock(&access->ioas_lock); 902 if (!access->ioas) { 903 mutex_unlock(&access->ioas_lock); 904 return -ENOENT; 905 } 906 rc = iommufd_access_change_ioas_id(access, ioas_id); 907 mutex_unlock(&access->ioas_lock); 908 return rc; 909} 910EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); 911 912/** 913 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it 914 * @iopt: iopt to work on 915 * @iova: Starting iova in the iopt 916 * @length: Number of bytes 917 * 918 * After this function returns there should be no users attached to the pages 919 * linked to this iopt that intersect with iova,length. Anyone that has attached 920 * a user through iopt_access_pages() needs to detach it through 921 * iommufd_access_unpin_pages() before this function returns. 922 * 923 * iommufd_access_destroy() will wait for any outstanding unmap callback to 924 * complete. Once iommufd_access_destroy() no unmap ops are running or will 925 * run in the future. Due to this a driver must not create locking that prevents 926 * unmap to complete while iommufd_access_destroy() is running. 927 */ 928void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 929 unsigned long length) 930{ 931 struct iommufd_ioas *ioas = 932 container_of(iopt, struct iommufd_ioas, iopt); 933 struct iommufd_access *access; 934 unsigned long index; 935 936 xa_lock(&ioas->iopt.access_list); 937 xa_for_each(&ioas->iopt.access_list, index, access) { 938 if (!iommufd_lock_obj(&access->obj)) 939 continue; 940 xa_unlock(&ioas->iopt.access_list); 941 942 access->ops->unmap(access->data, iova, length); 943 944 iommufd_put_object(access->ictx, &access->obj); 945 xa_lock(&ioas->iopt.access_list); 946 } 947 xa_unlock(&ioas->iopt.access_list); 948} 949 950/** 951 * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages 952 * @access: IOAS access to act on 953 * @iova: Starting IOVA 954 * @length: Number of bytes to access 955 * 956 * Return the struct page's. The caller must stop accessing them before calling 957 * this. The iova/length must exactly match the one provided to access_pages. 958 */ 959void iommufd_access_unpin_pages(struct iommufd_access *access, 960 unsigned long iova, unsigned long length) 961{ 962 struct iopt_area_contig_iter iter; 963 struct io_pagetable *iopt; 964 unsigned long last_iova; 965 struct iopt_area *area; 966 967 if (WARN_ON(!length) || 968 WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) 969 return; 970 971 mutex_lock(&access->ioas_lock); 972 /* 973 * The driver must be doing something wrong if it calls this before an 974 * iommufd_access_attach() or after an iommufd_access_detach(). 975 */ 976 if (WARN_ON(!access->ioas_unpin)) { 977 mutex_unlock(&access->ioas_lock); 978 return; 979 } 980 iopt = &access->ioas_unpin->iopt; 981 982 down_read(&iopt->iova_rwsem); 983 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 984 iopt_area_remove_access( 985 area, iopt_area_iova_to_index(area, iter.cur_iova), 986 iopt_area_iova_to_index( 987 area, 988 min(last_iova, iopt_area_last_iova(area)))); 989 WARN_ON(!iopt_area_contig_done(&iter)); 990 up_read(&iopt->iova_rwsem); 991 mutex_unlock(&access->ioas_lock); 992} 993EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); 994 995static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) 996{ 997 if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) 998 return false; 999 1000 if (!iopt_area_contig_done(iter) && 1001 (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % 1002 PAGE_SIZE) != (PAGE_SIZE - 1)) 1003 return false; 1004 return true; 1005} 1006 1007static bool check_area_prot(struct iopt_area *area, unsigned int flags) 1008{ 1009 if (flags & IOMMUFD_ACCESS_RW_WRITE) 1010 return area->iommu_prot & IOMMU_WRITE; 1011 return area->iommu_prot & IOMMU_READ; 1012} 1013 1014/** 1015 * iommufd_access_pin_pages() - Return a list of pages under the iova 1016 * @access: IOAS access to act on 1017 * @iova: Starting IOVA 1018 * @length: Number of bytes to access 1019 * @out_pages: Output page list 1020 * @flags: IOPMMUFD_ACCESS_RW_* flags 1021 * 1022 * Reads @length bytes starting at iova and returns the struct page * pointers. 1023 * These can be kmap'd by the caller for CPU access. 1024 * 1025 * The caller must perform iommufd_access_unpin_pages() when done to balance 1026 * this. 1027 * 1028 * This API always requires a page aligned iova. This happens naturally if the 1029 * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However 1030 * smaller alignments have corner cases where this API can fail on otherwise 1031 * aligned iova. 1032 */ 1033int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, 1034 unsigned long length, struct page **out_pages, 1035 unsigned int flags) 1036{ 1037 struct iopt_area_contig_iter iter; 1038 struct io_pagetable *iopt; 1039 unsigned long last_iova; 1040 struct iopt_area *area; 1041 int rc; 1042 1043 /* Driver's ops don't support pin_pages */ 1044 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 1045 WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) 1046 return -EINVAL; 1047 1048 if (!length) 1049 return -EINVAL; 1050 if (check_add_overflow(iova, length - 1, &last_iova)) 1051 return -EOVERFLOW; 1052 1053 mutex_lock(&access->ioas_lock); 1054 if (!access->ioas) { 1055 mutex_unlock(&access->ioas_lock); 1056 return -ENOENT; 1057 } 1058 iopt = &access->ioas->iopt; 1059 1060 down_read(&iopt->iova_rwsem); 1061 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1062 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1063 unsigned long last_index = iopt_area_iova_to_index(area, last); 1064 unsigned long index = 1065 iopt_area_iova_to_index(area, iter.cur_iova); 1066 1067 if (area->prevent_access || 1068 !iopt_area_contig_is_aligned(&iter)) { 1069 rc = -EINVAL; 1070 goto err_remove; 1071 } 1072 1073 if (!check_area_prot(area, flags)) { 1074 rc = -EPERM; 1075 goto err_remove; 1076 } 1077 1078 rc = iopt_area_add_access(area, index, last_index, out_pages, 1079 flags); 1080 if (rc) 1081 goto err_remove; 1082 out_pages += last_index - index + 1; 1083 } 1084 if (!iopt_area_contig_done(&iter)) { 1085 rc = -ENOENT; 1086 goto err_remove; 1087 } 1088 1089 up_read(&iopt->iova_rwsem); 1090 mutex_unlock(&access->ioas_lock); 1091 return 0; 1092 1093err_remove: 1094 if (iova < iter.cur_iova) { 1095 last_iova = iter.cur_iova - 1; 1096 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 1097 iopt_area_remove_access( 1098 area, 1099 iopt_area_iova_to_index(area, iter.cur_iova), 1100 iopt_area_iova_to_index( 1101 area, min(last_iova, 1102 iopt_area_last_iova(area)))); 1103 } 1104 up_read(&iopt->iova_rwsem); 1105 mutex_unlock(&access->ioas_lock); 1106 return rc; 1107} 1108EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); 1109 1110/** 1111 * iommufd_access_rw - Read or write data under the iova 1112 * @access: IOAS access to act on 1113 * @iova: Starting IOVA 1114 * @data: Kernel buffer to copy to/from 1115 * @length: Number of bytes to access 1116 * @flags: IOMMUFD_ACCESS_RW_* flags 1117 * 1118 * Copy kernel to/from data into the range given by IOVA/length. If flags 1119 * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized 1120 * by changing it into copy_to/from_user(). 1121 */ 1122int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, 1123 void *data, size_t length, unsigned int flags) 1124{ 1125 struct iopt_area_contig_iter iter; 1126 struct io_pagetable *iopt; 1127 struct iopt_area *area; 1128 unsigned long last_iova; 1129 int rc; 1130 1131 if (!length) 1132 return -EINVAL; 1133 if (check_add_overflow(iova, length - 1, &last_iova)) 1134 return -EOVERFLOW; 1135 1136 mutex_lock(&access->ioas_lock); 1137 if (!access->ioas) { 1138 mutex_unlock(&access->ioas_lock); 1139 return -ENOENT; 1140 } 1141 iopt = &access->ioas->iopt; 1142 1143 down_read(&iopt->iova_rwsem); 1144 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1145 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1146 unsigned long bytes = (last - iter.cur_iova) + 1; 1147 1148 if (area->prevent_access) { 1149 rc = -EINVAL; 1150 goto err_out; 1151 } 1152 1153 if (!check_area_prot(area, flags)) { 1154 rc = -EPERM; 1155 goto err_out; 1156 } 1157 1158 rc = iopt_pages_rw_access( 1159 area->pages, iopt_area_start_byte(area, iter.cur_iova), 1160 data, bytes, flags); 1161 if (rc) 1162 goto err_out; 1163 data += bytes; 1164 } 1165 if (!iopt_area_contig_done(&iter)) 1166 rc = -ENOENT; 1167err_out: 1168 up_read(&iopt->iova_rwsem); 1169 mutex_unlock(&access->ioas_lock); 1170 return rc; 1171} 1172EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); 1173 1174int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) 1175{ 1176 struct iommu_hw_info *cmd = ucmd->cmd; 1177 void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); 1178 const struct iommu_ops *ops; 1179 struct iommufd_device *idev; 1180 unsigned int data_len; 1181 unsigned int copy_len; 1182 void *data; 1183 int rc; 1184 1185 if (cmd->flags || cmd->__reserved) 1186 return -EOPNOTSUPP; 1187 1188 idev = iommufd_get_device(ucmd, cmd->dev_id); 1189 if (IS_ERR(idev)) 1190 return PTR_ERR(idev); 1191 1192 ops = dev_iommu_ops(idev->dev); 1193 if (ops->hw_info) { 1194 data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); 1195 if (IS_ERR(data)) { 1196 rc = PTR_ERR(data); 1197 goto out_put; 1198 } 1199 1200 /* 1201 * drivers that have hw_info callback should have a unique 1202 * iommu_hw_info_type. 1203 */ 1204 if (WARN_ON_ONCE(cmd->out_data_type == 1205 IOMMU_HW_INFO_TYPE_NONE)) { 1206 rc = -ENODEV; 1207 goto out_free; 1208 } 1209 } else { 1210 cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; 1211 data_len = 0; 1212 data = NULL; 1213 } 1214 1215 copy_len = min(cmd->data_len, data_len); 1216 if (copy_to_user(user_ptr, data, copy_len)) { 1217 rc = -EFAULT; 1218 goto out_free; 1219 } 1220 1221 /* 1222 * Zero the trailing bytes if the user buffer is bigger than the 1223 * data size kernel actually has. 1224 */ 1225 if (copy_len < cmd->data_len) { 1226 if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { 1227 rc = -EFAULT; 1228 goto out_free; 1229 } 1230 } 1231 1232 /* 1233 * We return the length the kernel supports so userspace may know what 1234 * the kernel capability is. It could be larger than the input buffer. 1235 */ 1236 cmd->data_len = data_len; 1237 1238 cmd->out_capabilities = 0; 1239 if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) 1240 cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; 1241 1242 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 1243out_free: 1244 kfree(data); 1245out_put: 1246 iommufd_put_object(ucmd->ictx, &idev->obj); 1247 return rc; 1248} 1249