1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * MMU-based software IOTLB. 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11#include <linux/slab.h> 12#include <linux/file.h> 13#include <linux/anon_inodes.h> 14#include <linux/highmem.h> 15#include <linux/vmalloc.h> 16#include <linux/vdpa.h> 17 18#include "iova_domain.h" 19 20static int vduse_iotlb_add_range(struct vduse_iova_domain *domain, 21 u64 start, u64 last, 22 u64 addr, unsigned int perm, 23 struct file *file, u64 offset) 24{ 25 struct vdpa_map_file *map_file; 26 int ret; 27 28 map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC); 29 if (!map_file) 30 return -ENOMEM; 31 32 map_file->file = get_file(file); 33 map_file->offset = offset; 34 35 ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last, 36 addr, perm, map_file); 37 if (ret) { 38 fput(map_file->file); 39 kfree(map_file); 40 return ret; 41 } 42 return 0; 43} 44 45static void vduse_iotlb_del_range(struct vduse_iova_domain *domain, 46 u64 start, u64 last) 47{ 48 struct vdpa_map_file *map_file; 49 struct vhost_iotlb_map *map; 50 51 while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) { 52 map_file = (struct vdpa_map_file *)map->opaque; 53 fput(map_file->file); 54 kfree(map_file); 55 vhost_iotlb_map_free(domain->iotlb, map); 56 } 57} 58 59int vduse_domain_set_map(struct vduse_iova_domain *domain, 60 struct vhost_iotlb *iotlb) 61{ 62 struct vdpa_map_file *map_file; 63 struct vhost_iotlb_map *map; 64 u64 start = 0ULL, last = ULLONG_MAX; 65 int ret; 66 67 spin_lock(&domain->iotlb_lock); 68 vduse_iotlb_del_range(domain, start, last); 69 70 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 71 map = vhost_iotlb_itree_next(map, start, last)) { 72 map_file = (struct vdpa_map_file *)map->opaque; 73 ret = vduse_iotlb_add_range(domain, map->start, map->last, 74 map->addr, map->perm, 75 map_file->file, 76 map_file->offset); 77 if (ret) 78 goto err; 79 } 80 spin_unlock(&domain->iotlb_lock); 81 82 return 0; 83err: 84 vduse_iotlb_del_range(domain, start, last); 85 spin_unlock(&domain->iotlb_lock); 86 return ret; 87} 88 89void vduse_domain_clear_map(struct vduse_iova_domain *domain, 90 struct vhost_iotlb *iotlb) 91{ 92 struct vhost_iotlb_map *map; 93 u64 start = 0ULL, last = ULLONG_MAX; 94 95 spin_lock(&domain->iotlb_lock); 96 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 97 map = vhost_iotlb_itree_next(map, start, last)) { 98 vduse_iotlb_del_range(domain, map->start, map->last); 99 } 100 spin_unlock(&domain->iotlb_lock); 101} 102 103static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, 104 u64 iova, u64 size, u64 paddr) 105{ 106 struct vduse_bounce_map *map; 107 u64 last = iova + size - 1; 108 109 while (iova <= last) { 110 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 111 if (!map->bounce_page) { 112 map->bounce_page = alloc_page(GFP_ATOMIC); 113 if (!map->bounce_page) 114 return -ENOMEM; 115 } 116 map->orig_phys = paddr; 117 paddr += PAGE_SIZE; 118 iova += PAGE_SIZE; 119 } 120 return 0; 121} 122 123static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain, 124 u64 iova, u64 size) 125{ 126 struct vduse_bounce_map *map; 127 u64 last = iova + size - 1; 128 129 while (iova <= last) { 130 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 131 map->orig_phys = INVALID_PHYS_ADDR; 132 iova += PAGE_SIZE; 133 } 134} 135 136static void do_bounce(phys_addr_t orig, void *addr, size_t size, 137 enum dma_data_direction dir) 138{ 139 unsigned long pfn = PFN_DOWN(orig); 140 unsigned int offset = offset_in_page(orig); 141 struct page *page; 142 unsigned int sz = 0; 143 144 while (size) { 145 sz = min_t(size_t, PAGE_SIZE - offset, size); 146 147 page = pfn_to_page(pfn); 148 if (dir == DMA_TO_DEVICE) 149 memcpy_from_page(addr, page, offset, sz); 150 else 151 memcpy_to_page(page, offset, addr, sz); 152 153 size -= sz; 154 pfn++; 155 addr += sz; 156 offset = 0; 157 } 158} 159 160static void vduse_domain_bounce(struct vduse_iova_domain *domain, 161 dma_addr_t iova, size_t size, 162 enum dma_data_direction dir) 163{ 164 struct vduse_bounce_map *map; 165 unsigned int offset; 166 void *addr; 167 size_t sz; 168 169 if (iova >= domain->bounce_size) 170 return; 171 172 while (size) { 173 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 174 offset = offset_in_page(iova); 175 sz = min_t(size_t, PAGE_SIZE - offset, size); 176 177 if (WARN_ON(!map->bounce_page || 178 map->orig_phys == INVALID_PHYS_ADDR)) 179 return; 180 181 addr = kmap_local_page(map->bounce_page); 182 do_bounce(map->orig_phys + offset, addr + offset, sz, dir); 183 kunmap_local(addr); 184 size -= sz; 185 iova += sz; 186 } 187} 188 189static struct page * 190vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova) 191{ 192 u64 start = iova & PAGE_MASK; 193 u64 last = start + PAGE_SIZE - 1; 194 struct vhost_iotlb_map *map; 195 struct page *page = NULL; 196 197 spin_lock(&domain->iotlb_lock); 198 map = vhost_iotlb_itree_first(domain->iotlb, start, last); 199 if (!map) 200 goto out; 201 202 page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT); 203 get_page(page); 204out: 205 spin_unlock(&domain->iotlb_lock); 206 207 return page; 208} 209 210static struct page * 211vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) 212{ 213 struct vduse_bounce_map *map; 214 struct page *page = NULL; 215 216 read_lock(&domain->bounce_lock); 217 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 218 if (domain->user_bounce_pages || !map->bounce_page) 219 goto out; 220 221 page = map->bounce_page; 222 get_page(page); 223out: 224 read_unlock(&domain->bounce_lock); 225 226 return page; 227} 228 229static void 230vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain) 231{ 232 struct vduse_bounce_map *map; 233 unsigned long pfn, bounce_pfns; 234 235 bounce_pfns = domain->bounce_size >> PAGE_SHIFT; 236 237 for (pfn = 0; pfn < bounce_pfns; pfn++) { 238 map = &domain->bounce_maps[pfn]; 239 if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR)) 240 continue; 241 242 if (!map->bounce_page) 243 continue; 244 245 __free_page(map->bounce_page); 246 map->bounce_page = NULL; 247 } 248} 249 250int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, 251 struct page **pages, int count) 252{ 253 struct vduse_bounce_map *map; 254 int i, ret; 255 256 /* Now we don't support partial mapping */ 257 if (count != (domain->bounce_size >> PAGE_SHIFT)) 258 return -EINVAL; 259 260 write_lock(&domain->bounce_lock); 261 ret = -EEXIST; 262 if (domain->user_bounce_pages) 263 goto out; 264 265 for (i = 0; i < count; i++) { 266 map = &domain->bounce_maps[i]; 267 if (map->bounce_page) { 268 /* Copy kernel page to user page if it's in use */ 269 if (map->orig_phys != INVALID_PHYS_ADDR) 270 memcpy_to_page(pages[i], 0, 271 page_address(map->bounce_page), 272 PAGE_SIZE); 273 __free_page(map->bounce_page); 274 } 275 map->bounce_page = pages[i]; 276 get_page(pages[i]); 277 } 278 domain->user_bounce_pages = true; 279 ret = 0; 280out: 281 write_unlock(&domain->bounce_lock); 282 283 return ret; 284} 285 286void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) 287{ 288 struct vduse_bounce_map *map; 289 unsigned long i, count; 290 291 write_lock(&domain->bounce_lock); 292 if (!domain->user_bounce_pages) 293 goto out; 294 295 count = domain->bounce_size >> PAGE_SHIFT; 296 for (i = 0; i < count; i++) { 297 struct page *page = NULL; 298 299 map = &domain->bounce_maps[i]; 300 if (WARN_ON(!map->bounce_page)) 301 continue; 302 303 /* Copy user page to kernel page if it's in use */ 304 if (map->orig_phys != INVALID_PHYS_ADDR) { 305 page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL); 306 memcpy_from_page(page_address(page), 307 map->bounce_page, 0, PAGE_SIZE); 308 } 309 put_page(map->bounce_page); 310 map->bounce_page = page; 311 } 312 domain->user_bounce_pages = false; 313out: 314 write_unlock(&domain->bounce_lock); 315} 316 317void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) 318{ 319 if (!domain->bounce_map) 320 return; 321 322 spin_lock(&domain->iotlb_lock); 323 if (!domain->bounce_map) 324 goto unlock; 325 326 vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1); 327 domain->bounce_map = 0; 328unlock: 329 spin_unlock(&domain->iotlb_lock); 330} 331 332static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain) 333{ 334 int ret = 0; 335 336 if (domain->bounce_map) 337 return 0; 338 339 spin_lock(&domain->iotlb_lock); 340 if (domain->bounce_map) 341 goto unlock; 342 343 ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1, 344 0, VHOST_MAP_RW, domain->file, 0); 345 if (ret) 346 goto unlock; 347 348 domain->bounce_map = 1; 349unlock: 350 spin_unlock(&domain->iotlb_lock); 351 return ret; 352} 353 354static dma_addr_t 355vduse_domain_alloc_iova(struct iova_domain *iovad, 356 unsigned long size, unsigned long limit) 357{ 358 unsigned long shift = iova_shift(iovad); 359 unsigned long iova_len = iova_align(iovad, size) >> shift; 360 unsigned long iova_pfn; 361 362 iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true); 363 364 return (dma_addr_t)iova_pfn << shift; 365} 366 367static void vduse_domain_free_iova(struct iova_domain *iovad, 368 dma_addr_t iova, size_t size) 369{ 370 unsigned long shift = iova_shift(iovad); 371 unsigned long iova_len = iova_align(iovad, size) >> shift; 372 373 free_iova_fast(iovad, iova >> shift, iova_len); 374} 375 376void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain, 377 dma_addr_t dma_addr, size_t size, 378 enum dma_data_direction dir) 379{ 380 read_lock(&domain->bounce_lock); 381 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 382 vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE); 383 read_unlock(&domain->bounce_lock); 384} 385 386void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain, 387 dma_addr_t dma_addr, size_t size, 388 enum dma_data_direction dir) 389{ 390 read_lock(&domain->bounce_lock); 391 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 392 vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); 393 read_unlock(&domain->bounce_lock); 394} 395 396dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, 397 struct page *page, unsigned long offset, 398 size_t size, enum dma_data_direction dir, 399 unsigned long attrs) 400{ 401 struct iova_domain *iovad = &domain->stream_iovad; 402 unsigned long limit = domain->bounce_size - 1; 403 phys_addr_t pa = page_to_phys(page) + offset; 404 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); 405 406 if (!iova) 407 return DMA_MAPPING_ERROR; 408 409 if (vduse_domain_init_bounce_map(domain)) 410 goto err; 411 412 read_lock(&domain->bounce_lock); 413 if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) 414 goto err_unlock; 415 416 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 417 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) 418 vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); 419 420 read_unlock(&domain->bounce_lock); 421 422 return iova; 423err_unlock: 424 read_unlock(&domain->bounce_lock); 425err: 426 vduse_domain_free_iova(iovad, iova, size); 427 return DMA_MAPPING_ERROR; 428} 429 430void vduse_domain_unmap_page(struct vduse_iova_domain *domain, 431 dma_addr_t dma_addr, size_t size, 432 enum dma_data_direction dir, unsigned long attrs) 433{ 434 struct iova_domain *iovad = &domain->stream_iovad; 435 read_lock(&domain->bounce_lock); 436 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 437 (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 438 vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); 439 440 vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); 441 read_unlock(&domain->bounce_lock); 442 vduse_domain_free_iova(iovad, dma_addr, size); 443} 444 445void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, 446 size_t size, dma_addr_t *dma_addr, 447 gfp_t flag, unsigned long attrs) 448{ 449 struct iova_domain *iovad = &domain->consistent_iovad; 450 unsigned long limit = domain->iova_limit; 451 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); 452 void *orig = alloc_pages_exact(size, flag); 453 454 if (!iova || !orig) 455 goto err; 456 457 spin_lock(&domain->iotlb_lock); 458 if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1, 459 virt_to_phys(orig), VHOST_MAP_RW, 460 domain->file, (u64)iova)) { 461 spin_unlock(&domain->iotlb_lock); 462 goto err; 463 } 464 spin_unlock(&domain->iotlb_lock); 465 466 *dma_addr = iova; 467 468 return orig; 469err: 470 *dma_addr = DMA_MAPPING_ERROR; 471 if (orig) 472 free_pages_exact(orig, size); 473 if (iova) 474 vduse_domain_free_iova(iovad, iova, size); 475 476 return NULL; 477} 478 479void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, 480 void *vaddr, dma_addr_t dma_addr, 481 unsigned long attrs) 482{ 483 struct iova_domain *iovad = &domain->consistent_iovad; 484 struct vhost_iotlb_map *map; 485 struct vdpa_map_file *map_file; 486 phys_addr_t pa; 487 488 spin_lock(&domain->iotlb_lock); 489 map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr, 490 (u64)dma_addr + size - 1); 491 if (WARN_ON(!map)) { 492 spin_unlock(&domain->iotlb_lock); 493 return; 494 } 495 map_file = (struct vdpa_map_file *)map->opaque; 496 fput(map_file->file); 497 kfree(map_file); 498 pa = map->addr; 499 vhost_iotlb_map_free(domain->iotlb, map); 500 spin_unlock(&domain->iotlb_lock); 501 502 vduse_domain_free_iova(iovad, dma_addr, size); 503 free_pages_exact(phys_to_virt(pa), size); 504} 505 506static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf) 507{ 508 struct vduse_iova_domain *domain = vmf->vma->vm_private_data; 509 unsigned long iova = vmf->pgoff << PAGE_SHIFT; 510 struct page *page; 511 512 if (!domain) 513 return VM_FAULT_SIGBUS; 514 515 if (iova < domain->bounce_size) 516 page = vduse_domain_get_bounce_page(domain, iova); 517 else 518 page = vduse_domain_get_coherent_page(domain, iova); 519 520 if (!page) 521 return VM_FAULT_SIGBUS; 522 523 vmf->page = page; 524 525 return 0; 526} 527 528static const struct vm_operations_struct vduse_domain_mmap_ops = { 529 .fault = vduse_domain_mmap_fault, 530}; 531 532static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma) 533{ 534 struct vduse_iova_domain *domain = file->private_data; 535 536 vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND); 537 vma->vm_private_data = domain; 538 vma->vm_ops = &vduse_domain_mmap_ops; 539 540 return 0; 541} 542 543static int vduse_domain_release(struct inode *inode, struct file *file) 544{ 545 struct vduse_iova_domain *domain = file->private_data; 546 547 spin_lock(&domain->iotlb_lock); 548 vduse_iotlb_del_range(domain, 0, ULLONG_MAX); 549 vduse_domain_remove_user_bounce_pages(domain); 550 vduse_domain_free_kernel_bounce_pages(domain); 551 spin_unlock(&domain->iotlb_lock); 552 put_iova_domain(&domain->stream_iovad); 553 put_iova_domain(&domain->consistent_iovad); 554 vhost_iotlb_free(domain->iotlb); 555 vfree(domain->bounce_maps); 556 kfree(domain); 557 558 return 0; 559} 560 561static const struct file_operations vduse_domain_fops = { 562 .owner = THIS_MODULE, 563 .mmap = vduse_domain_mmap, 564 .release = vduse_domain_release, 565}; 566 567void vduse_domain_destroy(struct vduse_iova_domain *domain) 568{ 569 fput(domain->file); 570} 571 572struct vduse_iova_domain * 573vduse_domain_create(unsigned long iova_limit, size_t bounce_size) 574{ 575 struct vduse_iova_domain *domain; 576 struct file *file; 577 struct vduse_bounce_map *map; 578 unsigned long pfn, bounce_pfns; 579 int ret; 580 581 bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; 582 if (iova_limit <= bounce_size) 583 return NULL; 584 585 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 586 if (!domain) 587 return NULL; 588 589 domain->iotlb = vhost_iotlb_alloc(0, 0); 590 if (!domain->iotlb) 591 goto err_iotlb; 592 593 domain->iova_limit = iova_limit; 594 domain->bounce_size = PAGE_ALIGN(bounce_size); 595 domain->bounce_maps = vzalloc(bounce_pfns * 596 sizeof(struct vduse_bounce_map)); 597 if (!domain->bounce_maps) 598 goto err_map; 599 600 for (pfn = 0; pfn < bounce_pfns; pfn++) { 601 map = &domain->bounce_maps[pfn]; 602 map->orig_phys = INVALID_PHYS_ADDR; 603 } 604 file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops, 605 domain, O_RDWR); 606 if (IS_ERR(file)) 607 goto err_file; 608 609 domain->file = file; 610 rwlock_init(&domain->bounce_lock); 611 spin_lock_init(&domain->iotlb_lock); 612 init_iova_domain(&domain->stream_iovad, 613 PAGE_SIZE, IOVA_START_PFN); 614 ret = iova_domain_init_rcaches(&domain->stream_iovad); 615 if (ret) 616 goto err_iovad_stream; 617 init_iova_domain(&domain->consistent_iovad, 618 PAGE_SIZE, bounce_pfns); 619 ret = iova_domain_init_rcaches(&domain->consistent_iovad); 620 if (ret) 621 goto err_iovad_consistent; 622 623 return domain; 624err_iovad_consistent: 625 put_iova_domain(&domain->stream_iovad); 626err_iovad_stream: 627 fput(file); 628err_file: 629 vfree(domain->bounce_maps); 630err_map: 631 vhost_iotlb_free(domain->iotlb); 632err_iotlb: 633 kfree(domain); 634 return NULL; 635} 636 637int vduse_domain_init(void) 638{ 639 return iova_cache_get(); 640} 641 642void vduse_domain_exit(void) 643{ 644 iova_cache_put(); 645} 646