1// Copyright 2016 The Fuchsia Authors 2// 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file or at 5// https://opensource.org/licenses/MIT 6 7#include "vm/vm_object_paged.h" 8 9#include "vm_priv.h" 10 11#include <arch/ops.h> 12#include <assert.h> 13#include <err.h> 14#include <fbl/alloc_checker.h> 15#include <fbl/auto_call.h> 16#include <inttypes.h> 17#include <lib/console.h> 18#include <stdlib.h> 19#include <string.h> 20#include <trace.h> 21#include <vm/fault.h> 22#include <vm/physmap.h> 23#include <vm/vm.h> 24#include <vm/vm_address_region.h> 25#include <zircon/types.h> 26 27#define LOCAL_TRACE MAX(VM_GLOBAL_TRACE, 0) 28 29namespace { 30 31void ZeroPage(paddr_t pa) { 32 void* ptr = paddr_to_physmap(pa); 33 DEBUG_ASSERT(ptr); 34 35 arch_zero_page(ptr); 36} 37 38void ZeroPage(vm_page_t* p) { 39 paddr_t pa = p->paddr(); 40 ZeroPage(pa); 41} 42 43void InitializeVmPage(vm_page_t* p) { 44 DEBUG_ASSERT(p->state == VM_PAGE_STATE_ALLOC); 45 p->state = VM_PAGE_STATE_OBJECT; 46 p->object.pin_count = 0; 47} 48 49// round up the size to the next page size boundary and make sure we dont wrap 50zx_status_t RoundSize(uint64_t size, uint64_t* out_size) { 51 *out_size = ROUNDUP_PAGE_SIZE(size); 52 if (*out_size < size) { 53 return ZX_ERR_OUT_OF_RANGE; 54 } 55 56 // there's a max size to keep indexes within range 57 if (*out_size > VmObjectPaged::MAX_SIZE) { 58 return ZX_ERR_OUT_OF_RANGE; 59 } 60 61 return ZX_OK; 62} 63 64} // namespace 65 66VmObjectPaged::VmObjectPaged( 67 uint32_t options, uint32_t pmm_alloc_flags, uint64_t size, fbl::RefPtr<VmObject> parent) 68 : VmObject(fbl::move(parent)), 69 options_(options), 70 size_(size), 71 pmm_alloc_flags_(pmm_alloc_flags) { 72 LTRACEF("%p\n", this); 73 74 DEBUG_ASSERT(IS_PAGE_ALIGNED(size_)); 75} 76 77VmObjectPaged::~VmObjectPaged() { 78 canary_.Assert(); 79 80 LTRACEF("%p\n", this); 81 82 page_list_.ForEveryPage( 83 [this](const auto p, uint64_t off) { 84 if (this->is_contiguous()) { 85 p->object.pin_count--; 86 } 87 ASSERT(p->object.pin_count == 0); 88 return ZX_ERR_NEXT; 89 }); 90 91 // free all of the pages attached to us 92 page_list_.FreeAllPages(); 93} 94 95zx_status_t VmObjectPaged::Create(uint32_t pmm_alloc_flags, 96 uint32_t options, 97 uint64_t size, fbl::RefPtr<VmObject>* obj) { 98 // make sure size is page aligned 99 zx_status_t status = RoundSize(size, &size); 100 if (status != ZX_OK) { 101 return status; 102 } 103 104 if (options & kContiguous) { 105 // Force callers to use CreateContiguous() instead. 106 return ZX_ERR_INVALID_ARGS; 107 } 108 109 fbl::AllocChecker ac; 110 auto vmo = fbl::AdoptRef<VmObject>( 111 new (&ac) VmObjectPaged(options, pmm_alloc_flags, size, nullptr)); 112 if (!ac.check()) { 113 return ZX_ERR_NO_MEMORY; 114 } 115 116 *obj = fbl::move(vmo); 117 118 return ZX_OK; 119} 120 121zx_status_t VmObjectPaged::CreateContiguous(uint32_t pmm_alloc_flags, uint64_t size, 122 uint8_t alignment_log2, fbl::RefPtr<VmObject>* obj) { 123 DEBUG_ASSERT(alignment_log2 < sizeof(uint64_t) * 8); 124 // make sure size is page aligned 125 zx_status_t status = RoundSize(size, &size); 126 if (status != ZX_OK) { 127 return status; 128 } 129 130 fbl::AllocChecker ac; 131 auto vmo = fbl::AdoptRef<VmObject>( 132 new (&ac) VmObjectPaged(kContiguous, pmm_alloc_flags, size, nullptr)); 133 if (!ac.check()) { 134 return ZX_ERR_NO_MEMORY; 135 } 136 137 if (size == 0) { 138 *obj = fbl::move(vmo); 139 return ZX_OK; 140 } 141 142 // allocate the pages 143 list_node page_list; 144 list_initialize(&page_list); 145 146 size_t num_pages = size / PAGE_SIZE; 147 paddr_t pa; 148 status = pmm_alloc_contiguous(num_pages, pmm_alloc_flags, alignment_log2, &pa, &page_list); 149 if (status != ZX_OK) { 150 LTRACEF("failed to allocate enough pages (asked for %zu)\n", num_pages); 151 return ZX_ERR_NO_MEMORY; 152 } 153 auto cleanup_phys_pages = fbl::MakeAutoCall([&page_list]() { 154 pmm_free(&page_list); 155 }); 156 157 // add them to the appropriate range of the object 158 VmObjectPaged* vmop = static_cast<VmObjectPaged*>(vmo.get()); 159 for (uint64_t off = 0; off < size; off += PAGE_SIZE) { 160 vm_page_t* p = list_remove_head_type(&page_list, vm_page_t, queue_node); 161 ASSERT(p); 162 163 InitializeVmPage(p); 164 165 // TODO: remove once pmm returns zeroed pages 166 ZeroPage(p); 167 168 // We don't need thread-safety analysis here, since this VMO has not 169 // been shared anywhere yet. 170 [&]() TA_NO_THREAD_SAFETY_ANALYSIS { 171 status = vmop->page_list_.AddPage(p, off); 172 }(); 173 if (status != ZX_OK) { 174 return status; 175 } 176 177 // Mark the pages as pinned, so they can't be physically rearranged 178 // underneath us. 179 p->object.pin_count++; 180 } 181 182 cleanup_phys_pages.cancel(); 183 *obj = fbl::move(vmo); 184 return ZX_OK; 185} 186 187zx_status_t VmObjectPaged::CreateFromROData(const void* data, size_t size, fbl::RefPtr<VmObject>* obj) { 188 LTRACEF("data %p, size %zu\n", data, size); 189 190 fbl::RefPtr<VmObject> vmo; 191 zx_status_t status = Create(PMM_ALLOC_FLAG_ANY, 0, size, &vmo); 192 if (status != ZX_OK) { 193 return status; 194 } 195 196 if (size > 0) { 197 ASSERT(IS_PAGE_ALIGNED(size)); 198 ASSERT(IS_PAGE_ALIGNED(reinterpret_cast<uintptr_t>(data))); 199 200 // Do a direct lookup of the physical pages backing the range of 201 // the kernel that these addresses belong to and jam them directly 202 // into the VMO. 203 // 204 // NOTE: This relies on the kernel not otherwise owning the pages. 205 // If the setup of the kernel's address space changes so that the 206 // pages are attached to a kernel VMO, this will need to change. 207 208 paddr_t start_paddr = vaddr_to_paddr(data); 209 ASSERT(start_paddr != 0); 210 211 for (size_t count = 0; count < size / PAGE_SIZE; count++) { 212 paddr_t pa = start_paddr + count * PAGE_SIZE; 213 vm_page_t* page = paddr_to_vm_page(pa); 214 ASSERT(page); 215 216 if (page->state == VM_PAGE_STATE_WIRED) { 217 // it's wired to the kernel, so we can just use it directly 218 } else if (page->state == VM_PAGE_STATE_FREE) { 219 list_node list = LIST_INITIAL_VALUE(list); 220 ASSERT(pmm_alloc_range(pa, 1, &list) == ZX_OK); 221 page->state = VM_PAGE_STATE_WIRED; 222 } else { 223 panic("page used to back static vmo in unusable state: paddr %#" PRIxPTR " state %u\n", pa, 224 page->state); 225 } 226 227 // XXX hack to work around the ref pointer to the base class 228 auto vmo2 = static_cast<VmObjectPaged*>(vmo.get()); 229 vmo2->AddPage(page, count * PAGE_SIZE); 230 } 231 } 232 233 *obj = fbl::move(vmo); 234 235 return ZX_OK; 236} 237 238zx_status_t VmObjectPaged::CloneCOW(bool resizable, uint64_t offset, uint64_t size, 239 bool copy_name, fbl::RefPtr<VmObject>* clone_vmo) { 240 LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size); 241 242 canary_.Assert(); 243 244 // make sure size is page aligned 245 zx_status_t status = RoundSize(size, &size); 246 if (status != ZX_OK) { 247 return status; 248 } 249 250 auto options = resizable ? kResizable : 0u; 251 252 // allocate the clone up front outside of our lock 253 fbl::AllocChecker ac; 254 auto vmo = fbl::AdoptRef<VmObjectPaged>( 255 new (&ac) VmObjectPaged(options, pmm_alloc_flags_, size, fbl::WrapRefPtr(this))); 256 if (!ac.check()) { 257 return ZX_ERR_NO_MEMORY; 258 } 259 260 Guard<fbl::Mutex> guard{&lock_}; 261 262 // add the new VMO as a child before we do anything, since its 263 // dtor expects to find it in its parent's child list 264 AddChildLocked(vmo.get()); 265 266 // check that we're not uncached in some way 267 if (cache_policy_ != ARCH_MMU_FLAG_CACHED) { 268 return ZX_ERR_BAD_STATE; 269 } 270 271 // set the offset with the parent 272 status = vmo->SetParentOffsetLocked(offset); 273 if (status != ZX_OK) { 274 return status; 275 } 276 277 if (copy_name) { 278 vmo->name_ = name_; 279 } 280 281 *clone_vmo = fbl::move(vmo); 282 283 return ZX_OK; 284} 285 286void VmObjectPaged::Dump(uint depth, bool verbose) { 287 canary_.Assert(); 288 289 // This can grab our lock. 290 uint64_t parent_id = parent_user_id(); 291 292 Guard<fbl::Mutex> guard{&lock_}; 293 294 size_t count = 0; 295 page_list_.ForEveryPage([&count](const auto p, uint64_t) { 296 count++; 297 return ZX_ERR_NEXT; 298 }); 299 300 for (uint i = 0; i < depth; ++i) { 301 printf(" "); 302 } 303 printf("vmo %p/k%" PRIu64 " size %#" PRIx64 304 " pages %zu ref %d parent k%" PRIu64 "\n", 305 this, user_id_, size_, count, ref_count_debug(), parent_id); 306 307 if (verbose) { 308 auto f = [depth](const auto p, uint64_t offset) { 309 for (uint i = 0; i < depth + 1; ++i) { 310 printf(" "); 311 } 312 printf("offset %#" PRIx64 " page %p paddr %#" PRIxPTR "\n", offset, p, p->paddr()); 313 return ZX_ERR_NEXT; 314 }; 315 page_list_.ForEveryPage(f); 316 } 317} 318 319size_t VmObjectPaged::AllocatedPagesInRange(uint64_t offset, uint64_t len) const { 320 canary_.Assert(); 321 Guard<fbl::Mutex> guard{&lock_}; 322 uint64_t new_len; 323 if (!TrimRange(offset, len, size_, &new_len)) { 324 return 0; 325 } 326 size_t count = 0; 327 // TODO: Figure out what to do with our parent's pages. If we're a clone, 328 // page_list_ only contains pages that we've made copies of. 329 page_list_.ForEveryPage( 330 [&count, offset, new_len](const auto p, uint64_t off) { 331 if (off >= offset && off < offset + new_len) { 332 count++; 333 } 334 return ZX_ERR_NEXT; 335 }); 336 return count; 337} 338 339zx_status_t VmObjectPaged::AddPage(vm_page_t* p, uint64_t offset) { 340 Guard<fbl::Mutex> guard{&lock_}; 341 342 return AddPageLocked(p, offset); 343} 344 345zx_status_t VmObjectPaged::AddPageLocked(vm_page_t* p, uint64_t offset) { 346 canary_.Assert(); 347 DEBUG_ASSERT(lock_.lock().IsHeld()); 348 349 LTRACEF("vmo %p, offset %#" PRIx64 ", page %p (%#" PRIxPTR ")\n", this, offset, p, p->paddr()); 350 351 DEBUG_ASSERT(p); 352 353 if (offset >= size_) { 354 return ZX_ERR_OUT_OF_RANGE; 355 } 356 357 zx_status_t err = page_list_.AddPage(p, offset); 358 if (err != ZX_OK) { 359 return err; 360 } 361 362 // other mappings may have covered this offset into the vmo, so unmap those ranges 363 RangeChangeUpdateLocked(offset, PAGE_SIZE); 364 365 return ZX_OK; 366} 367 368// Looks up the page at the requested offset, faulting it in if requested and necessary. If 369// this VMO has a parent and the requested page isn't found, the parent will be searched. 370// 371// |free_list|, if not NULL, is a list of allocated but unused vm_page_t that 372// this function may allocate from. This function will need at most one entry, 373// and will not fail if |free_list| is a non-empty list, faulting in was requested, 374// and offset is in range. 375zx_status_t VmObjectPaged::GetPageLocked(uint64_t offset, uint pf_flags, list_node* free_list, 376 vm_page_t** const page_out, paddr_t* const pa_out) { 377 canary_.Assert(); 378 DEBUG_ASSERT(lock_.lock().IsHeld()); 379 380 if (offset >= size_) { 381 return ZX_ERR_OUT_OF_RANGE; 382 } 383 384 vm_page_t* p; 385 paddr_t pa; 386 387 // see if we already have a page at that offset 388 p = page_list_.GetPage(offset); 389 if (p) { 390 if (page_out) { 391 *page_out = p; 392 } 393 if (pa_out) { 394 *pa_out = p->paddr(); 395 } 396 return ZX_OK; 397 } 398 399 __UNUSED char pf_string[5]; 400 LTRACEF("vmo %p, offset %#" PRIx64 ", pf_flags %#x (%s)\n", this, offset, pf_flags, 401 vmm_pf_flags_to_string(pf_flags, pf_string)); 402 403 // if we have a parent see if they have a page for us 404 if (parent_) { 405 uint64_t parent_offset; 406 bool overflowed = add_overflow(parent_offset_, offset, &parent_offset); 407 ASSERT(!overflowed); 408 409 // make sure we don't cause the parent to fault in new pages, just ask for any that already exist 410 uint parent_pf_flags = pf_flags & ~(VMM_PF_FLAG_FAULT_MASK); 411 412 zx_status_t status = parent_->GetPageLocked(parent_offset, parent_pf_flags, 413 nullptr, &p, &pa); 414 if (status == ZX_OK) { 415 // we have a page from them. if we're read-only faulting, return that page so they can map 416 // or read from it directly 417 if ((pf_flags & VMM_PF_FLAG_WRITE) == 0) { 418 if (page_out) { 419 *page_out = p; 420 } 421 if (pa_out) { 422 *pa_out = pa; 423 } 424 425 LTRACEF("read only faulting in page %p, pa %#" PRIxPTR " from parent\n", p, pa); 426 427 return ZX_OK; 428 } 429 430 // if we're write faulting, we need to clone it and return the new page 431 paddr_t pa_clone; 432 vm_page_t* p_clone = nullptr; 433 if (free_list) { 434 p_clone = list_remove_head_type(free_list, vm_page, queue_node); 435 if (p_clone) { 436 pa_clone = p_clone->paddr(); 437 } 438 } 439 if (!p_clone) { 440 status = pmm_alloc_page(pmm_alloc_flags_, &p_clone, &pa_clone); 441 } 442 if (!p_clone) { 443 return ZX_ERR_NO_MEMORY; 444 } 445 446 InitializeVmPage(p_clone); 447 448 // do a direct copy of the two pages 449 const void* src = paddr_to_physmap(pa); 450 void* dst = paddr_to_physmap(pa_clone); 451 452 DEBUG_ASSERT(src && dst); 453 454 memcpy(dst, src, PAGE_SIZE); 455 456 // add the new page and return it 457 status = AddPageLocked(p_clone, offset); 458 DEBUG_ASSERT(status == ZX_OK); 459 460 LTRACEF("copy-on-write faulted in page %p, pa %#" PRIxPTR " copied from %p, pa %#" PRIxPTR "\n", 461 p, pa, p_clone, pa_clone); 462 463 if (page_out) { 464 *page_out = p_clone; 465 } 466 if (pa_out) { 467 *pa_out = pa_clone; 468 } 469 470 return ZX_OK; 471 } 472 } 473 474 // if we're not being asked to sw or hw fault in the page, return not found 475 if ((pf_flags & VMM_PF_FLAG_FAULT_MASK) == 0) { 476 return ZX_ERR_NOT_FOUND; 477 } 478 479 // if we're read faulting, we don't already have a page, and the parent doesn't have it, 480 // return the single global zero page 481 if ((pf_flags & VMM_PF_FLAG_WRITE) == 0) { 482 LTRACEF("returning the zero page\n"); 483 if (page_out) { 484 *page_out = vm_get_zero_page(); 485 } 486 if (pa_out) { 487 *pa_out = vm_get_zero_page_paddr(); 488 } 489 return ZX_OK; 490 } 491 492 // allocate a page 493 if (free_list) { 494 p = list_remove_head_type(free_list, vm_page, queue_node); 495 if (p) { 496 pa = p->paddr(); 497 } 498 } 499 if (!p) { 500 pmm_alloc_page(pmm_alloc_flags_, &p, &pa); 501 } 502 if (!p) { 503 return ZX_ERR_NO_MEMORY; 504 } 505 506 InitializeVmPage(p); 507 508 // TODO: remove once pmm returns zeroed pages 509 ZeroPage(pa); 510 511// if ARM and not fully cached, clean/invalidate the page after zeroing it 512#if ARCH_ARM64 513 if (cache_policy_ != ARCH_MMU_FLAG_CACHED) { 514 arch_clean_invalidate_cache_range((addr_t)paddr_to_physmap(pa), PAGE_SIZE); 515 } 516#endif 517 518 zx_status_t status = AddPageLocked(p, offset); 519 DEBUG_ASSERT(status == ZX_OK); 520 521 // other mappings may have covered this offset into the vmo, so unmap those ranges 522 RangeChangeUpdateLocked(offset, PAGE_SIZE); 523 524 LTRACEF("faulted in page %p, pa %#" PRIxPTR "\n", p, pa); 525 526 if (page_out) { 527 *page_out = p; 528 } 529 if (pa_out) { 530 *pa_out = pa; 531 } 532 533 return ZX_OK; 534} 535 536zx_status_t VmObjectPaged::CommitRange(uint64_t offset, uint64_t len, uint64_t* committed) { 537 canary_.Assert(); 538 LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len); 539 540 if (committed) { 541 *committed = 0; 542 } 543 544 Guard<fbl::Mutex> guard{&lock_}; 545 546 // trim the size 547 uint64_t new_len; 548 if (!TrimRange(offset, len, size_, &new_len)) { 549 return ZX_ERR_OUT_OF_RANGE; 550 } 551 552 // was in range, just zero length 553 if (new_len == 0) { 554 return ZX_OK; 555 } 556 557 // compute a page aligned end to do our searches in to make sure we cover all the pages 558 uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len); 559 DEBUG_ASSERT(end > offset); 560 offset = ROUNDDOWN(offset, PAGE_SIZE); 561 562 // make a pass through the list, counting the number of pages we need to allocate 563 size_t count = 0; 564 uint64_t expected_next_off = offset; 565 page_list_.ForEveryPageInRange( 566 [&count, &expected_next_off](const auto p, uint64_t off) { 567 568 count += (off - expected_next_off) / PAGE_SIZE; 569 expected_next_off = off + PAGE_SIZE; 570 return ZX_ERR_NEXT; 571 }, 572 expected_next_off, end); 573 574 // If expected_next_off isn't at the end of the range, there was a gap at 575 // the end. Add it back in 576 DEBUG_ASSERT(end >= expected_next_off); 577 count += (end - expected_next_off) / PAGE_SIZE; 578 if (count == 0) { 579 return ZX_OK; 580 } 581 582 // allocate count number of pages 583 list_node page_list; 584 list_initialize(&page_list); 585 586 zx_status_t status = pmm_alloc_pages(count, pmm_alloc_flags_, &page_list); 587 if (status != ZX_OK) { 588 return status; 589 } 590 591 // unmap all of the pages in this range on all the mapping regions 592 RangeChangeUpdateLocked(offset, end - offset); 593 594 // add them to the appropriate range of the object 595 for (uint64_t o = offset; o < end; o += PAGE_SIZE) { 596 // Don't commit if we already have this page 597 vm_page_t* p = page_list_.GetPage(o); 598 if (p) { 599 continue; 600 } 601 602 // Check if our parent has the page 603 paddr_t pa; 604 const uint flags = VMM_PF_FLAG_SW_FAULT | VMM_PF_FLAG_WRITE; 605 // Should not be able to fail, since we're providing it memory and the 606 // range should be valid. 607 zx_status_t status = GetPageLocked(o, flags, &page_list, &p, &pa); 608 ASSERT(status == ZX_OK); 609 610 if (committed) { 611 *committed += PAGE_SIZE; 612 } 613 } 614 615 DEBUG_ASSERT(list_is_empty(&page_list)); 616 617 // for now we only support committing as much as we were asked for 618 DEBUG_ASSERT(!committed || *committed == count * PAGE_SIZE); 619 620 return ZX_OK; 621} 622 623zx_status_t VmObjectPaged::DecommitRange(uint64_t offset, uint64_t len, uint64_t* decommitted) { 624 canary_.Assert(); 625 LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len); 626 627 if (decommitted) { 628 *decommitted = 0; 629 } 630 631 if (options_ & kContiguous) { 632 return ZX_ERR_NOT_SUPPORTED; 633 } 634 635 Guard<fbl::Mutex> guard{&lock_}; 636 637 // trim the size 638 uint64_t new_len; 639 if (!TrimRange(offset, len, size_, &new_len)) { 640 return ZX_ERR_OUT_OF_RANGE; 641 } 642 643 // was in range, just zero length 644 if (new_len == 0) { 645 return ZX_OK; 646 } 647 648 // figure the starting and ending page offset 649 uint64_t start = ROUNDDOWN(offset, PAGE_SIZE); 650 uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len); 651 DEBUG_ASSERT(end > offset); 652 DEBUG_ASSERT(end > start); 653 uint64_t page_aligned_len = end - start; 654 655 LTRACEF("start offset %#" PRIx64 ", end %#" PRIx64 ", page_aliged_len %#" PRIx64 "\n", start, end, 656 page_aligned_len); 657 658 // TODO(teisenbe): Allow decommitting of pages pinned by 659 // CommitRangeContiguous 660 661 if (AnyPagesPinnedLocked(start, page_aligned_len)) { 662 return ZX_ERR_BAD_STATE; 663 } 664 665 // unmap all of the pages in this range on all the mapping regions 666 RangeChangeUpdateLocked(start, page_aligned_len); 667 668 // iterate through the pages, freeing them 669 // TODO: use page_list iterator, move pages to list, free at once 670 while (start < end) { 671 auto status = page_list_.FreePage(start); 672 if (status == ZX_OK && decommitted) { 673 *decommitted += PAGE_SIZE; 674 } 675 start += PAGE_SIZE; 676 } 677 678 return ZX_OK; 679} 680 681zx_status_t VmObjectPaged::Pin(uint64_t offset, uint64_t len) { 682 canary_.Assert(); 683 684 Guard<fbl::Mutex> guard{&lock_}; 685 return PinLocked(offset, len); 686} 687 688zx_status_t VmObjectPaged::PinLocked(uint64_t offset, uint64_t len) { 689 canary_.Assert(); 690 691 // verify that the range is within the object 692 if (unlikely(!InRange(offset, len, size_))) { 693 return ZX_ERR_OUT_OF_RANGE; 694 } 695 696 if (unlikely(len == 0)) { 697 return ZX_OK; 698 } 699 700 const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE); 701 const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE); 702 703 uint64_t expected_next_off = start_page_offset; 704 zx_status_t status = page_list_.ForEveryPageInRange( 705 [&expected_next_off](const auto p, uint64_t off) { 706 if (off != expected_next_off) { 707 return ZX_ERR_NOT_FOUND; 708 } 709 710 DEBUG_ASSERT(p->state == VM_PAGE_STATE_OBJECT); 711 if (p->object.pin_count == VM_PAGE_OBJECT_MAX_PIN_COUNT) { 712 return ZX_ERR_UNAVAILABLE; 713 } 714 715 p->object.pin_count++; 716 expected_next_off = off + PAGE_SIZE; 717 return ZX_ERR_NEXT; 718 }, 719 start_page_offset, end_page_offset); 720 721 if (status == ZX_OK && expected_next_off != end_page_offset) { 722 status = ZX_ERR_NOT_FOUND; 723 } 724 if (status != ZX_OK) { 725 UnpinLocked(start_page_offset, expected_next_off - start_page_offset); 726 return status; 727 } 728 729 return ZX_OK; 730} 731 732void VmObjectPaged::Unpin(uint64_t offset, uint64_t len) { 733 Guard<fbl::Mutex> guard{&lock_}; 734 UnpinLocked(offset, len); 735} 736 737void VmObjectPaged::UnpinLocked(uint64_t offset, uint64_t len) { 738 canary_.Assert(); 739 DEBUG_ASSERT(lock_.lock().IsHeld()); 740 741 // verify that the range is within the object 742 ASSERT(InRange(offset, len, size_)); 743 744 if (unlikely(len == 0)) { 745 return; 746 } 747 748 const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE); 749 const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE); 750 751 uint64_t expected_next_off = start_page_offset; 752 zx_status_t status = page_list_.ForEveryPageInRange( 753 [&expected_next_off](const auto p, uint64_t off) { 754 if (off != expected_next_off) { 755 return ZX_ERR_NOT_FOUND; 756 } 757 758 DEBUG_ASSERT(p->state == VM_PAGE_STATE_OBJECT); 759 ASSERT(p->object.pin_count > 0); 760 p->object.pin_count--; 761 expected_next_off = off + PAGE_SIZE; 762 return ZX_ERR_NEXT; 763 }, 764 start_page_offset, end_page_offset); 765 ASSERT_MSG(status == ZX_OK && expected_next_off == end_page_offset, 766 "Tried to unpin an uncommitted page"); 767 return; 768} 769 770bool VmObjectPaged::AnyPagesPinnedLocked(uint64_t offset, size_t len) { 771 canary_.Assert(); 772 DEBUG_ASSERT(lock_.lock().IsHeld()); 773 DEBUG_ASSERT(IS_PAGE_ALIGNED(offset)); 774 DEBUG_ASSERT(IS_PAGE_ALIGNED(len)); 775 776 const uint64_t start_page_offset = offset; 777 const uint64_t end_page_offset = offset + len; 778 779 bool found_pinned = false; 780 page_list_.ForEveryPageInRange( 781 [&found_pinned, start_page_offset, end_page_offset](const auto p, uint64_t off) { 782 DEBUG_ASSERT(off >= start_page_offset && off < end_page_offset); 783 if (p->object.pin_count > 0) { 784 found_pinned = true; 785 return ZX_ERR_STOP; 786 } 787 return ZX_ERR_NEXT; 788 }, 789 start_page_offset, end_page_offset); 790 791 return found_pinned; 792} 793 794zx_status_t VmObjectPaged::ResizeLocked(uint64_t s) { 795 canary_.Assert(); 796 DEBUG_ASSERT(lock_.lock().IsHeld()); 797 798 LTRACEF("vmo %p, size %" PRIu64 "\n", this, s); 799 800 if (!(options_ & kResizable)) { 801 return ZX_ERR_UNAVAILABLE; 802 } 803 804 // round up the size to the next page size boundary and make sure we dont wrap 805 zx_status_t status = RoundSize(s, &s); 806 if (status != ZX_OK) { 807 return status; 808 } 809 810 // make sure everything is aligned before we get started 811 DEBUG_ASSERT(IS_PAGE_ALIGNED(size_)); 812 DEBUG_ASSERT(IS_PAGE_ALIGNED(s)); 813 814 // see if we're shrinking or expanding the vmo 815 if (s < size_) { 816 // shrinking 817 uint64_t start = s; 818 uint64_t end = size_; 819 uint64_t len = end - start; 820 821 // bail if there are any pinned pages in the range we're trimming 822 if (AnyPagesPinnedLocked(start, len)) { 823 return ZX_ERR_BAD_STATE; 824 } 825 826 // unmap all of the pages in this range on all the mapping regions 827 RangeChangeUpdateLocked(start, len); 828 829 // iterate through the pages, freeing them 830 // TODO: use page_list iterator, move pages to list, free at once 831 while (start < end) { 832 page_list_.FreePage(start); 833 start += PAGE_SIZE; 834 } 835 } else if (s > size_) { 836 // expanding 837 // figure the starting and ending page offset that is affected 838 uint64_t start = size_; 839 uint64_t end = s; 840 uint64_t len = end - start; 841 842 // inform all our children or mapping that there's new bits 843 RangeChangeUpdateLocked(start, len); 844 } 845 846 // save bytewise size 847 size_ = s; 848 849 return ZX_OK; 850} 851 852zx_status_t VmObjectPaged::Resize(uint64_t s) { 853 Guard<fbl::Mutex> guard{&lock_}; 854 855 return ResizeLocked(s); 856} 857 858zx_status_t VmObjectPaged::SetParentOffsetLocked(uint64_t offset) { 859 DEBUG_ASSERT(lock_.lock().IsHeld()); 860 861 // offset must be page aligned 862 if (!IS_PAGE_ALIGNED(offset)) { 863 return ZX_ERR_INVALID_ARGS; 864 } 865 866 // TODO: ZX-692 make sure that the accumulated offset of the entire parent chain doesn't wrap 64bit space 867 868 // make sure the size + this offset are still valid 869 uint64_t end; 870 if (add_overflow(offset, size_, &end)) { 871 return ZX_ERR_OUT_OF_RANGE; 872 } 873 874 parent_offset_ = offset; 875 876 return ZX_OK; 877} 878 879// perform some sort of copy in/out on a range of the object using a passed in lambda 880// for the copy routine 881template <typename T> 882zx_status_t VmObjectPaged::ReadWriteInternal(uint64_t offset, size_t len, bool write, T copyfunc) { 883 canary_.Assert(); 884 885 Guard<fbl::Mutex> guard{&lock_}; 886 887 // are we uncached? abort in this case 888 if (cache_policy_ != ARCH_MMU_FLAG_CACHED) { 889 return ZX_ERR_BAD_STATE; 890 } 891 892 // test if in range 893 uint64_t end_offset; 894 if (add_overflow(offset, len, &end_offset) || end_offset > size_) { 895 return ZX_ERR_OUT_OF_RANGE; 896 } 897 898 // walk the list of pages and do the write 899 uint64_t src_offset = offset; 900 size_t dest_offset = 0; 901 while (len > 0) { 902 size_t page_offset = src_offset % PAGE_SIZE; 903 size_t tocopy = MIN(PAGE_SIZE - page_offset, len); 904 905 // fault in the page 906 paddr_t pa; 907 auto status = GetPageLocked(src_offset, 908 VMM_PF_FLAG_SW_FAULT | (write ? VMM_PF_FLAG_WRITE : 0), 909 nullptr, nullptr, &pa); 910 if (status != ZX_OK) { 911 return status; 912 } 913 914 // compute the kernel mapping of this page 915 uint8_t* page_ptr = reinterpret_cast<uint8_t*>(paddr_to_physmap(pa)); 916 917 // call the copy routine 918 auto err = copyfunc(page_ptr + page_offset, dest_offset, tocopy); 919 if (err < 0) { 920 return err; 921 } 922 923 src_offset += tocopy; 924 dest_offset += tocopy; 925 len -= tocopy; 926 } 927 928 return ZX_OK; 929} 930 931zx_status_t VmObjectPaged::Read(void* _ptr, uint64_t offset, size_t len) { 932 canary_.Assert(); 933 // test to make sure this is a kernel pointer 934 if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) { 935 DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n"); 936 return ZX_ERR_INVALID_ARGS; 937 } 938 939 // read routine that just uses a memcpy 940 uint8_t* ptr = reinterpret_cast<uint8_t*>(_ptr); 941 auto read_routine = [ptr](const void* src, size_t offset, size_t len) -> zx_status_t { 942 memcpy(ptr + offset, src, len); 943 return ZX_OK; 944 }; 945 946 return ReadWriteInternal(offset, len, false, read_routine); 947} 948 949zx_status_t VmObjectPaged::Write(const void* _ptr, uint64_t offset, size_t len) { 950 canary_.Assert(); 951 // test to make sure this is a kernel pointer 952 if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) { 953 DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n"); 954 return ZX_ERR_INVALID_ARGS; 955 } 956 957 // write routine that just uses a memcpy 958 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(_ptr); 959 auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> zx_status_t { 960 memcpy(dst, ptr + offset, len); 961 return ZX_OK; 962 }; 963 964 return ReadWriteInternal(offset, len, true, write_routine); 965} 966 967zx_status_t VmObjectPaged::Lookup(uint64_t offset, uint64_t len, uint pf_flags, 968 vmo_lookup_fn_t lookup_fn, void* context) { 969 canary_.Assert(); 970 if (unlikely(len == 0)) { 971 return ZX_ERR_INVALID_ARGS; 972 } 973 974 Guard<fbl::Mutex> guard{&lock_}; 975 976 // verify that the range is within the object 977 if (unlikely(!InRange(offset, len, size_))) { 978 return ZX_ERR_OUT_OF_RANGE; 979 } 980 981 const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE); 982 const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE); 983 984 uint64_t expected_next_off = start_page_offset; 985 zx_status_t status = page_list_.ForEveryPageInRange( 986 [&expected_next_off, this, pf_flags, lookup_fn, context, 987 start_page_offset](const auto p, uint64_t off) { 988 989 // If some page was missing from our list, run the more expensive 990 // GetPageLocked to see if our parent has it. 991 for (uint64_t missing_off = expected_next_off; missing_off < off; 992 missing_off += PAGE_SIZE) { 993 994 paddr_t pa; 995 zx_status_t status = this->GetPageLocked(missing_off, pf_flags, nullptr, 996 nullptr, &pa); 997 if (status != ZX_OK) { 998 return ZX_ERR_NO_MEMORY; 999 } 1000 const size_t index = (off - start_page_offset) / PAGE_SIZE; 1001 status = lookup_fn(context, missing_off, index, pa); 1002 if (status != ZX_OK) { 1003 if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) { 1004 status = ZX_ERR_INTERNAL; 1005 } 1006 return status; 1007 } 1008 } 1009 1010 const size_t index = (off - start_page_offset) / PAGE_SIZE; 1011 paddr_t pa = p->paddr(); 1012 zx_status_t status = lookup_fn(context, off, index, pa); 1013 if (status != ZX_OK) { 1014 if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) { 1015 status = ZX_ERR_INTERNAL; 1016 } 1017 return status; 1018 } 1019 1020 expected_next_off = off + PAGE_SIZE; 1021 return ZX_ERR_NEXT; 1022 }, 1023 start_page_offset, end_page_offset); 1024 if (status != ZX_OK) { 1025 return status; 1026 } 1027 1028 // If expected_next_off isn't at the end, there's a gap to process 1029 for (uint64_t off = expected_next_off; off < end_page_offset; off += PAGE_SIZE) { 1030 paddr_t pa; 1031 zx_status_t status = GetPageLocked(off, pf_flags, nullptr, nullptr, &pa); 1032 if (status != ZX_OK) { 1033 return ZX_ERR_NO_MEMORY; 1034 } 1035 const size_t index = (off - start_page_offset) / PAGE_SIZE; 1036 status = lookup_fn(context, off, index, pa); 1037 if (status != ZX_OK) { 1038 return status; 1039 } 1040 } 1041 1042 return ZX_OK; 1043} 1044 1045zx_status_t VmObjectPaged::ReadUser(user_out_ptr<void> ptr, uint64_t offset, size_t len) { 1046 canary_.Assert(); 1047 1048 // read routine that uses copy_to_user 1049 auto read_routine = [ptr](const void* src, size_t offset, size_t len) -> zx_status_t { 1050 return ptr.byte_offset(offset).copy_array_to_user(src, len); 1051 }; 1052 1053 return ReadWriteInternal(offset, len, false, read_routine); 1054} 1055 1056zx_status_t VmObjectPaged::WriteUser(user_in_ptr<const void> ptr, uint64_t offset, size_t len) { 1057 canary_.Assert(); 1058 1059 // write routine that uses copy_from_user 1060 auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> zx_status_t { 1061 return ptr.byte_offset(offset).copy_array_from_user(dst, len); 1062 }; 1063 1064 return ReadWriteInternal(offset, len, true, write_routine); 1065} 1066 1067zx_status_t VmObjectPaged::LookupUser(uint64_t offset, uint64_t len, user_inout_ptr<paddr_t> buffer, 1068 size_t buffer_size) { 1069 canary_.Assert(); 1070 1071 uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE); 1072 uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE); 1073 // compute the size of the table we'll need and make sure it fits in the user buffer 1074 uint64_t table_size = ((end_page_offset - start_page_offset) / PAGE_SIZE) * sizeof(paddr_t); 1075 if (unlikely(table_size > buffer_size)) { 1076 return ZX_ERR_BUFFER_TOO_SMALL; 1077 } 1078 1079 auto copy_to_user = [](void* context, size_t offset, size_t index, paddr_t pa) -> zx_status_t { 1080 user_inout_ptr<paddr_t>* buffer = static_cast<user_inout_ptr<paddr_t>*>(context); 1081 return buffer->element_offset(index).copy_to_user(pa); 1082 }; 1083 // only lookup pages that are already present 1084 return Lookup(offset, len, 0, copy_to_user, &buffer); 1085} 1086 1087zx_status_t VmObjectPaged::InvalidateCache(const uint64_t offset, const uint64_t len) { 1088 return CacheOp(offset, len, CacheOpType::Invalidate); 1089} 1090 1091zx_status_t VmObjectPaged::CleanCache(const uint64_t offset, const uint64_t len) { 1092 return CacheOp(offset, len, CacheOpType::Clean); 1093} 1094 1095zx_status_t VmObjectPaged::CleanInvalidateCache(const uint64_t offset, const uint64_t len) { 1096 return CacheOp(offset, len, CacheOpType::CleanInvalidate); 1097} 1098 1099zx_status_t VmObjectPaged::SyncCache(const uint64_t offset, const uint64_t len) { 1100 return CacheOp(offset, len, CacheOpType::Sync); 1101} 1102 1103zx_status_t VmObjectPaged::CacheOp(const uint64_t start_offset, const uint64_t len, 1104 const CacheOpType type) { 1105 canary_.Assert(); 1106 1107 if (unlikely(len == 0)) { 1108 return ZX_ERR_INVALID_ARGS; 1109 } 1110 1111 Guard<fbl::Mutex> guard{&lock_}; 1112 1113 if (unlikely(!InRange(start_offset, len, size_))) { 1114 return ZX_ERR_OUT_OF_RANGE; 1115 } 1116 1117 const size_t end_offset = static_cast<size_t>(start_offset + len); 1118 size_t op_start_offset = static_cast<size_t>(start_offset); 1119 1120 while (op_start_offset != end_offset) { 1121 // Offset at the end of the current page. 1122 const size_t page_end_offset = ROUNDUP(op_start_offset + 1, PAGE_SIZE); 1123 1124 // This cache op will either terminate at the end of the current page or 1125 // at the end of the whole op range -- whichever comes first. 1126 const size_t op_end_offset = MIN(page_end_offset, end_offset); 1127 1128 const size_t cache_op_len = op_end_offset - op_start_offset; 1129 1130 const size_t page_offset = op_start_offset % PAGE_SIZE; 1131 1132 // lookup the physical address of the page, careful not to fault in a new one 1133 paddr_t pa; 1134 auto status = GetPageLocked(op_start_offset, 0, nullptr, nullptr, &pa); 1135 1136 if (likely(status == ZX_OK)) { 1137 // Convert the page address to a Kernel virtual address. 1138 const void* ptr = paddr_to_physmap(pa); 1139 const addr_t cache_op_addr = reinterpret_cast<addr_t>(ptr) + page_offset; 1140 1141 LTRACEF("ptr %p op %d\n", ptr, (int)type); 1142 1143 // Perform the necessary cache op against this page. 1144 switch (type) { 1145 case CacheOpType::Invalidate: 1146 arch_invalidate_cache_range(cache_op_addr, cache_op_len); 1147 break; 1148 case CacheOpType::Clean: 1149 arch_clean_cache_range(cache_op_addr, cache_op_len); 1150 break; 1151 case CacheOpType::CleanInvalidate: 1152 arch_clean_invalidate_cache_range(cache_op_addr, cache_op_len); 1153 break; 1154 case CacheOpType::Sync: 1155 arch_sync_cache_range(cache_op_addr, cache_op_len); 1156 break; 1157 } 1158 } 1159 1160 op_start_offset += cache_op_len; 1161 } 1162 1163 return ZX_OK; 1164} 1165 1166zx_status_t VmObjectPaged::GetMappingCachePolicy(uint32_t* cache_policy) { 1167 Guard<fbl::Mutex> guard{&lock_}; 1168 1169 *cache_policy = cache_policy_; 1170 1171 return ZX_OK; 1172} 1173 1174zx_status_t VmObjectPaged::SetMappingCachePolicy(const uint32_t cache_policy) { 1175 // Is it a valid cache flag? 1176 if (cache_policy & ~ZX_CACHE_POLICY_MASK) { 1177 return ZX_ERR_INVALID_ARGS; 1178 } 1179 1180 Guard<fbl::Mutex> guard{&lock_}; 1181 1182 // conditions for allowing the cache policy to be set: 1183 // 1) vmo has no pages committed currently 1184 // 2) vmo has no mappings 1185 // 3) vmo has no clones 1186 // 4) vmo is not a clone 1187 if (!page_list_.IsEmpty()) { 1188 return ZX_ERR_BAD_STATE; 1189 } 1190 if (!mapping_list_.is_empty()) { 1191 return ZX_ERR_BAD_STATE; 1192 } 1193 if (!children_list_.is_empty()) { 1194 return ZX_ERR_BAD_STATE; 1195 } 1196 if (parent_) { 1197 return ZX_ERR_BAD_STATE; 1198 } 1199 1200 cache_policy_ = cache_policy; 1201 1202 return ZX_OK; 1203} 1204 1205void VmObjectPaged::RangeChangeUpdateFromParentLocked(const uint64_t offset, const uint64_t len) { 1206 canary_.Assert(); 1207 1208 LTRACEF("offset %#" PRIx64 " len %#" PRIx64 " p_offset %#" PRIx64 " size_ %#" PRIx64 "\n", 1209 offset, len, parent_offset_, size_); 1210 1211 // our parent is notifying that a range of theirs changed, see where it intersects 1212 // with our offset into the parent and pass it on 1213 uint64_t offset_new; 1214 uint64_t len_new; 1215 if (!GetIntersect(parent_offset_, size_, offset, len, 1216 &offset_new, &len_new)) { 1217 return; 1218 } 1219 1220 // if they intersect with us, then by definition the new offset must be >= parent_offset_ 1221 DEBUG_ASSERT(offset_new >= parent_offset_); 1222 1223 // subtract our offset 1224 offset_new -= parent_offset_; 1225 1226 // verify that it's still within range of us 1227 DEBUG_ASSERT(offset_new + len_new <= size_); 1228 1229 LTRACEF("new offset %#" PRIx64 " new len %#" PRIx64 "\n", 1230 offset_new, len_new); 1231 1232 // pass it on 1233 // TODO: optimize by not passing on ranges that are completely covered by pages local to this vmo 1234 RangeChangeUpdateLocked(offset_new, len_new); 1235} 1236