1// Copyright 2017 The Fuchsia Authors 2// 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file or at 5// https://opensource.org/licenses/MIT 6 7#include <arch/x86/page_tables/page_tables.h> 8 9#include <arch/x86/feature.h> 10#include <arch/x86/page_tables/constants.h> 11#include <assert.h> 12#include <fbl/algorithm.h> 13#include <fbl/auto_call.h> 14#include <fbl/auto_lock.h> 15#include <trace.h> 16#include <vm/physmap.h> 17#include <vm/pmm.h> 18 19#define LOCAL_TRACE 0 20 21namespace { 22 23// Return the page size for this level 24size_t page_size(PageTableLevel level) { 25 switch (level) { 26 case PT_L: 27 return 1ULL << PT_SHIFT; 28 case PD_L: 29 return 1ULL << PD_SHIFT; 30 case PDP_L: 31 return 1ULL << PDP_SHIFT; 32 case PML4_L: 33 return 1ULL << PML4_SHIFT; 34 default: 35 panic("page_size: invalid level\n"); 36 } 37} 38 39// Whether an address is aligned to the page size of this level 40bool page_aligned(PageTableLevel level, vaddr_t vaddr) { 41 return (vaddr & (page_size(level) - 1)) == 0; 42} 43 44// Extract the index needed for finding |vaddr| for the given level 45uint vaddr_to_index(PageTableLevel level, vaddr_t vaddr) { 46 switch (level) { 47 case PML4_L: 48 return VADDR_TO_PML4_INDEX(vaddr); 49 case PDP_L: 50 return VADDR_TO_PDP_INDEX(vaddr); 51 case PD_L: 52 return VADDR_TO_PD_INDEX(vaddr); 53 case PT_L: 54 return VADDR_TO_PT_INDEX(vaddr); 55 default: 56 panic("vaddr_to_index: invalid level\n"); 57 } 58} 59 60// Convert a PTE to a physical address 61paddr_t paddr_from_pte(PageTableLevel level, pt_entry_t pte) { 62 DEBUG_ASSERT(IS_PAGE_PRESENT(pte)); 63 64 paddr_t pa; 65 switch (level) { 66 case PDP_L: 67 pa = (pte & X86_HUGE_PAGE_FRAME); 68 break; 69 case PD_L: 70 pa = (pte & X86_LARGE_PAGE_FRAME); 71 break; 72 case PT_L: 73 pa = (pte & X86_PG_FRAME); 74 break; 75 default: 76 panic("paddr_from_pte at unhandled level %d\n", level); 77 } 78 79 return pa; 80} 81 82PageTableLevel lower_level(PageTableLevel level) { 83 DEBUG_ASSERT(level != 0); 84 return (PageTableLevel)(level - 1); 85} 86 87} // namespace 88 89void PendingTlbInvalidation::enqueue(vaddr_t v, PageTableLevel level, bool is_global_page, 90 bool is_terminal) { 91 if (is_global_page) { 92 contains_global = true; 93 } 94 95 // We mark PML4_L entries as full shootdowns, since it's going to be 96 // expensive one way or another. 97 if (count >= fbl::count_of(item) || level == PML4_L) { 98 full_shootdown = true; 99 return; 100 } 101 item[count].set_page_level(static_cast<uint64_t>(level)); 102 item[count].set_is_global(is_global_page); 103 item[count].set_is_terminal(is_terminal); 104 item[count].set_encoded_addr(v >> PAGE_SIZE_SHIFT); 105 count++; 106} 107 108void PendingTlbInvalidation::clear() { 109 count = 0; 110 full_shootdown = false; 111 contains_global = false; 112} 113 114PendingTlbInvalidation::~PendingTlbInvalidation() { 115 DEBUG_ASSERT(count == 0); 116} 117 118// Utility for coalescing cache line flushes when modifying page tables. This 119// allows us to mutate adjacent page table entries without having to flush for 120// each cache line multiple times. 121class X86PageTableBase::CacheLineFlusher { 122public: 123 // If |perform_invalidations| is false, this class acts as a no-op. 124 explicit CacheLineFlusher(bool perform_invalidations); 125 ~CacheLineFlusher(); 126 void FlushPtEntry(const volatile pt_entry_t* entry); 127 128 void ForceFlush(); 129private: 130 DISALLOW_COPY_ASSIGN_AND_MOVE(CacheLineFlusher); 131 132 // The cache-aligned address that currently dirty. If 0, no dirty line. 133 uintptr_t dirty_line_; 134 135 const uintptr_t cl_mask_; 136 const bool perform_invalidations_; 137}; 138 139X86PageTableBase::CacheLineFlusher::CacheLineFlusher(bool perform_invalidations) 140 : dirty_line_(0), cl_mask_(~(x86_get_clflush_line_size() - 1ull)), 141 perform_invalidations_(perform_invalidations) { 142} 143 144X86PageTableBase::CacheLineFlusher::~CacheLineFlusher() { 145 ForceFlush(); 146} 147 148void X86PageTableBase::CacheLineFlusher::ForceFlush() { 149 if (dirty_line_ && perform_invalidations_) { 150 __asm__ volatile("clflush %0\n" 151 : 152 : "m"(*reinterpret_cast<char*>(dirty_line_)) 153 : "memory"); 154 dirty_line_ = 0; 155 } 156} 157 158void X86PageTableBase::CacheLineFlusher::FlushPtEntry(const volatile pt_entry_t* entry) { 159 uintptr_t entry_line = reinterpret_cast<uintptr_t>(entry) & cl_mask_; 160 if (entry_line != dirty_line_) { 161 ForceFlush(); 162 dirty_line_ = entry_line; 163 } 164} 165 166// Utility for managing consistency of the page tables from a cache and TLB 167// point-of-view. It ensures that memory is not freed while a TLB entry may 168// refer to it, and that changes to the page tables have appropriate visiblity 169// to the hardware interpreting them. Finish MUST be called on this 170// class, even if the page table change failed. 171class X86PageTableBase::ConsistencyManager { 172public: 173 explicit ConsistencyManager(X86PageTableBase* pt); 174 ~ConsistencyManager(); 175 176 // Disable thread safety analysis here because it has trouble identifying 177 // that |pt_->lock_| is held here. 178 void queue_free(vm_page_t* page) TA_NO_THREAD_SAFETY_ANALYSIS { 179 DEBUG_ASSERT(pt_->lock_.IsHeld()); 180 181 list_add_tail(&to_free_, &page->queue_node); 182 pt_->pages_--; 183 } 184 185 CacheLineFlusher* cache_line_flusher() { return &clf_; } 186 PendingTlbInvalidation* pending_tlb() { return &tlb_; } 187 188 // This function must be called while holding pt_->lock_. 189 void Finish(); 190private: 191 X86PageTableBase* pt_; 192 193 // Cache line to flush prior to TLB invalidations 194 X86PageTableBase::CacheLineFlusher clf_; 195 196 // TLB invalidations that need to occur 197 PendingTlbInvalidation tlb_; 198 199 // vm_page_t's to relese to the PMM after the TLB invalidation occurs 200 list_node to_free_; 201}; 202 203X86PageTableBase::ConsistencyManager::ConsistencyManager(X86PageTableBase* pt) 204 : pt_(pt), clf_(pt->needs_cache_flushes()) { 205 206 to_free_ = LIST_INITIAL_VALUE(to_free_); 207} 208 209X86PageTableBase::ConsistencyManager::~ConsistencyManager() { 210 DEBUG_ASSERT(pt_ == nullptr); 211 212 // We free the paging structures here rather than in Finish(), to allow 213 // support deferring invoking pmm_free() until after we've left the page 214 // table lock. 215 if (!list_is_empty(&to_free_)) { 216 pmm_free(&to_free_); 217 } 218} 219 220void X86PageTableBase::ConsistencyManager::Finish() { 221 DEBUG_ASSERT(pt_->lock_.IsHeld()); 222 223 clf_.ForceFlush(); 224 if (pt_->needs_cache_flushes()) { 225 // If the hardware needs cache flushes for the tables to be visible, 226 // make sure we serialize the flushes before issuing the TLB 227 // invalidations. 228 mb(); 229 } 230 pt_->TlbInvalidate(&tlb_); 231 pt_ = nullptr; 232} 233 234struct X86PageTableBase::MappingCursor { 235public: 236 /** 237 * @brief Update the cursor to skip over a not-present page table entry. 238 */ 239 void SkipEntry(PageTableLevel level) { 240 const size_t ps = page_size(level); 241 // Calculate the amount the cursor should skip to get to the next entry at 242 // this page table level. 243 const size_t skipped_size = ps - (vaddr & (ps - 1)); 244 // If our endpoint was in the middle of this range, clamp the 245 // amount we remove from the cursor 246 const size_t _size = (size > skipped_size) ? skipped_size : size; 247 248 size -= _size; 249 vaddr += _size; 250 } 251 252 paddr_t paddr; 253 vaddr_t vaddr; 254 size_t size; 255}; 256 257void X86PageTableBase::UpdateEntry(ConsistencyManager* cm, PageTableLevel level, vaddr_t vaddr, 258 volatile pt_entry_t* pte, paddr_t paddr, PtFlags flags, 259 bool was_terminal) { 260 DEBUG_ASSERT(pte); 261 DEBUG_ASSERT(IS_PAGE_ALIGNED(paddr)); 262 263 pt_entry_t olde = *pte; 264 265 /* set the new entry */ 266 *pte = paddr | flags | X86_MMU_PG_P; 267 cm->cache_line_flusher()->FlushPtEntry(pte); 268 269 /* attempt to invalidate the page */ 270 if (IS_PAGE_PRESENT(olde)) { 271 // TODO(teisenbe): the is_kernel_address should be a check for the 272 // global bit 273 cm->pending_tlb()->enqueue(vaddr, level, is_kernel_address(vaddr), was_terminal); 274 } 275} 276 277void X86PageTableBase::UnmapEntry(ConsistencyManager* cm, PageTableLevel level, vaddr_t vaddr, 278 volatile pt_entry_t* pte, bool was_terminal) { 279 DEBUG_ASSERT(pte); 280 281 pt_entry_t olde = *pte; 282 283 *pte = 0; 284 cm->cache_line_flusher()->FlushPtEntry(pte); 285 286 /* attempt to invalidate the page */ 287 if (IS_PAGE_PRESENT(olde)) { 288 // TODO(teisenbe): the is_kernel_address should be a check for the 289 // global bit 290 cm->pending_tlb()->enqueue(vaddr, level, is_kernel_address(vaddr), was_terminal); 291 } 292} 293 294/** 295 * @brief Allocating a new page table 296 */ 297static volatile pt_entry_t* _map_alloc_page(void) { 298 paddr_t pa; 299 vm_page* p; 300 zx_status_t status = pmm_alloc_page(0, &p, &pa); 301 if (status != ZX_OK) { 302 return nullptr; 303 } 304 p->state = VM_PAGE_STATE_MMU; 305 306 pt_entry_t* page_ptr = static_cast<pt_entry_t*>(paddr_to_physmap(pa)); 307 DEBUG_ASSERT(page_ptr); 308 309 arch_zero_page(page_ptr); 310 311 return page_ptr; 312} 313 314/* 315 * @brief Split the given large page into smaller pages 316 */ 317zx_status_t X86PageTableBase::SplitLargePage(PageTableLevel level, vaddr_t vaddr, 318 volatile pt_entry_t* pte, ConsistencyManager* cm) { 319 DEBUG_ASSERT_MSG(level != PT_L, "tried splitting PT_L"); 320 LTRACEF_LEVEL(2, "splitting table %p at level %d\n", pte, level); 321 322 DEBUG_ASSERT(IS_PAGE_PRESENT(*pte) && IS_LARGE_PAGE(*pte)); 323 volatile pt_entry_t* m = _map_alloc_page(); 324 if (m == nullptr) { 325 return ZX_ERR_NO_MEMORY; 326 } 327 328 paddr_t paddr_base = paddr_from_pte(level, *pte); 329 PtFlags flags = split_flags(level, *pte & X86_LARGE_FLAGS_MASK); 330 331 DEBUG_ASSERT(page_aligned(level, vaddr)); 332 vaddr_t new_vaddr = vaddr; 333 paddr_t new_paddr = paddr_base; 334 size_t ps = page_size(lower_level(level)); 335 for (int i = 0; i < NO_OF_PT_ENTRIES; i++) { 336 volatile pt_entry_t* e = m + i; 337 // If this is a PDP_L (i.e. huge page), flags will include the 338 // PS bit still, so the new PD entries will be large pages. 339 UpdateEntry(cm, lower_level(level), new_vaddr, e, new_paddr, flags, 340 false /* was_terminal */); 341 new_vaddr += ps; 342 new_paddr += ps; 343 } 344 DEBUG_ASSERT(new_vaddr == vaddr + page_size(level)); 345 346 flags = intermediate_flags(); 347 UpdateEntry(cm, level, vaddr, pte, X86_VIRT_TO_PHYS(m), flags, true /* was_terminal */); 348 pages_++; 349 return ZX_OK; 350} 351 352/* 353 * @brief given a page table entry, return a pointer to the next page table one level down 354 */ 355static inline volatile pt_entry_t* get_next_table_from_entry(pt_entry_t entry) { 356 if (!IS_PAGE_PRESENT(entry) || IS_LARGE_PAGE(entry)) 357 return nullptr; 358 359 return reinterpret_cast<volatile pt_entry_t*>(X86_PHYS_TO_VIRT(entry & X86_PG_FRAME)); 360} 361 362/** 363 * @brief Walk the page table structures returning the entry and level that maps the address. 364 * 365 * @param table The top-level paging structure's virtual address 366 * @param vaddr The virtual address to retrieve the mapping for 367 * @param ret_level The level of the table that defines the found mapping 368 * @param mapping The mapping that was found 369 * 370 * @return ZX_OK if mapping is found 371 * @return ZX_ERR_NOT_FOUND if mapping is not found 372 */ 373zx_status_t X86PageTableBase::GetMapping(volatile pt_entry_t* table, vaddr_t vaddr, 374 PageTableLevel level, 375 PageTableLevel* ret_level, 376 volatile pt_entry_t** mapping) { 377 DEBUG_ASSERT(table); 378 DEBUG_ASSERT(ret_level); 379 DEBUG_ASSERT(mapping); 380 381 if (level == PT_L) { 382 return GetMappingL0(table, vaddr, ret_level, mapping); 383 } 384 385 LTRACEF_LEVEL(2, "table %p\n", table); 386 387 uint index = vaddr_to_index(level, vaddr); 388 volatile pt_entry_t* e = table + index; 389 pt_entry_t pt_val = *e; 390 if (!IS_PAGE_PRESENT(pt_val)) 391 return ZX_ERR_NOT_FOUND; 392 393 /* if this is a large page, stop here */ 394 if (IS_LARGE_PAGE(pt_val)) { 395 *mapping = e; 396 *ret_level = level; 397 return ZX_OK; 398 } 399 400 volatile pt_entry_t* next_table = get_next_table_from_entry(pt_val); 401 return GetMapping(next_table, vaddr, lower_level(level), ret_level, mapping); 402} 403 404zx_status_t X86PageTableBase::GetMappingL0(volatile pt_entry_t* table, vaddr_t vaddr, 405 PageTableLevel* ret_level, 406 volatile pt_entry_t** mapping) { 407 /* do the final page table lookup */ 408 uint index = vaddr_to_index(PT_L, vaddr); 409 volatile pt_entry_t* e = table + index; 410 if (!IS_PAGE_PRESENT(*e)) 411 return ZX_ERR_NOT_FOUND; 412 413 *mapping = e; 414 *ret_level = PT_L; 415 return ZX_OK; 416} 417 418/** 419 * @brief Unmaps the range specified by start_cursor. 420 * 421 * Level must be top_level() when invoked. The caller must, even on failure, 422 * free all pages in the |to_free| list and adjust the |pages_| count. 423 * 424 * @param table The top-level paging structure's virtual address. 425 * @param start_cursor A cursor describing the range of address space to 426 * unmap within table 427 * @param new_cursor A returned cursor describing how much work was not 428 * completed. Must be non-null. 429 * 430 * @return true if at least one page was unmapped at this level 431 */ 432bool X86PageTableBase::RemoveMapping(volatile pt_entry_t* table, PageTableLevel level, 433 const MappingCursor& start_cursor, MappingCursor* new_cursor, 434 ConsistencyManager* cm) { 435 DEBUG_ASSERT(table); 436 LTRACEF("L: %d, %016" PRIxPTR " %016zx\n", level, start_cursor.vaddr, 437 start_cursor.size); 438 DEBUG_ASSERT(check_vaddr(start_cursor.vaddr)); 439 440 if (level == PT_L) { 441 return RemoveMappingL0(table, start_cursor, new_cursor, cm); 442 } 443 444 *new_cursor = start_cursor; 445 446 bool unmapped = false; 447 size_t ps = page_size(level); 448 uint index = vaddr_to_index(level, new_cursor->vaddr); 449 for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { 450 volatile pt_entry_t* e = table + index; 451 pt_entry_t pt_val = *e; 452 // If the page isn't even mapped, just skip it 453 if (!IS_PAGE_PRESENT(pt_val)) { 454 new_cursor->SkipEntry(level); 455 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 456 continue; 457 } 458 459 if (IS_LARGE_PAGE(pt_val)) { 460 bool vaddr_level_aligned = page_aligned(level, new_cursor->vaddr); 461 // If the request covers the entire large page, just unmap it 462 if (vaddr_level_aligned && new_cursor->size >= ps) { 463 UnmapEntry(cm, level, new_cursor->vaddr, e, true /* was_terminal */); 464 unmapped = true; 465 466 new_cursor->vaddr += ps; 467 new_cursor->size -= ps; 468 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 469 continue; 470 } 471 // Otherwise, we need to split it 472 vaddr_t page_vaddr = new_cursor->vaddr & ~(ps - 1); 473 zx_status_t status = SplitLargePage(level, page_vaddr, e, cm); 474 if (status != ZX_OK) { 475 // If split fails, just unmap the whole thing, and let a 476 // subsequent page fault clean it up. 477 UnmapEntry(cm, level, new_cursor->vaddr, e, true /* was_terminal */); 478 unmapped = true; 479 480 new_cursor->SkipEntry(level); 481 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 482 } 483 pt_val = *e; 484 } 485 486 MappingCursor cursor; 487 volatile pt_entry_t* next_table = get_next_table_from_entry(pt_val); 488 bool lower_unmapped = RemoveMapping(next_table, lower_level(level), 489 *new_cursor, &cursor, cm); 490 491 // If we were requesting to unmap everything in the lower page table, 492 // we know we can unmap the lower level page table. Otherwise, if 493 // we unmapped anything in the lower level, check to see if that 494 // level is now empty. 495 bool unmap_page_table = 496 page_aligned(level, new_cursor->vaddr) && new_cursor->size >= ps; 497 if (!unmap_page_table && lower_unmapped) { 498 uint lower_idx; 499 for (lower_idx = 0; lower_idx < NO_OF_PT_ENTRIES; ++lower_idx) { 500 if (IS_PAGE_PRESENT(next_table[lower_idx])) { 501 break; 502 } 503 } 504 if (lower_idx == NO_OF_PT_ENTRIES) { 505 unmap_page_table = true; 506 } 507 } 508 if (unmap_page_table) { 509 paddr_t ptable_phys = X86_VIRT_TO_PHYS(next_table); 510 LTRACEF("L: %d free pt v %#" PRIxPTR " phys %#" PRIxPTR "\n", 511 level, (uintptr_t)next_table, ptable_phys); 512 513 UnmapEntry(cm, level, new_cursor->vaddr, e, false /* was_terminal */); 514 vm_page_t* page = paddr_to_vm_page(ptable_phys); 515 516 DEBUG_ASSERT(page); 517 DEBUG_ASSERT_MSG(page->state == VM_PAGE_STATE_MMU, 518 "page %p state %u, paddr %#" PRIxPTR "\n", page, page->state, 519 X86_VIRT_TO_PHYS(next_table)); 520 DEBUG_ASSERT(!list_in_list(&page->queue_node)); 521 522 cm->queue_free(page); 523 unmapped = true; 524 } 525 *new_cursor = cursor; 526 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 527 528 DEBUG_ASSERT(new_cursor->size == 0 || page_aligned(level, new_cursor->vaddr)); 529 } 530 531 return unmapped; 532} 533 534// Base case of RemoveMapping for smallest page size. 535bool X86PageTableBase::RemoveMappingL0(volatile pt_entry_t* table, 536 const MappingCursor& start_cursor, MappingCursor* new_cursor, 537 ConsistencyManager* cm) { 538 LTRACEF("%016" PRIxPTR " %016zx\n", start_cursor.vaddr, start_cursor.size); 539 DEBUG_ASSERT(IS_PAGE_ALIGNED(start_cursor.size)); 540 541 *new_cursor = start_cursor; 542 543 bool unmapped = false; 544 uint index = vaddr_to_index(PT_L, new_cursor->vaddr); 545 for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { 546 volatile pt_entry_t* e = table + index; 547 if (IS_PAGE_PRESENT(*e)) { 548 UnmapEntry(cm, PT_L, new_cursor->vaddr, e, true /* was_terminal */); 549 unmapped = true; 550 } 551 552 new_cursor->vaddr += PAGE_SIZE; 553 new_cursor->size -= PAGE_SIZE; 554 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 555 } 556 return unmapped; 557} 558 559/** 560 * @brief Creates mappings for the range specified by start_cursor 561 * 562 * Level must be top_level() when invoked. 563 * 564 * @param table The top-level paging structure's virtual address. 565 * @param start_cursor A cursor describing the range of address space to 566 * act on within table 567 * @param new_cursor A returned cursor describing how much work was not 568 * completed. Must be non-null. 569 * 570 * @return ZX_OK if successful 571 * @return ZX_ERR_ALREADY_EXISTS if the range overlaps an existing mapping 572 * @return ZX_ERR_NO_MEMORY if intermediate page tables could not be allocated 573 */ 574zx_status_t X86PageTableBase::AddMapping(volatile pt_entry_t* table, uint mmu_flags, 575 PageTableLevel level, const MappingCursor& start_cursor, 576 MappingCursor* new_cursor, 577 ConsistencyManager* cm) { 578 DEBUG_ASSERT(table); 579 DEBUG_ASSERT(check_vaddr(start_cursor.vaddr)); 580 DEBUG_ASSERT(check_paddr(start_cursor.paddr)); 581 582 zx_status_t ret = ZX_OK; 583 *new_cursor = start_cursor; 584 585 if (level == PT_L) { 586 return AddMappingL0(table, mmu_flags, start_cursor, new_cursor, cm); 587 } 588 589 // Disable thread safety analysis, since Clang has trouble noticing that 590 // lock_ is held when RemoveMapping is called. 591 auto abort = fbl::MakeAutoCall([&]() TA_NO_THREAD_SAFETY_ANALYSIS { 592 if (level == top_level()) { 593 MappingCursor cursor = start_cursor; 594 MappingCursor result; 595 // new_cursor->size should be how much is left to be mapped still 596 cursor.size -= new_cursor->size; 597 if (cursor.size > 0) { 598 RemoveMapping(table, level, cursor, &result, cm); 599 DEBUG_ASSERT(result.size == 0); 600 } 601 } 602 }); 603 604 X86PageTableBase::IntermediatePtFlags interm_flags = intermediate_flags(); 605 X86PageTableBase::PtFlags term_flags = terminal_flags(level, mmu_flags); 606 607 size_t ps = page_size(level); 608 bool level_supports_large_pages = supports_page_size(level); 609 uint index = vaddr_to_index(level, new_cursor->vaddr); 610 for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { 611 volatile pt_entry_t* e = table + index; 612 pt_entry_t pt_val = *e; 613 // See if there's a large page in our way 614 if (IS_PAGE_PRESENT(pt_val) && IS_LARGE_PAGE(pt_val)) { 615 return ZX_ERR_ALREADY_EXISTS; 616 } 617 618 // Check if this is a candidate for a new large page 619 bool level_valigned = page_aligned(level, new_cursor->vaddr); 620 bool level_paligned = page_aligned(level, new_cursor->paddr); 621 if (level_supports_large_pages && !IS_PAGE_PRESENT(pt_val) && level_valigned && 622 level_paligned && new_cursor->size >= ps) { 623 624 UpdateEntry(cm, level, new_cursor->vaddr, table + index, 625 new_cursor->paddr, term_flags | X86_MMU_PG_PS, false /* was_terminal */); 626 new_cursor->paddr += ps; 627 new_cursor->vaddr += ps; 628 new_cursor->size -= ps; 629 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 630 } else { 631 // See if we need to create a new table 632 if (!IS_PAGE_PRESENT(pt_val)) { 633 volatile pt_entry_t* m = _map_alloc_page(); 634 if (m == nullptr) { 635 return ZX_ERR_NO_MEMORY; 636 } 637 638 LTRACEF_LEVEL(2, "new table %p at level %d\n", m, level); 639 640 UpdateEntry(cm, level, new_cursor->vaddr, e, 641 X86_VIRT_TO_PHYS(m), interm_flags, false /* was_terminal */); 642 pt_val = *e; 643 pages_++; 644 } 645 646 MappingCursor cursor; 647 ret = AddMapping(get_next_table_from_entry(pt_val), mmu_flags, 648 lower_level(level), *new_cursor, &cursor, cm); 649 *new_cursor = cursor; 650 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 651 if (ret != ZX_OK) { 652 return ret; 653 } 654 } 655 } 656 abort.cancel(); 657 return ZX_OK; 658} 659 660// Base case of AddMapping for smallest page size. 661zx_status_t X86PageTableBase::AddMappingL0(volatile pt_entry_t* table, uint mmu_flags, 662 const MappingCursor& start_cursor, 663 MappingCursor* new_cursor, ConsistencyManager* cm) { 664 DEBUG_ASSERT(IS_PAGE_ALIGNED(start_cursor.size)); 665 666 *new_cursor = start_cursor; 667 668 X86PageTableBase::PtFlags term_flags = terminal_flags(PT_L, mmu_flags); 669 670 uint index = vaddr_to_index(PT_L, new_cursor->vaddr); 671 for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { 672 volatile pt_entry_t* e = table + index; 673 if (IS_PAGE_PRESENT(*e)) { 674 return ZX_ERR_ALREADY_EXISTS; 675 } 676 677 UpdateEntry(cm, PT_L, new_cursor->vaddr, e, new_cursor->paddr, term_flags, 678 false /* was_terminal */); 679 680 new_cursor->paddr += PAGE_SIZE; 681 new_cursor->vaddr += PAGE_SIZE; 682 new_cursor->size -= PAGE_SIZE; 683 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 684 } 685 686 return ZX_OK; 687} 688 689/** 690 * @brief Changes the permissions/caching of the range specified by start_cursor 691 * 692 * Level must be top_level() when invoked. The caller must, even on failure, 693 * free all pages in the |to_free| list and adjust the |pages_| count. 694 * 695 * @param table The top-level paging structure's virtual address. 696 * @param start_cursor A cursor describing the range of address space to 697 * act on within table 698 * @param new_cursor A returned cursor describing how much work was not 699 * completed. Must be non-null. 700 */ 701zx_status_t X86PageTableBase::UpdateMapping(volatile pt_entry_t* table, uint mmu_flags, 702 PageTableLevel level, const MappingCursor& start_cursor, 703 MappingCursor* new_cursor, ConsistencyManager* cm) { 704 DEBUG_ASSERT(table); 705 LTRACEF("L: %d, %016" PRIxPTR " %016zx\n", level, start_cursor.vaddr, 706 start_cursor.size); 707 DEBUG_ASSERT(check_vaddr(start_cursor.vaddr)); 708 709 if (level == PT_L) { 710 return UpdateMappingL0(table, mmu_flags, start_cursor, new_cursor, cm); 711 } 712 713 zx_status_t ret = ZX_OK; 714 *new_cursor = start_cursor; 715 716 X86PageTableBase::PtFlags term_flags = terminal_flags(level, mmu_flags); 717 718 size_t ps = page_size(level); 719 uint index = vaddr_to_index(level, new_cursor->vaddr); 720 for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { 721 volatile pt_entry_t* e = table + index; 722 pt_entry_t pt_val = *e; 723 // Skip unmapped pages (we may encounter these due to demand paging) 724 if (!IS_PAGE_PRESENT(pt_val)) { 725 new_cursor->SkipEntry(level); 726 continue; 727 } 728 729 if (IS_LARGE_PAGE(pt_val)) { 730 bool vaddr_level_aligned = page_aligned(level, new_cursor->vaddr); 731 // If the request covers the entire large page, just change the 732 // permissions 733 if (vaddr_level_aligned && new_cursor->size >= ps) { 734 UpdateEntry(cm, level, new_cursor->vaddr, e, 735 paddr_from_pte(level, pt_val), 736 term_flags | X86_MMU_PG_PS, true /* was_terminal */); 737 new_cursor->vaddr += ps; 738 new_cursor->size -= ps; 739 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 740 continue; 741 } 742 // Otherwise, we need to split it 743 vaddr_t page_vaddr = new_cursor->vaddr & ~(ps - 1); 744 ret = SplitLargePage(level, page_vaddr, e, cm); 745 if (ret != ZX_OK) { 746 // If we failed to split the table, just unmap it. Subsequent 747 // page faults will bring it back in. 748 MappingCursor cursor; 749 cursor.vaddr = new_cursor->vaddr; 750 cursor.size = ps; 751 752 MappingCursor tmp_cursor; 753 RemoveMapping(table, level, cursor, &tmp_cursor, cm); 754 755 new_cursor->SkipEntry(level); 756 } 757 pt_val = *e; 758 } 759 760 MappingCursor cursor; 761 volatile pt_entry_t* next_table = get_next_table_from_entry(pt_val); 762 ret = UpdateMapping(next_table, mmu_flags, lower_level(level), 763 *new_cursor, &cursor, cm); 764 *new_cursor = cursor; 765 if (ret != ZX_OK) { 766 // Currently this can't happen 767 ASSERT(false); 768 } 769 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 770 DEBUG_ASSERT(new_cursor->size == 0 || page_aligned(level, new_cursor->vaddr)); 771 } 772 return ZX_OK; 773} 774 775// Base case of UpdateMapping for smallest page size. 776zx_status_t X86PageTableBase::UpdateMappingL0(volatile pt_entry_t* table, uint mmu_flags, 777 const MappingCursor& start_cursor, 778 MappingCursor* new_cursor, 779 ConsistencyManager* cm) { 780 LTRACEF("%016" PRIxPTR " %016zx\n", start_cursor.vaddr, start_cursor.size); 781 DEBUG_ASSERT(IS_PAGE_ALIGNED(start_cursor.size)); 782 783 *new_cursor = start_cursor; 784 785 X86PageTableBase::PtFlags term_flags = terminal_flags(PT_L, mmu_flags); 786 787 uint index = vaddr_to_index(PT_L, new_cursor->vaddr); 788 for (; index != NO_OF_PT_ENTRIES && new_cursor->size != 0; ++index) { 789 volatile pt_entry_t* e = table + index; 790 pt_entry_t pt_val = *e; 791 // Skip unmapped pages (we may encounter these due to demand paging) 792 if (IS_PAGE_PRESENT(pt_val)) { 793 UpdateEntry(cm, PT_L, new_cursor->vaddr, e, paddr_from_pte(PT_L, pt_val), 794 term_flags, true /* was_terminal */); 795 } 796 797 new_cursor->vaddr += PAGE_SIZE; 798 new_cursor->size -= PAGE_SIZE; 799 DEBUG_ASSERT(new_cursor->size <= start_cursor.size); 800 } 801 DEBUG_ASSERT(new_cursor->size == 0 || page_aligned(PT_L, new_cursor->vaddr)); 802 return ZX_OK; 803} 804 805zx_status_t X86PageTableBase::UnmapPages(vaddr_t vaddr, const size_t count, 806 size_t* unmapped) { 807 LTRACEF("aspace %p, vaddr %#" PRIxPTR ", count %#zx\n", this, vaddr, count); 808 809 canary_.Assert(); 810 811 if (!check_vaddr(vaddr)) 812 return ZX_ERR_INVALID_ARGS; 813 if (count == 0) 814 return ZX_OK; 815 816 MappingCursor start = { 817 .paddr = 0, .vaddr = vaddr, .size = count * PAGE_SIZE, 818 }; 819 MappingCursor result; 820 821 ConsistencyManager cm(this); 822 { 823 fbl::AutoLock a(&lock_); 824 DEBUG_ASSERT(virt_); 825 RemoveMapping(virt_, top_level(), start, &result, &cm); 826 cm.Finish(); 827 } 828 DEBUG_ASSERT(result.size == 0); 829 830 if (unmapped) 831 *unmapped = count; 832 833 return ZX_OK; 834} 835 836zx_status_t X86PageTableBase::MapPages(vaddr_t vaddr, paddr_t* phys, size_t count, 837 uint mmu_flags, size_t* mapped) { 838 canary_.Assert(); 839 840 LTRACEF("aspace %p, vaddr %#" PRIxPTR " count %#zx mmu_flags 0x%x\n", 841 this, vaddr, count, mmu_flags); 842 843 if (!check_vaddr(vaddr)) 844 return ZX_ERR_INVALID_ARGS; 845 for (size_t i = 0; i < count; ++i) { 846 if (!check_paddr(phys[i])) 847 return ZX_ERR_INVALID_ARGS; 848 } 849 if (count == 0) 850 return ZX_OK; 851 852 if (!allowed_flags(mmu_flags)) 853 return ZX_ERR_INVALID_ARGS; 854 855 PageTableLevel top = top_level(); 856 ConsistencyManager cm(this); 857 { 858 fbl::AutoLock a(&lock_); 859 DEBUG_ASSERT(virt_); 860 861 // TODO(teisenbe): Improve performance of this function by integrating deeper into 862 // the algorithm (e.g. make the cursors aware of the page array). 863 size_t idx = 0; 864 auto undo = fbl::MakeAutoCall([&]() TA_NO_THREAD_SAFETY_ANALYSIS { 865 if (idx > 0) { 866 MappingCursor start = { 867 .paddr = 0, .vaddr = vaddr, .size = idx * PAGE_SIZE, 868 }; 869 870 MappingCursor result; 871 RemoveMapping(virt_, top, start, &result, &cm); 872 DEBUG_ASSERT(result.size == 0); 873 } 874 cm.Finish(); 875 }); 876 877 vaddr_t v = vaddr; 878 for (; idx < count; ++idx) { 879 MappingCursor start = { 880 .paddr = phys[idx], .vaddr = v, .size = PAGE_SIZE, 881 }; 882 MappingCursor result; 883 zx_status_t status = AddMapping(virt_, mmu_flags, top, start, &result, &cm); 884 if (status != ZX_OK) { 885 dprintf(SPEW, "Add mapping failed with err=%d\n", status); 886 return status; 887 } 888 DEBUG_ASSERT(result.size == 0); 889 890 v += PAGE_SIZE; 891 } 892 893 undo.cancel(); 894 cm.Finish(); 895 } 896 897 if (mapped) { 898 *mapped = count; 899 } 900 return ZX_OK; 901} 902 903zx_status_t X86PageTableBase::MapPagesContiguous(vaddr_t vaddr, paddr_t paddr, 904 const size_t count, uint mmu_flags, 905 size_t* mapped) { 906 canary_.Assert(); 907 908 LTRACEF("aspace %p, vaddr %#" PRIxPTR " paddr %#" PRIxPTR " count %#zx mmu_flags 0x%x\n", 909 this, vaddr, paddr, count, mmu_flags); 910 911 if (!check_paddr(paddr)) 912 return ZX_ERR_INVALID_ARGS; 913 if (!check_vaddr(vaddr)) 914 return ZX_ERR_INVALID_ARGS; 915 if (count == 0) 916 return ZX_OK; 917 918 if (!allowed_flags(mmu_flags)) 919 return ZX_ERR_INVALID_ARGS; 920 921 MappingCursor start = { 922 .paddr = paddr, .vaddr = vaddr, .size = count * PAGE_SIZE, 923 }; 924 MappingCursor result; 925 ConsistencyManager cm(this); 926 { 927 fbl::AutoLock a(&lock_); 928 DEBUG_ASSERT(virt_); 929 zx_status_t status = AddMapping(virt_, mmu_flags, top_level(), start, &result, &cm); 930 cm.Finish(); 931 if (status != ZX_OK) { 932 dprintf(SPEW, "Add mapping failed with err=%d\n", status); 933 return status; 934 } 935 } 936 DEBUG_ASSERT(result.size == 0); 937 938 if (mapped) 939 *mapped = count; 940 941 return ZX_OK; 942} 943 944zx_status_t X86PageTableBase::ProtectPages(vaddr_t vaddr, size_t count, uint mmu_flags) { 945 canary_.Assert(); 946 947 LTRACEF("aspace %p, vaddr %#" PRIxPTR " count %#zx mmu_flags 0x%x\n", 948 this, vaddr, count, mmu_flags); 949 950 if (!check_vaddr(vaddr)) 951 return ZX_ERR_INVALID_ARGS; 952 if (count == 0) 953 return ZX_OK; 954 955 if (!allowed_flags(mmu_flags)) 956 return ZX_ERR_INVALID_ARGS; 957 958 MappingCursor start = { 959 .paddr = 0, .vaddr = vaddr, .size = count * PAGE_SIZE, 960 }; 961 MappingCursor result; 962 ConsistencyManager cm(this); 963 { 964 fbl::AutoLock a(&lock_); 965 zx_status_t status = UpdateMapping(virt_, mmu_flags, top_level(), start, &result, &cm); 966 cm.Finish(); 967 if (status != ZX_OK) { 968 return status; 969 } 970 } 971 DEBUG_ASSERT(result.size == 0); 972 return ZX_OK; 973} 974 975zx_status_t X86PageTableBase::QueryVaddr(vaddr_t vaddr, paddr_t* paddr, uint* mmu_flags) { 976 canary_.Assert(); 977 978 PageTableLevel ret_level; 979 980 LTRACEF("aspace %p, vaddr %#" PRIxPTR ", paddr %p, mmu_flags %p\n", this, vaddr, paddr, 981 mmu_flags); 982 983 fbl::AutoLock a(&lock_); 984 985 volatile pt_entry_t* last_valid_entry; 986 zx_status_t status = GetMapping(virt_, vaddr, top_level(), &ret_level, &last_valid_entry); 987 if (status != ZX_OK) 988 return status; 989 990 DEBUG_ASSERT(last_valid_entry); 991 LTRACEF("last_valid_entry (%p) 0x%" PRIxPTE ", level %d\n", last_valid_entry, *last_valid_entry, 992 ret_level); 993 994 /* based on the return level, parse the page table entry */ 995 if (paddr) { 996 switch (ret_level) { 997 case PDP_L: /* 1GB page */ 998 *paddr = paddr_from_pte(PDP_L, *last_valid_entry); 999 *paddr |= vaddr & PAGE_OFFSET_MASK_HUGE; 1000 break; 1001 case PD_L: /* 2MB page */ 1002 *paddr = paddr_from_pte(PD_L, *last_valid_entry); 1003 *paddr |= vaddr & PAGE_OFFSET_MASK_LARGE; 1004 break; 1005 case PT_L: /* 4K page */ 1006 *paddr = paddr_from_pte(PT_L, *last_valid_entry); 1007 *paddr |= vaddr & PAGE_OFFSET_MASK_4KB; 1008 break; 1009 default: 1010 panic("arch_mmu_query: unhandled frame level\n"); 1011 } 1012 1013 LTRACEF("paddr %#" PRIxPTR "\n", *paddr); 1014 } 1015 1016 /* converting arch-specific flags to mmu flags */ 1017 if (mmu_flags) { 1018 *mmu_flags = pt_flags_to_mmu_flags(*last_valid_entry, ret_level); 1019 } 1020 1021 return ZX_OK; 1022} 1023 1024void X86PageTableBase::Destroy(vaddr_t base, size_t size) { 1025 canary_.Assert(); 1026 1027#if LK_DEBUGLEVEL > 1 1028 PageTableLevel top = top_level(); 1029 if (virt_) { 1030 pt_entry_t* table = static_cast<pt_entry_t*>(virt_); 1031 uint start = vaddr_to_index(top, base); 1032 uint end = vaddr_to_index(top, base + size - 1); 1033 1034 // Don't check start if that table is shared with another aspace. 1035 if (!page_aligned(top, base)) { 1036 start += 1; 1037 } 1038 // Do check the end if it fills out the table entry. 1039 if (page_aligned(top, base + size)) { 1040 end += 1; 1041 } 1042 1043 for (uint i = start; i < end; ++i) { 1044 DEBUG_ASSERT(!IS_PAGE_PRESENT(table[i])); 1045 } 1046 } 1047#endif 1048 1049 if (phys_) { 1050 pmm_free_page(paddr_to_vm_page(phys_)); 1051 phys_ = 0; 1052 } 1053} 1054