1// Copyright 2016 The Fuchsia Authors
2//
3// Use of this source code is governed by a MIT-style
4// license that can be found in the LICENSE file or at
5// https://opensource.org/licenses/MIT
6
7#include <assert.h>
8#include <err.h>
9#include <string.h>
10#include <trace.h>
11
12#include <arch/arch_ops.h>
13#include <arch/mmu.h>
14#include <arch/x86.h>
15#include <arch/x86/descriptor.h>
16#include <arch/x86/feature.h>
17#include <arch/x86/mmu.h>
18#include <arch/x86/mmu_mem_types.h>
19#include <kernel/mp.h>
20#include <vm/arch_vm_aspace.h>
21#include <vm/physmap.h>
22#include <vm/pmm.h>
23#include <vm/vm.h>
24#include <zircon/types.h>
25#include <zxcpp/new.h>
26
27#define LOCAL_TRACE 0
28
29/* Default address width including virtual/physical address.
30 * newer versions fetched below */
31uint8_t g_vaddr_width = 48;
32uint8_t g_paddr_width = 32;
33
34/* True if the system supports 1GB pages */
35static bool supports_huge_pages = false;
36
37/* top level kernel page tables, initialized in start.S */
38volatile pt_entry_t pml4[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE);
39volatile pt_entry_t pdp[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE); /* temporary */
40volatile pt_entry_t pte[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE);
41
42/* top level pdp needed to map the -512GB..0 space */
43volatile pt_entry_t pdp_high[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE);
44
45/* a big pile of page tables needed to map 64GB of memory into kernel space using 2MB pages */
46volatile pt_entry_t linear_map_pdp[(64ULL * GB) / (2 * MB)] __ALIGNED(PAGE_SIZE);
47
48/* which of the above variables is the top level page table */
49#define KERNEL_PT pml4
50
51// Static relocated base to prepare for KASLR. Used at early boot and by gdb
52// script to know the target relocated address.
53// TODO(thgarnie): Move to a dynamicly generated base address
54#if DISABLE_KASLR
55uint64_t kernel_relocated_base = KERNEL_BASE - KERNEL_LOAD_OFFSET;
56#else
57uint64_t kernel_relocated_base = 0xffffffff00000000;
58#endif
59
60/* kernel base top level page table in physical space */
61static const paddr_t kernel_pt_phys =
62    (vaddr_t)KERNEL_PT - (vaddr_t)__code_start + KERNEL_LOAD_OFFSET;
63
64// Valid EPT MMU flags.
65static const uint kValidEptFlags =
66    ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE | ARCH_MMU_FLAG_PERM_EXECUTE;
67
68paddr_t x86_kernel_cr3(void) {
69    return kernel_pt_phys;
70}
71
72/**
73 * @brief  check if the virtual address is canonical
74 */
75bool x86_is_vaddr_canonical(vaddr_t vaddr) {
76    uint64_t max_vaddr_lohalf, min_vaddr_hihalf;
77
78    /* get max address in lower-half canonical addr space */
79    /* e.g. if width is 48, then 0x00007FFF_FFFFFFFF */
80    max_vaddr_lohalf = ((uint64_t)1ull << (g_vaddr_width - 1)) - 1;
81
82    /* get min address in higher-half canonical addr space */
83    /* e.g. if width is 48, then 0xFFFF8000_00000000*/
84    min_vaddr_hihalf = ~max_vaddr_lohalf;
85
86    /* Check to see if the address in a canonical address */
87    if ((vaddr > max_vaddr_lohalf) && (vaddr < min_vaddr_hihalf))
88        return false;
89
90    return true;
91}
92
93/**
94 * @brief  check if the virtual address is aligned and canonical
95 */
96static bool x86_mmu_check_vaddr(vaddr_t vaddr) {
97    /* Check to see if the address is PAGE aligned */
98    if (!IS_ALIGNED(vaddr, PAGE_SIZE))
99        return false;
100
101    return x86_is_vaddr_canonical(vaddr);
102}
103
104/**
105 * @brief  check if the physical address is valid and aligned
106 */
107bool x86_mmu_check_paddr(paddr_t paddr) {
108    uint64_t max_paddr;
109
110    /* Check to see if the address is PAGE aligned */
111    if (!IS_ALIGNED(paddr, PAGE_SIZE))
112        return false;
113
114    max_paddr = ((uint64_t)1ull << g_paddr_width) - 1;
115
116    return paddr <= max_paddr;
117}
118
119/**
120 * @brief  invalidate all TLB entries, including global entries
121 */
122static void x86_tlb_global_invalidate() {
123    /* See Intel 3A section 4.10.4.1 */
124    ulong cr4 = x86_get_cr4();
125    if (likely(cr4 & X86_CR4_PGE)) {
126        x86_set_cr4(cr4 & ~X86_CR4_PGE);
127        x86_set_cr4(cr4);
128    } else {
129        x86_set_cr3(x86_get_cr3());
130    }
131}
132
133/**
134 * @brief  invalidate all TLB entries, excluding global entries
135 */
136static void x86_tlb_nonglobal_invalidate() {
137    x86_set_cr3(x86_get_cr3());
138}
139
140/* Task used for invalidating a TLB entry on each CPU */
141struct TlbInvalidatePage_context {
142    ulong target_cr3;
143    const PendingTlbInvalidation* pending;
144};
145static void TlbInvalidatePage_task(void* raw_context) {
146    DEBUG_ASSERT(arch_ints_disabled());
147    TlbInvalidatePage_context* context = (TlbInvalidatePage_context*)raw_context;
148
149    ulong cr3 = x86_get_cr3();
150    if (context->target_cr3 != cr3 && !context->pending->contains_global) {
151        /* This invalidation doesn't apply to this CPU, ignore it */
152        return;
153    }
154
155    if (context->pending->full_shootdown) {
156        if (context->pending->contains_global) {
157            x86_tlb_global_invalidate();
158        } else {
159            x86_tlb_nonglobal_invalidate();
160        }
161        return;
162    }
163
164    for (uint i = 0; i < context->pending->count; ++i) {
165        const auto& item = context->pending->item[i];
166        switch (item.page_level()) {
167            case PML4_L:
168                panic("PML4_L invld found; should not be here\n");
169            case PDP_L:
170            case PD_L:
171            case PT_L:
172                __asm__ volatile("invlpg %0" ::"m"(*(uint8_t*)item.addr()));
173                break;
174        }
175    }
176}
177
178/**
179 * @brief Execute a queued TLB invalidation
180 *
181 * @param pt The page table we're invalidating for (if nullptr, assume for current one)
182 * @param pending The planned invalidation
183 */
184static void x86_tlb_invalidate_page(const X86PageTableBase* pt, PendingTlbInvalidation* pending) {
185    if (pending->count == 0) {
186        return;
187    }
188
189    ulong cr3 = pt ? pt->phys() : x86_get_cr3();
190    struct TlbInvalidatePage_context task_context = {
191        .target_cr3 = cr3, .pending = pending,
192    };
193
194    /* Target only CPUs this aspace is active on.  It may be the case that some
195     * other CPU will become active in it after this load, or will have left it
196     * just before this load.  In the former case, it is becoming active after
197     * the write to the page table, so it will see the change.  In the latter
198     * case, it will get a spurious request to flush. */
199    mp_ipi_target_t target;
200    cpu_mask_t target_mask = 0;
201    if (pending->contains_global || pt == nullptr) {
202        target = MP_IPI_TARGET_ALL;
203    } else {
204        target = MP_IPI_TARGET_MASK;
205        target_mask = static_cast<X86ArchVmAspace*>(pt->ctx())->active_cpus();
206    }
207
208    mp_sync_exec(target, target_mask, TlbInvalidatePage_task, &task_context);
209    pending->clear();
210}
211
212bool X86PageTableMmu::check_paddr(paddr_t paddr) {
213    return x86_mmu_check_paddr(paddr);
214}
215
216bool X86PageTableMmu::check_vaddr(vaddr_t vaddr) {
217    return x86_mmu_check_vaddr(vaddr);
218}
219
220bool X86PageTableMmu::supports_page_size(PageTableLevel level) {
221    DEBUG_ASSERT(level != PT_L);
222    switch (level) {
223    case PD_L:
224        return true;
225    case PDP_L:
226        return supports_huge_pages;
227    case PML4_L:
228        return false;
229    default:
230        panic("Unreachable case in supports_page_size\n");
231    }
232}
233
234X86PageTableBase::IntermediatePtFlags X86PageTableMmu::intermediate_flags() {
235    return X86_MMU_PG_RW | X86_MMU_PG_U;
236}
237
238X86PageTableBase::PtFlags X86PageTableMmu::terminal_flags(PageTableLevel level,
239                                                          uint flags) {
240    X86PageTableBase::PtFlags terminal_flags = 0;
241
242    if (flags & ARCH_MMU_FLAG_PERM_WRITE) {
243        terminal_flags |= X86_MMU_PG_RW;
244    }
245    if (flags & ARCH_MMU_FLAG_PERM_USER) {
246        terminal_flags |= X86_MMU_PG_U;
247    }
248    if (use_global_mappings_) {
249        terminal_flags |= X86_MMU_PG_G;
250    }
251    if (!(flags & ARCH_MMU_FLAG_PERM_EXECUTE)) {
252        terminal_flags |= X86_MMU_PG_NX;
253    }
254
255    if (level > 0) {
256        switch (flags & ARCH_MMU_FLAG_CACHE_MASK) {
257        case ARCH_MMU_FLAG_CACHED:
258            terminal_flags |= X86_MMU_LARGE_PAT_WRITEBACK;
259            break;
260        case ARCH_MMU_FLAG_UNCACHED_DEVICE:
261        case ARCH_MMU_FLAG_UNCACHED:
262            terminal_flags |= X86_MMU_LARGE_PAT_UNCACHABLE;
263            break;
264        case ARCH_MMU_FLAG_WRITE_COMBINING:
265            terminal_flags |= X86_MMU_LARGE_PAT_WRITE_COMBINING;
266            break;
267        default:
268            PANIC_UNIMPLEMENTED;
269        }
270    } else {
271        switch (flags & ARCH_MMU_FLAG_CACHE_MASK) {
272        case ARCH_MMU_FLAG_CACHED:
273            terminal_flags |= X86_MMU_PTE_PAT_WRITEBACK;
274            break;
275        case ARCH_MMU_FLAG_UNCACHED_DEVICE:
276        case ARCH_MMU_FLAG_UNCACHED:
277            terminal_flags |= X86_MMU_PTE_PAT_UNCACHABLE;
278            break;
279        case ARCH_MMU_FLAG_WRITE_COMBINING:
280            terminal_flags |= X86_MMU_PTE_PAT_WRITE_COMBINING;
281            break;
282        default:
283            PANIC_UNIMPLEMENTED;
284        }
285    }
286
287    return terminal_flags;
288}
289
290X86PageTableBase::PtFlags X86PageTableMmu::split_flags(PageTableLevel level,
291                                                       X86PageTableBase::PtFlags flags) {
292    DEBUG_ASSERT(level != PML4_L && level != PT_L);
293    DEBUG_ASSERT(flags & X86_MMU_PG_PS);
294    if (level == PD_L) {
295        // Note: Clear PS before the check below; the PAT bit for a PTE is the
296        // the same as the PS bit for a higher table entry.
297        flags &= ~X86_MMU_PG_PS;
298
299        /* If the larger page had the PAT flag set, make sure it's
300         * transferred to the different index for a PTE */
301        if (flags & X86_MMU_PG_LARGE_PAT) {
302            flags &= ~X86_MMU_PG_LARGE_PAT;
303            flags |= X86_MMU_PG_PTE_PAT;
304        }
305    }
306    return flags;
307}
308
309void X86PageTableMmu::TlbInvalidate(PendingTlbInvalidation* pending) {
310    x86_tlb_invalidate_page(this, pending);
311}
312
313uint X86PageTableMmu::pt_flags_to_mmu_flags(PtFlags flags, PageTableLevel level) {
314    uint mmu_flags = ARCH_MMU_FLAG_PERM_READ;
315
316    if (flags & X86_MMU_PG_RW) {
317        mmu_flags |= ARCH_MMU_FLAG_PERM_WRITE;
318    }
319    if (flags & X86_MMU_PG_U) {
320        mmu_flags |= ARCH_MMU_FLAG_PERM_USER;
321    }
322    if (!(flags & X86_MMU_PG_NX)) {
323        mmu_flags |= ARCH_MMU_FLAG_PERM_EXECUTE;
324    }
325
326    if (level > 0) {
327        switch (flags & X86_MMU_LARGE_PAT_MASK) {
328        case X86_MMU_LARGE_PAT_WRITEBACK:
329            mmu_flags |= ARCH_MMU_FLAG_CACHED;
330            break;
331        case X86_MMU_LARGE_PAT_UNCACHABLE:
332            mmu_flags |= ARCH_MMU_FLAG_UNCACHED;
333            break;
334        case X86_MMU_LARGE_PAT_WRITE_COMBINING:
335            mmu_flags |= ARCH_MMU_FLAG_WRITE_COMBINING;
336            break;
337        default:
338            PANIC_UNIMPLEMENTED;
339        }
340    } else {
341        switch (flags & X86_MMU_PTE_PAT_MASK) {
342        case X86_MMU_PTE_PAT_WRITEBACK:
343            mmu_flags |= ARCH_MMU_FLAG_CACHED;
344            break;
345        case X86_MMU_PTE_PAT_UNCACHABLE:
346            mmu_flags |= ARCH_MMU_FLAG_UNCACHED;
347            break;
348        case X86_MMU_PTE_PAT_WRITE_COMBINING:
349            mmu_flags |= ARCH_MMU_FLAG_WRITE_COMBINING;
350            break;
351        default:
352            PANIC_UNIMPLEMENTED;
353        }
354    }
355    return mmu_flags;
356}
357
358bool X86PageTableEpt::allowed_flags(uint flags) {
359    if (!(flags & ARCH_MMU_FLAG_PERM_READ)) {
360        return false;
361    }
362    if (flags & ~kValidEptFlags) {
363        return false;
364    }
365    return true;
366}
367
368bool X86PageTableEpt::check_paddr(paddr_t paddr) {
369    return x86_mmu_check_paddr(paddr);
370}
371
372bool X86PageTableEpt::check_vaddr(vaddr_t vaddr) {
373    return x86_mmu_check_vaddr(vaddr);
374}
375
376bool X86PageTableEpt::supports_page_size(PageTableLevel level) {
377    DEBUG_ASSERT(level != PT_L);
378    switch (level) {
379    case PD_L:
380        return true;
381    case PDP_L:
382        return supports_huge_pages;
383    case PML4_L:
384        return false;
385    default:
386        panic("Unreachable case in supports_page_size\n");
387    }
388}
389
390X86PageTableBase::PtFlags X86PageTableEpt::intermediate_flags() {
391    return X86_EPT_R | X86_EPT_W | X86_EPT_X;
392}
393
394X86PageTableBase::PtFlags X86PageTableEpt::terminal_flags(PageTableLevel level,
395                                                          uint flags) {
396    X86PageTableBase::PtFlags terminal_flags = 0;
397
398    if (flags & ARCH_MMU_FLAG_PERM_READ) {
399        terminal_flags |= X86_EPT_R;
400    }
401    if (flags & ARCH_MMU_FLAG_PERM_WRITE) {
402        terminal_flags |= X86_EPT_W;
403    }
404    if (flags & ARCH_MMU_FLAG_PERM_EXECUTE) {
405        terminal_flags |= X86_EPT_X;
406    }
407
408    switch (flags & ARCH_MMU_FLAG_CACHE_MASK) {
409    case ARCH_MMU_FLAG_CACHED:
410        terminal_flags |= X86_EPT_WB;
411        break;
412    case ARCH_MMU_FLAG_UNCACHED_DEVICE:
413    case ARCH_MMU_FLAG_UNCACHED:
414        terminal_flags |= X86_EPT_UC;
415        break;
416    case ARCH_MMU_FLAG_WRITE_COMBINING:
417        terminal_flags |= X86_EPT_WC;
418        break;
419    default:
420        PANIC_UNIMPLEMENTED;
421    }
422
423    return terminal_flags;
424}
425
426X86PageTableBase::PtFlags X86PageTableEpt::split_flags(PageTableLevel level,
427                                                       X86PageTableBase::PtFlags flags) {
428    DEBUG_ASSERT(level != PML4_L && level != PT_L);
429    // We don't need to relocate any flags on split for EPT.
430    return flags;
431}
432
433
434void X86PageTableEpt::TlbInvalidate(PendingTlbInvalidation* pending) {
435    // TODO(ZX-981): Implement this.
436    pending->clear();
437}
438
439uint X86PageTableEpt::pt_flags_to_mmu_flags(PtFlags flags, PageTableLevel level) {
440    uint mmu_flags = 0;
441
442    if (flags & X86_EPT_R) {
443        mmu_flags |= ARCH_MMU_FLAG_PERM_READ;
444    }
445    if (flags & X86_EPT_W) {
446        mmu_flags |= ARCH_MMU_FLAG_PERM_WRITE;
447    }
448    if (flags & X86_EPT_X) {
449        mmu_flags |= ARCH_MMU_FLAG_PERM_EXECUTE;
450    }
451
452    switch (flags & X86_EPT_MEMORY_TYPE_MASK) {
453    case X86_EPT_WB:
454        mmu_flags |= ARCH_MMU_FLAG_CACHED;
455        break;
456    case X86_EPT_UC:
457        mmu_flags |= ARCH_MMU_FLAG_UNCACHED;
458        break;
459    case X86_EPT_WC:
460        mmu_flags |= ARCH_MMU_FLAG_WRITE_COMBINING;
461        break;
462    default:
463        PANIC_UNIMPLEMENTED;
464    }
465
466    return mmu_flags;
467}
468
469void x86_mmu_early_init() {
470    x86_mmu_percpu_init();
471
472    x86_mmu_mem_type_init();
473
474    // Unmap the lower identity mapping.
475    pml4[0] = 0;
476    PendingTlbInvalidation tlb;
477    tlb.enqueue(0, PML4_L, /* global */ false, /* terminal */ false);
478    x86_tlb_invalidate_page(nullptr, &tlb);
479
480    /* get the address width from the CPU */
481    uint8_t vaddr_width = x86_linear_address_width();
482    uint8_t paddr_width = x86_physical_address_width();
483
484    supports_huge_pages = x86_feature_test(X86_FEATURE_HUGE_PAGE);
485
486    /* if we got something meaningful, override the defaults.
487     * some combinations of cpu on certain emulators seems to return
488     * nonsense paddr widths (1), so trim it. */
489    if (paddr_width > g_paddr_width)
490        g_paddr_width = paddr_width;
491    if (vaddr_width > g_vaddr_width)
492        g_vaddr_width = vaddr_width;
493
494    LTRACEF("paddr_width %u vaddr_width %u\n", g_paddr_width, g_vaddr_width);
495}
496
497void x86_mmu_init(void) {}
498
499X86PageTableBase::X86PageTableBase() {
500}
501
502X86PageTableBase::~X86PageTableBase() {
503    DEBUG_ASSERT_MSG(!phys_, "page table dtor called before Destroy()");
504}
505
506// We disable analysis due to the write to |pages_| tripping it up.  It is safe
507// to write to |pages_| since this is part of object construction.
508zx_status_t X86PageTableBase::Init(void* ctx) TA_NO_THREAD_SAFETY_ANALYSIS {
509    /* allocate a top level page table for the new address space */
510    vm_page* p;
511    paddr_t pa;
512    zx_status_t status = pmm_alloc_page(0, &p, &pa);
513    if (status != ZX_OK) {
514        TRACEF("error allocating top level page directory\n");
515        return ZX_ERR_NO_MEMORY;
516    }
517    virt_ = reinterpret_cast<pt_entry_t*>(paddr_to_physmap(pa));
518    phys_ = pa;
519    p->state = VM_PAGE_STATE_MMU;
520
521    // TODO(abdulla): Remove when PMM returns pre-zeroed pages.
522    arch_zero_page(virt_);
523
524    ctx_ = ctx;
525    pages_ = 1;
526    return ZX_OK;
527}
528
529// We disable analysis due to the write to |pages_| tripping it up.  It is safe
530// to write to |pages_| since this is part of object construction.
531zx_status_t X86PageTableMmu::InitKernel(void* ctx) TA_NO_THREAD_SAFETY_ANALYSIS {
532    phys_ = kernel_pt_phys;
533    virt_ = (pt_entry_t*)X86_PHYS_TO_VIRT(phys_);
534    ctx_ = ctx;
535    pages_ = 1;
536    use_global_mappings_ = true;
537    return ZX_OK;
538}
539
540zx_status_t X86PageTableMmu::AliasKernelMappings() {
541    // Copy the kernel portion of it from the master kernel pt.
542    memcpy(virt_ + NO_OF_PT_ENTRIES / 2,
543           const_cast<pt_entry_t*>(&KERNEL_PT[NO_OF_PT_ENTRIES / 2]),
544           sizeof(pt_entry_t) * NO_OF_PT_ENTRIES / 2);
545    return ZX_OK;
546}
547
548X86ArchVmAspace::X86ArchVmAspace() {}
549
550/*
551 * Fill in the high level x86 arch aspace structure and allocating a top level page table.
552 */
553zx_status_t X86ArchVmAspace::Init(vaddr_t base, size_t size, uint mmu_flags) {
554    static_assert(sizeof(cpu_mask_t) == sizeof(active_cpus_), "err");
555    canary_.Assert();
556
557    LTRACEF("aspace %p, base %#" PRIxPTR ", size 0x%zx, mmu_flags 0x%x\n", this, base, size,
558            mmu_flags);
559
560    flags_ = mmu_flags;
561    base_ = base;
562    size_ = size;
563    if (mmu_flags & ARCH_ASPACE_FLAG_KERNEL) {
564        X86PageTableMmu* mmu = new (page_table_storage_) X86PageTableMmu();
565        pt_ = mmu;
566
567        zx_status_t status = mmu->InitKernel(this);
568        if (status != ZX_OK) {
569            return status;
570        }
571        LTRACEF("kernel aspace: pt phys %#" PRIxPTR ", virt %p\n", pt_->phys(), pt_->virt());
572    } else if (mmu_flags & ARCH_ASPACE_FLAG_GUEST) {
573        X86PageTableEpt* ept = new (page_table_storage_) X86PageTableEpt();
574        pt_ = ept;
575
576        zx_status_t status = ept->Init(this);
577        if (status != ZX_OK) {
578            return status;
579        }
580        LTRACEF("guest paspace: pt phys %#" PRIxPTR ", virt %p\n", pt_->phys(), pt_->virt());
581    } else {
582        X86PageTableMmu* mmu = new (page_table_storage_) X86PageTableMmu;
583        pt_ = mmu;
584
585        zx_status_t status = mmu->Init(this);
586        if (status != ZX_OK) {
587            return status;
588        }
589
590        status = mmu->AliasKernelMappings();
591        if (status != ZX_OK) {
592            return status;
593        }
594
595        LTRACEF("user aspace: pt phys %#" PRIxPTR ", virt %p\n", pt_->phys(), pt_->virt());
596    }
597    fbl::atomic_init(&active_cpus_, 0);
598
599    return ZX_OK;
600}
601
602zx_status_t X86ArchVmAspace::Destroy() {
603    canary_.Assert();
604    DEBUG_ASSERT(active_cpus_.load() == 0);
605
606    if (flags_ & ARCH_ASPACE_FLAG_GUEST) {
607        static_cast<X86PageTableEpt*>(pt_)->Destroy(base_, size_);
608    } else {
609        static_cast<X86PageTableMmu*>(pt_)->Destroy(base_, size_);
610    }
611    return ZX_OK;
612}
613
614zx_status_t X86ArchVmAspace::Unmap(vaddr_t vaddr, size_t count, size_t* unmapped) {
615    if (!IsValidVaddr(vaddr))
616        return ZX_ERR_INVALID_ARGS;
617
618    return pt_->UnmapPages(vaddr, count, unmapped);
619}
620
621zx_status_t X86ArchVmAspace::MapContiguous(vaddr_t vaddr, paddr_t paddr, size_t count,
622                                           uint mmu_flags, size_t* mapped) {
623    if (!IsValidVaddr(vaddr))
624        return ZX_ERR_INVALID_ARGS;
625
626    return pt_->MapPagesContiguous(vaddr, paddr, count, mmu_flags, mapped);
627}
628
629zx_status_t X86ArchVmAspace::Map(vaddr_t vaddr, paddr_t* phys, size_t count,
630                                 uint mmu_flags, size_t* mapped) {
631    if (!IsValidVaddr(vaddr))
632        return ZX_ERR_INVALID_ARGS;
633
634    return pt_->MapPages(vaddr, phys, count, mmu_flags, mapped);
635}
636
637zx_status_t X86ArchVmAspace::Protect(vaddr_t vaddr, size_t count, uint mmu_flags) {
638    if (!IsValidVaddr(vaddr))
639        return ZX_ERR_INVALID_ARGS;
640
641    return pt_->ProtectPages(vaddr, count, mmu_flags);
642}
643
644void X86ArchVmAspace::ContextSwitch(X86ArchVmAspace* old_aspace, X86ArchVmAspace* aspace) {
645    cpu_mask_t cpu_bit = cpu_num_to_mask(arch_curr_cpu_num());
646    if (aspace != nullptr) {
647        aspace->canary_.Assert();
648        paddr_t phys = aspace->pt_phys();
649        LTRACEF_LEVEL(3, "switching to aspace %p, pt %#" PRIXPTR "\n", aspace, phys);
650        x86_set_cr3(phys);
651
652        if (old_aspace != nullptr) {
653            old_aspace->active_cpus_.fetch_and(~cpu_bit);
654        }
655        aspace->active_cpus_.fetch_or(cpu_bit);
656    } else {
657        LTRACEF_LEVEL(3, "switching to kernel aspace, pt %#" PRIxPTR "\n", kernel_pt_phys);
658        x86_set_cr3(kernel_pt_phys);
659        if (old_aspace != nullptr) {
660            old_aspace->active_cpus_.fetch_and(~cpu_bit);
661        }
662    }
663
664    // Cleanup io bitmap entries from previous thread.
665    if (old_aspace)
666        x86_clear_tss_io_bitmap(old_aspace->io_bitmap());
667
668    // Set the io bitmap for this thread.
669    if (aspace)
670        x86_set_tss_io_bitmap(aspace->io_bitmap());
671}
672
673zx_status_t X86ArchVmAspace::Query(vaddr_t vaddr, paddr_t* paddr, uint* mmu_flags) {
674    if (!IsValidVaddr(vaddr))
675        return ZX_ERR_INVALID_ARGS;
676
677    return pt_->QueryVaddr(vaddr, paddr, mmu_flags);
678}
679
680void x86_mmu_percpu_init(void) {
681    ulong cr0 = x86_get_cr0();
682    /* Set write protect bit in CR0*/
683    cr0 |= X86_CR0_WP;
684    // Clear Cache disable/not write-through bits
685    cr0 &= ~(X86_CR0_NW | X86_CR0_CD);
686    x86_set_cr0(cr0);
687
688    /* Setting the SMEP & SMAP bit in CR4 */
689    ulong cr4 = x86_get_cr4();
690    if (x86_feature_test(X86_FEATURE_SMEP))
691        cr4 |= X86_CR4_SMEP;
692    if (x86_feature_test(X86_FEATURE_SMAP))
693        cr4 |= X86_CR4_SMAP;
694    x86_set_cr4(cr4);
695
696    // Set NXE bit in X86_MSR_IA32_EFER.
697    uint64_t efer_msr = read_msr(X86_MSR_IA32_EFER);
698    efer_msr |= X86_EFER_NXE;
699    write_msr(X86_MSR_IA32_EFER, efer_msr);
700}
701
702X86ArchVmAspace::~X86ArchVmAspace() {
703    if (pt_) {
704        pt_->~X86PageTableBase();
705    }
706    // TODO(ZX-980): check that we've destroyed the aspace.
707}
708
709vaddr_t X86ArchVmAspace::PickSpot(vaddr_t base, uint prev_region_mmu_flags,
710                                  vaddr_t end, uint next_region_mmu_flags,
711                                  vaddr_t align, size_t size, uint mmu_flags) {
712    canary_.Assert();
713    return PAGE_ALIGN(base);
714}
715