1/* 2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 3 * 4 * SPDX-License-Identifier: GPL-2.0-only 5 */ 6 7#include <config.h> 8#include <api/syscall.h> 9#include <machine/io.h> 10#include <kernel/boot.h> 11#include <model/statedata.h> 12#include <arch/kernel/vspace.h> 13#include <arch/kernel/boot.h> 14#include <arch/kernel/boot_sys.h> 15#include <arch/api/invocation.h> 16#include <mode/kernel/tlb.h> 17#include <arch/kernel/tlb_bitmap.h> 18#include <object/structures.h> 19 20/* When using the SKIM window to isolate the kernel from the user we also need to 21 * not use global mappings as having global mappings and entries in the TLB is 22 * equivalent, for the purpose of exploitation, to having the mappings in the 23 * kernel window */ 24#define KERNEL_IS_GLOBAL() (config_set(CONFIG_KERNEL_SKIM_WINDOW) ? 0 : 1) 25 26/* For the boot code we create two windows into the physical address space 27 * One is at the same location as the kernel window, and is placed up high 28 * The other is a 1-to-1 mapping of the first 512gb of memory. The purpose 29 * of this is to have a 1-to-1 mapping for the low parts of memory, so that 30 * when we switch paging on, and are still running at physical addresses, 31 * we don't explode. Then we also want the high mappings so we can start 32 * running at proper kernel virtual addresses */ 33pml4e_t boot_pml4[BIT(PML4_INDEX_BITS)] ALIGN(BIT(seL4_PageBits)) VISIBLE PHYS_BSS; 34pdpte_t boot_pdpt[BIT(PDPT_INDEX_BITS)] ALIGN(BIT(seL4_PageBits)) VISIBLE PHYS_BSS; 35 36/* 'gdt_idt_ptr' is declared globally because of a C-subset restriction. 37 * It is only used in init_drts(), which therefore is non-reentrant. 38 */ 39gdt_idt_ptr_t gdt_idt_ptr; 40 41BOOT_CODE bool_t map_kernel_window( 42 uint32_t num_ioapic, 43 paddr_t *ioapic_paddrs, 44 uint32_t num_drhu, 45 paddr_t *drhu_list 46) 47{ 48 49 uint64_t paddr; 50 uint64_t vaddr; 51 52#ifdef CONFIG_HUGE_PAGE 53 /* using 1 GiB page size */ 54 55 /* verify that the kernel window as at the last entry of the PML4 */ 56 assert(GET_PML4_INDEX(PPTR_BASE) == BIT(PML4_INDEX_BITS) - 1); 57 /* verify that the kernel_base is located in the last entry of the PML4, 58 * the second last entry of the PDPT, is 1gb aligned and 1gb in size */ 59 assert(GET_PML4_INDEX(KERNEL_ELF_BASE) == BIT(PML4_INDEX_BITS) - 1); 60 assert(GET_PDPT_INDEX(KERNEL_ELF_BASE) == BIT(PML4_INDEX_BITS) - 2); 61 assert(GET_PDPT_INDEX(KDEV_BASE) == BIT(PML4_INDEX_BITS) - 1); 62 assert(IS_ALIGNED(KERNEL_ELF_BASE - KERNEL_ELF_PADDR_BASE, seL4_HugePageBits)); 63 assert(IS_ALIGNED(KDEV_BASE, seL4_HugePageBits)); 64 /* place the PDPT into the PML4 */ 65 x64KSKernelPML4[GET_PML4_INDEX(PPTR_BASE)] = pml4e_new( 66 0, /* xd */ 67 kpptr_to_paddr(x64KSKernelPDPT), 68 0, /* accessed */ 69 0, /* cache_disabled */ 70 0, /* write_through */ 71 0, /* super_user */ 72 1, /* read_write */ 73 1 /* present */ 74 ); 75 /* put the 1GB kernel_base mapping into the PDPT */ 76 x64KSKernelPDPT[GET_PDPT_INDEX(KERNEL_ELF_BASE)] = pdpte_pdpte_1g_new( 77 0, /* xd */ 78 PADDR_BASE, 79 0, /* PAT */ 80 KERNEL_IS_GLOBAL(), /* global */ 81 0, /* dirty */ 82 0, /* accessed */ 83 0, /* cache_disabled */ 84 0, /* write_through */ 85 0, /* super_user */ 86 1, /* read_write */ 87 1 /* present */ 88 ); 89 /* also map the physical memory into the big kernel window */ 90 paddr = 0; 91 vaddr = PPTR_BASE; 92 for (paddr = 0; paddr < PADDR_TOP; 93 paddr += BIT(seL4_HugePageBits)) { 94 95 int pdpte_index = GET_PDPT_INDEX(vaddr); 96 x64KSKernelPDPT[pdpte_index] = pdpte_pdpte_1g_new( 97 0, /* xd */ 98 paddr, /* physical address */ 99 0, /* PAT */ 100 KERNEL_IS_GLOBAL(), /* global */ 101 0, /* dirty */ 102 0, /* accessed */ 103 0, /* cache_disabled */ 104 0, /* write_through */ 105 0, /* super_user */ 106 1, /* read_write */ 107 1 /* present */ 108 ); 109 110 vaddr += BIT(seL4_HugePageBits); 111 } 112 113 /* put the PD into the PDPT */ 114 x64KSKernelPDPT[GET_PDPT_INDEX(KDEV_BASE)] = pdpte_pdpte_pd_new( 115 0, /* xd */ 116 kpptr_to_paddr(x64KSKernelPD), 117 0, /* accessed */ 118 0, /* cache_disabled */ 119 0, /* write_through */ 120 0, /* super_user */ 121 1, /* read_write */ 122 1 /* present */ 123 ); 124 /* put the PT into the PD */ 125 x64KSKernelPD[0] = pde_pde_pt_new( 126 0, /* xd */ 127 kpptr_to_paddr(x64KSKernelPT), 128 0, /* accessed */ 129 0, /* cache_disabled */ 130 0, /* write_through */ 131 0, /* super_user */ 132 1, /* read_write */ 133 1 /* present */ 134 ); 135#else 136 137 int pd_index = 0; 138 /* use 2 MiB page size */ 139 /* verify that the kernel window as at the last entry of the PML4 */ 140 assert(GET_PML4_INDEX(PPTR_BASE) == BIT(PML4_INDEX_BITS) - 1); 141 /* verify that the kernel_base is located in the last entry of the PML4, 142 * the second last entry of the PDPT, is 1gb aligned and 1gb in size */ 143 assert(GET_PML4_INDEX(KERNEL_ELF_BASE) == BIT(PML4_INDEX_BITS) - 1); 144 assert(GET_PDPT_INDEX(KERNEL_ELF_BASE) == BIT(PML4_INDEX_BITS) - 2); 145 assert(GET_PDPT_INDEX(KDEV_BASE) == BIT(PML4_INDEX_BITS) - 1); 146 assert(IS_ALIGNED(KERNEL_ELF_BASE - KERNEL_ELF_PADDR_BASE, seL4_HugePageBits)); 147 assert(IS_ALIGNED(KDEV_BASE, seL4_HugePageBits)); 148 149 /* place the PDPT into the PML4 */ 150 x64KSKernelPML4[GET_PML4_INDEX(PPTR_BASE)] = pml4e_new( 151 0, /* xd */ 152 kpptr_to_paddr(x64KSKernelPDPT), 153 0, /* accessed */ 154 0, /* cache_disabled */ 155 0, /* write_through */ 156 0, /* super_user */ 157 1, /* read_write */ 158 1 /* present */ 159 ); 160 161 for (pd_index = 0; pd_index < PADDR_TOP >> seL4_HugePageBits; pd_index++) { 162 /* put the 1GB kernel_base mapping into the PDPT */ 163 x64KSKernelPDPT[GET_PDPT_INDEX(PPTR_BASE) + pd_index] = pdpte_pdpte_pd_new( 164 0, /* xd */ 165 kpptr_to_paddr(&x64KSKernelPDs[pd_index][0]), 166 0, /* accessed */ 167 0, /* cache disabled */ 168 0, /* write through */ 169 0, /* super user */ 170 1, /* read write */ 171 1 /* present */ 172 ); 173 } 174 175 x64KSKernelPDPT[GET_PDPT_INDEX(KERNEL_ELF_BASE)] = pdpte_pdpte_pd_new( 176 0, /* xd */ 177 kpptr_to_paddr(&x64KSKernelPDs[0][0]), 178 0, /* accessed */ 179 0, /* cache disable */ 180 1, /* write through */ 181 0, /* super user */ 182 1, /* read write */ 183 1 /* present */ 184 ); 185 186 paddr = 0; 187 vaddr = PPTR_BASE; 188 189 for (paddr = 0; paddr < PADDR_TOP; 190 paddr += 0x200000) { 191 192 int pd_index = GET_PDPT_INDEX(vaddr) - GET_PDPT_INDEX(PPTR_BASE); 193 int pde_index = GET_PD_INDEX(vaddr); 194 195 x64KSKernelPDs[pd_index][pde_index] = pde_pde_large_new( 196 0, /* xd */ 197 paddr, 198 0, /* pat */ 199 KERNEL_IS_GLOBAL(), /* global */ 200 0, /* dirty */ 201 0, /* accessed */ 202 0, /* cache disabled */ 203 0, /* write through */ 204 0, /* super user */ 205 1, /* read write */ 206 1 /* present */ 207 ); 208 vaddr += 0x200000; 209 } 210 211 /* put the PD into the PDPT */ 212 x64KSKernelPDPT[GET_PDPT_INDEX(KDEV_BASE)] = pdpte_pdpte_pd_new( 213 0, /* xd */ 214 kpptr_to_paddr(&x64KSKernelPDs[BIT(PDPT_INDEX_BITS) - 1][0]), 215 0, /* accessed */ 216 0, /* cache_disabled */ 217 0, /* write_through */ 218 0, /* super_user */ 219 1, /* read_write */ 220 1 /* present */ 221 ); 222 223 /* put the PT into the PD */ 224 x64KSKernelPDs[BIT(PDPT_INDEX_BITS) - 1][0] = pde_pde_pt_new( 225 0, /* xd */ 226 kpptr_to_paddr(x64KSKernelPT), 227 0, /* accessed */ 228 0, /* cache_disabled */ 229 0, /* write_through */ 230 0, /* super_user */ 231 1, /* read_write */ 232 1 /* present */ 233 ); 234#endif 235 236#if CONFIG_MAX_NUM_TRACE_POINTS > 0 237 /* use the last PD entry as the benchmark log storage. 238 * the actual backing physical memory will be filled 239 * later by using alloc_region */ 240 ksLog = (ks_log_entry_t *)(KDEV_BASE + 0x200000 * (BIT(PD_INDEX_BITS) - 1)); 241#endif 242 243 /* now map in the kernel devices */ 244 if (!map_kernel_window_devices(x64KSKernelPT, num_ioapic, ioapic_paddrs, num_drhu, drhu_list)) { 245 return false; 246 } 247 248#ifdef ENABLE_SMP_SUPPORT 249 /* initialize the TLB bitmap */ 250 tlb_bitmap_init(x64KSKernelPML4); 251#endif /* ENABLE_SMP_SUPPORT */ 252 253 /* In boot code, so fine to just trash everything here */ 254 invalidateLocalTranslationAll(); 255 printf("Mapping kernel window is done\n"); 256 return true; 257} 258 259#ifdef CONFIG_KERNEL_SKIM_WINDOW 260BOOT_CODE bool_t map_skim_window(vptr_t skim_start, vptr_t skim_end) 261{ 262 /* place the PDPT into the PML4 */ 263 x64KSSKIMPML4[GET_PML4_INDEX(PPTR_BASE)] = pml4e_new( 264 0, /* xd */ 265 kpptr_to_paddr(x64KSSKIMPDPT), 266 0, /* accessed */ 267 0, /* cache_disabled */ 268 0, /* write_through */ 269 0, /* super_user */ 270 1, /* read_write */ 271 1 /* present */ 272 ); 273 /* place the PD into the kernel_base slot of the PDPT */ 274 x64KSSKIMPDPT[GET_PDPT_INDEX(KERNEL_ELF_BASE)] = pdpte_pdpte_pd_new( 275 0, /* xd */ 276 kpptr_to_paddr(x64KSSKIMPD), 277 0, /* accessed */ 278 0, /* cache_disabled */ 279 0, /* write_through */ 280 0, /* super_user */ 281 1, /* read_write */ 282 1 /* present */ 283 ); 284 /* map the skim portion into the PD. we expect it to be 2M aligned */ 285 assert((skim_start % BIT(seL4_LargePageBits)) == 0); 286 assert((skim_end % BIT(seL4_LargePageBits)) == 0); 287 uint64_t paddr = kpptr_to_paddr((void *)skim_start); 288 for (int i = GET_PD_INDEX(skim_start); i < GET_PD_INDEX(skim_end); i++) { 289 x64KSSKIMPD[i] = pde_pde_large_new( 290 0, /* xd */ 291 paddr, 292 0, /* pat */ 293 KERNEL_IS_GLOBAL(), /* global */ 294 0, /* dirty */ 295 0, /* accessed */ 296 0, /* cache_disabled */ 297 0, /* write_through */ 298 0, /* super_user */ 299 1, /* read_write */ 300 1 /* present */ 301 ); 302 paddr += BIT(seL4_LargePageBits); 303 } 304 return true; 305} 306#endif 307 308BOOT_CODE void init_tss(tss_t *tss) 309{ 310 word_t base = (word_t)&x64KSIRQStack[CURRENT_CPU_INDEX()][IRQ_STACK_SIZE]; 311 *tss = tss_new( 312 sizeof(*tss), /* io map base */ 313 0, 0, /* ist 7 */ 314 0, 0, 315 0, 0, 316 0, 0, 317 0, 0, 318 0, 0, 319 /* ist 1 is the stack frame we use for interrupts */ 320 base >> 32, base & 0xffffffff, /* ist 1 */ 321 0, 0, /* rsp 2 */ 322 0, 0, /* rsp 1 */ 323 0, 0 /* rsp 0 */ 324 ); 325 /* set the IO map to all 1 to block user IN/OUT instructions */ 326 memset(&x86KSGlobalState[CURRENT_CPU_INDEX()].x86KStss.io_map[0], 0xff, 327 sizeof(x86KSGlobalState[CURRENT_CPU_INDEX()].x86KStss.io_map)); 328} 329 330BOOT_CODE void init_syscall_msrs(void) 331{ 332 x86_wrmsr(IA32_LSTAR_MSR, (uint64_t)&handle_fastsyscall); 333 // mask bit 9 in the kernel (which is the interrupt enable bit) 334 // also mask bit 8, which is the Trap Flag, to prevent the kernel 335 // from single stepping 336 x86_wrmsr(IA32_FMASK_MSR, FLAGS_TF | FLAGS_IF); 337 x86_wrmsr(IA32_STAR_MSR, ((uint64_t)SEL_CS_0 << 32) | ((uint64_t)SEL_CS_3 << 48)); 338} 339 340BOOT_CODE void init_gdt(gdt_entry_t *gdt, tss_t *tss) 341{ 342 343 uint64_t tss_base = (uint64_t)tss; 344 gdt_tss_t gdt_tss; 345 346 gdt[GDT_NULL] = gdt_entry_gdt_null_new(); 347 348 gdt[GDT_CS_0] = gdt_entry_gdt_code_new( 349 0, /* base high */ 350 1, /* granularity */ 351 0, /* operation size, must be 0 when 64-bit is set */ 352 1, /* long mode */ 353 0, /* avl */ 354 0xf, /* limit high */ 355 1, /* present */ 356 0, /* dpl */ 357 1, /* always 1 for segment */ 358 0, /* base middle */ 359 0, /* base low */ 360 0xffff /* limit low */ 361 ); 362 363 gdt[GDT_DS_0] = gdt_entry_gdt_data_new( 364 0, /* base high */ 365 1, /* granularity */ 366 1, /* operation size */ 367 0, /* avl */ 368 0xf, /* seg limit high */ 369 1, /* present */ 370 0, /* dpl */ 371 1, /* always 1 */ 372 0, /* base mid */ 373 0, /* base low */ 374 0xffff /* seg limit low */ 375 ); 376 377 gdt[GDT_CS_3] = gdt_entry_gdt_code_new( 378 0, /* base high */ 379 1, /* granularity */ 380 0, /* operation size, must be 0 when 64-bit is set */ 381 1, /* long mode */ 382 0, /* avl */ 383 0xf, /* limit high */ 384 1, /* present */ 385 3, /* dpl */ 386 1, /* always 1 */ 387 0, /* base middle */ 388 0, /* base low */ 389 0xffff /* limit low */ 390 ); 391 392 gdt[GDT_DS_3] = gdt_entry_gdt_data_new( 393 0, 394 1, 395 1, 396 0, 397 0xf, 398 1, 399 3, 400 1, 401 0, 402 0, 403 0xffff 404 ); 405 406 gdt[GDT_FS] = gdt_entry_gdt_data_new( 407 0, 408 1, 409 1, 410 0, 411 0xf, 412 1, 413 3, 414 1, 415 0, 416 0, 417 0xffff 418 ); 419 420 gdt[GDT_GS] = gdt_entry_gdt_data_new( 421 0, 422 1, 423 1, 424 0, 425 0xf, 426 1, 427 3, 428 1, 429 0, 430 0, 431 0xffff 432 ); 433 434 gdt_tss = gdt_tss_new( 435 tss_base >> 32, /* base 63 - 32 */ 436 (tss_base & 0xff000000UL) >> 24, /* base 31 - 24 */ 437 1, /* granularity */ 438 0, /* avl */ 439 0, /* limit high */ 440 1, /* present */ 441 0, /* dpl */ 442 9, /* desc type */ 443 (tss_base & 0xff0000UL) >> 16, /* base 23-16 */ 444 (tss_base & 0xffffUL), /* base 15 - 0 */ 445 sizeof(tss_io_t) - 1 446 ); 447 448 gdt[GDT_TSS].words[0] = gdt_tss.words[0]; 449 gdt[GDT_TSS + 1].words[0] = gdt_tss.words[1]; 450} 451 452BOOT_CODE void init_idt_entry(idt_entry_t *idt, interrupt_t interrupt, void(*handler)(void)) 453{ 454 uint64_t handler_addr = (uint64_t)handler; 455 uint64_t dpl = 3; 456 457 if (interrupt < int_trap_min && interrupt != int_software_break_request) { 458 dpl = 0; 459 } 460 461 idt[interrupt] = idt_entry_interrupt_gate_new( 462 handler_addr >> 32, /* offset 63 - 32 */ 463 ((handler_addr >> 16) & 0xffff), 464 1, /* present */ 465 dpl, /* dpl */ 466 1, /* ist */ 467 SEL_CS_0, /* segment selector */ 468 (handler_addr & 0xffff) /* offset 15 - 0 */ 469 ); 470} 471 472void setVMRoot(tcb_t *tcb) 473{ 474 cap_t threadRoot; 475 asid_t asid; 476 pml4e_t *pml4; 477 findVSpaceForASID_ret_t find_ret; 478 cr3_t cr3; 479 480 threadRoot = TCB_PTR_CTE_PTR(tcb, tcbVTable)->cap; 481 482 if (cap_get_capType(threadRoot) != cap_pml4_cap || 483 !cap_pml4_cap_get_capPML4IsMapped(threadRoot)) { 484 setCurrentUserVSpaceRoot(kpptr_to_paddr(X86_GLOBAL_VSPACE_ROOT), 0); 485 return; 486 } 487 488 pml4 = PML4E_PTR(cap_pml4_cap_get_capPML4BasePtr(threadRoot)); 489 asid = cap_pml4_cap_get_capPML4MappedASID(threadRoot); 490 find_ret = findVSpaceForASID(asid); 491 if (unlikely(find_ret.status != EXCEPTION_NONE || find_ret.vspace_root != pml4)) { 492 setCurrentUserVSpaceRoot(kpptr_to_paddr(X86_GLOBAL_VSPACE_ROOT), 0); 493 return; 494 } 495 cr3 = makeCR3(pptr_to_paddr(pml4), asid); 496 if (getCurrentUserCR3().words[0] != cr3.words[0]) { 497 SMP_COND_STATEMENT(tlb_bitmap_set(pml4, getCurrentCPUIndex());) 498 setCurrentUserCR3(cr3); 499 } 500} 501 502 503BOOT_CODE void init_dtrs(void) 504{ 505 gdt_idt_ptr.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; 506 gdt_idt_ptr.base = (uint64_t)x86KSGlobalState[CURRENT_CPU_INDEX()].x86KSgdt; 507 508 /* When we install the gdt it will clobber any value of gs that 509 * we have. Since we might be using it for TLS we can stash 510 * and unstash any gs value using swapgs 511 */ 512 swapgs(); 513 x64_install_gdt(&gdt_idt_ptr); 514 swapgs(); 515 516 gdt_idt_ptr.limit = (sizeof(idt_entry_t) * (int_max + 1)) - 1; 517 gdt_idt_ptr.base = (uint64_t)x86KSGlobalState[CURRENT_CPU_INDEX()].x86KSidt; 518 x64_install_idt(&gdt_idt_ptr); 519 520 x64_install_ldt(SEL_NULL); 521 522 x64_install_tss(SEL_TSS); 523} 524 525BOOT_CODE void map_it_frame_cap(cap_t pd_cap, cap_t frame_cap) 526{ 527 pml4e_t *pml4 = PML4_PTR(pptr_of_cap(pd_cap)); 528 pdpte_t *pdpt; 529 pde_t *pd; 530 pte_t *pt; 531 vptr_t vptr = cap_frame_cap_get_capFMappedAddress(frame_cap); 532 void *pptr = (void *)cap_frame_cap_get_capFBasePtr(frame_cap); 533 534 assert(cap_frame_cap_get_capFMapType(frame_cap) == X86_MappingVSpace); 535 assert(cap_frame_cap_get_capFMappedASID(frame_cap) != asidInvalid); 536 pml4 += GET_PML4_INDEX(vptr); 537 assert(pml4e_ptr_get_present(pml4)); 538 pdpt = paddr_to_pptr(pml4e_ptr_get_pdpt_base_address(pml4)); 539 pdpt += GET_PDPT_INDEX(vptr); 540 assert(pdpte_pdpte_pd_ptr_get_present(pdpt)); 541 pd = paddr_to_pptr(pdpte_pdpte_pd_ptr_get_pd_base_address(pdpt)); 542 pd += GET_PD_INDEX(vptr); 543 assert(pde_pde_pt_ptr_get_present(pd)); 544 pt = paddr_to_pptr(pde_pde_pt_ptr_get_pt_base_address(pd)); 545 *(pt + GET_PT_INDEX(vptr)) = pte_new( 546 0, /* xd */ 547 pptr_to_paddr(pptr), /* page_base_address */ 548 0, /* global */ 549 0, /* pat */ 550 0, /* dirty */ 551 0, /* accessed */ 552 0, /* cache_disabled */ 553 0, /* write_through */ 554 1, /* super_user */ 555 1, /* read_write */ 556 1 /* present */ 557 ); 558} 559 560static BOOT_CODE void map_it_pdpt_cap(cap_t vspace_cap, cap_t pdpt_cap) 561{ 562 pml4e_t *pml4 = PML4_PTR(pptr_of_cap(vspace_cap)); 563 pdpte_t *pdpt = PDPT_PTR(cap_pdpt_cap_get_capPDPTBasePtr(pdpt_cap)); 564 vptr_t vptr = cap_pdpt_cap_get_capPDPTMappedAddress(pdpt_cap); 565 566 assert(cap_pdpt_cap_get_capPDPTIsMapped(pdpt_cap)); 567 *(pml4 + GET_PML4_INDEX(vptr)) = pml4e_new( 568 0, /* xd */ 569 pptr_to_paddr(pdpt), /* pdpt_base_address */ 570 0, /* accessed */ 571 0, /* cache_disabled */ 572 0, /* write_through */ 573 1, /* super_user */ 574 1, /* read_write */ 575 1 /* present */ 576 ); 577} 578 579BOOT_CODE void map_it_pd_cap(cap_t vspace_cap, cap_t pd_cap) 580{ 581 pml4e_t *pml4 = PML4_PTR(pptr_of_cap(vspace_cap)); 582 pdpte_t *pdpt; 583 pde_t *pd = PD_PTR(cap_page_directory_cap_get_capPDBasePtr(pd_cap)); 584 vptr_t vptr = cap_page_directory_cap_get_capPDMappedAddress(pd_cap); 585 586 assert(cap_page_directory_cap_get_capPDIsMapped(pd_cap)); 587 pml4 += GET_PML4_INDEX(vptr); 588 assert(pml4e_ptr_get_present(pml4)); 589 pdpt = paddr_to_pptr(pml4e_ptr_get_pdpt_base_address(pml4)); 590 *(pdpt + GET_PDPT_INDEX(vptr)) = pdpte_pdpte_pd_new( 591 0, /* xd */ 592 pptr_to_paddr(pd), /* pd_base_address */ 593 0, /* accessed */ 594 0, /* cache_disabled */ 595 0, /* write_through */ 596 1, /* super_user */ 597 1, /* read_write */ 598 1 /* present */ 599 ); 600} 601 602BOOT_CODE void map_it_pt_cap(cap_t vspace_cap, cap_t pt_cap) 603{ 604 pml4e_t *pml4 = PML4_PTR(pptr_of_cap(vspace_cap)); 605 pdpte_t *pdpt; 606 pde_t *pd; 607 pte_t *pt = PT_PTR(cap_page_table_cap_get_capPTBasePtr(pt_cap)); 608 vptr_t vptr = cap_page_table_cap_get_capPTMappedAddress(pt_cap); 609 610 assert(cap_page_table_cap_get_capPTIsMapped(pt_cap)); 611 pml4 += GET_PML4_INDEX(vptr); 612 assert(pml4e_ptr_get_present(pml4)); 613 pdpt = paddr_to_pptr(pml4e_ptr_get_pdpt_base_address(pml4)); 614 pdpt += GET_PDPT_INDEX(vptr); 615 assert(pdpte_pdpte_pd_ptr_get_present(pdpt)); 616 pd = paddr_to_pptr(pdpte_pdpte_pd_ptr_get_pd_base_address(pdpt)); 617 *(pd + GET_PD_INDEX(vptr)) = pde_pde_pt_new( 618 0, /* xd */ 619 pptr_to_paddr(pt), /* pt_base_address */ 620 0, /* accessed */ 621 0, /* cache_disabled */ 622 0, /* write_through */ 623 1, /* super_user */ 624 1, /* read_write */ 625 1 /* present */ 626 ); 627} 628 629BOOT_CODE void *map_temp_boot_page(void *entry, uint32_t large_pages) 630{ 631 /* this function is for legacy 32-bit systems where the ACPI tables might 632 * collide with the kernel window. Here we just assert that the table is 633 * in fact in the lower 4GiB region (which is already 1-to-1 mapped) and 634 * continue */ 635 assert((word_t)entry < BIT(32)); 636 return entry; 637} 638 639static BOOT_CODE cap_t create_it_pdpt_cap(cap_t vspace_cap, pptr_t pptr, vptr_t vptr, asid_t asid) 640{ 641 cap_t cap; 642 cap = cap_pdpt_cap_new( 643 asid, /* capPDPTMappedASID */ 644 pptr, /* capPDPTBasePtr */ 645 1, /* capPDPTIsMapped */ 646 vptr /* capPDPTMappedAddress */ 647 ); 648 map_it_pdpt_cap(vspace_cap, cap); 649 return cap; 650} 651 652static BOOT_CODE cap_t create_it_pd_cap(cap_t vspace_cap, pptr_t pptr, vptr_t vptr, asid_t asid) 653{ 654 cap_t cap; 655 cap = cap_page_directory_cap_new( 656 asid, /* capPDMappedASID */ 657 pptr, /* capPDBasePtr */ 658 1, /* capPDIsMapped */ 659 vptr /* capPDMappedAddress */ 660 ); 661 map_it_pd_cap(vspace_cap, cap); 662 return cap; 663} 664 665static BOOT_CODE cap_t create_it_pt_cap(cap_t vspace_cap, pptr_t pptr, vptr_t vptr, asid_t asid) 666{ 667 cap_t cap; 668 cap = cap_page_table_cap_new( 669 asid, /* capPTMappedASID */ 670 pptr, /* capPTBasePtr */ 671 1, /* capPTIsMapped */ 672 vptr /* capPTMappedAddress */ 673 ); 674 map_it_pt_cap(vspace_cap, cap); 675 return cap; 676} 677 678 679BOOT_CODE word_t arch_get_n_paging(v_region_t it_v_reg) 680{ 681 word_t n = get_n_paging(it_v_reg, PD_INDEX_OFFSET); 682 n += get_n_paging(it_v_reg, PDPT_INDEX_OFFSET); 683 n += get_n_paging(it_v_reg, PML4_INDEX_OFFSET); 684#ifdef CONFIG_IOMMU 685 n += vtd_get_n_paging(&boot_state.rmrr_list); 686#endif 687 return n; 688} 689 690BOOT_CODE cap_t create_it_address_space(cap_t root_cnode_cap, v_region_t it_v_reg) 691{ 692 cap_t vspace_cap; 693 vptr_t vptr; 694 seL4_SlotPos slot_pos_before; 695 seL4_SlotPos slot_pos_after; 696 697 slot_pos_before = ndks_boot.slot_pos_cur; 698 copyGlobalMappings(PML4_PTR(rootserver.vspace)); 699 vspace_cap = cap_pml4_cap_new( 700 IT_ASID, /* capPML4MappedASID */ 701 rootserver.vspace, /* capPML4BasePtr */ 702 1 /* capPML4IsMapped */ 703 ); 704 705 706 write_slot(SLOT_PTR(pptr_of_cap(root_cnode_cap), seL4_CapInitThreadVSpace), vspace_cap); 707 708 /* Create any PDPTs needed for the user land image */ 709 for (vptr = ROUND_DOWN(it_v_reg.start, PML4_INDEX_OFFSET); 710 vptr < it_v_reg.end; 711 vptr += BIT(PML4_INDEX_OFFSET)) { 712 if (!provide_cap(root_cnode_cap, 713 create_it_pdpt_cap(vspace_cap, it_alloc_paging(), vptr, IT_ASID)) 714 ) { 715 return cap_null_cap_new(); 716 } 717 } 718 719 /* Create any PDs needed for the user land image */ 720 for (vptr = ROUND_DOWN(it_v_reg.start, PDPT_INDEX_OFFSET); 721 vptr < it_v_reg.end; 722 vptr += BIT(PDPT_INDEX_OFFSET)) { 723 if (!provide_cap(root_cnode_cap, 724 create_it_pd_cap(vspace_cap, it_alloc_paging(), vptr, IT_ASID)) 725 ) { 726 return cap_null_cap_new(); 727 } 728 } 729 730 /* Create any PTs needed for the user land image */ 731 for (vptr = ROUND_DOWN(it_v_reg.start, PD_INDEX_OFFSET); 732 vptr < it_v_reg.end; 733 vptr += BIT(PD_INDEX_OFFSET)) { 734 if (!provide_cap(root_cnode_cap, 735 create_it_pt_cap(vspace_cap, it_alloc_paging(), vptr, IT_ASID)) 736 ) { 737 return cap_null_cap_new(); 738 } 739 } 740 741 slot_pos_after = ndks_boot.slot_pos_cur; 742 ndks_boot.bi_frame->userImagePaging = (seL4_SlotRegion) { 743 slot_pos_before, slot_pos_after 744 }; 745 return vspace_cap; 746} 747 748void copyGlobalMappings(vspace_root_t *new_vspace) 749{ 750 unsigned long i; 751 pml4e_t *vspace = (pml4e_t *)new_vspace; 752 753 /* Copy from the tlbbitmap_pptr so that we copy the default entries of the 754 * tlb bitmap (if it exists). If it doesn't exist then this loop 755 * will be equivalent to copying from PPTR_BASE 756 */ 757 for (i = GET_PML4_INDEX(TLBBITMAP_PPTR); i < BIT(PML4_INDEX_BITS); i++) { 758 vspace[i] = X86_GLOBAL_VSPACE_ROOT[i]; 759 } 760} 761 762static BOOT_CODE cap_t create_it_frame_cap(pptr_t pptr, vptr_t vptr, asid_t asid, bool_t use_large, seL4_Word map_type) 763{ 764 vm_page_size_t frame_size; 765 766 if (use_large) { 767 frame_size = X86_LargePage; 768 } else { 769 frame_size = X86_SmallPage; 770 } 771 772 return 773 cap_frame_cap_new( 774 asid, /* capFMappedASID */ 775 pptr, /* capFBasePtr */ 776 frame_size, /* capFSize */ 777 map_type, /* capFMapType */ 778 vptr, /* capFMappedAddress */ 779 wordFromVMRights(VMReadWrite), /* capFVMRights */ 780 0 /* capFIsDevice */ 781 ); 782} 783 784BOOT_CODE cap_t create_unmapped_it_frame_cap(pptr_t pptr, bool_t use_large) 785{ 786 return create_it_frame_cap(pptr, 0, asidInvalid, use_large, X86_MappingNone); 787} 788 789BOOT_CODE cap_t create_mapped_it_frame_cap(cap_t vspace_cap, pptr_t pptr, vptr_t vptr, asid_t asid, bool_t use_large, 790 bool_t executable UNUSED) 791{ 792 cap_t cap = create_it_frame_cap(pptr, vptr, asid, use_large, X86_MappingVSpace); 793 map_it_frame_cap(vspace_cap, cap); 794 return cap; 795} 796 797/* ====================== BOOT CODE FINISHES HERE ======================== */ 798 799 800 801exception_t performASIDPoolInvocation(asid_t asid, asid_pool_t *poolPtr, cte_t *vspaceCapSlot) 802{ 803 asid_map_t asid_map; 804#ifdef CONFIG_VTX 805 if (cap_get_capType(vspaceCapSlot->cap) == cap_ept_pml4_cap) { 806 cap_ept_pml4_cap_ptr_set_capPML4MappedASID(&vspaceCapSlot->cap, asid); 807 cap_ept_pml4_cap_ptr_set_capPML4IsMapped(&vspaceCapSlot->cap, 1); 808 asid_map = asid_map_asid_map_ept_new(cap_ept_pml4_cap_get_capPML4BasePtr(vspaceCapSlot->cap)); 809 } else 810#endif 811 { 812 assert(cap_get_capType(vspaceCapSlot->cap) == cap_pml4_cap); 813 cap_pml4_cap_ptr_set_capPML4MappedASID(&vspaceCapSlot->cap, asid); 814 cap_pml4_cap_ptr_set_capPML4IsMapped(&vspaceCapSlot->cap, 1); 815 asid_map = asid_map_asid_map_vspace_new(cap_pml4_cap_get_capPML4BasePtr(vspaceCapSlot->cap)); 816 } 817 poolPtr->array[asid & MASK(asidLowBits)] = asid_map; 818 return EXCEPTION_NONE; 819} 820 821bool_t CONST isVTableRoot(cap_t cap) 822{ 823 return cap_get_capType(cap) == cap_pml4_cap; 824} 825 826bool_t CONST isValidNativeRoot(cap_t cap) 827{ 828 return isVTableRoot(cap) && 829 cap_pml4_cap_get_capPML4IsMapped(cap); 830} 831 832static pml4e_t CONST makeUserPML4E(paddr_t paddr, vm_attributes_t vm_attr) 833{ 834 return pml4e_new( 835 0, 836 paddr, 837 0, 838 vm_attributes_get_x86PCDBit(vm_attr), 839 vm_attributes_get_x86PWTBit(vm_attr), 840 1, 841 1, 842 1 843 ); 844} 845 846static pml4e_t CONST makeUserPML4EInvalid(void) 847{ 848 return pml4e_new( 849 0, /* xd */ 850 0, /* pdpt_base_addr */ 851 0, /* accessed */ 852 0, /* cache_disabled */ 853 0, /* write through */ 854 0, /* super user */ 855 0, /* read_write */ 856 0 /* present */ 857 ); 858} 859 860static pdpte_t CONST makeUserPDPTEHugePage(paddr_t paddr, vm_attributes_t vm_attr, vm_rights_t vm_rights) 861{ 862 return pdpte_pdpte_1g_new( 863 0, /* xd */ 864 paddr, /* physical address */ 865 0, /* PAT */ 866 0, /* global */ 867 0, /* dirty */ 868 0, /* accessed */ 869 vm_attributes_get_x86PCDBit(vm_attr), /* cache disabled */ 870 vm_attributes_get_x86PWTBit(vm_attr), /* write through */ 871 SuperUserFromVMRights(vm_rights), /* super user */ 872 WritableFromVMRights(vm_rights), /* read write */ 873 1 /* present */ 874 ); 875} 876 877static pdpte_t CONST makeUserPDPTEPageDirectory(paddr_t paddr, vm_attributes_t vm_attr) 878{ 879 return pdpte_pdpte_pd_new( 880 0, /* xd */ 881 paddr, /* paddr */ 882 0, /* accessed */ 883 vm_attributes_get_x86PCDBit(vm_attr), /* cache disabled */ 884 vm_attributes_get_x86PWTBit(vm_attr), /* write through */ 885 1, /* super user */ 886 1, /* read write */ 887 1 /* present */ 888 ); 889} 890 891static pdpte_t CONST makeUserPDPTEInvalid(void) 892{ 893 return pdpte_pdpte_pd_new( 894 0, /* xd */ 895 0, /* physical address */ 896 0, /* accessed */ 897 0, /* cache disabled */ 898 0, /* write through */ 899 0, /* super user */ 900 0, /* read write */ 901 0 /* present */ 902 ); 903} 904 905pde_t CONST makeUserPDELargePage(paddr_t paddr, vm_attributes_t vm_attr, vm_rights_t vm_rights) 906{ 907 return pde_pde_large_new( 908 0, /* xd */ 909 paddr, /* page_base_address */ 910 vm_attributes_get_x86PATBit(vm_attr), /* pat */ 911 0, /* global */ 912 0, /* dirty */ 913 0, /* accessed */ 914 vm_attributes_get_x86PCDBit(vm_attr), /* cache_disabled */ 915 vm_attributes_get_x86PWTBit(vm_attr), /* write_through */ 916 SuperUserFromVMRights(vm_rights), /* super_user */ 917 WritableFromVMRights(vm_rights), /* read_write */ 918 1 /* present */ 919 ); 920} 921 922pde_t CONST makeUserPDEPageTable(paddr_t paddr, vm_attributes_t vm_attr) 923{ 924 925 return pde_pde_pt_new( 926 0, /* xd */ 927 paddr, /* pt_base_address */ 928 0, /* accessed */ 929 vm_attributes_get_x86PCDBit(vm_attr), /* cache_disabled */ 930 vm_attributes_get_x86PWTBit(vm_attr), /* write_through */ 931 1, /* super_user */ 932 1, /* read_write */ 933 1 /* present */ 934 ); 935} 936 937pde_t CONST makeUserPDEInvalid(void) 938{ 939 /* The bitfield only declares two kinds of PDE entries (page tables or large pages) 940 * and an invalid entry should really be a third type, but we can simulate it by 941 * creating an invalid (present bit 0) entry of either of the defined types */ 942 return pde_pde_pt_new( 943 0, /* xd */ 944 0, /* pt_base_addr */ 945 0, /* accessed */ 946 0, /* cache_disabled */ 947 0, /* write_through */ 948 0, /* super_user */ 949 0, /* read_write */ 950 0 /* present */ 951 ); 952} 953 954pte_t CONST makeUserPTE(paddr_t paddr, vm_attributes_t vm_attr, vm_rights_t vm_rights) 955{ 956 return pte_new( 957 0, /* xd */ 958 paddr, /* page_base_address */ 959 0, /* global */ 960 vm_attributes_get_x86PATBit(vm_attr), /* pat */ 961 0, /* dirty */ 962 0, /* accessed */ 963 vm_attributes_get_x86PCDBit(vm_attr), /* cache_disabled */ 964 vm_attributes_get_x86PWTBit(vm_attr), /* write_through */ 965 SuperUserFromVMRights(vm_rights), /* super_user */ 966 WritableFromVMRights(vm_rights), /* read_write */ 967 1 /* present */ 968 ); 969} 970 971pte_t CONST makeUserPTEInvalid(void) 972{ 973 return pte_new( 974 0, /* xd */ 975 0, /* page_base_address */ 976 0, /* global */ 977 0, /* pat */ 978 0, /* dirty */ 979 0, /* accessed */ 980 0, /* cache_disabled */ 981 0, /* write_through */ 982 0, /* super_user */ 983 0, /* read_write */ 984 0 /* present */ 985 ); 986} 987 988 989static pml4e_t *lookupPML4Slot(vspace_root_t *pml4, vptr_t vptr) 990{ 991 pml4e_t *pml4e = PML4E_PTR(pml4); 992 word_t pml4Index = GET_PML4_INDEX(vptr); 993 return pml4e + pml4Index; 994} 995 996static lookupPDPTSlot_ret_t lookupPDPTSlot(vspace_root_t *pml4, vptr_t vptr) 997{ 998 pml4e_t *pml4Slot = lookupPML4Slot(pml4, vptr); 999 lookupPDPTSlot_ret_t ret; 1000 1001 if (!pml4e_ptr_get_present(pml4Slot)) { 1002 current_lookup_fault = lookup_fault_missing_capability_new(PML4_INDEX_OFFSET); 1003 1004 ret.pdptSlot = NULL; 1005 ret.status = EXCEPTION_LOOKUP_FAULT; 1006 return ret; 1007 } else { 1008 pdpte_t *pdpt; 1009 pdpte_t *pdptSlot; 1010 word_t pdptIndex = GET_PDPT_INDEX(vptr); 1011 pdpt = paddr_to_pptr(pml4e_ptr_get_pdpt_base_address(pml4Slot)); 1012 pdptSlot = pdpt + pdptIndex; 1013 1014 ret.status = EXCEPTION_NONE; 1015 ret.pdptSlot = pdptSlot; 1016 return ret; 1017 } 1018} 1019 1020lookupPDSlot_ret_t lookupPDSlot(vspace_root_t *pml4, vptr_t vptr) 1021{ 1022 lookupPDPTSlot_ret_t pdptSlot; 1023 lookupPDSlot_ret_t ret; 1024 1025 pdptSlot = lookupPDPTSlot(pml4, vptr); 1026 1027 if (pdptSlot.status != EXCEPTION_NONE) { 1028 ret.pdSlot = NULL; 1029 ret.status = pdptSlot.status; 1030 return ret; 1031 } 1032 if ((pdpte_ptr_get_page_size(pdptSlot.pdptSlot) != pdpte_pdpte_pd) || 1033 !pdpte_pdpte_pd_ptr_get_present(pdptSlot.pdptSlot)) { 1034 current_lookup_fault = lookup_fault_missing_capability_new(PDPT_INDEX_OFFSET); 1035 1036 ret.pdSlot = NULL; 1037 ret.status = EXCEPTION_LOOKUP_FAULT; 1038 return ret; 1039 } else { 1040 pde_t *pd; 1041 pde_t *pdSlot; 1042 word_t pdIndex = GET_PD_INDEX(vptr); 1043 pd = paddr_to_pptr(pdpte_pdpte_pd_ptr_get_pd_base_address(pdptSlot.pdptSlot)); 1044 pdSlot = pd + pdIndex; 1045 1046 ret.status = EXCEPTION_NONE; 1047 ret.pdSlot = pdSlot; 1048 return ret; 1049 } 1050} 1051 1052static void flushPD(vspace_root_t *vspace, word_t vptr, pde_t *pd, asid_t asid) 1053{ 1054 /* clearing the entire PCID vs flushing the virtual addresses 1055 * one by one using invplg. 1056 * choose the easy way, invalidate the PCID 1057 */ 1058 invalidateASID(vspace, asid, SMP_TERNARY(tlb_bitmap_get(vspace), 0)); 1059 1060} 1061 1062static void flushPDPT(vspace_root_t *vspace, word_t vptr, pdpte_t *pdpt, asid_t asid) 1063{ 1064 /* similar here */ 1065 invalidateASID(vspace, asid, SMP_TERNARY(tlb_bitmap_get(vspace), 0)); 1066 return; 1067} 1068 1069void hwASIDInvalidate(asid_t asid, vspace_root_t *vspace) 1070{ 1071 invalidateASID(vspace, asid, SMP_TERNARY(tlb_bitmap_get(vspace), 0)); 1072} 1073 1074void unmapPageDirectory(asid_t asid, vptr_t vaddr, pde_t *pd) 1075{ 1076 findVSpaceForASID_ret_t find_ret; 1077 lookupPDPTSlot_ret_t lu_ret; 1078 1079 find_ret = findVSpaceForASID(asid); 1080 if (find_ret.status != EXCEPTION_NONE) { 1081 return; 1082 } 1083 1084 lu_ret = lookupPDPTSlot(find_ret.vspace_root, vaddr); 1085 if (lu_ret.status != EXCEPTION_NONE) { 1086 return; 1087 } 1088 1089 /* check if the PDPT has the PD */ 1090 if (!(pdpte_ptr_get_page_size(lu_ret.pdptSlot) == pdpte_pdpte_pd && 1091 pdpte_pdpte_pd_ptr_get_present(lu_ret.pdptSlot) && 1092 (pdpte_pdpte_pd_ptr_get_pd_base_address(lu_ret.pdptSlot) == pptr_to_paddr(pd)))) { 1093 return; 1094 } 1095 1096 flushPD(find_ret.vspace_root, vaddr, pd, asid); 1097 1098 *lu_ret.pdptSlot = makeUserPDPTEInvalid(); 1099 1100 invalidatePageStructureCacheASID(pptr_to_paddr(find_ret.vspace_root), asid, 1101 SMP_TERNARY(tlb_bitmap_get(find_ret.vspace_root), 0)); 1102} 1103 1104 1105static exception_t performX64PageDirectoryInvocationUnmap(cap_t cap, cte_t *ctSlot) 1106{ 1107 1108 if (cap_page_directory_cap_get_capPDIsMapped(cap)) { 1109 pde_t *pd = PDE_PTR(cap_page_directory_cap_get_capPDBasePtr(cap)); 1110 unmapPageDirectory( 1111 cap_page_directory_cap_get_capPDMappedASID(cap), 1112 cap_page_directory_cap_get_capPDMappedAddress(cap), 1113 pd 1114 ); 1115 clearMemory((void *)pd, cap_get_capSizeBits(cap)); 1116 } 1117 1118 cap_page_directory_cap_ptr_set_capPDIsMapped(&(ctSlot->cap), 0); 1119 1120 return EXCEPTION_NONE; 1121} 1122 1123static exception_t performX64PageDirectoryInvocationMap(cap_t cap, cte_t *ctSlot, pdpte_t pdpte, pdpte_t *pdptSlot, 1124 vspace_root_t *vspace) 1125{ 1126 ctSlot->cap = cap; 1127 *pdptSlot = pdpte; 1128 invalidatePageStructureCacheASID(pptr_to_paddr(vspace), cap_page_directory_cap_get_capPDMappedASID(cap), 1129 SMP_TERNARY(tlb_bitmap_get(vspace), 0)); 1130 return EXCEPTION_NONE; 1131} 1132 1133 1134static exception_t decodeX64PageDirectoryInvocation( 1135 word_t label, 1136 word_t length, 1137 cte_t *cte, 1138 cap_t cap, 1139 extra_caps_t extraCaps, 1140 word_t *buffer 1141) 1142{ 1143 word_t vaddr; 1144 vm_attributes_t vm_attr; 1145 cap_t vspaceCap; 1146 vspace_root_t *vspace; 1147 pdpte_t pdpte; 1148 paddr_t paddr; 1149 asid_t asid; 1150 lookupPDPTSlot_ret_t pdptSlot; 1151 1152 if (label == X86PageDirectoryUnmap) { 1153 if (!isFinalCapability(cte)) { 1154 current_syscall_error.type = seL4_RevokeFirst; 1155 userError("X86PageDirectory: Cannot unmap if more than one cap exist."); 1156 return EXCEPTION_SYSCALL_ERROR; 1157 } 1158 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 1159 1160 return performX64PageDirectoryInvocationUnmap(cap, cte); 1161 } 1162 1163 if (label != X86PageDirectoryMap) { 1164 userError("X64Directory: Illegal operation."); 1165 current_syscall_error.type = seL4_IllegalOperation; 1166 return EXCEPTION_SYSCALL_ERROR; 1167 } 1168 1169 if (length < 2 || extraCaps.excaprefs[0] == NULL) { 1170 userError("X64PageDirectory: Truncated message."); 1171 current_syscall_error.type = seL4_TruncatedMessage; 1172 return EXCEPTION_SYSCALL_ERROR; 1173 } 1174 1175 if (cap_page_directory_cap_get_capPDIsMapped(cap)) { 1176 userError("X64PageDirectory: PD is already mapped to a PML4."); 1177 current_syscall_error.type = seL4_InvalidCapability; 1178 current_syscall_error.invalidCapNumber = 0; 1179 1180 return EXCEPTION_SYSCALL_ERROR; 1181 } 1182 1183 vaddr = getSyscallArg(0, buffer) & (~MASK(PDPT_INDEX_OFFSET)); 1184 vm_attr = vmAttributesFromWord(getSyscallArg(1, buffer)); 1185 vspaceCap = extraCaps.excaprefs[0]->cap; 1186 1187 if (!isValidNativeRoot(vspaceCap)) { 1188 current_syscall_error.type = seL4_InvalidCapability; 1189 current_syscall_error.invalidCapNumber = 1; 1190 1191 return EXCEPTION_SYSCALL_ERROR; 1192 } 1193 1194 vspace = (vspace_root_t *)pptr_of_cap(vspaceCap); 1195 asid = cap_get_capMappedASID(vspaceCap); 1196 1197 if (vaddr > USER_TOP) { 1198 userError("X64PageDirectory: Mapping address too high."); 1199 current_syscall_error.type = seL4_InvalidArgument; 1200 current_syscall_error.invalidArgumentNumber = 0; 1201 1202 return EXCEPTION_SYSCALL_ERROR; 1203 } 1204 1205 findVSpaceForASID_ret_t find_ret; 1206 1207 find_ret = findVSpaceForASID(asid); 1208 if (find_ret.status != EXCEPTION_NONE) { 1209 current_syscall_error.type = seL4_FailedLookup; 1210 current_syscall_error.failedLookupWasSource = false; 1211 1212 return EXCEPTION_SYSCALL_ERROR; 1213 } 1214 1215 if (find_ret.vspace_root != vspace) { 1216 current_syscall_error.type = seL4_InvalidCapability; 1217 current_syscall_error.invalidCapNumber = 1; 1218 1219 return EXCEPTION_SYSCALL_ERROR; 1220 } 1221 1222 pdptSlot = lookupPDPTSlot(vspace, vaddr); 1223 if (pdptSlot.status != EXCEPTION_NONE) { 1224 current_syscall_error.type = seL4_FailedLookup; 1225 current_syscall_error.failedLookupWasSource = false; 1226 1227 return EXCEPTION_SYSCALL_ERROR; 1228 } 1229 1230 if ((pdpte_ptr_get_page_size(pdptSlot.pdptSlot) == pdpte_pdpte_pd && 1231 pdpte_pdpte_pd_ptr_get_present(pdptSlot.pdptSlot)) || 1232 (pdpte_ptr_get_page_size(pdptSlot.pdptSlot) == pdpte_pdpte_1g 1233 && pdpte_pdpte_1g_ptr_get_present(pdptSlot.pdptSlot))) { 1234 current_syscall_error.type = seL4_DeleteFirst; 1235 1236 return EXCEPTION_SYSCALL_ERROR; 1237 } 1238 1239 paddr = pptr_to_paddr(PDE_PTR(cap_page_directory_cap_get_capPDBasePtr(cap))); 1240 pdpte = makeUserPDPTEPageDirectory(paddr, vm_attr); 1241 1242 cap = cap_page_directory_cap_set_capPDIsMapped(cap, 1); 1243 cap = cap_page_directory_cap_set_capPDMappedASID(cap, asid); 1244 cap = cap_page_directory_cap_set_capPDMappedAddress(cap, vaddr); 1245 1246 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 1247 return performX64PageDirectoryInvocationMap(cap, cte, pdpte, pdptSlot.pdptSlot, vspace); 1248} 1249 1250static void unmapPDPT(asid_t asid, vptr_t vaddr, pdpte_t *pdpt) 1251{ 1252 findVSpaceForASID_ret_t find_ret; 1253 pml4e_t *pml4Slot; 1254 1255 find_ret = findVSpaceForASID(asid); 1256 if (find_ret.status != EXCEPTION_NONE) { 1257 return; 1258 } 1259 1260 pml4Slot = lookupPML4Slot(find_ret.vspace_root, vaddr); 1261 1262 /* check if the PML4 has the PDPT */ 1263 if (!(pml4e_ptr_get_present(pml4Slot) && 1264 pml4e_ptr_get_pdpt_base_address(pml4Slot) == pptr_to_paddr(pdpt))) { 1265 return; 1266 } 1267 1268 flushPDPT(find_ret.vspace_root, vaddr, pdpt, asid); 1269 1270 *pml4Slot = makeUserPML4EInvalid(); 1271} 1272 1273static exception_t performX64PDPTInvocationUnmap(cap_t cap, cte_t *ctSlot) 1274{ 1275 if (cap_pdpt_cap_get_capPDPTIsMapped(cap)) { 1276 pdpte_t *pdpt = PDPTE_PTR(cap_pdpt_cap_get_capPDPTBasePtr(cap)); 1277 unmapPDPT(cap_pdpt_cap_get_capPDPTMappedASID(cap), 1278 cap_pdpt_cap_get_capPDPTMappedAddress(cap), 1279 pdpt); 1280 clearMemory((void *)pdpt, cap_get_capSizeBits(cap)); 1281 } 1282 1283 cap_pdpt_cap_ptr_set_capPDPTIsMapped(&(ctSlot->cap), 0); 1284 1285 return EXCEPTION_NONE; 1286} 1287 1288static exception_t performX64PDPTInvocationMap(cap_t cap, cte_t *ctSlot, pml4e_t pml4e, pml4e_t *pml4Slot, 1289 vspace_root_t *vspace) 1290{ 1291 ctSlot->cap = cap; 1292 *pml4Slot = pml4e; 1293 invalidatePageStructureCacheASID(pptr_to_paddr(vspace), cap_pdpt_cap_get_capPDPTMappedASID(cap), 1294 SMP_TERNARY(tlb_bitmap_get(vspace), 0)); 1295 1296 return EXCEPTION_NONE; 1297} 1298 1299static exception_t decodeX64PDPTInvocation( 1300 word_t label, 1301 word_t length, 1302 cte_t *cte, 1303 cap_t cap, 1304 extra_caps_t extraCaps, 1305 word_t *buffer) 1306{ 1307 word_t vaddr; 1308 vm_attributes_t attr; 1309 pml4e_t *pml4Slot; 1310 cap_t vspaceCap; 1311 vspace_root_t *vspace; 1312 pml4e_t pml4e; 1313 paddr_t paddr; 1314 asid_t asid; 1315 1316 if (label == X86PDPTUnmap) { 1317 if (!isFinalCapability(cte)) { 1318 current_syscall_error.type = seL4_RevokeFirst; 1319 userError("X86PDPT: Cannot unmap if more than one cap exist."); 1320 return EXCEPTION_SYSCALL_ERROR; 1321 } 1322 1323 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 1324 1325 return performX64PDPTInvocationUnmap(cap, cte); 1326 } 1327 1328 if (label != X86PDPTMap) { 1329 userError("X86PDPT: Illegal operation."); 1330 current_syscall_error.type = seL4_IllegalOperation; 1331 return EXCEPTION_SYSCALL_ERROR; 1332 } 1333 1334 if (length < 2 || extraCaps.excaprefs[0] == NULL) { 1335 userError("X64PDPT: Truncated message."); 1336 current_syscall_error.type = seL4_TruncatedMessage; 1337 return EXCEPTION_SYSCALL_ERROR; 1338 } 1339 1340 if (cap_pdpt_cap_get_capPDPTIsMapped(cap)) { 1341 userError("X64PDPT: PDPT is already mapped to a PML4."); 1342 current_syscall_error.type = seL4_InvalidCapability; 1343 current_syscall_error.invalidCapNumber = 0; 1344 1345 return EXCEPTION_SYSCALL_ERROR; 1346 } 1347 1348 vaddr = getSyscallArg(0, buffer) & (~MASK(PML4_INDEX_OFFSET)); 1349 attr = vmAttributesFromWord(getSyscallArg(1, buffer)); 1350 vspaceCap = extraCaps.excaprefs[0]->cap; 1351 1352 if (!isValidNativeRoot(vspaceCap)) { 1353 current_syscall_error.type = seL4_InvalidCapability; 1354 current_syscall_error.invalidCapNumber = 1; 1355 1356 return EXCEPTION_SYSCALL_ERROR; 1357 } 1358 1359 vspace = (vspace_root_t *)pptr_of_cap(vspaceCap); 1360 asid = cap_get_capMappedASID(vspaceCap); 1361 1362 if (vaddr > USER_TOP) { 1363 userError("X64PDPT: Mapping address too high."); 1364 current_syscall_error.type = seL4_InvalidArgument; 1365 current_syscall_error.invalidArgumentNumber = 0; 1366 1367 return EXCEPTION_SYSCALL_ERROR; 1368 } 1369 1370 findVSpaceForASID_ret_t find_ret; 1371 1372 find_ret = findVSpaceForASID(asid); 1373 if (find_ret.status != EXCEPTION_NONE) { 1374 current_syscall_error.type = seL4_FailedLookup; 1375 current_syscall_error.failedLookupWasSource = false; 1376 1377 return EXCEPTION_SYSCALL_ERROR; 1378 } 1379 1380 if (find_ret.vspace_root != vspace) { 1381 current_syscall_error.type = seL4_InvalidCapability; 1382 current_syscall_error.invalidCapNumber = 1; 1383 1384 return EXCEPTION_SYSCALL_ERROR; 1385 } 1386 1387 pml4Slot = lookupPML4Slot(vspace, vaddr); 1388 1389 if (pml4e_ptr_get_present(pml4Slot)) { 1390 current_syscall_error.type = seL4_DeleteFirst; 1391 1392 return EXCEPTION_SYSCALL_ERROR; 1393 } 1394 1395 paddr = pptr_to_paddr(PDPTE_PTR((cap_pdpt_cap_get_capPDPTBasePtr(cap)))); 1396 pml4e = makeUserPML4E(paddr, attr); 1397 1398 cap = cap_pdpt_cap_set_capPDPTIsMapped(cap, 1); 1399 cap = cap_pdpt_cap_set_capPDPTMappedASID(cap, asid); 1400 cap = cap_pdpt_cap_set_capPDPTMappedAddress(cap, vaddr); 1401 1402 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 1403 return performX64PDPTInvocationMap(cap, cte, pml4e, pml4Slot, vspace); 1404} 1405 1406exception_t decodeX86ModeMMUInvocation( 1407 word_t label, 1408 word_t length, 1409 cptr_t cptr, 1410 cte_t *cte, 1411 cap_t cap, 1412 extra_caps_t extraCaps, 1413 word_t *buffer 1414) 1415{ 1416 switch (cap_get_capType(cap)) { 1417 1418 case cap_pml4_cap: 1419 current_syscall_error.type = seL4_IllegalOperation; 1420 return EXCEPTION_SYSCALL_ERROR; 1421 1422 case cap_pdpt_cap: 1423 return decodeX64PDPTInvocation(label, length, cte, cap, extraCaps, buffer); 1424 1425 case cap_page_directory_cap: 1426 return decodeX64PageDirectoryInvocation(label, length, cte, cap, extraCaps, buffer); 1427 1428 default: 1429 fail("Invalid arch cap type"); 1430 } 1431} 1432 1433bool_t modeUnmapPage(vm_page_size_t page_size, vspace_root_t *vroot, vptr_t vaddr, void *pptr) 1434{ 1435 if (config_set(CONFIG_HUGE_PAGE) && page_size == X64_HugePage) { 1436 pdpte_t *pdpte; 1437 lookupPDPTSlot_ret_t pdpt_ret = lookupPDPTSlot(vroot, vaddr); 1438 if (pdpt_ret.status != EXCEPTION_NONE) { 1439 return false; 1440 } 1441 pdpte = pdpt_ret.pdptSlot; 1442 1443 1444 if (!(pdpte_ptr_get_page_size(pdpte) == pdpte_pdpte_1g 1445 && pdpte_pdpte_1g_ptr_get_present(pdpte) 1446 && (pdpte_pdpte_1g_ptr_get_page_base_address(pdpte) 1447 == pptr_to_paddr(pptr)))) { 1448 return false; 1449 } 1450 1451 *pdpte = makeUserPDPTEInvalid(); 1452 return true; 1453 } 1454 fail("Invalid page type"); 1455 return false; 1456} 1457 1458static exception_t updatePDPTE(asid_t asid, pdpte_t pdpte, pdpte_t *pdptSlot, vspace_root_t *vspace) 1459{ 1460 *pdptSlot = pdpte; 1461 invalidatePageStructureCacheASID(pptr_to_paddr(vspace), asid, 1462 SMP_TERNARY(tlb_bitmap_get(vspace), 0)); 1463 return EXCEPTION_NONE; 1464} 1465 1466static exception_t performX64ModeMap(cap_t cap, cte_t *ctSlot, pdpte_t pdpte, pdpte_t *pdptSlot, vspace_root_t *vspace) 1467{ 1468 ctSlot->cap = cap; 1469 return updatePDPTE(cap_frame_cap_get_capFMappedASID(cap), pdpte, pdptSlot, vspace); 1470} 1471 1472struct create_mapping_pdpte_return { 1473 exception_t status; 1474 pdpte_t pdpte; 1475 pdpte_t *pdptSlot; 1476}; 1477typedef struct create_mapping_pdpte_return create_mapping_pdpte_return_t; 1478 1479static create_mapping_pdpte_return_t createSafeMappingEntries_PDPTE(paddr_t base, word_t vaddr, vm_rights_t vmRights, 1480 vm_attributes_t attr, 1481 vspace_root_t *vspace) 1482{ 1483 create_mapping_pdpte_return_t ret; 1484 lookupPDPTSlot_ret_t lu_ret; 1485 1486 lu_ret = lookupPDPTSlot(vspace, vaddr); 1487 if (lu_ret.status != EXCEPTION_NONE) { 1488 current_syscall_error.type = seL4_FailedLookup; 1489 current_syscall_error.failedLookupWasSource = false; 1490 ret.status = EXCEPTION_SYSCALL_ERROR; 1491 /* current_lookup_fault will have been set by lookupPDSlot */ 1492 return ret; 1493 } 1494 ret.pdptSlot = lu_ret.pdptSlot; 1495 1496 /* check for existing page directory */ 1497 if ((pdpte_ptr_get_page_size(ret.pdptSlot) == pdpte_pdpte_pd) && 1498 (pdpte_pdpte_pd_ptr_get_present(ret.pdptSlot))) { 1499 current_syscall_error.type = seL4_DeleteFirst; 1500 ret.status = EXCEPTION_SYSCALL_ERROR; 1501 return ret; 1502 } 1503 1504 ret.pdpte = makeUserPDPTEHugePage(base, attr, vmRights); 1505 ret.status = EXCEPTION_NONE; 1506 return ret; 1507} 1508 1509exception_t decodeX86ModeMapPage(word_t label, vm_page_size_t page_size, cte_t *cte, cap_t cap, 1510 vspace_root_t *vroot, vptr_t vaddr, paddr_t paddr, vm_rights_t vm_rights, vm_attributes_t vm_attr) 1511{ 1512 if (config_set(CONFIG_HUGE_PAGE) && page_size == X64_HugePage) { 1513 create_mapping_pdpte_return_t map_ret; 1514 1515 map_ret = createSafeMappingEntries_PDPTE(paddr, vaddr, vm_rights, vm_attr, vroot); 1516 if (map_ret.status != EXCEPTION_NONE) { 1517 return map_ret.status; 1518 } 1519 1520 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 1521 1522 switch (label) { 1523 case X86PageMap: 1524 return performX64ModeMap(cap, cte, map_ret.pdpte, map_ret.pdptSlot, vroot); 1525 1526 default: 1527 current_syscall_error.type = seL4_IllegalOperation; 1528 return EXCEPTION_SYSCALL_ERROR; 1529 } 1530 } 1531 fail("Invalid Page type"); 1532} 1533 1534#ifdef CONFIG_PRINTING 1535typedef struct readWordFromVSpace_ret { 1536 exception_t status; 1537 word_t value; 1538} readWordFromVSpace_ret_t; 1539 1540static readWordFromVSpace_ret_t readWordFromVSpace(vspace_root_t *vspace, word_t vaddr) 1541{ 1542 readWordFromVSpace_ret_t ret; 1543 lookupPTSlot_ret_t ptSlot; 1544 lookupPDSlot_ret_t pdSlot; 1545 lookupPDPTSlot_ret_t pdptSlot; 1546 paddr_t paddr; 1547 word_t offset; 1548 pptr_t kernel_vaddr; 1549 word_t *value; 1550 1551 pdptSlot = lookupPDPTSlot(vspace, vaddr); 1552 if (pdptSlot.status == EXCEPTION_NONE && 1553 pdpte_ptr_get_page_size(pdptSlot.pdptSlot) == pdpte_pdpte_1g && 1554 pdpte_pdpte_1g_ptr_get_present(pdptSlot.pdptSlot)) { 1555 1556 paddr = pdpte_pdpte_1g_ptr_get_page_base_address(pdptSlot.pdptSlot); 1557 offset = vaddr & MASK(seL4_HugePageBits); 1558 } else { 1559 pdSlot = lookupPDSlot(vspace, vaddr); 1560 if (pdSlot.status == EXCEPTION_NONE && 1561 ((pde_ptr_get_page_size(pdSlot.pdSlot) == pde_pde_large) && 1562 pde_pde_large_ptr_get_present(pdSlot.pdSlot))) { 1563 1564 paddr = pde_pde_large_ptr_get_page_base_address(pdSlot.pdSlot); 1565 offset = vaddr & MASK(seL4_LargePageBits); 1566 } else { 1567 ptSlot = lookupPTSlot(vspace, vaddr); 1568 if (ptSlot.status == EXCEPTION_NONE && pte_ptr_get_present(ptSlot.ptSlot)) { 1569 paddr = pte_ptr_get_page_base_address(ptSlot.ptSlot); 1570 offset = vaddr & MASK(seL4_PageBits); 1571 } else { 1572 ret.status = EXCEPTION_LOOKUP_FAULT; 1573 return ret; 1574 } 1575 } 1576 } 1577 1578 1579 kernel_vaddr = (word_t)paddr_to_pptr(paddr); 1580 value = (word_t *)(kernel_vaddr + offset); 1581 ret.status = EXCEPTION_NONE; 1582 ret.value = *value; 1583 return ret; 1584} 1585 1586void Arch_userStackTrace(tcb_t *tptr) 1587{ 1588 cap_t threadRoot; 1589 vspace_root_t *vspace_root; 1590 word_t sp; 1591 int i; 1592 1593 threadRoot = TCB_PTR_CTE_PTR(tptr, tcbVTable)->cap; 1594 1595 /* lookup the PD */ 1596 if (cap_get_capType(threadRoot) != cap_pml4_cap) { 1597 printf("Invalid vspace\n"); 1598 return; 1599 } 1600 1601 vspace_root = (vspace_root_t *)pptr_of_cap(threadRoot); 1602 1603 sp = getRegister(tptr, RSP); 1604 /* check for alignment so we don't have to worry about accessing 1605 * words that might be on two different pages */ 1606 if (!IS_ALIGNED(sp, seL4_WordSizeBits)) { 1607 printf("RSP not aligned\n"); 1608 return; 1609 } 1610 1611 for (i = 0; i < CONFIG_USER_STACK_TRACE_LENGTH; i++) { 1612 word_t address = sp + (i * sizeof(word_t)); 1613 readWordFromVSpace_ret_t result; 1614 result = readWordFromVSpace(vspace_root, address); 1615 if (result.status == EXCEPTION_NONE) { 1616 printf("0x%lx: 0x%lx\n", (long)address, (long)result.value); 1617 } else { 1618 printf("0x%lx: INVALID\n", (long)address); 1619 } 1620 } 1621} 1622#endif /* CONFIG_PRINTING */ 1623 1624#ifdef CONFIG_KERNEL_LOG_BUFFER 1625exception_t benchmark_arch_map_logBuffer(word_t frame_cptr) 1626{ 1627 lookupCapAndSlot_ret_t lu_ret; 1628 vm_page_size_t frameSize; 1629 pptr_t frame_pptr; 1630 1631 /* faulting section */ 1632 lu_ret = lookupCapAndSlot(NODE_STATE(ksCurThread), frame_cptr); 1633 1634 if (unlikely(lu_ret.status != EXCEPTION_NONE)) { 1635 userError("Invalid cap #%lu.", frame_cptr); 1636 current_fault = seL4_Fault_CapFault_new(frame_cptr, false); 1637 1638 return EXCEPTION_SYSCALL_ERROR; 1639 } 1640 1641 if (cap_get_capType(lu_ret.cap) != cap_frame_cap) { 1642 userError("Invalid cap. Log buffer should be of a frame cap"); 1643 current_fault = seL4_Fault_CapFault_new(frame_cptr, false); 1644 1645 return EXCEPTION_SYSCALL_ERROR; 1646 } 1647 1648 frameSize = cap_frame_cap_get_capFSize(lu_ret.cap); 1649 1650 if (frameSize != X86_LargePage) { 1651 userError("Invalid size for log Buffer. The kernel expects at least 1M log buffer"); 1652 current_fault = seL4_Fault_CapFault_new(frame_cptr, false); 1653 1654 return EXCEPTION_SYSCALL_ERROR; 1655 } 1656 1657 frame_pptr = cap_frame_cap_get_capFBasePtr(lu_ret.cap); 1658 1659 ksUserLogBuffer = pptr_to_paddr((void *) frame_pptr); 1660 1661 pde_t pde = pde_pde_large_new( 1662 0, /* xd */ 1663 ksUserLogBuffer, /* page_base_address */ 1664 VMKernelOnly, /* pat */ 1665 1, /* global */ 1666 0, /* dirty */ 1667 0, /* accessed */ 1668 0, /* cache_disabled */ 1669 1, /* write_through */ 1670 1, /* super_user */ 1671 1, /* read_write */ 1672 1 /* present */ 1673 ); 1674 1675 /* Stored in the PD slot after the device page table */ 1676#ifdef CONFIG_HUGE_PAGE 1677 x64KSKernelPD[1] = pde; 1678#else 1679 x64KSKernelPDs[BIT(PDPT_INDEX_BITS) - 1][1] = pde; 1680#endif 1681 invalidateTranslationAll(MASK(CONFIG_MAX_NUM_NODES)); 1682 1683 return EXCEPTION_NONE; 1684} 1685#endif /* CONFIG_KERNEL_LOG_BUFFER */ 1686