1/** 2 * \file 3 * \brief pmap management 4 */ 5 6/* 7 * Copyright (c) 2010-2015 ETH Zurich. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group. 13 */ 14 15/* 16 * There was some minor difficulty here with mapping the cpus native 17 * page table arrangement onto Barrelfish. The problem lies with 18 * resource bootstrapping. The bootstrap ram allocator allocates pages. 19 * 20 * After reworking retype to be range based, we can now select to create a 21 * single 1kB vnode from a 4kB frame, so we currently waste 3kB when creating 22 * ARM l2 vnodes before we have a connection to the memory server. 23 * 24 */ 25 26#include <barrelfish/barrelfish.h> 27#include <barrelfish/caddr.h> 28#include <barrelfish/invocations_arch.h> 29#include <stdio.h> 30 31// Location of VSpace managed by this system. 32#define VSPACE_BEGIN ((lvaddr_t)1UL*1024*1024*1024) //0x40000000 33 34// Amount of virtual address space reserved for mapping frames 35// backing refill_slabs. 36//#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 128) // 64 37#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 1024) 38// increased above value from 128 for pandaboard port 39 40static inline uintptr_t 41vregion_flags_to_kpi_paging_flags(vregion_flags_t flags) 42{ 43 STATIC_ASSERT(0x1ff == VREGION_FLAGS_MASK, ""); 44 STATIC_ASSERT(0x0f == KPI_PAGING_FLAGS_MASK, ""); 45 STATIC_ASSERT(VREGION_FLAGS_READ == KPI_PAGING_FLAGS_READ, ""); 46 STATIC_ASSERT(VREGION_FLAGS_WRITE == KPI_PAGING_FLAGS_WRITE, ""); 47 STATIC_ASSERT(VREGION_FLAGS_EXECUTE == KPI_PAGING_FLAGS_EXECUTE, ""); 48 STATIC_ASSERT(VREGION_FLAGS_NOCACHE == KPI_PAGING_FLAGS_NOCACHE, ""); 49 if ((flags & VREGION_FLAGS_MPB) != 0) { 50 // XXX: ignore MPB flag on ARM, otherwise the assert below fires -AB 51 flags &= ~VREGION_FLAGS_MPB; 52 } 53 if ((flags & VREGION_FLAGS_WRITE_COMBINING) != 0) { 54 // XXX mask out write-combining flag on ARM 55 flags &= ~VREGION_FLAGS_WRITE_COMBINING; 56 } 57 if ((flags & VREGION_FLAGS_VTD_SNOOP) != 0) { 58 // XXX mask out vtd-snooping flag on ARM 59 flags &= ~VREGION_FLAGS_VTD_SNOOP; 60 } 61 if ((flags & VREGION_FLAGS_GUARD) != 0) { 62 flags = 0; 63 } 64 assert(0 == (~KPI_PAGING_FLAGS_MASK & (uintptr_t)flags)); 65 return (uintptr_t)flags; 66} 67 68// debug print preprocessor flag for this file 69//#define LIBBARRELFISH_DEBUG_PMAP 70 71/** 72 * \brief check whether region A = [start_a .. end_a) overlaps 73 * region B = [start_b .. end_b). 74 * \return true iff A overlaps B 75 */ 76static bool is_overlapping(uint16_t start_a, uint16_t end_a, uint16_t start_b, uint16_t end_b) 77{ 78 return 79 // B strict subset of A 80 (start_a < start_b && end_a >= end_b) 81 // start_a inside B 82 || (start_a >= start_b && start_a < end_b) 83 // end_a inside B 84 || (end_a > start_b && end_a < end_b); 85} 86 87/** 88 * \brief Check whether vnode `root' has entries in between [entry .. 89 * entry+len). 90 * \param root the vnode to look at 91 * \param entry first entry of the region to check 92 * \param len length of the region to check 93 * \param only_pages true == do not report previously allocated lower-level 94 * page tables that are empty 95 * \return true iff entries exist in region. 96 */ 97#if defined(LIBBARRELFISH_DEBUG_PMAP) 98#define DEBUG_HAS_VNODE 99#endif 100static bool has_vnode(struct vnode *root, uint32_t entry, size_t len, 101 bool only_pages) 102{ 103 assert(root != NULL); 104 assert(root->is_vnode); 105 struct vnode *n; 106 107 uint32_t end_entry = entry + len; 108#ifdef DEBUG_HAS_VNODE 109 debug_printf("%s: checking region [%"PRIu32"--%"PRIu32"], only_pages = %d\n", 110 __FUNCTION__, entry, end_entry, only_pages); 111#endif 112 113 for (n = root->u.vnode.children; n; n = n->next) { 114 // region to check [entry .. end_entry) 115 if (n->is_vnode && n->entry >= entry && n->entry < end_entry) { 116 if (only_pages) { 117 return has_vnode(n, 0, ARM_L2_TABLE_BYTES, true); 118 } 119#ifdef LIBBARRELFISH_DEBUG_PMAP 120 debug_printf("1: found page table inside our region\n"); 121#endif 122 return true; 123 } else if (n->is_vnode) { 124 // all other vnodes do not overlap with us, so go to next 125 assert(n->entry < entry || n->entry >= end_entry); 126 continue; 127 } else { 128 // not vnode 129 uint32_t end = n->entry + n->u.frame.pte_count; 130#ifdef DEBUG_HAS_VNODE 131 debug_printf("%s: looking at region: [%"PRIu32"--%"PRIu32"]\n", 132 __FUNCTION__, n->entry, end); 133#endif 134 135 // do checks 136 if (is_overlapping(entry, end_entry, n->entry, end)) { 137 return true; 138 } 139 } 140 } 141 142 return false; 143} 144 145/** 146 * \brief Starting at a given root, return the vnode with entry equal to #entry 147 * \return vnode at index `entry` or NULL 148 */ 149#ifdef LIBBARRELFISH_DEBUG_PMAP 150#define DEBUG_FIND_VNODE 151#endif 152static struct vnode *find_vnode(struct vnode *root, uint16_t entry) 153{ 154 assert(root != NULL); 155 assert(root->is_vnode); 156 struct vnode *n; 157 158#ifdef DEBUG_FIND_VNODE 159 debug_printf("%s: looking for %"PRIu16"\n", __FUNCTION__, entry); 160#endif 161 162 for(n = root->u.vnode.children; n != NULL; n = n->next) { 163 if (n->is_vnode && 164 is_overlapping(entry, entry + 1, n->entry, n->entry + 1)) { 165#ifdef DEBUG_FIND_VNODE 166 debug_printf("%s: found ptable at [%"PRIu16"--%"PRIu16"]\n", 167 __FUNCTION__, n->entry, n->entry + 1); 168#endif 169 return n; 170 } 171 else if (n->is_vnode) { 172 assert(!is_overlapping(entry, entry + 1, n->entry, n->entry + 1)); 173 // ignore all other vnodes; 174 continue; 175 } 176 177 // not vnode 178 assert(!n->is_vnode); 179 uint16_t end = n->entry + n->u.frame.pte_count; 180#ifdef DEBUG_FIND_VNODE 181 debug_printf("%s: looking at section [%"PRIu16"--%"PRIu16"]\n", __FUNCTION__, n->entry, end); 182#endif 183 if (n->entry <= entry && entry < end) { 184#ifdef DEBUG_FIND_VNODE 185 debug_printf("%d \\in [%d, %d]\n", entry, n->entry, end); 186#endif 187 return n; 188 } 189 } 190 return NULL; 191} 192 193/** 194 * \brief check whether region [entry, entry+npages) is contained in a child 195 * of `root`. 196 */ 197static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages) 198{ 199 assert(root != NULL); 200 assert(root->is_vnode); 201 202 struct vnode *n; 203 204 for (n = root->u.vnode.children; n; n = n->next) { 205 if (!n->is_vnode) { 206 uint16_t end = n->entry + n->u.frame.pte_count; 207 if (n->entry <= entry && entry + npages <= end) { 208 return true; 209 } 210 } 211 } 212 213 return false; 214} 215 216/** 217 * \brief remove vnode `item` from linked list of children of `root` 218 */ 219static void remove_vnode(struct vnode *root, struct vnode *item) 220{ 221 assert(root->is_vnode); 222 struct vnode *walk = root->u.vnode.children; 223 struct vnode *prev = NULL; 224 while (walk) { 225 if (walk == item) { 226 if (prev) { 227 prev->next = walk->next; 228 return; 229 } else { 230 root->u.vnode.children = walk->next; 231 return; 232 } 233 } 234 prev = walk; 235 walk = walk->next; 236 } 237 USER_PANIC("Should not get here"); 238} 239 240/** 241 * \brief (recursively) remove empty page tables in region [entry .. 242 * entry+len) in vnode `root`. 243 */ 244#ifdef LIBBARRELFISH_DEBUG_PMAP 245#define DEBUG_REMOVE_EMPTY_VNODES 246#endif 247static void remove_empty_vnodes(struct slab_allocator *vnode_alloc, struct vnode *root, 248 uint32_t entry, size_t len) 249{ 250 // precondition: root does not have pages in [entry, entry+len) 251 assert(!has_vnode(root, entry, len, true)); 252 253 errval_t err; 254 uint32_t end_entry = entry + len; 255 for (struct vnode *n = root->u.vnode.children; n; n = n->next) { 256 // sanity check and skip leaf entries 257 if (!n->is_vnode) { 258 continue; 259 } 260 // here we know that all vnodes we're interested in are 261 // page tables 262 assert(n->is_vnode); 263 264 // Unmap vnode if it is in range [entry .. entry+len) 265 if (n->entry >= entry && n->entry < end_entry) { 266 err = vnode_unmap(root->u.vnode.invokable, n->mapping); 267 assert(err_is_ok(err)); 268 269 if (!capcmp(n->u.vnode.cap, n->u.vnode.invokable)) { 270 // delete invokable pt cap if it's a real copy 271 err =cap_destroy(n->u.vnode.invokable); 272 assert(err_is_ok(err)); 273 } 274 275 // delete last copy of pt cap 276 err = cap_destroy(n->u.vnode.cap); 277 assert(err_is_ok(err)); 278 279 // remove vnode from list 280 remove_vnode(root, n); 281 slab_free(vnode_alloc, n); 282 } 283 } 284} 285 286/** 287 * \brief Allocates a new VNode, adding it to the page table and our metadata 288 */ 289static errval_t alloc_vnode(struct pmap_arm *pmap_arm, struct vnode *root, 290 enum objtype type, uint32_t entry, 291 struct vnode **retvnode) 292{ 293 assert(root->is_vnode); 294 errval_t err; 295 296 struct vnode *newvnode = slab_alloc(&pmap_arm->slab); 297 if (newvnode == NULL) { 298 return LIB_ERR_SLAB_ALLOC_FAIL; 299 } 300 newvnode->is_vnode = true; 301 302 // The VNode capability 303 err = slot_alloc(&newvnode->u.vnode.cap); 304 if (err_is_fail(err)) { 305 return err_push(err, LIB_ERR_SLOT_ALLOC); 306 } 307 308 err = vnode_create(newvnode->u.vnode.cap, type); 309 if (err_is_fail(err)) { 310 return err_push(err, LIB_ERR_VNODE_CREATE); 311 } 312 313 // XXX: do we need to put master copy in other cspace? 314 newvnode->u.vnode.invokable = newvnode->u.vnode.cap; 315 316 // The VNode meta data 317 newvnode->entry = entry; 318 newvnode->next = root->u.vnode.children; 319 root->u.vnode.children = newvnode; 320 newvnode->u.vnode.children = NULL; 321 322 err = slot_alloc(&newvnode->mapping); 323 if (err_is_fail(err)) { 324 return err_push(err, LIB_ERR_SLOT_ALLOC); 325 } 326 327 err = vnode_map(root->u.vnode.invokable, newvnode->u.vnode.cap, 328 entry, KPI_PAGING_FLAGS_READ | KPI_PAGING_FLAGS_WRITE, 0, 1, 329 newvnode->mapping); 330 if (err_is_fail(err)) { 331 return err_push(err, LIB_ERR_PMAP_MAP); 332 } 333 334 if (retvnode) { 335 *retvnode = newvnode; 336 } 337 return SYS_ERR_OK; 338} 339 340/** 341 * \brief Returns the vnode for the pagetable mapping a given vspace address 342 */ 343#ifdef LIBBARRELFISH_DEBUG_PMAP 344#define DEBUG_GET_PTABLE 345#endif 346static errval_t get_ptable(struct pmap_arm *pmap, 347 genvaddr_t vaddr, 348 struct vnode **ptable) 349{ 350 // NB Strictly there are 12 bits in the ARM L1, but allocations unit 351 // of L2 is 1 page of L2 entries (4 tables) so we use 10 bits for the L1 352 // idx here 353 uintptr_t idx = ARM_L1_OFFSET(vaddr); 354 if ((*ptable = find_vnode(&pmap->root, idx)) == NULL) 355 { 356 // L1 table entries point to L2 tables so allocate an L2 357 // table for this L1 entry. 358 359 struct vnode *tmp = NULL; // Tmp variable for passing to alloc_vnode 360 361 errval_t err = alloc_vnode(pmap, &pmap->root, ObjType_VNode_ARM_l2, 362 idx, &tmp); 363 if (err_is_fail(err)) { 364 DEBUG_ERR(err, "alloc_vnode"); 365 return err; 366 } 367 assert(tmp != NULL); 368 *ptable = tmp; // Set argument to received value 369 370 if (err_is_fail(err)) { 371 return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE); 372 } 373 } 374 assert(ptable); 375 struct vnode *pt = *ptable; 376 if (!pt->is_vnode) { 377 debug_printf("found section @%d, trying to get ptable for %d\n", 378 pt->entry, idx); 379 } 380 assert(pt->is_vnode); 381#ifdef DEBUG_GET_PTABLE 382 debug_printf("have ptable: %p\n", pt); 383#endif 384 385 return SYS_ERR_OK; 386} 387 388static struct vnode *find_ptable(struct pmap_arm *pmap, 389 genvaddr_t vaddr) 390{ 391 // NB Strictly there are 12 bits in the ARM L1, but allocations unit 392 // of L2 is 1 page of L2 entries (4 tables) so 393 uintptr_t idx = ARM_L1_OFFSET(vaddr); 394 return find_vnode(&pmap->root, idx); 395} 396 397static errval_t do_single_map(struct pmap_arm *pmap, genvaddr_t vaddr, genvaddr_t vend, 398 struct capref frame, size_t offset, size_t pte_count, 399 vregion_flags_t flags) 400{ 401 errval_t err = SYS_ERR_OK; 402 // Get the page table 403 struct vnode *ptable; 404 uintptr_t entry; 405 bool is_large = false; 406 407 struct frame_identity fi; 408 err = frame_identify(frame, &fi); 409 if (err_is_fail(err)) { 410 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY); 411 } 412 413 if (flags & VREGION_FLAGS_LARGE && 414 (vaddr & LARGE_PAGE_MASK) == 0 && 415 fi.bytes >= LARGE_PAGE_SIZE && 416 (fi.base & LARGE_PAGE_MASK) == 0) { 417 //section mapping (1MB) 418 //mapped in the L1 table at root 419 // 420 ptable = &pmap->root; 421 entry = ARM_L1_OFFSET(vaddr); 422 is_large = true; 423#ifdef LIBBARRELFISH_DEBUG_PMAP 424 debug_printf("do_single_map: large path: entry=%zu\n", entry); 425#endif 426 } else { 427#ifdef LIBBARRELFISH_DEBUG_PMAP 428 debug_printf("%s: 4k path: mapping %"PRIxGENVADDR", %zu entries\n", __FUNCTION__, vaddr, pte_count); 429 debug_printf("4k path: L1 entry: %zu\n", ARM_L1_OFFSET(vaddr)); 430#endif 431 //4k mapping 432 // XXX: reassess the following note -SG 433 // NOTE: strictly speaking a l2 entry only has 8 bits, while a l1 entry 434 // has 12 bits, but due to the way Barrelfish allocates l1 and l2 tables, 435 // we use 10 bits for the entry here and in the map syscall 436 err = get_ptable(pmap, vaddr, &ptable); 437 if (err_is_fail(err)) { 438 DEBUG_ERR(err, "get_ptable() in do_single_map"); 439 return err_push(err, LIB_ERR_PMAP_GET_PTABLE); 440 } 441 entry = ARM_L2_OFFSET(vaddr); 442#ifdef LIBBARRELFISH_DEBUG_PMAP 443 debug_printf("%s: 4k path: L2 entry=%zu\n", __FUNCTION__, entry); 444 debug_printf("%s: ptable->is_vnode = %d\n", 445 __FUNCTION__, ptable->is_vnode); 446#endif 447 } 448 449 // convert flags 450 flags &= ~(VREGION_FLAGS_LARGE | VREGION_FLAGS_HUGE); 451 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags); 452 453 // check if there is an overlapping mapping 454 if (has_vnode(ptable, entry, pte_count, false)) { 455#ifdef LIBBARRELFISH_DEBUG_PMAP 456 debug_printf("has_vnode, only_pages=false returned true\n"); 457#endif 458 if (has_vnode(ptable, entry, pte_count, true)) { 459 printf("page already exists in 0x%" 460 PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend); 461 return LIB_ERR_PMAP_EXISTING_MAPPING; 462 } else { 463#ifdef LIBBARRELFISH_DEBUG_PMAP 464 debug_printf("has_vnode, only_pages=true returned false, cleaning up empty ptables\n"); 465#endif 466 // clean out empty page tables. We do this here because we benefit 467 // from having the page tables in place when doing lots of small 468 // mappings 469 // XXX: TODO: fix this + mapping of L2 to work on single 1k 470 // chunks 471 remove_empty_vnodes(&pmap->slab, ptable, entry, pte_count); 472 } 473 } 474 475 // Create user level datastructure for the mapping 476 struct vnode *page = slab_alloc(&pmap->slab); 477 assert(page); 478 page->is_vnode = false; 479 page->entry = entry; 480 page->next = ptable->u.vnode.children; 481 ptable->u.vnode.children = page; 482 page->u.frame.cap = frame; 483 page->u.frame.flags = flags; 484 page->u.frame.pte_count = pte_count; 485 486 err = slot_alloc(&page->mapping); 487 if (err_is_fail(err)) { 488 return err_push(err, LIB_ERR_SLOT_ALLOC); 489 } 490 491 // Map entry into the page table 492 err = vnode_map(ptable->u.vnode.invokable, frame, entry, 493 pmap_flags, offset, pte_count, 494 page->mapping); 495 if (err_is_fail(err)) { 496 errval_t err2 = slot_free(page->mapping); 497 if (err_is_fail(err2)) { 498 err = err_push(err, err2); 499 } 500 return err_push(err, LIB_ERR_VNODE_MAP); 501 } 502 return SYS_ERR_OK; 503} 504 505static errval_t do_map(struct pmap_arm *pmap, genvaddr_t vaddr, 506 struct capref frame, size_t offset, size_t size, 507 vregion_flags_t flags, size_t *retoff, size_t *retsize) 508{ 509 errval_t err; 510 size_t page_size; 511 size_t offset_level; 512 513 // get base address and size of frame 514 struct frame_identity fi; 515 err = frame_identify(frame, &fi); 516 if (err_is_fail(err)) { 517 return err_push(err, LIB_ERR_PMAP_DO_MAP); 518 } 519 520 // determine mapping specific parts 521 if (flags & VREGION_FLAGS_LARGE && 522 (vaddr & LARGE_PAGE_MASK) == 0 && 523 fi.bytes >= LARGE_PAGE_SIZE && 524 (fi.base & LARGE_PAGE_MASK) == 0) { 525 //section mapping (1MB) 526 page_size = LARGE_PAGE_SIZE; 527 offset_level = ARM_L1_OFFSET(vaddr); 528#ifdef LIBBARRELFISH_DEBUG_PMAP 529 printf("do_map: large path\n"); 530 printf("page_size: %zx, size: %zx\n", page_size, size); 531#endif 532 } else { 533 //normal 4k mapping 534 page_size = BASE_PAGE_SIZE; 535 offset_level = ARM_L2_OFFSET(vaddr); 536 } 537 538 size = ROUND_UP(size, page_size); 539 size_t pte_count = DIVIDE_ROUND_UP(size, page_size); 540 if (flags & VREGION_FLAGS_LARGE) { 541#ifdef LIBBARRELFISH_DEBUG_PMAP 542 printf("#pages: 0x%zu\n", pte_count); 543#endif 544 } 545 genvaddr_t vend = vaddr + size; 546 547 if (fi.bytes < size) { 548 return LIB_ERR_PMAP_FRAME_SIZE; 549 } 550 551#ifdef LIBBARRELFISH_DEBUG_PMAP 552 printf("do_map: mapping %zu pages (size=%zx), from %zu.%zu\n", 553 pte_count, page_size, ARM_L1_OFFSET(vaddr), ARM_L2_OFFSET(vaddr)); 554 printf("page_size: %zx, size: %zx\n", page_size, size); 555#endif 556 557 //should be trivially true for section mappings 558 if ((ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend)) || 559 flags & VREGION_FLAGS_LARGE) { 560 // fast path 561 err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags); 562 if (err_is_fail(err)) { 563 DEBUG_ERR(err, "[do_map] in fast path"); 564 return err_push(err, LIB_ERR_PMAP_DO_MAP); 565 } 566 } else { // multiple leaf page tables 567 // first leaf 568 uint32_t c = ARM_L2_MAX_ENTRIES - offset_level; 569 genvaddr_t temp_end = vaddr + c * page_size; 570 err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags); 571 if (err_is_fail(err)) { 572 return err_push(err, LIB_ERR_PMAP_DO_MAP); 573 } 574 575 // map full leaves 576 while (ARM_L1_OFFSET(temp_end) < ARM_L1_OFFSET(vend)) { // update vars 577 vaddr = temp_end; 578 temp_end = vaddr + ARM_L2_MAX_ENTRIES * page_size; 579 offset += c * page_size; 580 c = ARM_L2_MAX_ENTRIES; 581 582 // do mapping 583 err = do_single_map(pmap, vaddr, temp_end, frame, offset, ARM_L2_MAX_ENTRIES, flags); 584 if (err_is_fail(err)) { 585 return err_push(err, LIB_ERR_PMAP_DO_MAP); 586 } 587 } 588 589 // map remaining part 590 offset += c * page_size; 591 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(temp_end); 592 if (c) { 593 594 // do mapping 595 err = do_single_map(pmap, temp_end, vend, frame, offset, c, flags); 596 if (err_is_fail(err)) { 597 return err_push(err, LIB_ERR_PMAP_DO_MAP); 598 } 599 } 600 } 601 if (retoff) { 602 *retoff = offset; 603 } 604 if (retsize) { 605 *retsize = size; 606 } 607 //has_vnode_debug = false; 608 return SYS_ERR_OK; 609#if 0 610 errval_t err; 611 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags); 612 613 for (size_t i = offset; i < offset + size; i += BASE_PAGE_SIZE) { 614 615 vaddr += BASE_PAGE_SIZE; 616 } 617 618 if (retoff) { 619 *retoff = offset; 620 } 621 if (retsize) { 622 *retsize = size; 623 } 624 return SYS_ERR_OK; 625#endif 626} 627 628static size_t 629max_slabs_required(size_t bytes) 630{ 631 // Perform a slab allocation for every page (do_map -> slab_alloc) 632 size_t pages = DIVIDE_ROUND_UP(bytes, BASE_PAGE_SIZE); 633 // Perform a slab allocation for every L2 (get_ptable -> find_vnode) 634 size_t l2entries = DIVIDE_ROUND_UP(pages, ARM_L2_MAX_ENTRIES); 635 // Perform a slab allocation for every L1 (do_map -> find_vnode) 636 size_t l1entries = DIVIDE_ROUND_UP(l2entries, ARM_L1_MAX_ENTRIES); 637 return pages + l2entries + l1entries; 638} 639static size_t max_slabs_required_large(size_t bytes) 640{ 641 // always need only one slab, as we can represent any size section mapping 642 // in a single struct vnode. 643 return 1; 644} 645 646/** 647 * \brief Refill slabs used for metadata 648 * 649 * \param pmap The pmap to refill in 650 * \param request The number of slabs the allocator must have 651 * when the function returns 652 * 653 * When the current pmap is initialized, 654 * it reserves some virtual address space for metadata. 655 * This reserved address space is used here 656 * 657 * Can only be called for the current pmap 658 * Will recursively call into itself till it has enough slabs 659 */ 660#include <stdio.h> 661static errval_t refill_slabs(struct pmap_arm *pmap, size_t request) 662{ 663 errval_t err; 664 665 /* Keep looping till we have #request slabs */ 666 while (slab_freecount(&pmap->slab) < request) { 667 // Amount of bytes required for #request 668 size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab), 669 sizeof(struct vnode)); 670 671 /* Get a frame of that size */ 672 struct capref cap; 673 err = frame_alloc(&cap, bytes, &bytes); 674 if (err_is_fail(err)) { 675 return err_push(err, LIB_ERR_FRAME_ALLOC); 676 } 677 678 /* If we do not have enough slabs to map the frame in, recurse */ 679 size_t required_slabs_for_frame = max_slabs_required(bytes); 680 if (slab_freecount(&pmap->slab) < required_slabs_for_frame) { 681 // If we recurse, we require more slabs than to map a single page 682 assert(required_slabs_for_frame > 4); 683 684 err = refill_slabs(pmap, required_slabs_for_frame); 685 if (err_is_fail(err)) { 686 return err_push(err, LIB_ERR_SLAB_REFILL); 687 } 688 } 689 690 /* Perform mapping */ 691 genvaddr_t genvaddr = pmap->vregion_offset; 692 pmap->vregion_offset += (genvaddr_t)bytes; 693 694 // if this assert fires, increase META_DATA_RESERVED_SPACE 695 assert(pmap->vregion_offset < (vregion_get_base_addr(&pmap->vregion) + 696 vregion_get_size(&pmap->vregion))); 697 698 err = do_map(pmap, genvaddr, cap, 0, bytes, 699 VREGION_FLAGS_READ_WRITE, NULL, NULL); 700 if (err_is_fail(err)) { 701 return err_push(err, LIB_ERR_PMAP_DO_MAP); 702 } 703 704 /* Grow the slab */ 705 lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr); 706 slab_grow(&pmap->slab, (void*)buf, bytes); 707 } 708 709 return SYS_ERR_OK; 710} 711 712/** 713 * \brief Create page mappings 714 * 715 * \param pmap The pmap object 716 * \param vaddr The virtual address to create the mapping for 717 * \param frame The frame cap to map in 718 * \param offset Offset into the frame cap 719 * \param size Size of the mapping 720 * \param flags Flags for the mapping 721 * \param retoff If non-NULL, filled in with adjusted offset of mapped region 722 * \param retsize If non-NULL, filled in with adjusted size of mapped region 723 */ 724static errval_t 725map(struct pmap *pmap, 726 genvaddr_t vaddr, 727 struct capref frame, 728 size_t offset, 729 size_t size, 730 vregion_flags_t flags, 731 size_t *retoff, 732 size_t *retsize) 733{ 734 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap; 735 736 errval_t err; 737 size_t base; 738 size_t page_size; 739 size_t slabs_required; 740 741 struct frame_identity fi; 742 err = frame_identify(frame, &fi); 743 if (err_is_fail(err)) { 744 return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY); 745 } 746 747 // adjust the mapping to be on page boundaries 748 if (flags & VREGION_FLAGS_LARGE && 749 (vaddr & LARGE_PAGE_MASK) == 0 && 750 fi.bytes >= LARGE_PAGE_SIZE && 751 (fi.base & LARGE_PAGE_MASK) == 0) { 752 //section mapping (1MB) 753 base = LARGE_PAGE_OFFSET(offset); 754 page_size = LARGE_PAGE_SIZE; 755 slabs_required = max_slabs_required_large(size); 756#ifdef LIBBARRELFISH_DEBUG_PMAP 757 printf("map: large path, page_size: %i, base: %i, slabs: %i, size: %i," 758 "frame size: %zu\n", page_size, base, slabs_required, size, fi.bytes); 759#endif 760 } else { 761 //4k mapping 762 base = BASE_PAGE_OFFSET(offset); 763 page_size = BASE_PAGE_SIZE; 764 slabs_required = max_slabs_required(size); 765 } 766 size += base; 767 size = ROUND_UP(size, page_size); 768 offset -= base; 769 770 const size_t slabs_reserve = 3; // == max_slabs_required(1) 771 uint64_t slabs_free = slab_freecount(&pmap_arm->slab); 772 773 slabs_required += slabs_reserve; 774 775 if (slabs_required > slabs_free) { 776 if (get_current_pmap() == pmap) { 777 err = refill_slabs(pmap_arm, slabs_required); 778 if (err_is_fail(err)) { 779 return err_push(err, LIB_ERR_SLAB_REFILL); 780 } 781 } 782 else { 783 size_t bytes = SLAB_STATIC_SIZE(slabs_required - slabs_free, 784 sizeof(struct vnode)); 785 void *buf = malloc(bytes); 786 if (!buf) { 787 return LIB_ERR_MALLOC_FAIL; 788 } 789 slab_grow(&pmap_arm->slab, buf, bytes); 790 } 791 } 792 793 return do_map(pmap_arm, vaddr, frame, offset, size, flags, 794 retoff, retsize); 795} 796 797static errval_t do_single_unmap(struct pmap_arm *pmap, genvaddr_t vaddr, 798 size_t pte_count) 799{ 800#ifdef LIBBARRELFISH_DEBUG_PMAP 801 debug_printf("%s: vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n", 802 __FUNCTION__, vaddr, pte_count); 803#endif 804 errval_t err; 805 struct vnode *pt = find_ptable(pmap, vaddr); 806 // pt->is_vnode == non-large mapping 807 if (pt && pt->is_vnode) { 808 // analog to do_single_map we use 10 bits for tracking pages in user space -SG 809 struct vnode *page = find_vnode(pt, ARM_L2_OFFSET(vaddr)); 810 if (page && page->u.frame.pte_count == pte_count) { 811#ifdef LIBBARRELFISH_DEBUG_PMAP 812 debug_printf("page unmap: pt entry: %zu, entry = %zu, pte_count = %hu\n", 813 pt->entry, page->entry, page->u.frame.pte_count); 814#endif 815 err = vnode_unmap(pt->u.vnode.cap, page->mapping); 816 if (err_is_fail(err)) { 817 DEBUG_ERR(err, "vnode_unmap"); 818 return err_push(err, LIB_ERR_VNODE_UNMAP); 819 } 820 821 // cleanup mapping cap 822 err = cap_delete(page->mapping); 823 if (err_is_fail(err)) { 824 DEBUG_ERR(err, "cap_delete"); 825 return err_push(err, LIB_ERR_CAP_DELETE); 826 } 827 err = slot_free(page->mapping); 828 if (err_is_fail(err)) { 829 return err_push(err, LIB_ERR_SLOT_FREE); 830 } 831 832 remove_vnode(pt, page); 833 slab_free(&pmap->slab, page); 834 } 835 else { 836 return LIB_ERR_PMAP_FIND_VNODE; 837 } 838 } else if (pt) { 839#ifdef LIBBARRELFISH_DEBUG_PMAP 840 debug_printf("section unmap: entry = %zu, pte_count = %zu\n", 841 pt->entry, pt->u.frame.kernel_pte_count); 842#endif 843 err = vnode_unmap(pmap->root.u.vnode.cap, pt->mapping); 844 if (err_is_fail(err)) { 845 DEBUG_ERR(err, "vnode_unmap"); 846 return err_push(err, LIB_ERR_VNODE_UNMAP); 847 } 848 849 // cleanup mapping cap 850 err = cap_delete(pt->mapping); 851 if (err_is_fail(err)) { 852 DEBUG_ERR(err, "cap_delete"); 853 return err_push(err, LIB_ERR_CAP_DELETE); 854 } 855 err = slot_free(pt->mapping); 856 if (err_is_fail(err)) { 857 return err_push(err, LIB_ERR_SLOT_FREE); 858 } 859 860 remove_vnode(&pmap->root, pt); 861 slab_free(&pmap->slab, pt); 862 } else { 863 return LIB_ERR_PMAP_FIND_VNODE; 864 } 865 866 return SYS_ERR_OK; 867} 868 869/** 870 * \brief Remove page mappings 871 * 872 * \param pmap The pmap object 873 * \param vaddr The start of the virtual addres to remove 874 * \param size The size of virtual address to remove 875 * \param retsize If non-NULL, filled in with the actual size removed 876 */ 877static errval_t 878unmap(struct pmap *pmap, 879 genvaddr_t vaddr, 880 size_t size, 881 size_t *retsize) 882{ 883 errval_t err, ret = SYS_ERR_OK; 884 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap; 885 size = ROUND_UP(size, BASE_PAGE_SIZE); 886 size_t pte_count = size / BASE_PAGE_SIZE; 887 genvaddr_t vend = vaddr + size; 888 889 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) { 890 // fast path 891#ifdef LIBBARRELFISH_DEBUG_PMAP 892 debug_printf("%s: fast path vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n", 893 __FUNCTION__, vaddr, pte_count); 894#endif 895 err = do_single_unmap(pmap_arm, vaddr, pte_count); 896 if (err_is_fail(err)) { 897 return err_push(err, LIB_ERR_PMAP_UNMAP); 898 } 899 } else { // slow path 900 // unmap first leaf 901 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr); 902#ifdef LIBBARRELFISH_DEBUG_PMAP 903 debug_printf("%s: slow path 1st leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n", 904 __FUNCTION__, vaddr, c); 905#endif 906 err = do_single_unmap(pmap_arm, vaddr, c); 907 if (err_is_fail(err)) { 908 return err_push(err, LIB_ERR_PMAP_UNMAP); 909 } 910 911 // unmap full leaves 912 vaddr += c * BASE_PAGE_SIZE; 913 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) { 914 c = ARM_L2_MAX_ENTRIES; 915#ifdef LIBBARRELFISH_DEBUG_PMAP 916 debug_printf("%s: slow path full leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n", 917 __FUNCTION__, vaddr, c); 918#endif 919 err = do_single_unmap(pmap_arm, vaddr, c); 920 if (err_is_fail(err)) { 921 return err_push(err, LIB_ERR_PMAP_UNMAP); 922 } 923 vaddr += c * BASE_PAGE_SIZE; 924 } 925 926 // unmap remaining part 927 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr); 928 if (c) { 929#ifdef LIBBARRELFISH_DEBUG_PMAP 930 debug_printf("%s: slow path last leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n", 931 __FUNCTION__, vaddr, c); 932#endif 933 err = do_single_unmap(pmap_arm, vaddr, c); 934 if (err_is_fail(err)) { 935 return err_push(err, LIB_ERR_PMAP_UNMAP); 936 } 937 } 938 } 939 940 if (retsize) { 941 *retsize = size; 942 } 943 944 return ret; 945} 946 947/** 948 * \brief Determine a suitable address for a given memory object 949 * 950 * \param pmap The pmap object 951 * \param memobj The memory object to determine the address for 952 * \param alignment Minimum alignment 953 * \param vaddr Pointer to return the determined address 954 * 955 * Relies on vspace.c code maintaining an ordered list of vregions 956 */ 957static errval_t 958determine_addr(struct pmap *pmap, 959 struct memobj *memobj, 960 size_t alignment, 961 genvaddr_t *vaddr) 962{ 963 assert(pmap->vspace->head); 964 965 if (alignment == 0) { 966 alignment = BASE_PAGE_SIZE; 967 } else { 968 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE); 969 } 970 size_t size = ROUND_UP(memobj->size, alignment); 971 972 struct vregion *walk = pmap->vspace->head; 973 while (walk->next) { // Try to insert between existing mappings 974 genvaddr_t walk_base = vregion_get_base_addr(walk); 975 genvaddr_t walk_size = ROUND_UP(vregion_get_size(walk), BASE_PAGE_SIZE); 976 genvaddr_t walk_end = ROUND_UP(walk_base + walk_size, alignment); 977 genvaddr_t next_base = vregion_get_base_addr(walk->next); 978 979 if (next_base > walk_end + size && 980 walk_base + walk_size > VSPACE_BEGIN) { // Ensure mappings are larger than VSPACE_BEGIN 981 *vaddr = walk_end; 982 return SYS_ERR_OK; 983 } 984 walk = walk->next; 985 } 986 987 *vaddr = ROUND_UP((vregion_get_base_addr(walk) 988 + ROUND_UP(vregion_get_size(walk), alignment)), 989 alignment); 990 return SYS_ERR_OK; 991} 992 993/** \brief Retrieves an address that can currently be used for large mappings 994 * 995 */ 996static errval_t determine_addr_raw(struct pmap *pmap, size_t size, 997 size_t alignment, genvaddr_t *retvaddr) 998{ 999 struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap; 1000 1001 struct vnode *walk_pdir = pmap_arm->root.u.vnode.children; 1002 assert(walk_pdir != NULL); // assume there's always at least one existing entry 1003 1004 if (alignment == 0) { 1005 alignment = BASE_PAGE_SIZE; 1006 } else { 1007 alignment = ROUND_UP(alignment, BASE_PAGE_SIZE); 1008 } 1009 size = ROUND_UP(size, alignment); 1010 1011 size_t free_count = DIVIDE_ROUND_UP(size, LARGE_PAGE_SIZE); 1012 //debug_printf("need %zu contiguous free pdirs\n", free_count); 1013 1014 // compile pdir free list 1015 // barrelfish treats L1 as 1024 entries 1016 bool f[ARM_L1_MAX_ENTRIES]; 1017 for (int i = 0; i < ARM_L1_MAX_ENTRIES; i++) { 1018 f[i] = true; 1019 } 1020 f[walk_pdir->entry] = false; 1021 while (walk_pdir) { 1022 assert(walk_pdir->is_vnode); 1023 f[walk_pdir->entry] = false; 1024 walk_pdir = walk_pdir->next; 1025 } 1026 genvaddr_t first_free = 384; 1027 for (; first_free < 512; first_free++) { 1028 if (f[first_free]) { 1029 for (int i = 1; i < free_count; i++) { 1030 if (!f[first_free + i]) { 1031 // advance pointer 1032 first_free = first_free+i; 1033 goto next; 1034 } 1035 } 1036 break; 1037 } 1038next: 1039 assert(1 == 1);// make compiler shut up about label 1040 } 1041 //printf("first free: %li\n", (uint32_t)first_free); 1042 if (first_free + free_count <= 512) { 1043 *retvaddr = first_free << 22; 1044 return SYS_ERR_OK; 1045 } else { 1046 return LIB_ERR_OUT_OF_VIRTUAL_ADDR; 1047 } 1048} 1049 1050 1051 1052static errval_t do_single_modify_flags(struct pmap_arm *pmap, genvaddr_t vaddr, 1053 size_t pages, vregion_flags_t flags) 1054{ 1055 errval_t err = SYS_ERR_OK; 1056 struct vnode *ptable = find_ptable(pmap, vaddr); 1057 uint16_t ptentry = ARM_L2_OFFSET(vaddr); 1058 if (ptable) { 1059 struct vnode *page = find_vnode(ptable, ptentry); 1060 if (page) { 1061 if (inside_region(ptable, ptentry, pages)) { 1062 // we're modifying part of a valid mapped region 1063 // arguments to invocation: invoke frame cap, first affected 1064 // page (as offset from first page in mapping), #affected 1065 // pages, new flags. Invocation should check compatibility of 1066 // new set of flags with cap permissions. 1067 size_t off = ptentry - page->entry; 1068 uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags); 1069 // VA hinting NYI on ARM, so we always pass 0 for va_hint 1070 err = invoke_mapping_modify_flags(page->mapping, 1071 off, pages, pmap_flags, 0); 1072 printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n", 1073 err_getstring(err), err); 1074 return err; 1075 } else { 1076 // overlaps some region border 1077 return LIB_ERR_PMAP_EXISTING_MAPPING; 1078 } 1079 } 1080 } 1081 return SYS_ERR_OK; 1082} 1083 1084/** 1085 * \brief Modify page mapping 1086 * 1087 * \param pmap The pmap object 1088 * \param vaddr The virtual address to unmap 1089 * \param flags New flags for the mapping 1090 * \param retsize If non-NULL, filled in with the actual size modified 1091 */ 1092static errval_t 1093modify_flags(struct pmap *pmap, 1094 genvaddr_t vaddr, 1095 size_t size, 1096 vregion_flags_t flags, 1097 size_t *retsize) 1098{ 1099 errval_t err, ret = SYS_ERR_OK; 1100 struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap; 1101 size = ROUND_UP(size, BASE_PAGE_SIZE); 1102 size_t pte_count = size / BASE_PAGE_SIZE; 1103 genvaddr_t vend = vaddr + size; 1104 1105 if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) { 1106 // fast path 1107 err = do_single_modify_flags(pmap_arm, vaddr, pte_count, flags); 1108 if (err_is_fail(err)) { 1109 return err_push(err, LIB_ERR_PMAP_UNMAP); 1110 } 1111 } 1112 else { // slow path 1113 // modify flags in first leaf 1114 uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr); 1115 err = do_single_modify_flags(pmap_arm, vaddr, c, flags); 1116 if (err_is_fail(err)) { 1117 return err_push(err, LIB_ERR_PMAP_UNMAP); 1118 } 1119 1120 // modify flags in full leaves 1121 vaddr += c * BASE_PAGE_SIZE; 1122 while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) { 1123 c = ARM_L2_MAX_ENTRIES; 1124 err = do_single_modify_flags(pmap_arm, vaddr, c, flags); 1125 if (err_is_fail(err)) { 1126 return err_push(err, LIB_ERR_PMAP_UNMAP); 1127 } 1128 vaddr += c * BASE_PAGE_SIZE; 1129 } 1130 1131 // modify flags in remaining part 1132 c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr); 1133 if (c) { 1134 err = do_single_modify_flags(pmap_arm, vaddr, c, flags); 1135 if (err_is_fail(err)) { 1136 return err_push(err, LIB_ERR_PMAP_UNMAP); 1137 } 1138 } 1139 } 1140 1141 if (retsize) { 1142 *retsize = size; 1143 } 1144 1145 return ret; 1146} 1147 1148/** 1149 * \brief Query existing page mapping 1150 * 1151 * \param pmap The pmap object 1152 * \param vaddr The virtual address to query 1153 * \param retvaddr Returns the base virtual address of the mapping 1154 * \param retsize Returns the actual size of the mapping 1155 * \param retcap Returns the cap mapped at this address 1156 * \param retoffset Returns the offset within the cap that is mapped 1157 * \param retflags Returns the flags for this mapping 1158 * 1159 * All of the ret parameters are optional. 1160 */ 1161static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr, 1162 struct pmap_mapping_info *info) 1163{ 1164 USER_PANIC("NYI"); 1165 return 0; 1166} 1167 1168 1169static errval_t 1170serialise(struct pmap *pmap, void *buf, size_t buflen) 1171{ 1172 // Unimplemented: ignored 1173 return SYS_ERR_OK; 1174} 1175 1176static errval_t 1177deserialise(struct pmap *pmap, void *buf, size_t buflen) 1178{ 1179 // Unimplemented: we start with an empty pmap, and avoid the bottom of the A/S 1180 return SYS_ERR_OK; 1181} 1182 1183static struct pmap_funcs pmap_funcs = { 1184 .determine_addr = determine_addr, 1185 .determine_addr_raw = determine_addr_raw, 1186 .map = map, 1187 .unmap = unmap, 1188 .modify_flags = modify_flags, 1189 .lookup = lookup, 1190 .serialise = serialise, 1191 .deserialise = deserialise, 1192}; 1193 1194/** 1195 * \brief Initialize the pmap object 1196 */ 1197errval_t 1198pmap_init(struct pmap *pmap, 1199 struct vspace *vspace, 1200 struct capref vnode, 1201 struct slot_allocator *opt_slot_alloc) 1202{ 1203 struct pmap_arm* pmap_arm = (struct pmap_arm*)pmap; 1204 1205 /* Generic portion */ 1206 pmap->f = pmap_funcs; 1207 pmap->vspace = vspace; 1208 1209 // Slab allocator for vnodes 1210 slab_init(&pmap_arm->slab, sizeof(struct vnode), NULL); 1211 slab_grow(&pmap_arm->slab, 1212 pmap_arm->slab_buffer, 1213 sizeof(pmap_arm->slab_buffer)); 1214 1215 pmap_arm->root.is_vnode = true; 1216 pmap_arm->root.u.vnode.cap = vnode; 1217 if (get_croot_addr(vnode) != CPTR_ROOTCN) { 1218 /* non invokable root cnode; copy */ 1219 errval_t err = slot_alloc(&pmap_arm->root.u.vnode.invokable); 1220 assert(err_is_ok(err)); 1221 err = cap_copy(pmap_arm->root.u.vnode.invokable, vnode); 1222 assert(err_is_ok(err)); 1223 } else { 1224 pmap_arm->root.u.vnode.invokable= vnode; 1225 } 1226 pmap_arm->root.next = NULL; 1227 pmap_arm->root.u.vnode.children = NULL; 1228 1229 return SYS_ERR_OK; 1230} 1231 1232errval_t pmap_current_init(bool init_domain) 1233{ 1234 struct pmap_arm *pmap_arm = (struct pmap_arm*)get_current_pmap(); 1235 1236 // To reserve a block of virtual address space, 1237 // a vregion representing the address space is required. 1238 // We construct a superficial one here and add it to the vregion list. 1239 struct vregion *vregion = &pmap_arm->vregion; 1240 assert((void*)vregion > (void*)pmap_arm); 1241 assert((void*)vregion < (void*)(pmap_arm + 1)); 1242 vregion->vspace = NULL; 1243 vregion->memobj = NULL; 1244 vregion->base = VSPACE_BEGIN; 1245 vregion->offset = 0; 1246 vregion->size = META_DATA_RESERVED_SPACE; 1247 vregion->flags = 0; 1248 vregion->next = NULL; 1249 1250 struct vspace *vspace = pmap_arm->p.vspace; 1251 assert(!vspace->head); 1252 vspace->head = vregion; 1253 1254 pmap_arm->vregion_offset = pmap_arm->vregion.base; 1255 1256 return SYS_ERR_OK; 1257} 1258