1/* 2 * \brief demandpaging.c 3 * 4 * Copyright (c) 2015 ETH Zurich. 5 * All rights reserved. 6 * 7 * This file is distributed under the terms in the attached LICENSE file. 8 * If you do not find this file, copies can be found by writing to: 9 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. 10 */ 11#include <stdio.h> 12#include <barrelfish/barrelfish.h> 13#include <barrelfish/except.h> 14#include <barrelfish/memobj.h> 15#include "../barrelfish/vspace/vspace_internal.h" 16#include <vfs/vfs.h> 17 18#include <dp_internal.h> 19 20 21struct demand_paging_region *demand_paging_regions = NULL; 22 23 24/* 25 * =========================================================================== 26 * helper functions 27 * =========================================================================== 28 */ 29 30static bool is_dirty(struct dp_page *dpp) 31{ 32 union x86_64_ptable_entry *entry = dpp->vnode_entry; 33 switch(dpp->dpr->pagesize) { 34 case BASE_PAGE_SIZE: 35 assert(entry->base.present == 1); 36 return entry->base.dirty; 37 break; 38 case LARGE_PAGE_SIZE: 39 assert(entry->large.present == 1); 40 return entry->large.dirty; 41 break; 42 case HUGE_PAGE_SIZE: 43 assert(entry->huge.present == 1); 44 return entry->huge.dirty; 45 break; 46 default: 47 return -1; 48 break; 49 50 } 51 return 1; 52} 53 54/* 55 * =========================================================================== 56 * swapping of pages 57 * =========================================================================== 58 */ 59 60static errval_t swap_in(struct dp_page *dpp) 61{ 62 errval_t err; 63 struct demand_paging_region *dpr = dpp->dpr; 64 65 lvaddr_t offset = dpp->vaddr - vregion_get_base_addr(&dpr->vreg); 66 67 68 DP_DEBUG_SWAP("[in] page=%" PRIx64 "\n", dpp->vaddr ); 69 70 size_t read; 71 size_t totalread = 0; 72 73 err = vfs_seek(dpr->swapfile, VFS_SEEK_SET, offset); 74 if (err_is_fail(err)) { 75 return err; 76 } 77 78 while(totalread < dpr->pagesize) { 79 err = vfs_read(dpr->swapfile, (void *)dpp->vaddr + totalread, 80 dpr->pagesize - totalread, &read); 81 if (err_is_fail(err)) { 82 USER_PANIC_ERR(err, "writing to fail"); 83 } 84 85 totalread += read; 86 } 87 88 dpp->state = DEMAND_PAGING_PST_MEMORY; 89 90 return SYS_ERR_OK; 91} 92 93static errval_t swap_out(struct dp_page *dpp) 94{ 95 errval_t err; 96 97 struct demand_paging_region *dpr = dpp->dpr; 98 lvaddr_t offset = dpp->vaddr - vregion_get_base_addr(&dpr->vreg); 99 100 DP_DEBUG_SWAP("[out] page=%" PRIx64 "\n", dpp->vaddr ); 101 102 size_t written; 103 size_t totalwritten = 0; 104 105 err = vfs_seek(dpr->swapfile, VFS_SEEK_SET, offset); 106 if (err_is_fail(err)) { 107 return err; 108 } 109 110 while(totalwritten < dpr->pagesize) { 111 err = vfs_write(dpr->swapfile, (void *)dpp->vaddr + totalwritten, 112 dpr->pagesize - totalwritten, &written); 113 if (err_is_fail(err)) { 114 USER_PANIC_ERR(err, "writing to fail"); 115 } 116 totalwritten += written; 117 } 118 119 dpp->state = DEMAND_PAGING_PST_FILE; 120 121 return SYS_ERR_OK; 122} 123 124/* 125 * =========================================================================== 126 * mapping of pages 127 * =========================================================================== 128 */ 129 130static inline errval_t frame_map(struct demand_paging_region *dpr, 131 struct dp_page *dpp, struct dp_frame *dpf) 132{ 133 DP_DEBUG_MAP("[map] vaddr= 0x%" PRIx64 "\n", dpp->vaddr); 134 struct pmap *pmap = vregion_get_vspace(&dpr->vreg)->pmap; 135 dpf->page = dpp; 136 dpf->vnode_entry = dpp->vnode_entry; 137 return pmap->f.map(pmap, dpp->vaddr, dpf->frame, 0, dpr->pagesize, 138 vregion_get_flags(&dpr->vreg), NULL, NULL); 139} 140 141static inline errval_t frame_unmap(struct demand_paging_region *dpr, struct dp_page *dpp) 142{ 143 DP_DEBUG_MAP("[unmap] vaddr= 0x%" PRIx64 "\n", dpp->vaddr); 144 struct pmap *pmap = vregion_get_vspace(&dpr->vreg)->pmap; 145 return pmap->f.unmap(pmap, dpp->vaddr, dpr->pagesize, NULL); 146} 147 148 149/* 150 * =========================================================================== 151 * frame evict policy 152 * =========================================================================== 153 */ 154 155static inline errval_t frame_evict(struct demand_paging_region *dpr, 156 struct dp_frame *dpf) 157{ 158 errval_t err; 159 160 if (is_dirty(dpf->page)) { 161 /* is dirty */ 162 swap_out(dpf->page); 163 } 164 165 err = frame_unmap(dpr, dpf->page); 166 if (err_is_fail(err)) { 167 return err; 168 } 169 dpf->page = NULL; 170 dpf->vnode_entry = NULL; 171 172 return SYS_ERR_OK; 173} 174 175 176static errval_t frame_evict_any(struct demand_paging_region *dpr, 177 struct dp_frame **ret_dpf) 178{ 179 errval_t err; 180 181 DP_DEBUG_SWAP("[evict] victim=%" PRIu64 "\n", dpr->frames_victim); 182 183 struct dp_frame *dpf = dpr->frames[dpr->frames_victim]; 184 185 err = frame_evict(dpr, dpf); 186 if (err_is_fail(err)) { 187 return err; 188 } 189 190 /* set the next victim */ 191 dpr->frames_victim = (dpr->frames_victim + 1) % dpr->frames_count; 192 193 *ret_dpf = dpf; 194 195 return SYS_ERR_OK; 196} 197 198/* 199 * =========================================================================== 200 * Page-fault handler 201 * =========================================================================== 202 */ 203 204static errval_t handle_pagefault(lvaddr_t vaddr) 205{ 206 /* find demand paging regions */ 207 errval_t err; 208 209 DP_DEBUG_HANDLER("pagefault at vaddr = %" PRIx64 "\n", vaddr); 210 211 struct demand_paging_region *dpr = demand_paging_regions; 212 213 lvaddr_t base; 214 215 while (dpr) { 216 base = vregion_get_base_addr(&dpr->vreg); 217 if (base <= vaddr && vaddr < (base + vregion_get_size(&dpr->vreg))) { 218 /* found */ 219 break; 220 } 221 dpr = dpr->next; 222 } 223 224 if (dpr == NULL) { 225 DP_DEBUG_HANDLER("dpr not found\n"); 226 return -1; 227 } 228 229 if (dpr->frames_count == 0) { 230 USER_PANIC("there are o frames in the region\n"); 231 } 232 233 struct dp_frame *dpf; 234 if (dpr->frames_free) { 235 dpf = dpr->frames_free; 236 dpr->frames_free = dpf->next; 237 } else { 238 err = frame_evict_any(dpr, &dpf); 239 if (err_is_fail(err)) { 240 return err; 241 } 242 } 243 244 /* find page */ 245 vaddr = vaddr & ~(dpr->pagesize - 1); 246 247 DP_DEBUG_HANDLER("handling fault on page 0x%" PRIx64 " in dpr='%s' with " 248 "frame 0x%" PRIx64"\n", vaddr, dpr->swapname, dpf->paddr); 249 250 size_t slot = (vaddr - base) / dpr->pagesize; 251 struct dp_page *dpp = &dpr->pages[slot]; 252 assert(dpp->vaddr == vaddr); 253 254 /* install the mapping */ 255 err = frame_map(dpr, dpp, dpf); 256 if (err_is_fail(err)) { 257 return err; 258 } 259 260 /* check if we need to swap in the page, otherwise clean */ 261 if (dpp->state == DEMAND_PAGING_PST_FILE) { 262 swap_in(dpp); 263 } else { 264 memset((void *)vaddr, 0, dpr->pagesize); 265 } 266 267 return SYS_ERR_OK; 268} 269 270static void exn_handler(enum exception_type type, int subtype, 271 void *addr, arch_registers_state_t *regs) 272{ 273 errval_t err; 274 if (type == EXCEPT_PAGEFAULT) { 275 err = handle_pagefault((lvaddr_t)addr); 276 if (err_is_fail(err)) { 277 // could not handle page fault, exiting for now 278 // TODO: do something sensible here 279 exit(1); 280 } 281 } else { 282 DP_DEBUG_HANDLER("unknown exception\n"); 283 } 284 return; 285} 286 287static errval_t vspace_reserve_region(struct vregion *vregion, 288 size_t bytes, size_t pagesize, 289 vregion_flags_t flags) 290{ 291 errval_t err; 292 293 struct vspace *vspace = get_current_vspace(); 294 struct pmap *pmap = vspace_get_pmap(vspace); 295 296 struct memobj memobj; 297 memobj.size = bytes; 298 299 genvaddr_t address; 300 err = pmap->f.determine_addr(pmap, &memobj, pagesize, &address); 301 if (err_is_fail(err)) { 302 return err_push(err, LIB_ERR_PMAP_DETERMINE_ADDR); 303 } 304 305 vregion->vspace = vspace; 306 vregion->memobj = NULL; 307 vregion->base = address; 308 vregion->offset = 0; 309 vregion->size = bytes; 310 vregion->flags = flags; 311 312 err = vspace_add_vregion(vspace, vregion); 313 if (err_is_fail(err)) { 314 return err_push(err, LIB_ERR_VSPACE_ADD_REGION); 315 } 316 317 err = pmap->f.create_pts_pinned(pmap, address, bytes, flags); 318 if (err_is_fail(err)) { 319 return err_push(err, LIB_ERR_PMAP_MAP); 320 } 321 322 return err; 323} 324 325static errval_t vspace_get_vnode(struct vregion *vregion, lvaddr_t vaddr, 326 lvaddr_t *ret_vaddr) 327{ 328 struct vspace *vspace = vregion_get_vspace(vregion); 329 struct pmap *pmap = vspace_get_pmap(vspace); 330 assert(pmap->f.get_leaf_pt); 331 return pmap->f.get_leaf_pt(pmap, vaddr, ret_vaddr); 332} 333 334 335static errval_t create_swap_file(char *path, size_t bytes, vfs_handle_t *ret_handle) 336{ 337 errval_t err; 338 339 /* open the paging file */ 340 err = vfs_create(path, ret_handle); 341 if (err_is_fail(err)) { 342 DEBUG_ERR(err, "could not create the vfs handle"); 343 return err; 344 } 345 346 err = vfs_truncate(*ret_handle, bytes); 347 if (err_is_fail(err)) { 348 DEBUG_ERR(err, "could not truncate swapfile"); 349 return err; 350 } 351 352 return SYS_ERR_OK; 353} 354 355/* 356 * =========================================================================== 357 * Public interface 358 * =========================================================================== 359 */ 360 361 362errval_t demand_paging_init(void *ex_stack, size_t stack_size) 363{ 364 errval_t err; 365 366 DP_DEBUG_MGMT("[init] preparing exception handler"); 367 368 if (ex_stack != NULL && stack_size < EXCEPTION_STACK_MIN_SIZE) { 369 return -1; 370 } 371 372 if (ex_stack == NULL) { 373 if (stack_size < EXCEPTION_STACK_MIN_SIZE) { 374 stack_size = EXCEPTION_STACK_SIZE; 375 } 376 ex_stack = calloc(stack_size, sizeof(char)); 377 } 378 379 void *ex_stack_top = ex_stack + stack_size; 380 381 DP_DEBUG_MGMT("[init] stack top=%p, stackbase=%p\n", exn_handler, 382 ex_stack_top, ex_stack); 383 384 err = thread_set_exception_handler(exn_handler, NULL, ex_stack, ex_stack_top, 385 NULL, NULL); 386 if (err_is_fail(err)) { 387 DEBUG_ERR(err, "failed to set the exceptin handler"); 388 return err; 389 } 390 391 vfs_init(); 392 393 err = vfs_mkdir(DEMAND_PAGING_SWAP_FILE); 394 if (err_is_fail(err)) { 395 DEBUG_ERR(err, "err"); 396 /* can actually fail */ 397 } 398 399 return SYS_ERR_OK; 400} 401 402errval_t demand_paging_region_create(size_t bytes, size_t pagesize, size_t numframes, 403 struct demand_paging_region **ret_dpr) 404{ 405 errval_t err; 406 407 DP_DEBUG_MGMT("[create] dpr of size %" PRIu64 "\n", bytes); 408 409 /* determine basic information about the page sizes */ 410 vregion_flags_t flags = VREGION_FLAGS_READ_WRITE; 411 uint8_t pagebits; 412 switch (pagesize) { 413 case BASE_PAGE_SIZE: 414 pagebits = BASE_PAGE_BITS; 415 break; 416 case LARGE_PAGE_SIZE: 417 flags |= VREGION_FLAGS_LARGE; 418 pagebits = LARGE_PAGE_BITS; 419 break; 420 case HUGE_PAGE_SIZE: 421 pagebits = HUGE_PAGE_BITS; 422 flags |= VREGION_FLAGS_HUGE; 423 break; 424 default: 425 return -1; 426 break; 427 } 428 429 /* round up bytes and calcualte number of slots */ 430 bytes = ROUND_UP(bytes, pagesize); 431 size_t slots = bytes / pagesize; 432 size_t vnode_leaves_count = ((bytes / pagesize) + 511) / 512; 433 434 /* allocate the data structure */ 435 struct demand_paging_region *dpr = calloc(1, sizeof(*dpr) + 436 slots * sizeof(struct dp_page) + 437 (vnode_leaves_count) * sizeof(void *)); 438 if (dpr == NULL) { 439 return LIB_ERR_MALLOC_FAIL; 440 } 441 442 /* initialize fields */ 443 dpr->pagesize = pagesize; 444 dpr->pages = (struct dp_page *)(dpr + 1); 445 446 err = vspace_reserve_region(&dpr->vreg, bytes, pagesize, flags); 447 if (err_is_fail(err)) { 448 USER_PANIC_ERR(err, "reserve region in vspace for demand paging\n"); 449 return err; 450 } 451 452 snprintf(dpr->swapname, DEMAND_PAGING_SWAP_FILE_PATHLEN, "%s/0x%016lx", 453 DEMAND_PAGING_SWAP_FILE, 454 vspace_genvaddr_to_lvaddr(vregion_get_base_addr(&dpr->vreg))); 455 456 err = create_swap_file(dpr->swapname, bytes, &dpr->swapfile); 457 if (err_is_fail(err)) { 458 return err; 459 } 460 461 /* initialize pages */ 462 genvaddr_t addr = vspace_genvaddr_to_lvaddr(vregion_get_base_addr(&dpr->vreg)); 463 lvaddr_t vnode_addr; 464 465 466 err = vspace_get_vnode(&dpr->vreg, addr, &vnode_addr); 467 if (err_is_fail(err)) { 468 USER_PANIC_ERR(err, "foobar"); 469 } 470 471 dpr->vnodes = (void **)(dpr->pages + slots); 472 dpr->vnodes[0] = (void *)vnode_addr; 473 int j = 0; 474 for (size_t i = 0; i < slots; ++i) { 475 err = vspace_get_vnode(&dpr->vreg, addr, &vnode_addr); 476 if (err_is_fail(err)) { 477 USER_PANIC_ERR(err, "foobar"); 478 } 479 480 if (dpr->vnodes[j] != (void *)vnode_addr) { 481 dpr->vnodes[++j] = (void *)vnode_addr; 482 } 483 dpr->pages[i].pagenr = i; 484 dpr->pages[i].dpr = dpr; 485 dpr->pages[i].vaddr = addr; 486 dpr->pages[i].vnode = (void *)vnode_addr; 487 dpr->pages[i].vnode_entry = (void *)vnode_addr; 488 if (pagesize == HUGE_PAGE_SIZE) { 489 dpr->pages[i].vnode_entry += X86_64_PDPT_BASE(addr); 490 } else if (pagesize == LARGE_PAGE_SIZE) { 491 dpr->pages[i].vnode_entry += X86_64_PDIR_BASE(addr); 492 } else { 493 dpr->pages[i].vnode_entry += X86_64_PTABLE_BASE(addr); 494 } 495 addr += pagesize; 496 497 } 498 499 /* allocate the frames */ 500 struct capref frame; 501 size_t allocated_size; 502 err = frame_alloc(&frame, numframes * pagesize, &allocated_size); 503 if (err_is_fail(err)) { 504 USER_PANIC_ERR(err, "frame alloc\n"); 505 } 506 507 struct frame_identity id; 508 err = frame_identify(frame, &id); 509 assert(err_is_ok(err)); 510 511 struct capref cnode_cap; 512 struct capref frames; 513 err = cnode_create(&cnode_cap, &frames.cnode, allocated_size / pagesize, NULL); 514 if (err_is_fail(err)) { 515 USER_PANIC_ERR(err, "cnode create\n"); 516 } 517 518 debug_printf("FRAME BASE: %lx\n", id.base); 519 520 err = cap_retype(frames, frame, 0, ObjType_Frame, pagesize, numframes); 521 if (err_is_fail(err)) { 522 USER_PANIC_ERR(err, "cap retype\n"); 523 } 524 525 dpr->frames = calloc(numframes, sizeof(void *)); 526 if (dpr->frames == NULL) { 527 USER_PANIC("alloc frame counter\n"); 528 } 529 530 /* initialize the frames */ 531 struct dp_frame *dpf = calloc(numframes, sizeof(*dpf)); 532 if (dpf == NULL) { 533 USER_PANIC("alloc frame counter\n"); 534 } 535 dpf->first = 1; 536 for (size_t i = 0; i < numframes; ++i) { 537 dpf->frame = frames; 538 dpf->page = NULL; 539 if (i == (numframes - 1)) { 540 dpf->next = NULL; 541 } else { 542 dpf->next = (dpf+1); 543 } 544 545 dpr->frames[i] = dpf; 546 547 dpf++; 548 frames.slot++; 549 } 550 551 dpr->frames_free = dpr->frames[0]; 552 dpr->frames_count = numframes; 553 554 dpr->next = demand_paging_regions; 555 demand_paging_regions = dpr; 556 557 if (ret_dpr) { 558 *ret_dpr = dpr; 559 } 560 561 debug_printf("region created\n"); 562 563 564 return SYS_ERR_OK; 565} 566 567errval_t demand_paging_region_add_frames(struct capref *frames, size_t count, 568 struct demand_paging_region *dpr) 569{ 570 if (count == 0) { 571 return SYS_ERR_OK; 572 } 573 574 assert(dpr); 575 576 /* initialize the frames */ 577 struct dp_frame *dpf = calloc(count, sizeof(*dpf)); 578 if (dpf == NULL) { 579 return LIB_ERR_MALLOC_FAIL; 580 } 581 582 struct dp_frame **dp_frames = realloc(dpr->frames, 583 (dpr->frames_count + count) * sizeof(void *)); 584 if (dp_frames == NULL) { 585 free(dpf); 586 return LIB_ERR_MALLOC_FAIL; 587 } 588 589 dpf->first = 1; 590 for (size_t i = 0; i < count; ++i) { 591 dpf->frame = frames[i]; 592 dpf->page = NULL; 593 if (i == (count - 1)) { 594 dpf->next = NULL; 595 } else { 596 dpf->next = (dpf+1); 597 } 598 599 dpr->frames[dpr->frames_count + i] = dpf; 600 601 dpf++; 602 } 603 604 /* add it to the free list */ 605 if (dpr->frames_free) { 606 dpr->frames[dpr->frames_count + count - 1]->next = dpr->frames_free; 607 } 608 dpr->frames_free = dpr->frames[dpr->frames_count]; 609 610 /* update count */ 611 dpr->frames_count += count; 612 dpr->frames = dp_frames; 613 614 return SYS_ERR_OK; 615} 616 617errval_t demand_paging_region_remove_frames(size_t count, struct demand_paging_region *dpr, 618 struct capref *ret_frames, size_t *ret_count) 619{ 620 errval_t err; 621 622 if (count == 0) { 623 goto out; 624 } 625 626 if (count > (dpr->frames_count - 1)) { 627 count = (dpr->frames_count - 1); 628 } 629 630 struct dp_frame *dpf = dpr->frames[dpr->frames_count - 1]; 631 for (size_t i = 0; i < count; ++i) { 632 err = frame_evict(dpr, dpf); 633 if (err_is_fail(err)) { 634 count = i; 635 break; 636 } 637 ret_frames[i] = dpf->frame; 638 memset(dpf, 0, sizeof(*dpf)); 639 if (dpf->first) { 640 free(dpf); 641 } 642 dpf--; 643 } 644 645 dpr->frames_count -= count; 646 dpr->frames = realloc(dpr->frames, dpr->frames_count * sizeof(void *)); 647 assert(dpr->frames); 648 649 out: 650 if (ret_count) { 651 *ret_count = count; 652 } 653 return SYS_ERR_OK; 654} 655 656errval_t demand_paging_region_destory(struct demand_paging_region *dpr) 657{ 658 USER_PANIC("NYI"); 659 return SYS_ERR_OK; 660} 661 662void *demand_paging_get_base_address(struct demand_paging_region *dpr) 663{ 664 return (void *)vspace_genvaddr_to_lvaddr(vregion_get_base_addr(&dpr->vreg)); 665} 666