1/*
2 * \brief demandpaging.c
3 *
4 * Copyright (c) 2015 ETH Zurich.
5 * All rights reserved.
6 *
7 * This file is distributed under the terms in the attached LICENSE file.
8 * If you do not find this file, copies can be found by writing to:
9 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
10 */
11#include <stdio.h>
12#include <barrelfish/barrelfish.h>
13#include <barrelfish/except.h>
14#include <barrelfish/memobj.h>
15#include "../barrelfish/vspace/vspace_internal.h"
16#include <vfs/vfs.h>
17
18#include <dp_internal.h>
19
20
21struct demand_paging_region *demand_paging_regions = NULL;
22
23
24/*
25 * ===========================================================================
26 * helper functions
27 * ===========================================================================
28 */
29
30static bool is_dirty(struct dp_page *dpp)
31{
32    union x86_64_ptable_entry *entry = dpp->vnode_entry;
33    switch(dpp->dpr->pagesize) {
34        case BASE_PAGE_SIZE:
35            assert(entry->base.present == 1);
36            return entry->base.dirty;
37            break;
38        case LARGE_PAGE_SIZE:
39            assert(entry->large.present == 1);
40            return entry->large.dirty;
41            break;
42        case HUGE_PAGE_SIZE:
43            assert(entry->huge.present == 1);
44            return entry->huge.dirty;
45            break;
46        default:
47            return -1;
48            break;
49
50    }
51    return 1;
52}
53
54/*
55 * ===========================================================================
56 * swapping of pages
57 * ===========================================================================
58 */
59
60static errval_t swap_in(struct dp_page *dpp)
61{
62    errval_t err;
63    struct demand_paging_region *dpr = dpp->dpr;
64
65    lvaddr_t offset = dpp->vaddr - vregion_get_base_addr(&dpr->vreg);
66
67
68    DP_DEBUG_SWAP("[in] page=%" PRIx64 "\n", dpp->vaddr );
69
70    size_t read;
71    size_t totalread = 0;
72
73    err = vfs_seek(dpr->swapfile, VFS_SEEK_SET, offset);
74    if (err_is_fail(err)) {
75        return err;
76    }
77
78    while(totalread < dpr->pagesize) {
79        err = vfs_read(dpr->swapfile, (void *)dpp->vaddr + totalread,
80                       dpr->pagesize - totalread, &read);
81        if (err_is_fail(err)) {
82            USER_PANIC_ERR(err, "writing to fail");
83        }
84
85        totalread += read;
86    }
87
88    dpp->state = DEMAND_PAGING_PST_MEMORY;
89
90    return SYS_ERR_OK;
91}
92
93static errval_t swap_out(struct dp_page *dpp)
94{
95    errval_t err;
96
97    struct demand_paging_region *dpr = dpp->dpr;
98    lvaddr_t offset = dpp->vaddr - vregion_get_base_addr(&dpr->vreg);
99
100    DP_DEBUG_SWAP("[out] page=%" PRIx64 "\n", dpp->vaddr );
101
102    size_t written;
103    size_t totalwritten = 0;
104
105    err = vfs_seek(dpr->swapfile, VFS_SEEK_SET, offset);
106    if (err_is_fail(err)) {
107        return err;
108    }
109
110    while(totalwritten < dpr->pagesize) {
111        err = vfs_write(dpr->swapfile, (void *)dpp->vaddr + totalwritten,
112                        dpr->pagesize - totalwritten, &written);
113        if (err_is_fail(err)) {
114            USER_PANIC_ERR(err, "writing to fail");
115        }
116        totalwritten += written;
117    }
118
119    dpp->state = DEMAND_PAGING_PST_FILE;
120
121    return SYS_ERR_OK;
122}
123
124/*
125 * ===========================================================================
126 * mapping of pages
127 * ===========================================================================
128 */
129
130static inline errval_t frame_map(struct demand_paging_region *dpr,
131                                 struct dp_page *dpp, struct dp_frame *dpf)
132{
133    DP_DEBUG_MAP("[map] vaddr= 0x%" PRIx64 "\n", dpp->vaddr);
134    struct pmap *pmap = vregion_get_vspace(&dpr->vreg)->pmap;
135    dpf->page = dpp;
136    dpf->vnode_entry = dpp->vnode_entry;
137    return pmap->f.map(pmap, dpp->vaddr, dpf->frame, 0, dpr->pagesize,
138                       vregion_get_flags(&dpr->vreg), NULL, NULL);
139}
140
141static inline errval_t frame_unmap(struct demand_paging_region *dpr, struct dp_page *dpp)
142{
143    DP_DEBUG_MAP("[unmap] vaddr= 0x%" PRIx64 "\n", dpp->vaddr);
144    struct pmap *pmap = vregion_get_vspace(&dpr->vreg)->pmap;
145    return pmap->f.unmap(pmap, dpp->vaddr, dpr->pagesize, NULL);
146}
147
148
149/*
150 * ===========================================================================
151 * frame evict policy
152 * ===========================================================================
153 */
154
155static inline errval_t frame_evict(struct demand_paging_region *dpr,
156                                   struct dp_frame *dpf)
157{
158    errval_t err;
159
160    if (is_dirty(dpf->page)) {
161        /* is dirty */
162        swap_out(dpf->page);
163    }
164
165    err = frame_unmap(dpr, dpf->page);
166    if (err_is_fail(err)) {
167        return err;
168    }
169    dpf->page = NULL;
170    dpf->vnode_entry = NULL;
171
172    return SYS_ERR_OK;
173}
174
175
176static errval_t frame_evict_any(struct demand_paging_region *dpr,
177                                struct dp_frame **ret_dpf)
178{
179    errval_t err;
180
181    DP_DEBUG_SWAP("[evict] victim=%" PRIu64 "\n",  dpr->frames_victim);
182
183    struct dp_frame *dpf = dpr->frames[dpr->frames_victim];
184
185    err = frame_evict(dpr, dpf);
186    if (err_is_fail(err)) {
187        return err;
188    }
189
190    /* set the next victim */
191    dpr->frames_victim = (dpr->frames_victim + 1) % dpr->frames_count;
192
193    *ret_dpf = dpf;
194
195    return SYS_ERR_OK;
196}
197
198/*
199 * ===========================================================================
200 * Page-fault handler
201 * ===========================================================================
202 */
203
204static errval_t handle_pagefault(lvaddr_t vaddr)
205{
206    /* find demand paging regions */
207    errval_t err;
208
209    DP_DEBUG_HANDLER("pagefault at vaddr = %" PRIx64 "\n", vaddr);
210
211    struct demand_paging_region *dpr = demand_paging_regions;
212
213    lvaddr_t base;
214
215    while (dpr) {
216        base = vregion_get_base_addr(&dpr->vreg);
217        if (base <= vaddr && vaddr < (base + vregion_get_size(&dpr->vreg))) {
218            /* found */
219            break;
220        }
221        dpr = dpr->next;
222    }
223
224    if (dpr == NULL) {
225        DP_DEBUG_HANDLER("dpr not found\n");
226        return -1;
227    }
228
229    if (dpr->frames_count == 0) {
230        USER_PANIC("there are o frames in the region\n");
231    }
232
233    struct dp_frame *dpf;
234    if (dpr->frames_free) {
235        dpf = dpr->frames_free;
236        dpr->frames_free = dpf->next;
237    } else {
238        err = frame_evict_any(dpr, &dpf);
239        if (err_is_fail(err)) {
240            return err;
241        }
242    }
243
244    /* find page */
245    vaddr = vaddr & ~(dpr->pagesize - 1);
246
247    DP_DEBUG_HANDLER("handling fault on page 0x%" PRIx64 " in dpr='%s' with "
248                     "frame 0x%" PRIx64"\n", vaddr, dpr->swapname, dpf->paddr);
249
250    size_t slot = (vaddr - base) / dpr->pagesize;
251    struct dp_page *dpp = &dpr->pages[slot];
252    assert(dpp->vaddr == vaddr);
253
254    /* install the mapping */
255    err = frame_map(dpr, dpp, dpf);
256    if (err_is_fail(err)) {
257        return err;
258    }
259
260    /* check if we need to swap in the page, otherwise clean */
261    if (dpp->state == DEMAND_PAGING_PST_FILE) {
262        swap_in(dpp);
263    } else {
264        memset((void *)vaddr, 0, dpr->pagesize);
265    }
266
267    return SYS_ERR_OK;
268}
269
270static void exn_handler(enum exception_type type, int subtype,
271                        void *addr, arch_registers_state_t *regs)
272{
273    errval_t err;
274    if (type == EXCEPT_PAGEFAULT) {
275        err = handle_pagefault((lvaddr_t)addr);
276        if (err_is_fail(err)) {
277            // could not handle page fault, exiting for now
278            // TODO: do something sensible here
279            exit(1);
280        }
281    } else {
282        DP_DEBUG_HANDLER("unknown exception\n");
283    }
284    return;
285}
286
287static errval_t vspace_reserve_region(struct vregion *vregion,
288                                      size_t bytes, size_t pagesize,
289                                      vregion_flags_t flags)
290{
291    errval_t err;
292
293    struct vspace *vspace = get_current_vspace();
294    struct pmap *pmap = vspace_get_pmap(vspace);
295
296    struct memobj memobj;
297    memobj.size = bytes;
298
299    genvaddr_t address;
300    err = pmap->f.determine_addr(pmap, &memobj, pagesize, &address);
301    if (err_is_fail(err)) {
302        return err_push(err, LIB_ERR_PMAP_DETERMINE_ADDR);
303    }
304
305    vregion->vspace = vspace;
306    vregion->memobj = NULL;
307    vregion->base   = address;
308    vregion->offset = 0;
309    vregion->size   = bytes;
310    vregion->flags  = flags;
311
312    err = vspace_add_vregion(vspace, vregion);
313    if (err_is_fail(err)) {
314        return err_push(err, LIB_ERR_VSPACE_ADD_REGION);
315    }
316
317    err = pmap->f.create_pts_pinned(pmap, address, bytes, flags);
318    if (err_is_fail(err)) {
319        return err_push(err, LIB_ERR_PMAP_MAP);
320    }
321
322    return err;
323}
324
325static errval_t vspace_get_vnode(struct vregion *vregion, lvaddr_t vaddr,
326                                 lvaddr_t *ret_vaddr)
327{
328    struct vspace *vspace = vregion_get_vspace(vregion);
329    struct pmap *pmap = vspace_get_pmap(vspace);
330    assert(pmap->f.get_leaf_pt);
331    return pmap->f.get_leaf_pt(pmap, vaddr, ret_vaddr);
332}
333
334
335static errval_t create_swap_file(char *path, size_t bytes, vfs_handle_t *ret_handle)
336{
337    errval_t err;
338
339    /* open the paging file */
340    err = vfs_create(path, ret_handle);
341    if (err_is_fail(err)) {
342        DEBUG_ERR(err, "could not create the vfs handle");
343        return err;
344    }
345
346    err = vfs_truncate(*ret_handle, bytes);
347    if (err_is_fail(err)) {
348        DEBUG_ERR(err, "could not truncate swapfile");
349        return err;
350    }
351
352    return SYS_ERR_OK;
353}
354
355/*
356 * ===========================================================================
357 * Public interface
358 * ===========================================================================
359 */
360
361
362errval_t demand_paging_init(void *ex_stack, size_t stack_size)
363{
364    errval_t err;
365
366    DP_DEBUG_MGMT("[init] preparing exception handler");
367
368    if (ex_stack != NULL && stack_size < EXCEPTION_STACK_MIN_SIZE) {
369        return -1;
370    }
371
372    if (ex_stack == NULL) {
373        if (stack_size < EXCEPTION_STACK_MIN_SIZE) {
374            stack_size = EXCEPTION_STACK_SIZE;
375        }
376        ex_stack = calloc(stack_size, sizeof(char));
377    }
378
379    void *ex_stack_top = ex_stack + stack_size;
380
381    DP_DEBUG_MGMT("[init] stack top=%p, stackbase=%p\n", exn_handler,
382                 ex_stack_top, ex_stack);
383
384    err = thread_set_exception_handler(exn_handler, NULL, ex_stack, ex_stack_top,
385                                       NULL, NULL);
386    if (err_is_fail(err)) {
387        DEBUG_ERR(err, "failed to set the exceptin handler");
388        return err;
389    }
390
391    vfs_init();
392
393    err = vfs_mkdir(DEMAND_PAGING_SWAP_FILE);
394    if (err_is_fail(err)) {
395        DEBUG_ERR(err, "err");
396        /* can actually fail */
397    }
398
399    return SYS_ERR_OK;
400}
401
402errval_t demand_paging_region_create(size_t bytes, size_t pagesize, size_t numframes,
403                                     struct demand_paging_region **ret_dpr)
404{
405    errval_t err;
406
407    DP_DEBUG_MGMT("[create] dpr of size %" PRIu64 "\n", bytes);
408
409    /* determine basic information about the page sizes */
410    vregion_flags_t flags = VREGION_FLAGS_READ_WRITE;
411    uint8_t pagebits;
412    switch (pagesize) {
413        case BASE_PAGE_SIZE:
414            pagebits = BASE_PAGE_BITS;
415            break;
416        case LARGE_PAGE_SIZE:
417            flags |= VREGION_FLAGS_LARGE;
418            pagebits = LARGE_PAGE_BITS;
419            break;
420        case HUGE_PAGE_SIZE:
421            pagebits = HUGE_PAGE_BITS;
422            flags |= VREGION_FLAGS_HUGE;
423            break;
424        default:
425            return -1;
426            break;
427    }
428
429    /* round up bytes and calcualte number of slots */
430    bytes = ROUND_UP(bytes, pagesize);
431    size_t slots = bytes / pagesize;
432    size_t vnode_leaves_count = ((bytes / pagesize) + 511) / 512;
433
434    /* allocate the data structure */
435    struct demand_paging_region *dpr = calloc(1, sizeof(*dpr) +
436                                              slots * sizeof(struct dp_page) +
437                                              (vnode_leaves_count) * sizeof(void *));
438    if (dpr == NULL) {
439        return LIB_ERR_MALLOC_FAIL;
440    }
441
442    /* initialize fields */
443    dpr->pagesize = pagesize;
444    dpr->pages = (struct dp_page *)(dpr + 1);
445
446    err = vspace_reserve_region(&dpr->vreg, bytes, pagesize, flags);
447    if (err_is_fail(err)) {
448        USER_PANIC_ERR(err, "reserve region in vspace for demand paging\n");
449        return err;
450    }
451
452    snprintf(dpr->swapname, DEMAND_PAGING_SWAP_FILE_PATHLEN, "%s/0x%016lx",
453             DEMAND_PAGING_SWAP_FILE,
454             vspace_genvaddr_to_lvaddr(vregion_get_base_addr(&dpr->vreg)));
455
456    err = create_swap_file(dpr->swapname, bytes, &dpr->swapfile);
457    if (err_is_fail(err)) {
458        return err;
459    }
460
461    /* initialize pages */
462    genvaddr_t addr = vspace_genvaddr_to_lvaddr(vregion_get_base_addr(&dpr->vreg));
463    lvaddr_t vnode_addr;
464
465
466    err = vspace_get_vnode(&dpr->vreg, addr, &vnode_addr);
467    if (err_is_fail(err)) {
468        USER_PANIC_ERR(err, "foobar");
469    }
470
471    dpr->vnodes = (void **)(dpr->pages + slots);
472    dpr->vnodes[0] = (void *)vnode_addr;
473    int j = 0;
474    for (size_t i = 0; i < slots; ++i) {
475        err = vspace_get_vnode(&dpr->vreg, addr, &vnode_addr);
476        if (err_is_fail(err)) {
477            USER_PANIC_ERR(err, "foobar");
478        }
479
480        if (dpr->vnodes[j] != (void *)vnode_addr) {
481            dpr->vnodes[++j] = (void *)vnode_addr;
482        }
483        dpr->pages[i].pagenr = i;
484        dpr->pages[i].dpr = dpr;
485        dpr->pages[i].vaddr = addr;
486        dpr->pages[i].vnode = (void *)vnode_addr;
487        dpr->pages[i].vnode_entry = (void *)vnode_addr;
488        if (pagesize == HUGE_PAGE_SIZE) {
489            dpr->pages[i].vnode_entry += X86_64_PDPT_BASE(addr);
490        } else if (pagesize == LARGE_PAGE_SIZE) {
491            dpr->pages[i].vnode_entry += X86_64_PDIR_BASE(addr);
492        } else {
493            dpr->pages[i].vnode_entry += X86_64_PTABLE_BASE(addr);
494        }
495        addr += pagesize;
496
497    }
498
499    /* allocate the frames */
500    struct capref frame;
501    size_t allocated_size;
502    err = frame_alloc(&frame, numframes * pagesize, &allocated_size);
503    if (err_is_fail(err)) {
504        USER_PANIC_ERR(err, "frame alloc\n");
505    }
506
507    struct frame_identity id;
508    err = frame_identify(frame, &id);
509    assert(err_is_ok(err));
510
511    struct capref cnode_cap;
512    struct capref frames;
513    err = cnode_create(&cnode_cap, &frames.cnode, allocated_size / pagesize, NULL);
514    if (err_is_fail(err)) {
515        USER_PANIC_ERR(err, "cnode create\n");
516    }
517
518    debug_printf("FRAME BASE: %lx\n", id.base);
519
520    err = cap_retype(frames, frame, 0, ObjType_Frame, pagesize, numframes);
521    if (err_is_fail(err)) {
522        USER_PANIC_ERR(err, "cap retype\n");
523    }
524
525    dpr->frames = calloc(numframes, sizeof(void *));
526    if (dpr->frames == NULL) {
527        USER_PANIC("alloc frame counter\n");
528    }
529
530    /* initialize the frames */
531    struct dp_frame *dpf = calloc(numframes, sizeof(*dpf));
532    if (dpf == NULL) {
533        USER_PANIC("alloc frame counter\n");
534    }
535    dpf->first = 1;
536    for (size_t i = 0; i < numframes; ++i) {
537        dpf->frame = frames;
538        dpf->page = NULL;
539        if (i == (numframes - 1)) {
540            dpf->next = NULL;
541        } else {
542            dpf->next = (dpf+1);
543        }
544
545        dpr->frames[i] = dpf;
546
547        dpf++;
548        frames.slot++;
549    }
550
551    dpr->frames_free = dpr->frames[0];
552    dpr->frames_count = numframes;
553
554    dpr->next = demand_paging_regions;
555    demand_paging_regions = dpr;
556
557    if (ret_dpr) {
558        *ret_dpr = dpr;
559    }
560
561    debug_printf("region created\n");
562
563
564    return SYS_ERR_OK;
565}
566
567errval_t demand_paging_region_add_frames(struct capref *frames, size_t count,
568                                         struct demand_paging_region *dpr)
569{
570    if (count == 0) {
571        return SYS_ERR_OK;
572    }
573
574    assert(dpr);
575
576    /* initialize the frames */
577    struct dp_frame *dpf = calloc(count, sizeof(*dpf));
578    if (dpf == NULL) {
579        return LIB_ERR_MALLOC_FAIL;
580    }
581
582    struct dp_frame **dp_frames = realloc(dpr->frames,
583                                          (dpr->frames_count + count) * sizeof(void *));
584    if (dp_frames == NULL) {
585        free(dpf);
586        return LIB_ERR_MALLOC_FAIL;
587    }
588
589    dpf->first = 1;
590    for (size_t i = 0; i < count; ++i) {
591        dpf->frame = frames[i];
592        dpf->page = NULL;
593        if (i == (count - 1)) {
594            dpf->next = NULL;
595        } else {
596            dpf->next = (dpf+1);
597        }
598
599        dpr->frames[dpr->frames_count + i] = dpf;
600
601        dpf++;
602    }
603
604    /* add it to the free list */
605    if (dpr->frames_free) {
606        dpr->frames[dpr->frames_count + count - 1]->next = dpr->frames_free;
607    }
608    dpr->frames_free = dpr->frames[dpr->frames_count];
609
610    /* update count */
611    dpr->frames_count += count;
612    dpr->frames = dp_frames;
613
614    return SYS_ERR_OK;
615}
616
617errval_t demand_paging_region_remove_frames(size_t count, struct demand_paging_region *dpr,
618                                             struct capref *ret_frames, size_t *ret_count)
619{
620    errval_t err;
621
622    if (count == 0) {
623        goto out;
624    }
625
626    if (count > (dpr->frames_count - 1)) {
627        count = (dpr->frames_count - 1);
628    }
629
630    struct dp_frame *dpf = dpr->frames[dpr->frames_count - 1];
631    for (size_t i = 0; i < count; ++i) {
632        err = frame_evict(dpr, dpf);
633        if (err_is_fail(err)) {
634            count = i;
635            break;
636        }
637        ret_frames[i] = dpf->frame;
638        memset(dpf, 0, sizeof(*dpf));
639        if (dpf->first) {
640            free(dpf);
641        }
642        dpf--;
643    }
644
645    dpr->frames_count -= count;
646    dpr->frames = realloc(dpr->frames, dpr->frames_count * sizeof(void *));
647    assert(dpr->frames);
648
649    out:
650    if (ret_count) {
651        *ret_count = count;
652    }
653    return SYS_ERR_OK;
654}
655
656errval_t demand_paging_region_destory(struct demand_paging_region *dpr)
657{
658    USER_PANIC("NYI");
659    return SYS_ERR_OK;
660}
661
662void *demand_paging_get_base_address(struct demand_paging_region *dpr)
663{
664    return (void *)vspace_genvaddr_to_lvaddr(vregion_get_base_addr(&dpr->vreg));
665}
666