1/**
2 * \file
3 * \brief pmap management
4 *
5 * x86_32 specific management of page tables
6 *
7 * Warning: This code is coupled with the code in slot_alloc/. and pinned.c
8 *
9 * The maximum number of slots required to map a BASE_PAGE_SIZE
10 * sized page is the number of page table levels + 1.
11 * The sum for x86_32 is 3.
12 *
13 * Warning: Additional slots will be required to map a BASE_PAGE_SIZE size page,
14 * if we also track the actual frames that are mapped.
15 * Currently this is not the case.
16 */
17
18/*
19 * Copyright (c) 2010-2013 ETH Zurich.
20 * Copyright (c) 2014, HP Labs.
21 * All rights reserved.
22 *
23 * This file is distributed under the terms in the attached LICENSE file.
24 * If you do not find this file, copies can be found by writing to:
25 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
26 */
27
28#include <barrelfish/barrelfish.h>
29#include <barrelfish/dispatch.h>
30#include <stdio.h>
31#include "target/x86/pmap_x86.h"
32
33
34// Location and size of virtual address space reserved for mapping
35// frames backing refill_slabs
36#define META_DATA_RESERVED_BASE ((lvaddr_t)1UL*1024*1024*1024)
37#define META_DATA_RESERVED_SIZE (X86_32_BASE_PAGE_SIZE * 1200)
38
39// flags for large pages
40#define FLAGS_LARGE 0x0100
41
42/**
43 * \brief Translate generic vregion flags to architecture specific pmap flags
44 */
45static paging_x86_32_flags_t vregion_to_pmap_flag(vregion_flags_t vregion_flags)
46{
47    paging_x86_32_flags_t pmap_flags = X86_32_PTABLE_USER_SUPERVISOR |
48        X86_32_PTABLE_EXECUTE_DISABLE;
49
50    if (!(vregion_flags & VREGION_FLAGS_GUARD)) {
51        if (vregion_flags & VREGION_FLAGS_WRITE) {
52            pmap_flags |= X86_32_PTABLE_READ_WRITE;
53        }
54        if (vregion_flags & VREGION_FLAGS_EXECUTE) {
55            pmap_flags &= ~X86_32_PTABLE_EXECUTE_DISABLE;
56        }
57        if (vregion_flags & VREGION_FLAGS_NOCACHE) {
58            pmap_flags |= X86_32_PTABLE_CACHE_DISABLED;
59        }
60        else if (vregion_flags & VREGION_FLAGS_WRITE_COMBINING) {
61            // PA4 is configured as write-combining
62            pmap_flags |= PTABLE_ATTR_INDEX;
63        }
64    }
65
66    return pmap_flags;
67}
68
69static inline bool is_same_pdir(genvaddr_t va1, genvaddr_t va2)
70{
71    return (va1>>X86_32_LARGE_PAGE_BITS) == (va2>>X86_32_LARGE_PAGE_BITS);
72}
73static inline bool is_same_pdpt(genvaddr_t va1, genvaddr_t va2)
74{
75#ifdef CONFIG_PAE
76    // PDPT in PAE has 4 entries, uses the topmost two bits
77    return (va1>>30) == (va2>>30);
78#else
79    // since there is no PDPT in 32bit, trivially true
80    return true;
81#endif
82}
83static inline genvaddr_t get_addr_prefix(genvaddr_t va)
84{
85    return va >> X86_32_LARGE_PAGE_BITS;
86}
87
88/**
89 * \brief Returns the vnode for the page directory mapping a given vspace address
90 */
91static errval_t get_pdir(struct pmap_x86 *pmap, genvaddr_t base,
92                           struct vnode **pdir)
93{
94#ifdef CONFIG_PAE
95    struct vnode *root = &pmap->root;
96    assert(root != NULL);
97
98    // PDPTE mapping
99    if((*pdir = find_vnode(root, X86_32_PDPTE_BASE(base))) == NULL) {
100        errval_t err = alloc_vnode(pmap, root, ObjType_VNode_x86_32_pdir,
101                          X86_32_PDPTE_BASE(base), pdir);
102        if (err_is_fail(err)) {
103            return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
104        }
105    }
106#else
107    *pdir = &pmap->root;
108#endif
109
110    return SYS_ERR_OK;
111}
112
113/**
114 * \brief Returns the vnode for the pagetable mapping a given vspace address
115 */
116static errval_t get_ptable(struct pmap_x86 *pmap, genvaddr_t base,
117                           struct vnode **ptable)
118{
119    errval_t err;
120    struct vnode *pdir;
121    err = get_pdir(pmap, base, &pdir);
122    if (err_is_fail(err)) {
123        return err;
124    }
125
126    // PDIR mapping
127    if((*ptable = find_vnode(pdir, X86_32_PDIR_BASE(base))) == NULL) {
128        err = alloc_vnode(pmap, pdir, ObjType_VNode_x86_32_ptable,
129                          X86_32_PDIR_BASE(base), ptable);
130        if (err_is_fail(err)) {
131            return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
132        }
133    }
134
135    return SYS_ERR_OK;
136}
137
138static struct vnode *find_pdir(struct pmap_x86 *pmap, genvaddr_t base)
139{
140    struct vnode *root = &pmap->root;
141    assert(root != NULL);
142
143#ifdef CONFIG_PAE
144    // PDPT mapping
145    return find_vnode(root, X86_32_PDPTE_BASE(base));
146#else
147    return root;
148#endif
149}
150
151static errval_t do_single_map(struct pmap_x86 *pmap, genvaddr_t vaddr,
152                              genvaddr_t vend, struct capref frame,
153                              size_t offset, size_t pte_count,
154                              vregion_flags_t flags)
155{
156    //printf("[do_single_map] vaddr = 0x%"PRIxGENVADDR"\n", vaddr);
157    // translate flags
158    paging_x86_32_flags_t pmap_flags = vregion_to_pmap_flag(flags);
159
160    // Get the page table and do mapping specific alterations
161    struct vnode *ptable;
162    errval_t err;
163    size_t base;
164
165    if (flags & VREGION_FLAGS_LARGE) {
166        //4M/2M(PAE) mapping
167        err = get_pdir(pmap, vaddr, &ptable);
168        base = X86_32_PDIR_BASE(vaddr);
169    } else {
170        //4k mapping
171        err = get_ptable(pmap, vaddr, &ptable);
172        base = X86_32_PTABLE_BASE(vaddr);
173    }
174    if (err_is_fail(err)) {
175        return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
176    }
177    assert(ptable->is_vnode);
178
179    // check if there is an overlapping mapping
180    if (has_vnode(ptable, base, pte_count, false)) {
181        if (has_vnode(ptable, base, pte_count, true)) {
182            printf("page already exists in 0x%"
183                    PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
184            return LIB_ERR_PMAP_EXISTING_MAPPING;
185        } else {
186            // clean out empty page tables. We do this here because we benefit
187            // from having the page tables in place when doing lots of small
188            // mappings
189            remove_empty_vnodes(pmap, ptable, base, pte_count);
190        }
191    }
192
193    // setup userspace mapping
194    struct vnode *page = slab_alloc(&pmap->slab);
195    assert(page);
196    page->is_vnode = false;
197    page->entry = base;
198    page->next  = ptable->u.vnode.children;
199    ptable->u.vnode.children = page;
200    page->u.frame.cap = frame;
201    page->u.frame.offset = offset;
202    page->u.frame.flags = flags;
203    page->u.frame.pte_count = pte_count;
204
205    err = pmap->p.slot_alloc->alloc(pmap->p.slot_alloc, &page->mapping);
206    if (err_is_fail(err)) {
207        return err_push(err, LIB_ERR_SLOT_ALLOC);
208    }
209
210    // do map
211    err = vnode_map(ptable->u.vnode.cap, frame, base,
212                    pmap_flags, offset, pte_count, page->mapping);
213    if (err_is_fail(err)) {
214        printf("error in do_single_map: vnode_map failed\n");
215        return err_push(err, LIB_ERR_VNODE_MAP);
216    }
217
218    return SYS_ERR_OK;
219}
220
221static errval_t do_map(struct pmap_x86 *pmap, genvaddr_t vaddr,
222                       struct capref frame, size_t offset, size_t size,
223                       vregion_flags_t flags, size_t *retoff, size_t *retsize)
224{
225    //printf("[do_map] vaddr = 0x%"PRIxGENVADDR", size = %zd\n", vaddr, size);
226    errval_t err;
227
228    // figure out mapping parameters
229    size_t page_size = X86_32_BASE_PAGE_SIZE;
230    size_t base = X86_32_PTABLE_BASE(vaddr);
231    if(flags & VREGION_FLAGS_LARGE) {
232        //4M/2M (PAE) pages
233        page_size = X86_32_LARGE_PAGE_SIZE;
234        base = X86_32_PDIR_BASE(vaddr);
235    }
236
237    // TODO: needs overhaul for mixed-size mappings
238    // TODO: need to make sure we can map that much
239    size = ROUND_UP(size, page_size);
240    size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
241    genvaddr_t vend = vaddr + size;
242
243    if (is_same_pdir(vaddr, vend) ||
244        (flags & VREGION_FLAGS_LARGE && is_same_pdpt(vaddr, vend))) {
245        // fast path
246        err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
247        if (err_is_fail(err)) {
248            return err_push(err, LIB_ERR_PMAP_DO_MAP);
249        }
250    }
251    else { // multiple leaf page tables
252        // first leaf
253        uint32_t c = X86_32_PTABLE_SIZE - base;
254        genvaddr_t temp_end = vaddr + c * page_size;
255        err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
256        if (err_is_fail(err)) {
257            return err_push(err, LIB_ERR_PMAP_DO_MAP);
258        }
259
260        // map full leaves
261        while (get_addr_prefix(temp_end) < get_addr_prefix(vend)) {
262            // update vars
263            vaddr = temp_end;
264            temp_end = vaddr + X86_32_PTABLE_SIZE * page_size;
265            offset += c * page_size;
266            c = X86_32_PTABLE_SIZE;
267
268            // do mapping
269            err = do_single_map(pmap, vaddr, temp_end, frame, offset,
270                    X86_32_PTABLE_SIZE, flags);
271            if (err_is_fail(err)) {
272                return err_push(err, LIB_ERR_PMAP_DO_MAP);
273            }
274        }
275
276        // map remaining part
277        offset += c * page_size;
278        if(flags & VREGION_FLAGS_LARGE) {
279            // 4M/2M (PAE) mapping
280            c = X86_32_PDIR_BASE(vend) - X86_32_PDIR_BASE(temp_end);
281        } else {
282            // 4K mapping
283            c = X86_32_PTABLE_BASE(vend) - X86_32_PTABLE_BASE(temp_end);
284        }
285        if (c) {
286            // do mapping
287            err = do_single_map(pmap, temp_end, vend, frame, offset, c, flags);
288            if (err_is_fail(err)) {
289                return err_push(err, LIB_ERR_PMAP_DO_MAP);
290            }
291        }
292    }
293
294    if (retoff) {
295        *retoff = offset;
296    }
297    if (retsize) {
298        *retsize = size;
299    }
300    return SYS_ERR_OK;
301}
302
303/// Compute upper limit on number of slabs required to perform a mapping
304static size_t max_slabs_for_mapping(size_t bytes)
305{
306    size_t max_pages  = DIVIDE_ROUND_UP(bytes, X86_32_BASE_PAGE_SIZE);
307    size_t max_ptable = DIVIDE_ROUND_UP(max_pages, X86_32_PTABLE_SIZE);
308    size_t max_pdir   = DIVIDE_ROUND_UP(max_ptable, X86_32_PTABLE_SIZE) + 1;
309#ifdef CONFIG_PAE
310    size_t max_pdpt   = DIVIDE_ROUND_UP(max_pdir, X86_32_PTABLE_SIZE) + 1;
311#else
312    size_t max_pdpt   = 0;
313#endif
314    return max_pages + max_ptable + max_pdir + max_pdpt;
315}
316static size_t max_slabs_for_mapping_large(size_t bytes)
317{
318    size_t max_pages  = DIVIDE_ROUND_UP(bytes, X86_32_LARGE_PAGE_SIZE);
319    size_t max_pdir   = DIVIDE_ROUND_UP(max_pages, X86_32_PTABLE_SIZE) + 1;
320#ifdef CONFIG_PAE
321    size_t max_pdpt   = DIVIDE_ROUND_UP(max_pdir, X86_32_PTABLE_SIZE) + 1;
322#else
323    size_t max_pdpt   = 0;
324#endif
325    return max_pages + max_pdir + max_pdpt;
326}
327
328/**
329 * \brief Refill slabs used for metadata
330 *
331 * \param pmap     The pmap to refill in
332 * \param request  The number of slabs the allocator must have
333 * when the function returns
334 *
335 * When the current pmap is initialized,
336 * it reserves some virtual address space for metadata.
337 * This reserved address space is used here
338 *
339 * Can only be called for the current pmap
340 * Will recursively call into itself till it has enough slabs
341 */
342static errval_t refill_slabs(struct pmap_x86 *pmap, size_t request)
343{
344    errval_t err;
345
346    /* Keep looping till we have #request slabs */
347    while (slab_freecount(&pmap->slab) < request) {
348        // Amount of bytes required for #request
349        size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab),
350                                        sizeof(struct vnode));
351
352        /* Get a frame of that size */
353        struct capref cap;
354        err = frame_alloc(&cap, bytes, &bytes);
355        if (err_is_fail(err)) {
356            return err_push(err, LIB_ERR_FRAME_ALLOC);
357        }
358
359        /* If we do not have enough slabs to map the frame in, recurse */
360        size_t required_slabs_for_frame = max_slabs_for_mapping(bytes);
361        if (slab_freecount(&pmap->slab) < required_slabs_for_frame) {
362            // If we recurse, we require more slabs than to map a single page
363            assert(required_slabs_for_frame > 4);
364
365            err = refill_slabs(pmap, required_slabs_for_frame);
366            if (err_is_fail(err)) {
367                return err_push(err, LIB_ERR_SLAB_REFILL);
368            }
369        }
370
371        /* Perform mapping */
372        genvaddr_t genvaddr = pmap->vregion_offset;
373        pmap->vregion_offset += (genvaddr_t)bytes;
374        assert(pmap->vregion_offset < vregion_get_base_addr(&pmap->vregion) +
375               vregion_get_size(&pmap->vregion));
376
377        err = do_map(pmap, genvaddr, cap, 0, bytes,
378                     VREGION_FLAGS_READ_WRITE, NULL, NULL);
379        if (err_is_fail(err)) {
380            return err_push(err, LIB_ERR_PMAP_DO_MAP);
381        }
382
383        /* Grow the slab */
384        lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
385        slab_grow(&pmap->slab, (void*)buf, bytes);
386    }
387
388    return SYS_ERR_OK;
389}
390
391/// Minimally refill the slab allocator
392static errval_t min_refill_slabs(struct pmap_x86 *pmap)
393{
394    return refill_slabs(pmap, 5);
395}
396
397/**
398 * \brief Create page mappings
399 *
400 * \param pmap     The pmap object
401 * \param vaddr    The virtual address to create the mapping for
402 * \param frame    The frame cap to map in
403 * \param offset   Offset into the frame cap
404 * \param size     Size of the mapping
405 * \param flags    Flags for the mapping
406 * \param retoff   If non-NULL, filled in with adjusted offset of mapped region
407 * \param retsize  If non-NULL, filled in with adjusted size of mapped region
408 */
409static errval_t map(struct pmap *pmap, genvaddr_t vaddr, struct capref frame,
410                    size_t offset, size_t size, vregion_flags_t flags,
411                    size_t *retoff, size_t *retsize)
412{
413    //printf("[map] vaddr = 0x%"PRIxGENVADDR", size = %zd\n", vaddr, size);
414    errval_t err;
415    struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
416
417    size_t max_slabs;
418
419    // Adjust the parameters to page boundaries
420    if(flags&FLAGS_LARGE) {
421        // 4M pages/2M pages(PAE)
422        size   += X86_32_LARGE_PAGE_OFFSET(offset);
423        size    = ROUND_UP(size, X86_32_LARGE_PAGE_SIZE);
424        offset -= X86_32_LARGE_PAGE_OFFSET(offset);
425        max_slabs = max_slabs_for_mapping_large(size);
426    } else {
427        // 4K pages
428        size   += X86_32_BASE_PAGE_OFFSET(offset);
429        size    = ROUND_UP(size, X86_32_BASE_PAGE_SIZE);
430        offset -= X86_32_BASE_PAGE_OFFSET(offset);
431        max_slabs = max_slabs_for_mapping(size);
432    }
433
434    // Refill slab allocator if necessary
435    size_t slabs_free = slab_freecount(&x86->slab);
436    max_slabs += 4; // minimum amount required to map a page
437    if (slabs_free < max_slabs) {
438        struct pmap *mypmap = get_current_pmap();
439        if (pmap == mypmap) {
440            err = refill_slabs(x86, max_slabs);
441            if (err_is_fail(err)) {
442                return err_push(err, LIB_ERR_SLAB_REFILL);
443            }
444        } else {
445            size_t bytes = SLAB_STATIC_SIZE(max_slabs - slabs_free,
446                                            sizeof(struct vnode));
447            void *buf = malloc(bytes);
448            if (!buf) {
449                return LIB_ERR_MALLOC_FAIL;
450            }
451            slab_grow(&x86->slab, buf, bytes);
452        }
453    }
454
455    //printf("[map call do_map] vaddr = 0x%"PRIxGENVADDR", flag = %x\n", vaddr, (int)flags);
456    err = do_map(x86, vaddr, frame, offset, size, flags, retoff, retsize);
457    return err;
458}
459
460/**
461 * \brief Find mapping for `vaddr` in `pmap`.
462 * \arg pmap the pmap to search in
463 * \arg vaddr the virtual address to search for
464 * \arg pt the last-level page table meta-data we found if any
465 * \arg page the page meta-data we found if any
466 * \returns `true` iff we found a mapping for vaddr
467 */
468static bool find_mapping(struct pmap_x86 *pmap, genvaddr_t vaddr,
469                         struct vnode **outpt, struct vnode **outpage)
470{
471    struct vnode *pdir = NULL, *pt = NULL, *page = NULL;
472
473    // find page and last-level page table (can be pdir or pdpt)
474    if ((pdir = find_pdir(pmap, vaddr)) != NULL) {
475        page = find_vnode(pdir, X86_32_PDIR_BASE(vaddr));
476        if (page && page->is_vnode) { // not 2M/4M pages
477            pt = page;
478            page = find_vnode(pt, X86_32_PTABLE_BASE(vaddr));
479        } else if (page) {
480            pt = pdir;
481        }
482    }
483    if (outpt) {
484        *outpt = pt;
485    }
486    if (outpage) {
487        *outpage = page;
488    }
489    if (pt) {
490        return true;
491    } else {
492        return false;
493    }
494}
495
496static errval_t do_single_unmap(struct pmap_x86 *pmap, genvaddr_t vaddr,
497                                size_t pte_count)
498{
499    errval_t err;
500    struct vnode *pt = NULL, *page = NULL;
501
502    find_mapping(pmap, vaddr, &pt, &page);
503
504    if (pt) {
505        if (page && page->u.frame.pte_count == pte_count) {
506            err = vnode_unmap(pt->u.vnode.cap, page->mapping);
507            if (err_is_fail(err)) {
508                printf("vnode_unmap returned error: %s (%d)\n",
509                        err_getstring(err), err_no(err));
510                return err_push(err, LIB_ERR_VNODE_UNMAP);
511            }
512
513            // delete&free page->mapping after doing vnode_unmap()
514            err = cap_delete(page->mapping);
515            if (err_is_fail(err)) {
516                return err_push(err, LIB_ERR_CAP_DELETE);
517            }
518            err = pmap->p.slot_alloc->free(pmap->p.slot_alloc, page->mapping);
519            if (err_is_fail(err)) {
520                return err_push(err, LIB_ERR_SLOT_FREE);
521            }
522            remove_vnode(pt, page);
523            slab_free(&pmap->slab, page);
524        }
525        else {
526            printf("couldn't find vnode\n");
527            return LIB_ERR_PMAP_FIND_VNODE;
528        }
529    }
530
531    return SYS_ERR_OK;
532}
533
534static inline bool is_large_page(struct vnode *p)
535{
536    return !p->is_vnode && p->u.frame.flags & VREGION_FLAGS_LARGE;
537}
538static inline bool is_huge_page(struct vnode *p)
539{
540    return !p->is_vnode && p->u.frame.flags & VREGION_FLAGS_HUGE;
541}
542
543/**
544 * \brief Remove page mappings
545 *
546 * \param pmap     The pmap object
547 * \param vaddr    The start of the virtual addres to remove
548 * \param size     The size of virtual address to remove
549 * \param retsize  If non-NULL, filled in with the actual size removed
550 */
551static errval_t unmap(struct pmap *pmap, genvaddr_t vaddr, size_t size,
552                      size_t *retsize)
553{
554    //printf("[unmap] 0x%"PRIxGENVADDR", %zu\n", vaddr, size);
555    errval_t err, ret = SYS_ERR_OK;
556    struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
557
558    //determine if we unmap a larger page
559    struct vnode* page = NULL;
560
561    if (!find_mapping(x86, vaddr, NULL, &page)) {
562        // TODO: better error
563        return LIB_ERR_PMAP_UNMAP;
564    }
565    assert(!page->is_vnode);
566
567    size_t page_size = X86_32_BASE_PAGE_SIZE;
568    if (is_large_page(page)) {
569        //large 2M page
570        page_size = X86_32_LARGE_PAGE_SIZE;
571    }
572
573    size = ROUND_UP(size, page_size);
574    genvaddr_t vend = vaddr + size;
575
576    if (is_same_pdir(vaddr, vend) ||
577        (is_same_pdpt(vaddr, vend) && is_large_page(page))) {
578        // fast path
579        err = do_single_unmap(x86, vaddr, size / page_size);
580        if (err_is_fail(err)) {
581            return err_push(err, LIB_ERR_PMAP_UNMAP);
582        }
583    }
584    else { // slow path
585        // unmap first leaf
586        uint32_t c = X86_32_PTABLE_SIZE - X86_32_PTABLE_BASE(vaddr);
587        err = do_single_unmap(x86, vaddr, c);
588        if (err_is_fail(err)) {
589            return err_push(err, LIB_ERR_PMAP_UNMAP);
590        }
591
592        // unmap full leaves
593        vaddr += c * page_size;
594        while (get_addr_prefix(vaddr) < get_addr_prefix(vend)) {
595            c = X86_32_PTABLE_SIZE;
596            err = do_single_unmap(x86, vaddr, X86_32_PTABLE_SIZE);
597            if (err_is_fail(err)) {
598                return err_push(err, LIB_ERR_PMAP_UNMAP);
599            }
600            vaddr += c * page_size;
601        }
602
603        // unmap remaining part
604        c = X86_32_PTABLE_BASE(vend) - X86_32_PTABLE_BASE(vaddr);
605        if (c) {
606            err = do_single_unmap(x86, vaddr, c);
607            if (err_is_fail(err)) {
608                return err_push(err, LIB_ERR_PMAP_UNMAP);
609            }
610        }
611    }
612
613    if (retsize) {
614        *retsize = size;
615    }
616
617    //printf("[unmap] exiting\n");
618    return ret;
619}
620
621/*
622 * \brief Modify the flags of a single kernel mapping
623 *
624 * \param pmap x86 pmap
625 * \param vaddr start address
626 * \param pages number of pages to modify
627 * \param flags the new set of flags
628 */
629static errval_t do_single_modify_flags(struct pmap_x86 *pmap, genvaddr_t vaddr,
630                                       size_t pages, vregion_flags_t flags)
631{
632    errval_t err = SYS_ERR_OK;
633
634    struct vnode *pt = NULL, *page = NULL;
635
636    if (!find_mapping(pmap, vaddr, &pt, &page)) {
637        return LIB_ERR_PMAP_FIND_VNODE;
638    }
639
640    assert(pt && pt->is_vnode && page && !page->is_vnode);
641
642    uint16_t ptentry = X86_32_PTABLE_BASE(vaddr);
643    size_t pagesize = BASE_PAGE_SIZE;
644    if (is_large_page(page)) {
645        //large 2M page
646        ptentry = X86_32_PDIR_BASE(vaddr);
647        pagesize = LARGE_PAGE_SIZE;
648    }
649
650    if (inside_region(pt, ptentry, pages)) {
651        // we're modifying part of a valid mapped region
652        // arguments to invocation: invoke frame cap, first affected
653        // page (as offset from first page in mapping), #affected
654        // pages, new flags. Invocation should check compatibility of
655        // new set of flags with cap permissions.
656        size_t off = ptentry - page->entry;
657        paging_x86_32_flags_t pmap_flags = vregion_to_pmap_flag(flags);
658        // calculate TLB flushing hint
659        genvaddr_t va_hint = 0;
660        if (pages == 1) {
661            // do assisted selective flush for single page
662            va_hint = vaddr & ~X86_32_BASE_PAGE_MASK;
663        }
664        err = invoke_mapping_modify_flags(page->mapping, off, pages, pmap_flags, va_hint);
665        printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n",
666                err_getstring(err), err);
667        return err;
668    }
669    return SYS_ERR_OK;
670}
671
672/**
673 * \brief Modify page mapping
674 *
675 * \param pmap     The pmap object
676 * \param vaddr    The virtual address to unmap
677 * \param flags    New flags for the mapping
678 * \param retsize  If non-NULL, filled in with the actual size modified
679 *
680 * TODO: fix for large page mappings
681 */
682static errval_t modify_flags(struct pmap *pmap, genvaddr_t vaddr, size_t size,
683                             vregion_flags_t flags, size_t *retsize)
684{
685    errval_t err;
686    struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
687
688    //determine if we unmap a larger page
689    struct vnode* page = NULL;
690
691    if (!find_mapping(x86, vaddr, NULL, &page)) {
692        return LIB_ERR_PMAP_NOT_MAPPED;
693    }
694
695    assert(page && !page->is_vnode);
696
697    size_t page_size = X86_32_BASE_PAGE_SIZE;
698    size_t table_base = X86_32_PTABLE_BASE(vaddr);
699    uint8_t map_bits= X86_32_BASE_PAGE_BITS + X86_32_PTABLE_BITS;
700    if (is_large_page(page)) {
701        //large 2/4M page
702        page_size = X86_32_LARGE_PAGE_SIZE;
703        table_base = X86_32_PDIR_BASE(vaddr);
704        map_bits = X86_32_LARGE_PAGE_BITS + X86_32_PTABLE_BITS;
705    }
706
707    // TODO: match new policy of map when implemented
708    size = ROUND_UP(size, page_size);
709    genvaddr_t vend = vaddr + size;
710
711    size_t pages = size / page_size;
712
713    if (is_same_pdir(vaddr, vend) ||
714        (is_same_pdpt(vaddr, vend) && is_large_page(page)))
715    {
716        // fast path
717        err = do_single_modify_flags(x86, vaddr, pages, flags);
718        if (err_is_fail(err)) {
719            return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
720        }
721    } else { // slow path
722        // unmap first leaf
723        uint32_t c = X86_32_PTABLE_SIZE - X86_32_PTABLE_BASE(vaddr);
724        err = do_single_modify_flags(x86, vaddr, c, flags);
725        if (err_is_fail(err)) {
726            return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
727        }
728
729        // unmap full leaves
730        vaddr += c * page_size;
731        while (get_addr_prefix(vaddr) < get_addr_prefix(vend)) {
732            c = X86_32_PTABLE_SIZE;
733            err = do_single_modify_flags(x86, vaddr, X86_32_PTABLE_SIZE, flags);
734            if (err_is_fail(err)) {
735                return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
736            }
737            vaddr += c * page_size;
738        }
739
740        // unmap remaining part
741        c = X86_32_PTABLE_BASE(vend) - X86_32_PTABLE_BASE(vaddr);
742        if (c) {
743            err = do_single_modify_flags(x86, vaddr, c, flags);
744            if (err_is_fail(err)) {
745                return err_push(err, LIB_ERR_PMAP_MODIFY_FLAGS);
746            }
747        }
748    }
749
750    if (retsize) {
751        *retsize = size;
752    }
753
754    return SYS_ERR_OK;
755}
756
757
758/**
759 * \brief Query existing page mapping
760 *
761 * \param pmap     The pmap object
762 * \param vaddr    The virtual address to query
763 * \param retvaddr Returns the base virtual address of the mapping
764 * \param retsize  Returns the actual size of the mapping
765 * \param retcap   Returns the cap mapped at this address
766 * \param retoffset Returns the offset within the cap that is mapped
767 * \param retflags Returns the flags for this mapping
768 *
769 * All of the ret parameters are optional.
770 */
771static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
772                       genvaddr_t *retvaddr, size_t *retsize,
773                       struct capref *retcap, genvaddr_t *retoffset,
774                       vregion_flags_t *retflags)
775{
776    USER_PANIC("NYI");
777    return 0;
778}
779
780static errval_t dump(struct pmap *pmap, struct pmap_dump_info *buf, size_t buflen, size_t *items_written)
781{
782    struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
783    struct pmap_dump_info *buf_ = buf;
784
785#ifdef CONFIG_PAE
786    struct vnode *pdpt = &x86->root, *pdir;
787    size_t pdpt_index;
788    assert(pdpt != NULL);
789#else
790    struct vnode *pdir = &x86->root;
791    assert(pdir != NULL);
792#endif
793    struct vnode *pt, *frame;
794
795    *items_written = 0;
796
797    // iterate over pdpt entries
798    size_t pdir_index;
799#if CONFIG_PAE
800    for (pdir = pdpt->u.vnode.children; pdir != NULL; pdir = pdir->next) {
801        pdpt_index = pdir->entry;
802        // iterate over pdir entries
803#endif
804        for (pt = pdir->u.vnode.children; pt != NULL; pt = pt->next) {
805            pdir_index = pt->entry;
806            // iterate over pt entries
807            for (frame = pt->u.vnode.children; frame != NULL; frame = frame->next) {
808                if (*items_written < buflen) {
809#if CONFIG_PAE
810                    buf_->pdpt_index = pdpt_index;
811#endif
812                    buf_->pdir_index = pdir_index;
813                    buf_->pt_index = frame->entry;
814                    buf_->cap = frame->u.frame.cap;
815                    buf_->offset = frame->u.frame.offset;
816                    buf_->flags = frame->u.frame.flags;
817                    buf_++;
818                    (*items_written)++;
819                }
820            }
821#if CONFIG_PAE
822        }
823#endif
824    }
825    return SYS_ERR_OK;
826}
827
828/** \brief Retrieves an address that can currently be used for large mappings
829  *
830  */
831static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
832                                   size_t alignment, genvaddr_t *retvaddr)
833{
834    struct pmap_x86 *x86 = (struct pmap_x86 *)pmap;
835
836    struct vnode *walk_pdir = x86->root.u.vnode.children;
837    assert(walk_pdir != NULL); // assume there's always at least one existing entry
838
839    if (alignment == 0) {
840        alignment = BASE_PAGE_SIZE;
841    } else {
842        alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
843    }
844    size = ROUND_UP(size, alignment);
845
846    size_t free_count = DIVIDE_ROUND_UP(size, LARGE_PAGE_SIZE);
847    //debug_printf("need %zu contiguous free pdirs\n", free_count);
848
849    // compile pdir free list
850    bool f[1024];
851    for (int i = 0; i < 1024; i++) {
852        f[i] = true;
853    }
854    f[walk_pdir->entry] = false;
855    while (walk_pdir) {
856        assert(walk_pdir->is_vnode);
857        f[walk_pdir->entry] = false;
858        walk_pdir = walk_pdir->next;
859    }
860    genvaddr_t first_free = 384;
861    // XXX: breaks for PAE
862    for (; first_free < 512; first_free++) {
863        if (f[first_free]) {
864            for (int i = 1; i < free_count; i++) {
865                if (!f[first_free + i]) {
866                    // advance pointer
867                    first_free = first_free+i;
868                    goto next;
869                }
870            }
871            break;
872        }
873next:
874        assert(1 == 1);// make compiler shut up about label
875    }
876    //printf("first free: %li\n", (uint32_t)first_free);
877    if (first_free + free_count <= 512) {
878        *retvaddr = first_free << 22;
879        return SYS_ERR_OK;
880    } else {
881        return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
882    }
883}
884
885
886static struct pmap_funcs pmap_funcs = {
887    .determine_addr = pmap_x86_determine_addr,
888    .determine_addr_raw = determine_addr_raw,
889    .map = map,
890    .unmap = unmap,
891    .modify_flags = modify_flags,
892    .lookup = lookup,
893    .serialise = pmap_x86_serialise,
894    .deserialise = pmap_x86_deserialise,
895    .dump = dump,
896};
897
898/**
899 * \brief Initialize a x86 pmap object
900 */
901errval_t pmap_x86_32_init(struct pmap *pmap, struct vspace *vspace,
902                          struct capref vnode,
903                          struct slot_allocator *opt_slot_alloc)
904{
905    struct pmap_x86 *x86 = (struct pmap_x86*)pmap;
906
907    /* Generic portion */
908    pmap->f = pmap_funcs;
909    pmap->vspace = vspace;
910
911    if (opt_slot_alloc != NULL) {
912        pmap->slot_alloc = opt_slot_alloc;
913    } else { /* use default allocator for this dispatcher */
914        pmap->slot_alloc = get_default_slot_allocator();
915    }
916
917    /* x86 specific portion */
918    slab_init(&x86->slab, sizeof(struct vnode), NULL);
919    slab_grow(&x86->slab, x86->slab_buffer,
920              sizeof(x86->slab_buffer));
921    x86->refill_slabs = min_refill_slabs;
922
923    x86->root.u.vnode.cap       = vnode;
924    x86->root.u.vnode.children  = NULL;
925    x86->root.is_vnode  = true;
926    x86->root.next      = NULL;
927
928    // choose a minimum mappable VA for most domains; enough to catch NULL
929    // pointer derefs with suitably large offsets
930    x86->min_mappable_va = 64 * 1024;
931
932    // maximum mappable VA is derived from X86_32_MEMORY_OFFSET in kernel
933    x86->max_mappable_va = (genvaddr_t)2 * 1024 * 1024 * 1024;
934
935    return SYS_ERR_OK;
936}
937
938/**
939 * \brief Initialize the current pmap. Reserve space for metadata
940 *
941 * This code is coupled with #vspace_current_init()
942 */
943errval_t pmap_x86_32_current_init(bool init_domain)
944{
945    struct pmap_x86 *x86 = (struct pmap_x86*)get_current_pmap();
946
947    // To reserve a block of virtual address space,
948    // a vregion representing the address space is required.
949    // We construct a superficial one here and add it to the vregion list.
950    struct vregion *vregion = &x86->vregion;
951    vregion->vspace = NULL;
952    vregion->memobj = NULL;
953    vregion->base   = META_DATA_RESERVED_BASE;
954    vregion->offset = 0;
955    vregion->size   = META_DATA_RESERVED_SIZE;
956    vregion->flags  = 0;
957    vregion->next = NULL;
958
959    struct vspace *vspace = x86->p.vspace;
960    assert(!vspace->head);
961    vspace->head = vregion;
962
963    x86->vregion_offset = x86->vregion.base;
964
965    // We don't know the vnode layout for the first part of our address space
966    // (which was setup by the kernel), so we avoid mapping there until told it.
967    x86->min_mappable_va = META_DATA_RESERVED_BASE;
968
969    return SYS_ERR_OK;
970}
971