1/**
2 * \file
3 * \brief pmap management
4 */
5
6/*
7 * Copyright (c) 2010,2015, ETH Zurich.
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development LP.
9 * All rights reserved.
10 *
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
14 */
15
16/*
17 * There was some minor difficulty here with mapping the cpus native
18 * page table arrangement onto Barrelfish. The problem lies with
19 * resource bootstrapping. The bootstrap ram allocator allocates pages.
20 *
21 *
22 * The natural division of bits is 12/10/12, corresponding to 4K
23 * L1 entries in the L1 table and 256 L2 entries per L2
24 * table. Unfortunately 256 entries consumes 1KB rather than a
25 * page (4KB) so we pretend here and in the kernel caps page
26 * code that the L1 has 1024 entries and L2 tables are 4KB in
27 * size. The 4KB constraint comes from ram_alloc_fixed
28 * allocating single pages and the difficulty in bootstrapping
29 * cap slots (alloc_node takes a single slot.
30 *
31 * For now this suffices, but might need to be revisited in future.
32 *
33 * An earlier cut at this, used the first 1KB from each
34 * allocation made from ram_alloc_fixed and wasted the remaining
35 * space. Aside from the space wasted it entailed a couple of minor
36 * platform ifdefs to work around the discrepency.
37 *
38 * Alternative fixes discussed include:
39 *
40 * 1. avoid the need to create vnodes before connecting to a
41 *    real allocator (probably not plausible).
42 *
43 * 2. somehow make ram_alloc_fixed handle sub-page allocations
44 *    (it's clunky, but perhaps we can give each domain a separate
45 *     cnode full of 1k- sized RAM caps?)
46 *
47 * 3. handle the problem at the level of vnode_create (can't see how to
48 *    do this)
49 *
50 * 4. waste the space -- doing this cleanly will require a new parameter
51 * to retype to prevent all 4 caps being created
52 *
53 * 5. introduce a new arm-specific version of vnode_create that creates
54 * 4 1k vnodes, and is only called from the ARM VM code.
55 *
56 */
57
58#include <barrelfish/barrelfish.h>
59#include <barrelfish/caddr.h>
60#include <barrelfish/invocations_arch.h>
61#include <stdio.h>
62
63// Location of VSpace managed by this system.
64#define VSPACE_BEGIN   ((lvaddr_t)(512*512*512*BASE_PAGE_SIZE * (disp_get_core_id() + 1)))
65
66
67// Amount of virtual address space reserved for mapping frames
68// backing refill_slabs.
69//#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 128) // 64
70#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 80000)
71// increased above value from 128 for pandaboard port
72
73static inline uintptr_t
74vregion_flags_to_kpi_paging_flags(vregion_flags_t flags)
75{
76    STATIC_ASSERT(0x1ff == VREGION_FLAGS_MASK, "");
77    STATIC_ASSERT(0x0f == KPI_PAGING_FLAGS_MASK, "");
78    STATIC_ASSERT(VREGION_FLAGS_READ    == KPI_PAGING_FLAGS_READ,    "");
79    STATIC_ASSERT(VREGION_FLAGS_WRITE   == KPI_PAGING_FLAGS_WRITE,   "");
80    STATIC_ASSERT(VREGION_FLAGS_EXECUTE == KPI_PAGING_FLAGS_EXECUTE, "");
81    STATIC_ASSERT(VREGION_FLAGS_NOCACHE == KPI_PAGING_FLAGS_NOCACHE, "");
82    if ((flags & VREGION_FLAGS_MPB) != 0) {
83        // XXX: ignore MPB flag on ARM,
84        //      otherwise the assert below fires -AB
85        flags &= ~VREGION_FLAGS_MPB;
86    }
87    // XXX: Ignore VTD Snoop flag on AArch64 - this stuff really isn't
88    // portable -DC
89    flags &= ~VREGION_FLAGS_VTD_SNOOP;
90    if ((flags & VREGION_FLAGS_GUARD) != 0) {
91        flags = 0;
92    }
93    assert(0 == (~KPI_PAGING_FLAGS_MASK & (uintptr_t)flags));
94    return (uintptr_t)flags;
95}
96
97/**
98 * \brief Starting at a given root, return the vnode with entry equal to #entry
99 */
100static struct vnode *find_vnode(struct vnode *root, uint32_t entry)
101{
102    assert(root != NULL);
103    assert(root->is_vnode);
104    struct vnode *n;
105
106    for(n = root->u.vnode.children; n != NULL; n = n->next) {
107        if(n->entry == entry) {
108            return n;
109        }
110    }
111    return NULL;
112}
113
114static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
115{
116    assert(root != NULL);
117    assert(root->is_vnode);
118
119    struct vnode *n;
120
121    for (n = root->u.vnode.children; n; n = n->next) {
122        if (!n->is_vnode) {
123            uint16_t end = n->entry + n->u.frame.pte_count;
124            if (n->entry <= entry && entry + npages <= end) {
125                return true;
126            }
127        }
128    }
129
130    return false;
131}
132
133static bool has_vnode(struct vnode *root, uint32_t entry, size_t len)
134{
135    assert(root != NULL);
136    assert(root->is_vnode);
137    struct vnode *n;
138
139    uint32_t end_entry = entry + len;
140
141    for (n = root->u.vnode.children; n; n = n->next) {
142        if (n->is_vnode && n->entry == entry) {
143            return true;
144        }
145        // n is frame
146        uint32_t end = n->entry + n->u.frame.pte_count;
147        if (n->entry < entry && end > end_entry) {
148            return true;
149        }
150        if (n->entry >= entry && n->entry < end_entry) {
151            return true;
152        }
153    }
154
155    return false;
156}
157
158static void remove_vnode(struct vnode *root, struct vnode *item)
159{
160    assert(root->is_vnode);
161    struct vnode *walk = root->u.vnode.children;
162    struct vnode *prev = NULL;
163    while (walk) {
164        if (walk == item) {
165            if (prev) {
166                prev->next = walk->next;
167                return;
168            } else {
169                root->u.vnode.children = walk->next;
170                return;
171            }
172        }
173        prev = walk;
174        walk = walk->next;
175    }
176    assert(!"Should not get here");
177}
178
179/**
180 * \brief Allocates a new VNode, adding it to the page table and our metadata
181 */
182static errval_t alloc_vnode(struct pmap_aarch64 *pmap_aarch64, struct vnode *root,
183                            enum objtype type, uint32_t entry,
184                            struct vnode **retvnode)
185{
186    assert(root->is_vnode);
187    errval_t err;
188
189    struct vnode *newvnode = slab_alloc(&pmap_aarch64->slab);
190    if (newvnode == NULL) {
191        return LIB_ERR_SLAB_ALLOC_FAIL;
192    }
193    newvnode->is_vnode = true;
194
195    // The VNode capability
196    err = pmap_aarch64->p.slot_alloc->alloc(pmap_aarch64->p.slot_alloc, &newvnode->u.vnode.cap);
197    if (err_is_fail(err)) {
198        return err_push(err, LIB_ERR_SLOT_ALLOC);
199    }
200
201    assert(!capref_is_null(newvnode->u.vnode.cap));
202
203    err = vnode_create(newvnode->u.vnode.cap, type);
204    if (err_is_fail(err)) {
205        return err_push(err, LIB_ERR_VNODE_CREATE);
206    }
207
208assert(!capref_is_null(newvnode->u.vnode.cap));
209
210    // XXX: need to make sure that vnode cap that we will invoke is in our cspace!
211    if (get_croot_addr(newvnode->u.vnode.cap) != CPTR_ROOTCN) {
212        // debug_printf("%s: creating vnode for another domain in that domain's cspace; need to copy vnode cap to our cspace to make it invokable\n", __FUNCTION__);
213        assert(!capref_is_null(newvnode->u.vnode.cap));
214        err = slot_alloc(&newvnode->u.vnode.invokable);
215        assert(!capref_is_null(newvnode->u.vnode.cap));
216        assert(err_is_ok(err));
217        assert(!capref_is_null(newvnode->u.vnode.cap));
218        err = cap_copy(newvnode->u.vnode.invokable, newvnode->u.vnode.cap);
219        assert(err_is_ok(err));
220        assert(!capref_is_null(newvnode->u.vnode.invokable));
221        assert(!capref_is_null(newvnode->u.vnode.cap));
222
223        assert(!capref_is_null(newvnode->u.vnode.cap));
224    } else {
225        // debug_printf("vnode in our cspace: copying capref to invokable\n");
226        assert(!capref_is_null(newvnode->u.vnode.cap));
227        newvnode->u.vnode.invokable = newvnode->u.vnode.cap;
228        assert(!capref_is_null(newvnode->u.vnode.cap));
229    }
230    assert(!capref_is_null(newvnode->u.vnode.cap));
231    assert(!capref_is_null(newvnode->u.vnode.invokable));
232
233    // The slot for the mapping capability
234    err = pmap_aarch64->p.slot_alloc->alloc(pmap_aarch64->p.slot_alloc, &newvnode->mapping);
235    if (err_is_fail(err)) {
236        return err_push(err, LIB_ERR_SLOT_ALLOC);
237    }
238
239    // Map it
240    err = vnode_map(root->u.vnode.invokable, newvnode->u.vnode.cap, entry,
241                    KPI_PAGING_FLAGS_READ | KPI_PAGING_FLAGS_WRITE, 0, 1, newvnode->mapping);
242    if (err_is_fail(err)) {
243        return err_push(err, LIB_ERR_VNODE_MAP);
244    }
245
246    if (err_is_fail(err)) {
247        return err_push(err, LIB_ERR_VNODE_MAP);
248    }
249
250    // The VNode meta data
251    newvnode->is_vnode  = true;
252    newvnode->entry     = entry;
253    newvnode->next      = root->u.vnode.children;
254    root->u.vnode.children = newvnode;
255    newvnode->u.vnode.children = NULL;
256
257    if (retvnode) {
258        *retvnode = newvnode;
259    }
260    return SYS_ERR_OK;
261}
262
263/**
264 * \brief Returns the vnode for the pagetable mapping a given vspace address
265 */
266static errval_t get_ptable(struct pmap_aarch64  *pmap,
267                           genvaddr_t        vaddr,
268                           struct vnode    **ptable)
269{
270    errval_t err;
271    struct vnode *root = &pmap->root;
272    struct vnode *pl1, *pl2, *pl3;
273    assert(root != NULL);
274
275    // L0 mapping
276    if ((pl1 = find_vnode(root, VMSAv8_64_L0_BASE(vaddr))) == NULL) {
277        err = alloc_vnode(pmap, root, ObjType_VNode_AARCH64_l1,
278                            VMSAv8_64_L0_BASE(vaddr), &pl1);
279        if (err_is_fail(err)) {
280            return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
281        }
282    }
283
284    // L1 mapping
285    if ((pl2 = find_vnode(pl1, VMSAv8_64_L1_BASE(vaddr))) == NULL) {
286        err = alloc_vnode(pmap, pl1, ObjType_VNode_AARCH64_l2,
287                            VMSAv8_64_L1_BASE(vaddr), &pl2);
288        if (err_is_fail(err)) {
289            return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
290        }
291    }
292
293    // L2 mapping
294    if ((pl3 = find_vnode(pl2, VMSAv8_64_L2_BASE(vaddr))) == NULL) {
295        err = alloc_vnode(pmap, pl2, ObjType_VNode_AARCH64_l3,
296                            VMSAv8_64_L2_BASE(vaddr), &pl3);
297        if (err_is_fail(err)) {
298            return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
299        }
300    }
301
302	assert(pl3 != NULL);
303	*ptable = pl3;
304    return SYS_ERR_OK;
305}
306
307static struct vnode *find_ptable(struct pmap_aarch64  *pmap,
308                                 genvaddr_t vaddr)
309{
310    struct vnode *root = &pmap->root;
311    struct vnode *pl1, *pl2;
312    assert(root != NULL);
313
314    // L0 mapping
315    if((pl1 = find_vnode(root, VMSAv8_64_L0_BASE(vaddr))) == NULL) {
316        return NULL;
317    }
318
319    // L1 mapping
320    if((pl2 = find_vnode(pl1, VMSAv8_64_L1_BASE(vaddr))) == NULL) {
321        return NULL;
322    }
323
324    // L2 mapping
325    return find_vnode(pl2, VMSAv8_64_L2_BASE(vaddr));
326}
327
328static errval_t do_single_map(struct pmap_aarch64 *pmap, genvaddr_t vaddr, genvaddr_t vend,
329                              struct capref frame, size_t offset, size_t pte_count,
330                              vregion_flags_t flags)
331{
332    // Get the page table
333    struct vnode *ptable= NULL;
334    errval_t err = get_ptable(pmap, vaddr, &ptable);
335    if (err_is_fail(err)) {
336        return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
337    }
338    uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
339
340	uintptr_t idx = VMSAv8_64_L3_BASE(vaddr);
341
342    // Create user level datastructure for the mapping
343    bool has_page = has_vnode(ptable, idx, pte_count);
344    assert(!has_page);
345
346    struct vnode *page = slab_alloc(&pmap->slab);
347    assert(page);
348
349    page->is_vnode = false;
350    page->entry = idx;
351    page->next  = ptable->u.vnode.children;
352    ptable->u.vnode.children = page;
353    page->u.frame.cap = frame;
354    page->u.frame.offset = offset;
355    page->u.frame.flags = flags;
356    page->u.frame.pte_count = pte_count;
357
358    err = pmap->p.slot_alloc->alloc(pmap->p.slot_alloc, &page->mapping);
359    if (err_is_fail(err)) {
360        return err_push(err, LIB_ERR_SLOT_ALLOC);
361    }
362
363    // Map entry into the page table
364    assert(!capref_is_null(ptable->u.vnode.invokable));
365    err = vnode_map(ptable->u.vnode.invokable, frame, idx,
366                    pmap_flags, offset, pte_count, page->mapping);
367
368    if (err_is_fail(err)) {
369        return err_push(err, LIB_ERR_VNODE_MAP);
370    }
371
372    return SYS_ERR_OK;
373}
374
375static errval_t do_map(struct pmap_aarch64 *pmap, genvaddr_t vaddr,
376                       struct capref frame, size_t offset, size_t size,
377                       vregion_flags_t flags, size_t *retoff, size_t *retsize)
378{
379    errval_t err;
380
381    size = ROUND_UP(size, BASE_PAGE_SIZE);
382    size_t pte_count = DIVIDE_ROUND_UP(size, BASE_PAGE_SIZE);
383    genvaddr_t vend = vaddr + size;
384
385    if (VMSAv8_64_L012_BASE(vaddr) == VMSAv8_64_L012_BASE(vend - 1)) {
386        // fast path
387        err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
388        if (err_is_fail(err)) {
389            DEBUG_ERR(err, "[do_map] in fast path");
390            return err_push(err, LIB_ERR_PMAP_DO_MAP);
391        }
392    } else { // multiple leaf page tables
393        // first leaf
394        uint32_t c = VMSAv8_64_PTABLE_NUM_ENTRIES - VMSAv8_64_L3_BASE(vaddr);
395        genvaddr_t temp_end = vaddr + c * BASE_PAGE_SIZE;
396        err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
397        if (err_is_fail(err)) {
398            return err_push(err, LIB_ERR_PMAP_DO_MAP);
399        }
400
401        // map full leaves
402        while (VMSAv8_64_L012_BASE(temp_end) < VMSAv8_64_L012_BASE(vend)) { // update vars
403            vaddr = temp_end;
404            temp_end = vaddr + VMSAv8_64_PTABLE_NUM_ENTRIES * BASE_PAGE_SIZE;
405            offset += c * BASE_PAGE_SIZE;
406            c = VMSAv8_64_PTABLE_NUM_ENTRIES;
407
408            // do mapping
409            err = do_single_map(pmap, vaddr, temp_end, frame, offset,
410                    VMSAv8_64_PTABLE_NUM_ENTRIES, flags);
411            if (err_is_fail(err)) {
412                return err_push(err, LIB_ERR_PMAP_DO_MAP);
413            }
414        }
415
416        // map remaining part
417        offset += c * BASE_PAGE_SIZE;
418        c = VMSAv8_64_L3_BASE(vend) - VMSAv8_64_L3_BASE(temp_end);
419        if (c) {
420            // do mapping
421            err = do_single_map(pmap, temp_end, vend, frame, offset, c, flags);
422            if (err_is_fail(err)) {
423                return err_push(err, LIB_ERR_PMAP_DO_MAP);
424            }
425        }
426    }
427    if (retoff) {
428        *retoff = offset;
429    }
430    if (retsize) {
431        *retsize = size;
432    }
433    //has_vnode_debug = false;
434    return SYS_ERR_OK;
435#if 0
436    errval_t err;
437    uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
438
439    for (size_t i = offset; i < offset + size; i += BASE_PAGE_SIZE) {
440
441        vaddr += BASE_PAGE_SIZE;
442    }
443
444    if (retoff) {
445        *retoff = offset;
446    }
447    if (retsize) {
448        *retsize = size;
449    }
450    return SYS_ERR_OK;
451#endif
452}
453
454static size_t
455max_slabs_required(size_t bytes)
456{
457    // Perform a slab allocation for every page (do_map -> slab_alloc)
458    size_t pages     = DIVIDE_ROUND_UP(bytes, BASE_PAGE_SIZE);
459
460    // Perform a slab allocation for every L2 (get_ptable -> find_vnode)
461    size_t l3entries = DIVIDE_ROUND_UP(pages, 512);
462
463    // Perform a slab allocation for every L2 (get_ptable -> find_vnode)
464    size_t l2entries = DIVIDE_ROUND_UP(l3entries, 512);
465
466    // Perform a slab allocation for every L1 (do_map -> find_vnode)
467    size_t l1entries = DIVIDE_ROUND_UP(l2entries, 512);
468
469    // Perform a slab allocation for every L0 (do_map -> find_vnode)
470    size_t l0entries = DIVIDE_ROUND_UP(l1entries, 512);
471
472    return pages + l3entries + l2entries + l1entries + l0entries;
473}
474
475/**
476 * \brief Refill slabs used for metadata
477 *
478 * \param pmap     The pmap to refill in
479 * \param request  The number of slabs the allocator must have
480 * when the function returns
481 *
482 * When the current pmap is initialized,
483 * it reserves some virtual address space for metadata.
484 * This reserved address space is used here
485 *
486 * Can only be called for the current pmap
487 * Will recursively call into itself till it has enough slabs
488 */
489#include <stdio.h>
490static errval_t refill_slabs(struct pmap_aarch64 *pmap, size_t request)
491{
492    errval_t err;
493
494    /* Keep looping till we have #request slabs */
495    while (slab_freecount(&pmap->slab) < request) {
496        // Amount of bytes required for #request
497        size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab),
498                                        sizeof(struct vnode));
499
500        /* Get a frame of that size */
501        struct capref cap;
502        err = frame_alloc(&cap, bytes, &bytes);
503        if (err_is_fail(err)) {
504            return err_push(err, LIB_ERR_FRAME_ALLOC);
505        }
506
507        /* If we do not have enough slabs to map the frame in, recurse */
508        size_t required_slabs_for_frame = max_slabs_required(bytes);
509        if (slab_freecount(&pmap->slab) < required_slabs_for_frame) {
510            // If we recurse, we require more slabs than to map a single page
511            assert(required_slabs_for_frame > 4);
512
513            err = refill_slabs(pmap, required_slabs_for_frame);
514            if (err_is_fail(err)) {
515                return err_push(err, LIB_ERR_SLAB_REFILL);
516            }
517        }
518
519        /* Perform mapping */
520        genvaddr_t genvaddr = pmap->vregion_offset;
521        pmap->vregion_offset += (genvaddr_t)bytes;
522
523        // if this assert fires, increase META_DATA_RESERVED_SPACE
524        assert(pmap->vregion_offset < (vregion_get_base_addr(&pmap->vregion) +
525               vregion_get_size(&pmap->vregion)));
526
527        err = do_map(pmap, genvaddr, cap, 0, bytes,
528                     VREGION_FLAGS_READ_WRITE, NULL, NULL);
529        if (err_is_fail(err)) {
530            return err_push(err, LIB_ERR_PMAP_DO_MAP);
531        }
532
533        /* Grow the slab */
534        lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
535        slab_grow(&pmap->slab, (void*)buf, bytes);
536    }
537
538    return SYS_ERR_OK;
539}
540
541/**
542 * \brief Create page mappings
543 *
544 * \param pmap     The pmap object
545 * \param vaddr    The virtual address to create the mapping for
546 * \param frame    The frame cap to map in
547 * \param offset   Offset into the frame cap
548 * \param size     Size of the mapping
549 * \param flags    Flags for the mapping
550 * \param retoff   If non-NULL, filled in with adjusted offset of mapped region
551 * \param retsize  If non-NULL, filled in with adjusted size of mapped region
552 */
553static errval_t
554map(struct pmap     *pmap,
555    genvaddr_t       vaddr,
556    struct capref    frame,
557    size_t           offset,
558    size_t           size,
559    vregion_flags_t  flags,
560    size_t          *retoff,
561    size_t          *retsize)
562{
563    struct pmap_aarch64 *pmap_aarch64 = (struct pmap_aarch64 *)pmap;
564
565    size   += BASE_PAGE_OFFSET(offset);
566    size    = ROUND_UP(size, BASE_PAGE_SIZE);
567    offset -= BASE_PAGE_OFFSET(offset);
568
569    const size_t slabs_reserve = 3; // == max_slabs_required(1)
570    uint64_t  slabs_free       = slab_freecount(&pmap_aarch64->slab);
571    size_t    slabs_required   = max_slabs_required(size) + slabs_reserve;
572
573    if (slabs_required > slabs_free) {
574        if (get_current_pmap() == pmap) {
575            errval_t err = refill_slabs(pmap_aarch64, slabs_required);
576            if (err_is_fail(err)) {
577                return err_push(err, LIB_ERR_SLAB_REFILL);
578            }
579        }
580        else {
581            size_t bytes = SLAB_STATIC_SIZE(slabs_required - slabs_free,
582                                            sizeof(struct vnode));
583            void *buf = malloc(bytes);
584            if (!buf) {
585                return LIB_ERR_MALLOC_FAIL;
586            }
587            slab_grow(&pmap_aarch64->slab, buf, bytes);
588        }
589    }
590
591    return do_map(pmap_aarch64, vaddr, frame, offset, size, flags,
592                  retoff, retsize);
593}
594
595static errval_t do_single_unmap(struct pmap_aarch64 *pmap, genvaddr_t vaddr,
596                                size_t pte_count)
597{
598    errval_t err;
599    struct vnode *pt = find_ptable(pmap, vaddr);
600    if (pt) {
601        struct vnode *page = find_vnode(pt, VMSAv8_64_L3_BASE(vaddr));
602        if (page && page->u.frame.pte_count == pte_count) {
603            err = vnode_unmap(pt->u.vnode.cap, page->mapping);
604            if (err_is_fail(err)) {
605                DEBUG_ERR(err, "vnode_unmap");
606                return err_push(err, LIB_ERR_VNODE_UNMAP);
607            }
608
609            err = cap_delete(page->mapping);
610            if (err_is_fail(err)) {
611                return err_push(err, LIB_ERR_CAP_DELETE);
612            }
613            err = slot_free(page->mapping);
614            if (err_is_fail(err)) {
615                return err_push(err, LIB_ERR_CAP_DELETE);
616            }
617            remove_vnode(pt, page);
618            slab_free(&pmap->slab, page);
619        }
620        else {
621            return LIB_ERR_PMAP_FIND_VNODE;
622        }
623    }
624
625    return SYS_ERR_OK;
626}
627
628/**
629 * \brief Remove page mappings
630 *
631 * \param pmap     The pmap object
632 * \param vaddr    The start of the virtual addres to remove
633 * \param size     The size of virtual address to remove
634 * \param retsize  If non-NULL, filled in with the actual size removed
635 */
636static errval_t
637unmap(struct pmap *pmap,
638      genvaddr_t   vaddr,
639      size_t       size,
640      size_t      *retsize)
641{
642    errval_t err, ret = SYS_ERR_OK;
643    struct pmap_aarch64 *pmap_aarch64 = (struct pmap_aarch64*)pmap;
644    size = ROUND_UP(size, BASE_PAGE_SIZE);
645    size_t pte_count = size / BASE_PAGE_SIZE;
646    genvaddr_t vend = vaddr + size;
647
648    if (VMSAv8_64_L012_BASE(vaddr) == VMSAv8_64_L012_BASE(vend - 1)) {
649        // fast path
650        err = do_single_unmap(pmap_aarch64, vaddr, pte_count);
651        if (err_is_fail(err)) {
652            return err_push(err, LIB_ERR_PMAP_UNMAP);
653        }
654    } else { // slow path
655        // unmap first leaf
656        uint32_t c = VMSAv8_64_PTABLE_NUM_ENTRIES - VMSAv8_64_L3_BASE(vaddr);
657        err = do_single_unmap(pmap_aarch64, vaddr, c);
658        if (err_is_fail(err)) {
659            return err_push(err, LIB_ERR_PMAP_UNMAP);
660        }
661
662        // unmap full leaves
663        vaddr += c * BASE_PAGE_SIZE;
664        while (VMSAv8_64_L012_BASE(vaddr) < VMSAv8_64_L012_BASE(vend)) {
665            c = VMSAv8_64_PTABLE_NUM_ENTRIES;
666            err = do_single_unmap(pmap_aarch64, vaddr, c);
667            if (err_is_fail(err)) {
668                return err_push(err, LIB_ERR_PMAP_UNMAP);
669            }
670            vaddr += c * BASE_PAGE_SIZE;
671        }
672
673        // unmap remaining part
674        c = VMSAv8_64_L3_BASE(vend) - VMSAv8_64_L3_BASE(vaddr);
675        if (c) {
676            err = do_single_unmap(pmap_aarch64, vaddr, c);
677            if (err_is_fail(err)) {
678                return err_push(err, LIB_ERR_PMAP_UNMAP);
679            }
680        }
681    }
682
683    if (retsize) {
684        *retsize = size;
685    }
686
687    return ret;
688}
689
690/**
691 * \brief Determine a suitable address for a given memory object
692 *
693 * \param pmap    The pmap object
694 * \param memobj  The memory object to determine the address for
695 * \param alignment Minimum alignment
696 * \param vaddr   Pointer to return the determined address
697 *
698 * Relies on vspace.c code maintaining an ordered list of vregions
699 */
700static errval_t
701determine_addr(struct pmap   *pmap,
702               struct memobj *memobj,
703               size_t        alignment,
704               genvaddr_t    *vaddr)
705{
706    assert(pmap->vspace->head);
707
708    assert(alignment <= BASE_PAGE_SIZE); // NYI
709
710    struct vregion *walk = pmap->vspace->head;
711    while (walk->next) { // Try to insert between existing mappings
712        genvaddr_t walk_base = vregion_get_base_addr(walk);
713        genvaddr_t walk_size = vregion_get_size(walk);
714        genvaddr_t next_base = vregion_get_base_addr(walk->next);
715
716        if (next_base > walk_base + walk_size + memobj->size &&
717            walk_base + walk_size > VSPACE_BEGIN) { // Ensure mappings are larger than VSPACE_BEGIN
718            *vaddr = walk_base + walk_size;
719            return SYS_ERR_OK;
720        }
721        walk = walk->next;
722    }
723
724    *vaddr = vregion_get_base_addr(walk) + vregion_get_size(walk);
725    return SYS_ERR_OK;
726}
727
728static errval_t do_single_modify_flags(struct pmap_aarch64 *pmap, genvaddr_t vaddr,
729                                       size_t pages, vregion_flags_t flags)
730{
731    errval_t err = SYS_ERR_OK;
732    struct vnode *ptable = find_ptable(pmap, vaddr);
733    uint16_t ptentry = VMSAv8_64_L3_BASE(vaddr);
734    if (ptable) {
735        struct vnode *page = find_vnode(ptable, ptentry);
736        if (page) {
737            if (inside_region(ptable, ptentry, pages)) {
738                // we're modifying part of a valid mapped region
739                // arguments to invocation: invoke frame cap, first affected
740                // page (as offset from first page in mapping), #affected
741                // pages, new flags. Invocation should check compatibility of
742                // new set of flags with cap permissions.
743                size_t off = ptentry - page->entry;
744                uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
745                // VA hinting NYI on ARMv8, always passing 0
746                err = invoke_mapping_modify_flags(page->mapping, off, pages, pmap_flags, 0);
747                printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n",
748                        err_getstring(err), err);
749                return err;
750            } else {
751                // overlaps some region border
752                return LIB_ERR_PMAP_EXISTING_MAPPING;
753            }
754        }
755    }
756    return SYS_ERR_OK;
757}
758
759/**
760 * \brief Modify page mapping
761 *
762 * \param pmap     The pmap object
763 * \param vaddr    The virtual address to unmap
764 * \param flags    New flags for the mapping
765 * \param retsize  If non-NULL, filled in with the actual size modified
766 */
767static errval_t
768modify_flags(struct pmap     *pmap,
769             genvaddr_t       vaddr,
770             size_t           size,
771             vregion_flags_t  flags,
772             size_t          *retsize)
773{
774    errval_t err, ret = SYS_ERR_OK;
775    struct pmap_aarch64 *pmap_aarch64 = (struct pmap_aarch64*)pmap;
776    size = ROUND_UP(size, BASE_PAGE_SIZE);
777    size_t pte_count = size / BASE_PAGE_SIZE;
778    genvaddr_t vend = vaddr + size;
779
780    if (VMSAv8_64_L012_BASE(vaddr) == VMSAv8_64_L012_BASE(vend - 1)) {
781        // fast path
782        err = do_single_modify_flags(pmap_aarch64, vaddr, pte_count, false);
783        if (err_is_fail(err)) {
784            return err_push(err, LIB_ERR_PMAP_UNMAP);
785        }
786    } else { // slow path
787        // unmap first leaf
788        uint32_t c = VMSAv8_64_PTABLE_NUM_ENTRIES - VMSAv8_64_L3_BASE(vaddr);
789        err = do_single_modify_flags(pmap_aarch64, vaddr, c, false);
790        if (err_is_fail(err)) {
791            return err_push(err, LIB_ERR_PMAP_UNMAP);
792        }
793
794        // unmap full leaves
795        vaddr += c * BASE_PAGE_SIZE;
796        while (VMSAv8_64_L012_BASE(vaddr) < VMSAv8_64_L012_BASE(vend)) {
797            c = VMSAv8_64_PTABLE_NUM_ENTRIES;
798            err = do_single_modify_flags(pmap_aarch64, vaddr, c, true);
799            if (err_is_fail(err)) {
800                return err_push(err, LIB_ERR_PMAP_UNMAP);
801            }
802            vaddr += c * BASE_PAGE_SIZE;
803        }
804
805        // unmap remaining part
806        c = VMSAv8_64_L3_BASE(vend) - VMSAv8_64_L3_BASE(vaddr);
807        if (c) {
808            err = do_single_modify_flags(pmap_aarch64, vaddr, c, true);
809            if (err_is_fail(err)) {
810                return err_push(err, LIB_ERR_PMAP_UNMAP);
811            }
812        }
813    }
814
815    if (retsize) {
816        *retsize = size;
817    }
818
819    return ret;
820}
821
822/**
823 * \brief Query existing page mapping
824 *
825 * \param pmap     The pmap object
826 * \param vaddr    The virtual address to query
827 * \param retvaddr Returns the base virtual address of the mapping
828 * \param retsize  Returns the actual size of the mapping
829 * \param retcap   Returns the cap mapped at this address
830 * \param retoffset Returns the offset within the cap that is mapped
831 * \param retflags Returns the flags for this mapping
832 *
833 * All of the ret parameters are optional.
834 */
835static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
836                       struct pmap_mapping_info *info)
837{
838    USER_PANIC("NYI");
839    return 0;
840}
841
842
843static errval_t
844serialise(struct pmap *pmap, void *buf, size_t buflen)
845{
846    // Unimplemented: ignored
847    return SYS_ERR_OK;
848}
849
850static errval_t
851deserialise(struct pmap *pmap, void *buf, size_t buflen)
852{
853    // Unimplemented: we start with an empty pmap, and avoid the bottom of the A/S
854    return SYS_ERR_OK;
855}
856
857static struct pmap_funcs pmap_funcs = {
858    .determine_addr = determine_addr,
859    .map = map,
860    .unmap = unmap,
861    .modify_flags = modify_flags,
862    .lookup = lookup,
863    .serialise = serialise,
864    .deserialise = deserialise,
865};
866
867/**
868 * \brief Initialize the pmap object
869 */
870errval_t
871pmap_init(struct pmap   *pmap,
872          struct vspace *vspace,
873          struct capref  vnode,
874          struct slot_allocator *opt_slot_alloc)
875{
876    struct pmap_aarch64* pmap_aarch64 = (struct pmap_aarch64*)pmap;
877
878    /* Generic portion */
879    pmap->f = pmap_funcs;
880    pmap->vspace = vspace;
881
882    if (opt_slot_alloc != NULL) {
883        pmap->slot_alloc = opt_slot_alloc;
884    } else { /* use default allocator for this dispatcher */
885        pmap->slot_alloc = get_default_slot_allocator();
886    }
887
888    // Slab allocator for vnodes
889    slab_init(&pmap_aarch64->slab, sizeof(struct vnode), NULL);
890    slab_grow(&pmap_aarch64->slab,
891              pmap_aarch64->slab_buffer,
892              sizeof(pmap_aarch64->slab_buffer));
893
894    pmap_aarch64->root.is_vnode         = true;
895    pmap_aarch64->root.u.vnode.cap      = vnode;
896    pmap_aarch64->root.u.vnode.invokable = vnode;
897
898    if (get_croot_addr(vnode) != CPTR_ROOTCN) {
899        errval_t err = slot_alloc(&pmap_aarch64->root.u.vnode.invokable);
900        assert(err_is_ok(err));
901        err = cap_copy(pmap_aarch64->root.u.vnode.invokable, vnode);
902        assert(err_is_ok(err));
903    }
904    assert(!capref_is_null(pmap_aarch64->root.u.vnode.cap));
905    assert(!capref_is_null(pmap_aarch64->root.u.vnode.invokable));
906    pmap_aarch64->root.u.vnode.children  = NULL;
907    pmap_aarch64->root.next              = NULL;
908
909    // choose a minimum mappable VA for most domains; enough to catch NULL
910    // pointer derefs with suitably large offsets
911    pmap_aarch64->min_mappable_va = 64 * 1024;
912
913    // maximum mappable VA is derived from X86_64_MEMORY_OFFSET in kernel
914    pmap_aarch64->max_mappable_va = (genvaddr_t)0xffffff8000000000;
915
916    return SYS_ERR_OK;
917}
918
919errval_t pmap_current_init(bool init_domain)
920{
921    struct pmap_aarch64 *pmap_aarch64 = (struct pmap_aarch64*)get_current_pmap();
922
923    // To reserve a block of virtual address space,
924    // a vregion representing the address space is required.
925    // We construct a superficial one here and add it to the vregion list.
926    struct vregion *vregion = &pmap_aarch64->vregion;
927    assert((void*)vregion > (void*)pmap_aarch64);
928    assert((void*)vregion < (void*)(pmap_aarch64 + 1));
929    vregion->vspace = NULL;
930    vregion->memobj = NULL;
931    vregion->base   = VSPACE_BEGIN;
932    vregion->offset = 0;
933    vregion->size   = META_DATA_RESERVED_SPACE;
934    vregion->flags  = 0;
935    vregion->next = NULL;
936
937    struct vspace *vspace = pmap_aarch64->p.vspace;
938    assert(!vspace->head);
939    vspace->head = vregion;
940
941    pmap_aarch64->vregion_offset = pmap_aarch64->vregion.base;
942
943    //pmap_aarch64->min_mappable_va = VSPACE_BEGIN;
944
945    return SYS_ERR_OK;
946}
947