1/**
2 * \file
3 * \brief pmap management
4 */
5
6/*
7 * Copyright (c) 2010-2015 ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15/*
16 * There was some minor difficulty here with mapping the cpus native
17 * page table arrangement onto Barrelfish. The problem lies with
18 * resource bootstrapping. The bootstrap ram allocator allocates pages.
19 *
20 * After reworking retype to be range based, we can now select to create a
21 * single 1kB vnode from a 4kB frame, so we currently waste 3kB when creating
22 * ARM l2 vnodes before we have a connection to the memory server.
23 *
24 */
25
26#include <barrelfish/barrelfish.h>
27#include <barrelfish/caddr.h>
28#include <barrelfish/invocations_arch.h>
29#include <stdio.h>
30
31// Location of VSpace managed by this system.
32#define VSPACE_BEGIN   ((lvaddr_t)1UL*1024*1024*1024)   //0x40000000
33
34// Amount of virtual address space reserved for mapping frames
35// backing refill_slabs.
36//#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 128) // 64
37#define META_DATA_RESERVED_SPACE (BASE_PAGE_SIZE * 1024)
38// increased above value from 128 for pandaboard port
39
40static inline uintptr_t
41vregion_flags_to_kpi_paging_flags(vregion_flags_t flags)
42{
43    STATIC_ASSERT(0x1ff == VREGION_FLAGS_MASK, "");
44    STATIC_ASSERT(0x0f == KPI_PAGING_FLAGS_MASK, "");
45    STATIC_ASSERT(VREGION_FLAGS_READ    == KPI_PAGING_FLAGS_READ,    "");
46    STATIC_ASSERT(VREGION_FLAGS_WRITE   == KPI_PAGING_FLAGS_WRITE,   "");
47    STATIC_ASSERT(VREGION_FLAGS_EXECUTE == KPI_PAGING_FLAGS_EXECUTE, "");
48    STATIC_ASSERT(VREGION_FLAGS_NOCACHE == KPI_PAGING_FLAGS_NOCACHE, "");
49    if ((flags & VREGION_FLAGS_MPB) != 0) {
50        // XXX: ignore MPB flag on ARM, otherwise the assert below fires -AB
51        flags &= ~VREGION_FLAGS_MPB;
52    }
53    if ((flags & VREGION_FLAGS_WRITE_COMBINING) != 0) {
54        // XXX mask out write-combining flag on ARM
55        flags &= ~VREGION_FLAGS_WRITE_COMBINING;
56    }
57    if ((flags & VREGION_FLAGS_VTD_SNOOP) != 0) {
58        // XXX mask out vtd-snooping flag on ARM
59        flags &= ~VREGION_FLAGS_VTD_SNOOP;
60    }
61    if ((flags & VREGION_FLAGS_GUARD) != 0) {
62        flags = 0;
63    }
64    assert(0 == (~KPI_PAGING_FLAGS_MASK & (uintptr_t)flags));
65    return (uintptr_t)flags;
66}
67
68// debug print preprocessor flag for this file
69//#define LIBBARRELFISH_DEBUG_PMAP
70
71/**
72 * \brief check whether region A = [start_a .. end_a) overlaps
73 * region B = [start_b .. end_b).
74 * \return true iff A overlaps B
75 */
76static bool is_overlapping(uint16_t start_a, uint16_t end_a, uint16_t start_b, uint16_t end_b)
77{
78    return
79        // B strict subset of A
80        (start_a < start_b && end_a >= end_b)
81        // start_a inside B
82        || (start_a >= start_b && start_a < end_b)
83        // end_a inside B
84        || (end_a > start_b && end_a < end_b);
85}
86
87/**
88 * \brief Check whether vnode `root' has entries in between [entry ..
89 * entry+len).
90 * \param root the vnode to look at
91 * \param entry first entry of the region to check
92 * \param len   length of the region to check
93 * \param only_pages true == do not report previously allocated lower-level
94 *                   page tables that are empty
95 * \return true iff entries exist in region.
96 */
97#if defined(LIBBARRELFISH_DEBUG_PMAP)
98#define DEBUG_HAS_VNODE
99#endif
100static bool has_vnode(struct vnode *root, uint32_t entry, size_t len,
101               bool only_pages)
102{
103    assert(root != NULL);
104    assert(root->is_vnode);
105    struct vnode *n;
106
107    uint32_t end_entry = entry + len;
108#ifdef DEBUG_HAS_VNODE
109    debug_printf("%s: checking region [%"PRIu32"--%"PRIu32"], only_pages = %d\n",
110            __FUNCTION__, entry, end_entry, only_pages);
111#endif
112
113    for (n = root->u.vnode.children; n; n = n->next) {
114        // region to check [entry .. end_entry)
115        if (n->is_vnode && n->entry >= entry && n->entry < end_entry) {
116            if (only_pages) {
117                return has_vnode(n, 0, ARM_L2_TABLE_BYTES, true);
118            }
119#ifdef LIBBARRELFISH_DEBUG_PMAP
120            debug_printf("1: found page table inside our region\n");
121#endif
122            return true;
123        } else if (n->is_vnode) {
124            // all other vnodes do not overlap with us, so go to next
125            assert(n->entry < entry || n->entry >= end_entry);
126            continue;
127        } else {
128            // not vnode
129            uint32_t end = n->entry + n->u.frame.pte_count;
130#ifdef DEBUG_HAS_VNODE
131            debug_printf("%s: looking at region: [%"PRIu32"--%"PRIu32"]\n",
132                    __FUNCTION__, n->entry, end);
133#endif
134
135            // do checks
136            if (is_overlapping(entry, end_entry, n->entry, end)) {
137                return true;
138            }
139        }
140    }
141
142    return false;
143}
144
145/**
146 * \brief Starting at a given root, return the vnode with entry equal to #entry
147 * \return vnode at index `entry` or NULL
148 */
149#ifdef LIBBARRELFISH_DEBUG_PMAP
150#define DEBUG_FIND_VNODE
151#endif
152static struct vnode *find_vnode(struct vnode *root, uint16_t entry)
153{
154    assert(root != NULL);
155    assert(root->is_vnode);
156    struct vnode *n;
157
158#ifdef DEBUG_FIND_VNODE
159    debug_printf("%s: looking for %"PRIu16"\n", __FUNCTION__, entry);
160#endif
161
162    for(n = root->u.vnode.children; n != NULL; n = n->next) {
163        if (n->is_vnode &&
164            is_overlapping(entry, entry + 1, n->entry, n->entry + 1)) {
165#ifdef DEBUG_FIND_VNODE
166            debug_printf("%s: found ptable at [%"PRIu16"--%"PRIu16"]\n",
167                    __FUNCTION__, n->entry, n->entry + 1);
168#endif
169            return n;
170        }
171        else if (n->is_vnode) {
172            assert(!is_overlapping(entry, entry + 1, n->entry, n->entry + 1));
173            // ignore all other vnodes;
174            continue;
175        }
176
177        // not vnode
178        assert(!n->is_vnode);
179        uint16_t end = n->entry + n->u.frame.pte_count;
180#ifdef DEBUG_FIND_VNODE
181        debug_printf("%s: looking at section [%"PRIu16"--%"PRIu16"]\n", __FUNCTION__, n->entry, end);
182#endif
183        if (n->entry <= entry && entry < end) {
184#ifdef DEBUG_FIND_VNODE
185            debug_printf("%d \\in [%d, %d]\n", entry, n->entry, end);
186#endif
187            return n;
188        }
189    }
190    return NULL;
191}
192
193/**
194 * \brief check whether region [entry, entry+npages) is contained in a child
195 * of `root`.
196 */
197static bool inside_region(struct vnode *root, uint32_t entry, uint32_t npages)
198{
199    assert(root != NULL);
200    assert(root->is_vnode);
201
202    struct vnode *n;
203
204    for (n = root->u.vnode.children; n; n = n->next) {
205        if (!n->is_vnode) {
206            uint16_t end = n->entry + n->u.frame.pte_count;
207            if (n->entry <= entry && entry + npages <= end) {
208                return true;
209            }
210        }
211    }
212
213    return false;
214}
215
216/**
217 * \brief remove vnode `item` from linked list of children of `root`
218 */
219static void remove_vnode(struct vnode *root, struct vnode *item)
220{
221    assert(root->is_vnode);
222    struct vnode *walk = root->u.vnode.children;
223    struct vnode *prev = NULL;
224    while (walk) {
225        if (walk == item) {
226            if (prev) {
227                prev->next = walk->next;
228                return;
229            } else {
230                root->u.vnode.children = walk->next;
231                return;
232            }
233        }
234        prev = walk;
235        walk = walk->next;
236    }
237    USER_PANIC("Should not get here");
238}
239
240/**
241 * \brief (recursively) remove empty page tables in region [entry ..
242 * entry+len) in vnode `root`.
243 */
244#ifdef LIBBARRELFISH_DEBUG_PMAP
245#define DEBUG_REMOVE_EMPTY_VNODES
246#endif
247static void remove_empty_vnodes(struct slab_allocator *vnode_alloc, struct vnode *root,
248                         uint32_t entry, size_t len)
249{
250    // precondition: root does not have pages in [entry, entry+len)
251    assert(!has_vnode(root, entry, len, true));
252
253    errval_t err;
254    uint32_t end_entry = entry + len;
255    for (struct vnode *n = root->u.vnode.children; n; n = n->next) {
256        // sanity check and skip leaf entries
257        if (!n->is_vnode) {
258            continue;
259        }
260        // here we know that all vnodes we're interested in are
261        // page tables
262        assert(n->is_vnode);
263
264        // Unmap vnode if it is in range [entry .. entry+len)
265        if (n->entry >= entry && n->entry < end_entry) {
266            err = vnode_unmap(root->u.vnode.invokable, n->mapping);
267            assert(err_is_ok(err));
268
269            if (!capcmp(n->u.vnode.cap, n->u.vnode.invokable)) {
270                // delete invokable pt cap if it's a real copy
271                err =cap_destroy(n->u.vnode.invokable);
272                assert(err_is_ok(err));
273            }
274
275            // delete last copy of pt cap
276            err = cap_destroy(n->u.vnode.cap);
277            assert(err_is_ok(err));
278
279            // remove vnode from list
280            remove_vnode(root, n);
281            slab_free(vnode_alloc, n);
282        }
283    }
284}
285
286/**
287 * \brief Allocates a new VNode, adding it to the page table and our metadata
288 */
289static errval_t alloc_vnode(struct pmap_arm *pmap_arm, struct vnode *root,
290                            enum objtype type, uint32_t entry,
291                            struct vnode **retvnode)
292{
293    assert(root->is_vnode);
294    errval_t err;
295
296    struct vnode *newvnode = slab_alloc(&pmap_arm->slab);
297    if (newvnode == NULL) {
298        return LIB_ERR_SLAB_ALLOC_FAIL;
299    }
300    newvnode->is_vnode = true;
301
302    // The VNode capability
303    err = slot_alloc(&newvnode->u.vnode.cap);
304    if (err_is_fail(err)) {
305        return err_push(err, LIB_ERR_SLOT_ALLOC);
306    }
307
308    err = vnode_create(newvnode->u.vnode.cap, type);
309    if (err_is_fail(err)) {
310        return err_push(err, LIB_ERR_VNODE_CREATE);
311    }
312
313    // XXX: do we need to put master copy in other cspace?
314    newvnode->u.vnode.invokable = newvnode->u.vnode.cap;
315
316    // The VNode meta data
317    newvnode->entry            = entry;
318    newvnode->next             = root->u.vnode.children;
319    root->u.vnode.children     = newvnode;
320    newvnode->u.vnode.children = NULL;
321
322    err = slot_alloc(&newvnode->mapping);
323    if (err_is_fail(err)) {
324        return err_push(err, LIB_ERR_SLOT_ALLOC);
325    }
326
327    err = vnode_map(root->u.vnode.invokable, newvnode->u.vnode.cap,
328            entry, KPI_PAGING_FLAGS_READ | KPI_PAGING_FLAGS_WRITE, 0, 1,
329            newvnode->mapping);
330    if (err_is_fail(err)) {
331        return err_push(err, LIB_ERR_PMAP_MAP);
332    }
333
334    if (retvnode) {
335        *retvnode = newvnode;
336    }
337    return SYS_ERR_OK;
338}
339
340/**
341 * \brief Returns the vnode for the pagetable mapping a given vspace address
342 */
343#ifdef LIBBARRELFISH_DEBUG_PMAP
344#define DEBUG_GET_PTABLE
345#endif
346static errval_t get_ptable(struct pmap_arm  *pmap,
347                           genvaddr_t        vaddr,
348                           struct vnode    **ptable)
349{
350    // NB Strictly there are 12 bits in the ARM L1, but allocations unit
351    // of L2 is 1 page of L2 entries (4 tables) so we use 10 bits for the L1
352    // idx here
353    uintptr_t idx = ARM_L1_OFFSET(vaddr);
354    if ((*ptable = find_vnode(&pmap->root, idx)) == NULL)
355    {
356        // L1 table entries point to L2 tables so allocate an L2
357        // table for this L1 entry.
358
359        struct vnode *tmp = NULL; // Tmp variable for passing to alloc_vnode
360
361        errval_t err = alloc_vnode(pmap, &pmap->root, ObjType_VNode_ARM_l2,
362                                   idx, &tmp);
363        if (err_is_fail(err)) {
364            DEBUG_ERR(err, "alloc_vnode");
365            return err;
366        }
367        assert(tmp != NULL);
368        *ptable = tmp; // Set argument to received value
369
370        if (err_is_fail(err)) {
371            return err_push(err, LIB_ERR_PMAP_ALLOC_VNODE);
372        }
373    }
374    assert(ptable);
375    struct vnode *pt = *ptable;
376    if (!pt->is_vnode) {
377        debug_printf("found section @%d, trying to get ptable for %d\n",
378                pt->entry, idx);
379    }
380    assert(pt->is_vnode);
381#ifdef DEBUG_GET_PTABLE
382    debug_printf("have ptable: %p\n", pt);
383#endif
384
385    return SYS_ERR_OK;
386}
387
388static struct vnode *find_ptable(struct pmap_arm  *pmap,
389                                 genvaddr_t vaddr)
390{
391    // NB Strictly there are 12 bits in the ARM L1, but allocations unit
392    // of L2 is 1 page of L2 entries (4 tables) so
393    uintptr_t idx = ARM_L1_OFFSET(vaddr);
394    return find_vnode(&pmap->root, idx);
395}
396
397static errval_t do_single_map(struct pmap_arm *pmap, genvaddr_t vaddr, genvaddr_t vend,
398                              struct capref frame, size_t offset, size_t pte_count,
399                              vregion_flags_t flags)
400{
401    errval_t err = SYS_ERR_OK;
402    // Get the page table
403    struct vnode *ptable;
404    uintptr_t entry;
405    bool is_large = false;
406
407    struct frame_identity fi;
408    err = frame_identify(frame, &fi);
409    if (err_is_fail(err)) {
410        return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
411    }
412
413    if (flags & VREGION_FLAGS_LARGE &&
414        (vaddr & LARGE_PAGE_MASK) == 0 &&
415        fi.bytes >= LARGE_PAGE_SIZE &&
416        (fi.base & LARGE_PAGE_MASK) == 0) {
417        //section mapping (1MB)
418        //mapped in the L1 table at root
419        //
420        ptable = &pmap->root;
421        entry = ARM_L1_OFFSET(vaddr);
422        is_large = true;
423#ifdef LIBBARRELFISH_DEBUG_PMAP
424        debug_printf("do_single_map: large path: entry=%zu\n", entry);
425#endif
426    } else {
427#ifdef LIBBARRELFISH_DEBUG_PMAP
428        debug_printf("%s: 4k path: mapping %"PRIxGENVADDR", %zu entries\n", __FUNCTION__, vaddr, pte_count);
429        debug_printf("4k path: L1 entry: %zu\n", ARM_L1_OFFSET(vaddr));
430#endif
431        //4k mapping
432        // XXX: reassess the following note -SG
433        // NOTE: strictly speaking a l2 entry only has 8 bits, while a l1 entry
434        // has 12 bits, but due to the way Barrelfish allocates l1 and l2 tables,
435        // we use 10 bits for the entry here and in the map syscall
436        err = get_ptable(pmap, vaddr, &ptable);
437        if (err_is_fail(err)) {
438            DEBUG_ERR(err, "get_ptable() in do_single_map");
439            return err_push(err, LIB_ERR_PMAP_GET_PTABLE);
440        }
441        entry = ARM_L2_OFFSET(vaddr);
442#ifdef LIBBARRELFISH_DEBUG_PMAP
443        debug_printf("%s: 4k path: L2 entry=%zu\n", __FUNCTION__, entry);
444        debug_printf("%s: ptable->is_vnode = %d\n",
445                __FUNCTION__, ptable->is_vnode);
446#endif
447    }
448
449    // convert flags
450    flags &= ~(VREGION_FLAGS_LARGE | VREGION_FLAGS_HUGE);
451    uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
452
453    // check if there is an overlapping mapping
454    if (has_vnode(ptable, entry, pte_count, false)) {
455#ifdef LIBBARRELFISH_DEBUG_PMAP
456        debug_printf("has_vnode, only_pages=false  returned true\n");
457#endif
458        if (has_vnode(ptable, entry, pte_count, true)) {
459            printf("page already exists in 0x%"
460                    PRIxGENVADDR"--0x%"PRIxGENVADDR"\n", vaddr, vend);
461            return LIB_ERR_PMAP_EXISTING_MAPPING;
462        } else {
463#ifdef LIBBARRELFISH_DEBUG_PMAP
464            debug_printf("has_vnode, only_pages=true  returned false, cleaning up empty ptables\n");
465#endif
466            // clean out empty page tables. We do this here because we benefit
467            // from having the page tables in place when doing lots of small
468            // mappings
469            // XXX: TODO: fix this + mapping of L2 to work on single 1k
470            // chunks
471            remove_empty_vnodes(&pmap->slab, ptable, entry, pte_count);
472        }
473    }
474
475    // Create user level datastructure for the mapping
476    struct vnode *page = slab_alloc(&pmap->slab);
477    assert(page);
478    page->is_vnode = false;
479    page->entry = entry;
480    page->next  = ptable->u.vnode.children;
481    ptable->u.vnode.children = page;
482    page->u.frame.cap = frame;
483    page->u.frame.flags = flags;
484    page->u.frame.pte_count = pte_count;
485
486    err = slot_alloc(&page->mapping);
487    if (err_is_fail(err)) {
488        return err_push(err, LIB_ERR_SLOT_ALLOC);
489    }
490
491    // Map entry into the page table
492    err = vnode_map(ptable->u.vnode.invokable, frame, entry,
493                    pmap_flags, offset, pte_count,
494                    page->mapping);
495    if (err_is_fail(err)) {
496        errval_t err2 = slot_free(page->mapping);
497        if (err_is_fail(err2)) {
498                err = err_push(err, err2);
499        }
500        return err_push(err, LIB_ERR_VNODE_MAP);
501    }
502    return SYS_ERR_OK;
503}
504
505static errval_t do_map(struct pmap_arm *pmap, genvaddr_t vaddr,
506                       struct capref frame, size_t offset, size_t size,
507                       vregion_flags_t flags, size_t *retoff, size_t *retsize)
508{
509    errval_t err;
510    size_t page_size;
511    size_t offset_level;
512
513    // get base address and size of frame
514    struct frame_identity fi;
515    err = frame_identify(frame, &fi);
516    if (err_is_fail(err)) {
517        return err_push(err, LIB_ERR_PMAP_DO_MAP);
518    }
519
520    // determine mapping specific parts
521    if (flags & VREGION_FLAGS_LARGE &&
522        (vaddr & LARGE_PAGE_MASK) == 0 &&
523        fi.bytes >= LARGE_PAGE_SIZE &&
524        (fi.base & LARGE_PAGE_MASK) == 0) {
525        //section mapping (1MB)
526        page_size = LARGE_PAGE_SIZE;
527        offset_level = ARM_L1_OFFSET(vaddr);
528#ifdef LIBBARRELFISH_DEBUG_PMAP
529        printf("do_map: large path\n");
530        printf("page_size: %zx, size: %zx\n", page_size, size);
531#endif
532    } else {
533        //normal 4k mapping
534        page_size = BASE_PAGE_SIZE;
535        offset_level = ARM_L2_OFFSET(vaddr);
536    }
537
538    size = ROUND_UP(size, page_size);
539    size_t pte_count = DIVIDE_ROUND_UP(size, page_size);
540    if (flags & VREGION_FLAGS_LARGE) {
541#ifdef LIBBARRELFISH_DEBUG_PMAP
542        printf("#pages: 0x%zu\n", pte_count);
543#endif
544    }
545    genvaddr_t vend = vaddr + size;
546
547    if (fi.bytes < size) {
548        return LIB_ERR_PMAP_FRAME_SIZE;
549    }
550
551#ifdef LIBBARRELFISH_DEBUG_PMAP
552        printf("do_map: mapping %zu pages (size=%zx), from %zu.%zu\n",
553                pte_count, page_size, ARM_L1_OFFSET(vaddr), ARM_L2_OFFSET(vaddr));
554        printf("page_size: %zx, size: %zx\n", page_size, size);
555#endif
556
557    //should be trivially true for section mappings
558    if ((ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend)) ||
559        flags & VREGION_FLAGS_LARGE) {
560        // fast path
561        err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags);
562        if (err_is_fail(err)) {
563            DEBUG_ERR(err, "[do_map] in fast path");
564            return err_push(err, LIB_ERR_PMAP_DO_MAP);
565        }
566    } else { // multiple leaf page tables
567        // first leaf
568        uint32_t c = ARM_L2_MAX_ENTRIES - offset_level;
569        genvaddr_t temp_end = vaddr + c * page_size;
570        err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags);
571        if (err_is_fail(err)) {
572            return err_push(err, LIB_ERR_PMAP_DO_MAP);
573        }
574
575        // map full leaves
576        while (ARM_L1_OFFSET(temp_end) < ARM_L1_OFFSET(vend)) { // update vars
577            vaddr = temp_end;
578            temp_end = vaddr + ARM_L2_MAX_ENTRIES * page_size;
579            offset += c * page_size;
580            c = ARM_L2_MAX_ENTRIES;
581
582            // do mapping
583            err = do_single_map(pmap, vaddr, temp_end, frame, offset, ARM_L2_MAX_ENTRIES, flags);
584            if (err_is_fail(err)) {
585                return err_push(err, LIB_ERR_PMAP_DO_MAP);
586            }
587        }
588
589        // map remaining part
590        offset += c * page_size;
591        c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(temp_end);
592        if (c) {
593
594            // do mapping
595            err = do_single_map(pmap, temp_end, vend, frame, offset, c, flags);
596            if (err_is_fail(err)) {
597                return err_push(err, LIB_ERR_PMAP_DO_MAP);
598            }
599        }
600    }
601    if (retoff) {
602        *retoff = offset;
603    }
604    if (retsize) {
605        *retsize = size;
606    }
607    //has_vnode_debug = false;
608    return SYS_ERR_OK;
609#if 0
610    errval_t err;
611    uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
612
613    for (size_t i = offset; i < offset + size; i += BASE_PAGE_SIZE) {
614
615        vaddr += BASE_PAGE_SIZE;
616    }
617
618    if (retoff) {
619        *retoff = offset;
620    }
621    if (retsize) {
622        *retsize = size;
623    }
624    return SYS_ERR_OK;
625#endif
626}
627
628static size_t
629max_slabs_required(size_t bytes)
630{
631    // Perform a slab allocation for every page (do_map -> slab_alloc)
632    size_t pages     = DIVIDE_ROUND_UP(bytes, BASE_PAGE_SIZE);
633    // Perform a slab allocation for every L2 (get_ptable -> find_vnode)
634    size_t l2entries = DIVIDE_ROUND_UP(pages, ARM_L2_MAX_ENTRIES);
635    // Perform a slab allocation for every L1 (do_map -> find_vnode)
636    size_t l1entries = DIVIDE_ROUND_UP(l2entries, ARM_L1_MAX_ENTRIES);
637    return pages + l2entries + l1entries;
638}
639static size_t max_slabs_required_large(size_t bytes)
640{
641    // always need only one slab, as we can represent any size section mapping
642    // in a single struct vnode.
643    return 1;
644}
645
646/**
647 * \brief Refill slabs used for metadata
648 *
649 * \param pmap     The pmap to refill in
650 * \param request  The number of slabs the allocator must have
651 * when the function returns
652 *
653 * When the current pmap is initialized,
654 * it reserves some virtual address space for metadata.
655 * This reserved address space is used here
656 *
657 * Can only be called for the current pmap
658 * Will recursively call into itself till it has enough slabs
659 */
660#include <stdio.h>
661static errval_t refill_slabs(struct pmap_arm *pmap, size_t request)
662{
663    errval_t err;
664
665    /* Keep looping till we have #request slabs */
666    while (slab_freecount(&pmap->slab) < request) {
667        // Amount of bytes required for #request
668        size_t bytes = SLAB_STATIC_SIZE(request - slab_freecount(&pmap->slab),
669                                        sizeof(struct vnode));
670
671        /* Get a frame of that size */
672        struct capref cap;
673        err = frame_alloc(&cap, bytes, &bytes);
674        if (err_is_fail(err)) {
675            return err_push(err, LIB_ERR_FRAME_ALLOC);
676        }
677
678        /* If we do not have enough slabs to map the frame in, recurse */
679        size_t required_slabs_for_frame = max_slabs_required(bytes);
680        if (slab_freecount(&pmap->slab) < required_slabs_for_frame) {
681            // If we recurse, we require more slabs than to map a single page
682            assert(required_slabs_for_frame > 4);
683
684            err = refill_slabs(pmap, required_slabs_for_frame);
685            if (err_is_fail(err)) {
686                return err_push(err, LIB_ERR_SLAB_REFILL);
687            }
688        }
689
690        /* Perform mapping */
691        genvaddr_t genvaddr = pmap->vregion_offset;
692        pmap->vregion_offset += (genvaddr_t)bytes;
693
694        // if this assert fires, increase META_DATA_RESERVED_SPACE
695        assert(pmap->vregion_offset < (vregion_get_base_addr(&pmap->vregion) +
696               vregion_get_size(&pmap->vregion)));
697
698        err = do_map(pmap, genvaddr, cap, 0, bytes,
699                     VREGION_FLAGS_READ_WRITE, NULL, NULL);
700        if (err_is_fail(err)) {
701            return err_push(err, LIB_ERR_PMAP_DO_MAP);
702        }
703
704        /* Grow the slab */
705        lvaddr_t buf = vspace_genvaddr_to_lvaddr(genvaddr);
706        slab_grow(&pmap->slab, (void*)buf, bytes);
707    }
708
709    return SYS_ERR_OK;
710}
711
712/**
713 * \brief Create page mappings
714 *
715 * \param pmap     The pmap object
716 * \param vaddr    The virtual address to create the mapping for
717 * \param frame    The frame cap to map in
718 * \param offset   Offset into the frame cap
719 * \param size     Size of the mapping
720 * \param flags    Flags for the mapping
721 * \param retoff   If non-NULL, filled in with adjusted offset of mapped region
722 * \param retsize  If non-NULL, filled in with adjusted size of mapped region
723 */
724static errval_t
725map(struct pmap     *pmap,
726    genvaddr_t       vaddr,
727    struct capref    frame,
728    size_t           offset,
729    size_t           size,
730    vregion_flags_t  flags,
731    size_t          *retoff,
732    size_t          *retsize)
733{
734    struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
735
736    errval_t err;
737    size_t base;
738    size_t page_size;
739    size_t slabs_required;
740
741    struct frame_identity fi;
742    err = frame_identify(frame, &fi);
743    if (err_is_fail(err)) {
744        return err_push(err, LIB_ERR_PMAP_FRAME_IDENTIFY);
745    }
746
747    // adjust the mapping to be on page boundaries
748    if (flags & VREGION_FLAGS_LARGE &&
749        (vaddr & LARGE_PAGE_MASK) == 0 &&
750        fi.bytes >= LARGE_PAGE_SIZE &&
751        (fi.base & LARGE_PAGE_MASK) == 0) {
752        //section mapping (1MB)
753        base = LARGE_PAGE_OFFSET(offset);
754        page_size = LARGE_PAGE_SIZE;
755        slabs_required = max_slabs_required_large(size);
756#ifdef LIBBARRELFISH_DEBUG_PMAP
757        printf("map: large path, page_size: %i, base: %i, slabs: %i, size: %i,"
758                "frame size: %zu\n", page_size, base, slabs_required, size, fi.bytes);
759#endif
760    } else {
761        //4k mapping
762        base = BASE_PAGE_OFFSET(offset);
763        page_size = BASE_PAGE_SIZE;
764        slabs_required = max_slabs_required(size);
765    }
766    size   += base;
767    size    = ROUND_UP(size, page_size);
768    offset -= base;
769
770    const size_t slabs_reserve = 3; // == max_slabs_required(1)
771    uint64_t  slabs_free       = slab_freecount(&pmap_arm->slab);
772
773    slabs_required += slabs_reserve;
774
775    if (slabs_required > slabs_free) {
776        if (get_current_pmap() == pmap) {
777            err = refill_slabs(pmap_arm, slabs_required);
778            if (err_is_fail(err)) {
779                return err_push(err, LIB_ERR_SLAB_REFILL);
780            }
781        }
782        else {
783            size_t bytes = SLAB_STATIC_SIZE(slabs_required - slabs_free,
784                                            sizeof(struct vnode));
785            void *buf = malloc(bytes);
786            if (!buf) {
787                return LIB_ERR_MALLOC_FAIL;
788            }
789            slab_grow(&pmap_arm->slab, buf, bytes);
790        }
791    }
792
793    return do_map(pmap_arm, vaddr, frame, offset, size, flags,
794                  retoff, retsize);
795}
796
797static errval_t do_single_unmap(struct pmap_arm *pmap, genvaddr_t vaddr,
798                                size_t pte_count)
799{
800#ifdef LIBBARRELFISH_DEBUG_PMAP
801    debug_printf("%s: vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
802             __FUNCTION__, vaddr, pte_count);
803#endif
804    errval_t err;
805    struct vnode *pt = find_ptable(pmap, vaddr);
806    // pt->is_vnode == non-large mapping
807    if (pt && pt->is_vnode) {
808        // analog to do_single_map we use 10 bits for tracking pages in user space -SG
809        struct vnode *page = find_vnode(pt, ARM_L2_OFFSET(vaddr));
810        if (page && page->u.frame.pte_count == pte_count) {
811#ifdef LIBBARRELFISH_DEBUG_PMAP
812        debug_printf("page unmap: pt entry: %zu, entry = %zu, pte_count = %hu\n",
813                pt->entry, page->entry, page->u.frame.pte_count);
814#endif
815            err = vnode_unmap(pt->u.vnode.cap, page->mapping);
816            if (err_is_fail(err)) {
817                DEBUG_ERR(err, "vnode_unmap");
818                return err_push(err, LIB_ERR_VNODE_UNMAP);
819            }
820
821            // cleanup mapping cap
822            err = cap_delete(page->mapping);
823            if (err_is_fail(err)) {
824                DEBUG_ERR(err, "cap_delete");
825                return err_push(err, LIB_ERR_CAP_DELETE);
826            }
827            err = slot_free(page->mapping);
828            if (err_is_fail(err)) {
829                return err_push(err, LIB_ERR_SLOT_FREE);
830            }
831
832            remove_vnode(pt, page);
833            slab_free(&pmap->slab, page);
834        }
835        else {
836            return LIB_ERR_PMAP_FIND_VNODE;
837        }
838    } else if (pt) {
839#ifdef LIBBARRELFISH_DEBUG_PMAP
840        debug_printf("section unmap: entry = %zu, pte_count = %zu\n",
841                pt->entry, pt->u.frame.kernel_pte_count);
842#endif
843        err = vnode_unmap(pmap->root.u.vnode.cap, pt->mapping);
844        if (err_is_fail(err)) {
845            DEBUG_ERR(err, "vnode_unmap");
846            return err_push(err, LIB_ERR_VNODE_UNMAP);
847        }
848
849        // cleanup mapping cap
850        err = cap_delete(pt->mapping);
851        if (err_is_fail(err)) {
852            DEBUG_ERR(err, "cap_delete");
853            return err_push(err, LIB_ERR_CAP_DELETE);
854        }
855        err = slot_free(pt->mapping);
856        if (err_is_fail(err)) {
857            return err_push(err, LIB_ERR_SLOT_FREE);
858        }
859
860        remove_vnode(&pmap->root, pt);
861        slab_free(&pmap->slab, pt);
862    } else {
863        return LIB_ERR_PMAP_FIND_VNODE;
864    }
865
866    return SYS_ERR_OK;
867}
868
869/**
870 * \brief Remove page mappings
871 *
872 * \param pmap     The pmap object
873 * \param vaddr    The start of the virtual addres to remove
874 * \param size     The size of virtual address to remove
875 * \param retsize  If non-NULL, filled in with the actual size removed
876 */
877static errval_t
878unmap(struct pmap *pmap,
879      genvaddr_t   vaddr,
880      size_t       size,
881      size_t      *retsize)
882{
883    errval_t err, ret = SYS_ERR_OK;
884    struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
885    size = ROUND_UP(size, BASE_PAGE_SIZE);
886    size_t pte_count = size / BASE_PAGE_SIZE;
887    genvaddr_t vend = vaddr + size;
888
889    if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
890        // fast path
891#ifdef LIBBARRELFISH_DEBUG_PMAP
892        debug_printf("%s: fast path vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
893                __FUNCTION__, vaddr, pte_count);
894#endif
895        err = do_single_unmap(pmap_arm, vaddr, pte_count);
896        if (err_is_fail(err)) {
897            return err_push(err, LIB_ERR_PMAP_UNMAP);
898        }
899    } else { // slow path
900        // unmap first leaf
901        uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
902#ifdef LIBBARRELFISH_DEBUG_PMAP
903        debug_printf("%s: slow path 1st leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
904                __FUNCTION__, vaddr, c);
905#endif
906        err = do_single_unmap(pmap_arm, vaddr, c);
907        if (err_is_fail(err)) {
908            return err_push(err, LIB_ERR_PMAP_UNMAP);
909        }
910
911        // unmap full leaves
912        vaddr += c * BASE_PAGE_SIZE;
913        while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
914            c = ARM_L2_MAX_ENTRIES;
915#ifdef LIBBARRELFISH_DEBUG_PMAP
916            debug_printf("%s: slow path full leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
917                    __FUNCTION__, vaddr, c);
918#endif
919            err = do_single_unmap(pmap_arm, vaddr, c);
920            if (err_is_fail(err)) {
921                return err_push(err, LIB_ERR_PMAP_UNMAP);
922            }
923            vaddr += c * BASE_PAGE_SIZE;
924        }
925
926        // unmap remaining part
927        c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
928        if (c) {
929#ifdef LIBBARRELFISH_DEBUG_PMAP
930            debug_printf("%s: slow path last leaf vaddr=0x%"PRIxGENVADDR", pte_count=%zu\n",
931                    __FUNCTION__, vaddr, c);
932#endif
933            err = do_single_unmap(pmap_arm, vaddr, c);
934            if (err_is_fail(err)) {
935                return err_push(err, LIB_ERR_PMAP_UNMAP);
936            }
937        }
938    }
939
940    if (retsize) {
941        *retsize = size;
942    }
943
944    return ret;
945}
946
947/**
948 * \brief Determine a suitable address for a given memory object
949 *
950 * \param pmap    The pmap object
951 * \param memobj  The memory object to determine the address for
952 * \param alignment Minimum alignment
953 * \param vaddr   Pointer to return the determined address
954 *
955 * Relies on vspace.c code maintaining an ordered list of vregions
956 */
957static errval_t
958determine_addr(struct pmap   *pmap,
959               struct memobj *memobj,
960               size_t        alignment,
961               genvaddr_t    *vaddr)
962{
963    assert(pmap->vspace->head);
964
965    if (alignment == 0) {
966        alignment = BASE_PAGE_SIZE;
967    } else {
968        alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
969    }
970    size_t size = ROUND_UP(memobj->size, alignment);
971
972    struct vregion *walk = pmap->vspace->head;
973    while (walk->next) { // Try to insert between existing mappings
974        genvaddr_t walk_base = vregion_get_base_addr(walk);
975        genvaddr_t walk_size = ROUND_UP(vregion_get_size(walk), BASE_PAGE_SIZE);
976        genvaddr_t walk_end  = ROUND_UP(walk_base + walk_size, alignment);
977        genvaddr_t next_base = vregion_get_base_addr(walk->next);
978
979        if (next_base > walk_end + size &&
980            walk_base + walk_size > VSPACE_BEGIN) { // Ensure mappings are larger than VSPACE_BEGIN
981            *vaddr = walk_end;
982            return SYS_ERR_OK;
983        }
984        walk = walk->next;
985    }
986
987    *vaddr = ROUND_UP((vregion_get_base_addr(walk)
988                       + ROUND_UP(vregion_get_size(walk), alignment)),
989                       alignment);
990    return SYS_ERR_OK;
991}
992
993/** \brief Retrieves an address that can currently be used for large mappings
994  *
995  */
996static errval_t determine_addr_raw(struct pmap *pmap, size_t size,
997                                   size_t alignment, genvaddr_t *retvaddr)
998{
999    struct pmap_arm *pmap_arm = (struct pmap_arm *)pmap;
1000
1001    struct vnode *walk_pdir = pmap_arm->root.u.vnode.children;
1002    assert(walk_pdir != NULL); // assume there's always at least one existing entry
1003
1004    if (alignment == 0) {
1005        alignment = BASE_PAGE_SIZE;
1006    } else {
1007        alignment = ROUND_UP(alignment, BASE_PAGE_SIZE);
1008    }
1009    size = ROUND_UP(size, alignment);
1010
1011    size_t free_count = DIVIDE_ROUND_UP(size, LARGE_PAGE_SIZE);
1012    //debug_printf("need %zu contiguous free pdirs\n", free_count);
1013
1014    // compile pdir free list
1015    // barrelfish treats L1 as 1024 entries
1016    bool f[ARM_L1_MAX_ENTRIES];
1017    for (int i = 0; i < ARM_L1_MAX_ENTRIES; i++) {
1018        f[i] = true;
1019    }
1020    f[walk_pdir->entry] = false;
1021    while (walk_pdir) {
1022        assert(walk_pdir->is_vnode);
1023        f[walk_pdir->entry] = false;
1024        walk_pdir = walk_pdir->next;
1025    }
1026    genvaddr_t first_free = 384;
1027    for (; first_free < 512; first_free++) {
1028        if (f[first_free]) {
1029            for (int i = 1; i < free_count; i++) {
1030                if (!f[first_free + i]) {
1031                    // advance pointer
1032                    first_free = first_free+i;
1033                    goto next;
1034                }
1035            }
1036            break;
1037        }
1038next:
1039        assert(1 == 1);// make compiler shut up about label
1040    }
1041    //printf("first free: %li\n", (uint32_t)first_free);
1042    if (first_free + free_count <= 512) {
1043        *retvaddr = first_free << 22;
1044        return SYS_ERR_OK;
1045    } else {
1046        return LIB_ERR_OUT_OF_VIRTUAL_ADDR;
1047    }
1048}
1049
1050
1051
1052static errval_t do_single_modify_flags(struct pmap_arm *pmap, genvaddr_t vaddr,
1053                                       size_t pages, vregion_flags_t flags)
1054{
1055    errval_t err = SYS_ERR_OK;
1056    struct vnode *ptable = find_ptable(pmap, vaddr);
1057    uint16_t ptentry = ARM_L2_OFFSET(vaddr);
1058    if (ptable) {
1059        struct vnode *page = find_vnode(ptable, ptentry);
1060        if (page) {
1061            if (inside_region(ptable, ptentry, pages)) {
1062                // we're modifying part of a valid mapped region
1063                // arguments to invocation: invoke frame cap, first affected
1064                // page (as offset from first page in mapping), #affected
1065                // pages, new flags. Invocation should check compatibility of
1066                // new set of flags with cap permissions.
1067                size_t off = ptentry - page->entry;
1068                uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags);
1069                // VA hinting NYI on ARM, so we always pass 0 for va_hint
1070                err = invoke_mapping_modify_flags(page->mapping,
1071                        off, pages, pmap_flags, 0);
1072                printf("invoke_frame_modify_flags returned error: %s (%"PRIuERRV")\n",
1073                        err_getstring(err), err);
1074                return err;
1075            } else {
1076                // overlaps some region border
1077                return LIB_ERR_PMAP_EXISTING_MAPPING;
1078            }
1079        }
1080    }
1081    return SYS_ERR_OK;
1082}
1083
1084/**
1085 * \brief Modify page mapping
1086 *
1087 * \param pmap     The pmap object
1088 * \param vaddr    The virtual address to unmap
1089 * \param flags    New flags for the mapping
1090 * \param retsize  If non-NULL, filled in with the actual size modified
1091 */
1092static errval_t
1093modify_flags(struct pmap     *pmap,
1094             genvaddr_t       vaddr,
1095             size_t           size,
1096             vregion_flags_t  flags,
1097             size_t          *retsize)
1098{
1099    errval_t err, ret = SYS_ERR_OK;
1100    struct pmap_arm *pmap_arm = (struct pmap_arm*)pmap;
1101    size = ROUND_UP(size, BASE_PAGE_SIZE);
1102    size_t pte_count = size / BASE_PAGE_SIZE;
1103    genvaddr_t vend = vaddr + size;
1104
1105    if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) {
1106        // fast path
1107        err = do_single_modify_flags(pmap_arm, vaddr, pte_count, flags);
1108        if (err_is_fail(err)) {
1109            return err_push(err, LIB_ERR_PMAP_UNMAP);
1110        }
1111    }
1112    else { // slow path
1113        // modify flags in first leaf
1114        uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr);
1115        err = do_single_modify_flags(pmap_arm, vaddr, c, flags);
1116        if (err_is_fail(err)) {
1117            return err_push(err, LIB_ERR_PMAP_UNMAP);
1118        }
1119
1120        // modify flags in full leaves
1121        vaddr += c * BASE_PAGE_SIZE;
1122        while (ARM_L1_OFFSET(vaddr) < ARM_L1_OFFSET(vend)) {
1123            c = ARM_L2_MAX_ENTRIES;
1124            err = do_single_modify_flags(pmap_arm, vaddr, c, flags);
1125            if (err_is_fail(err)) {
1126                return err_push(err, LIB_ERR_PMAP_UNMAP);
1127            }
1128            vaddr += c * BASE_PAGE_SIZE;
1129        }
1130
1131        // modify flags in remaining part
1132        c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(vaddr);
1133        if (c) {
1134            err = do_single_modify_flags(pmap_arm, vaddr, c, flags);
1135            if (err_is_fail(err)) {
1136                return err_push(err, LIB_ERR_PMAP_UNMAP);
1137            }
1138        }
1139    }
1140
1141    if (retsize) {
1142        *retsize = size;
1143    }
1144
1145    return ret;
1146}
1147
1148/**
1149 * \brief Query existing page mapping
1150 *
1151 * \param pmap     The pmap object
1152 * \param vaddr    The virtual address to query
1153 * \param retvaddr Returns the base virtual address of the mapping
1154 * \param retsize  Returns the actual size of the mapping
1155 * \param retcap   Returns the cap mapped at this address
1156 * \param retoffset Returns the offset within the cap that is mapped
1157 * \param retflags Returns the flags for this mapping
1158 *
1159 * All of the ret parameters are optional.
1160 */
1161static errval_t lookup(struct pmap *pmap, genvaddr_t vaddr,
1162                       struct pmap_mapping_info *info)
1163{
1164    USER_PANIC("NYI");
1165    return 0;
1166}
1167
1168
1169static errval_t
1170serialise(struct pmap *pmap, void *buf, size_t buflen)
1171{
1172    // Unimplemented: ignored
1173    return SYS_ERR_OK;
1174}
1175
1176static errval_t
1177deserialise(struct pmap *pmap, void *buf, size_t buflen)
1178{
1179    // Unimplemented: we start with an empty pmap, and avoid the bottom of the A/S
1180    return SYS_ERR_OK;
1181}
1182
1183static struct pmap_funcs pmap_funcs = {
1184    .determine_addr = determine_addr,
1185    .determine_addr_raw = determine_addr_raw,
1186    .map = map,
1187    .unmap = unmap,
1188    .modify_flags = modify_flags,
1189    .lookup = lookup,
1190    .serialise = serialise,
1191    .deserialise = deserialise,
1192};
1193
1194/**
1195 * \brief Initialize the pmap object
1196 */
1197errval_t
1198pmap_init(struct pmap   *pmap,
1199          struct vspace *vspace,
1200          struct capref  vnode,
1201          struct slot_allocator *opt_slot_alloc)
1202{
1203    struct pmap_arm* pmap_arm = (struct pmap_arm*)pmap;
1204
1205    /* Generic portion */
1206    pmap->f = pmap_funcs;
1207    pmap->vspace = vspace;
1208
1209    // Slab allocator for vnodes
1210    slab_init(&pmap_arm->slab, sizeof(struct vnode), NULL);
1211    slab_grow(&pmap_arm->slab,
1212              pmap_arm->slab_buffer,
1213              sizeof(pmap_arm->slab_buffer));
1214
1215    pmap_arm->root.is_vnode         = true;
1216    pmap_arm->root.u.vnode.cap      = vnode;
1217    if (get_croot_addr(vnode) != CPTR_ROOTCN) {
1218        /* non invokable root cnode; copy */
1219        errval_t err = slot_alloc(&pmap_arm->root.u.vnode.invokable);
1220        assert(err_is_ok(err));
1221        err = cap_copy(pmap_arm->root.u.vnode.invokable, vnode);
1222        assert(err_is_ok(err));
1223    } else {
1224        pmap_arm->root.u.vnode.invokable= vnode;
1225    }
1226    pmap_arm->root.next             = NULL;
1227    pmap_arm->root.u.vnode.children = NULL;
1228
1229    return SYS_ERR_OK;
1230}
1231
1232errval_t pmap_current_init(bool init_domain)
1233{
1234    struct pmap_arm *pmap_arm = (struct pmap_arm*)get_current_pmap();
1235
1236    // To reserve a block of virtual address space,
1237    // a vregion representing the address space is required.
1238    // We construct a superficial one here and add it to the vregion list.
1239    struct vregion *vregion = &pmap_arm->vregion;
1240    assert((void*)vregion > (void*)pmap_arm);
1241    assert((void*)vregion < (void*)(pmap_arm + 1));
1242    vregion->vspace = NULL;
1243    vregion->memobj = NULL;
1244    vregion->base   = VSPACE_BEGIN;
1245    vregion->offset = 0;
1246    vregion->size   = META_DATA_RESERVED_SPACE;
1247    vregion->flags  = 0;
1248    vregion->next = NULL;
1249
1250    struct vspace *vspace = pmap_arm->p.vspace;
1251    assert(!vspace->head);
1252    vspace->head = vregion;
1253
1254    pmap_arm->vregion_offset = pmap_arm->vregion.base;
1255
1256    return SYS_ERR_OK;
1257}
1258