1/*
2 * Copyright 2017, Data61
3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO)
4 * ABN 41 687 119 230.
5 *
6 * This software may be distributed and modified according to the terms of
7 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
8 * See "LICENSE_BSD2.txt" for details.
9 *
10 * @TAG(DATA61_BSD)
11 */
12
13/* CAmkES DMA functionality. Note that parts of this interoperate with
14 * generated code to provide complete functionality.
15 */
16
17#include <assert.h>
18#include <limits.h>
19#include <platsupport/io.h>
20#include <stdalign.h>
21#include <stdbool.h>
22#include <stdint.h>
23#include <stdlib.h>
24#include <string.h>
25#include <camkes/dma.h>
26#include <camkes/error.h>
27#include <utils/util.h>
28#include <sel4/sel4.h>
29#include <vspace/page.h>
30
31/* Check consistency of bookkeeping structures */
32// #define DEBUG_DMA
33
34/* Force the _dma_frames  section to be created even if no modules are defined. */
35static USED SECTION("_dma_frames") struct {} dummy_dma_frame;
36/* Definitions so that we can find the exposed DMA frames */
37extern dma_frame_t *__start__dma_frames[];
38extern dma_frame_t *__stop__dma_frames[];
39
40/* NOT THREAD SAFE. The code could be made thread safe relatively easily by
41 * operating atomically on the free list.
42 */
43
44/* We store the free list as a linked-list. If 'head' is NULL that implies we
45 * have exhausted our allocation pool.
46 */
47static void *head;
48
49/* This is a helper function to query the name of the current instance */
50extern const char *get_instance_name(void);
51
52/* A node in the free list. Note that the free list is stored as a linked-list
53 * of such nodes *within* the DMA pages themselves. This struct is deliberately
54 * arranged to be tightly packed (the non-word sized member at the end) so that
55 * it consumes as little size as possible. The size of this struct determines
56 * the minimum region we can track, and we'd like to be as permissive as
57 * possible. Ordinarily this would be achievable in a straightforward way with
58 * `__attribute__((packed, aligned(1)))`, but unaligned accesses to uncached
59 * memory (which these will live in) are UNPREDICTABLE on some of our platforms
60 * like ARMv7.
61 */
62typedef struct {
63
64    /* This struct also conceptually has the following member. However, it is
65     * not directly necessary because the nodes are stored in-place. The
66     * virtual address of a region is available as the pointer to the node
67     * itself.
68     *
69     *  void *vaddr;
70     */
71
72    /* The size in bytes of this region. */
73    size_t size;
74
75    /* The next node in the list. */
76    void *next;
77
78    /* The upper bits of the physical address of this region. We don't need to
79     * store the lower bits (the offset into the physical frame) because we can
80     * reconstruct these from the offset into the page, obtainable as described
81     * above. See `extract_paddr` below.
82     */
83uintptr_t paddr_upper:
84    sizeof(uintptr_t) * 8 - PAGE_BITS_4K;
85
86} region_t;
87
88static void save_paddr(region_t *r, uintptr_t paddr)
89{
90    assert(r != NULL);
91    r->paddr_upper = paddr >> PAGE_BITS_4K;
92}
93static uintptr_t PURE try_extract_paddr(region_t *r)
94{
95    assert(r != NULL);
96    uintptr_t paddr = r->paddr_upper;
97    if (paddr != 0) {
98        uintptr_t offset = (uintptr_t)r & MASK(PAGE_BITS_4K);
99        paddr = (paddr << PAGE_BITS_4K) | offset;
100    }
101    return paddr;
102}
103static uintptr_t extract_paddr(region_t *r)
104{
105    uintptr_t paddr = try_extract_paddr(r);
106    if (paddr == 0) {
107        /* We've never looked up the physical address of this region. Look it
108         * up and cache it now.
109         */
110        paddr = camkes_dma_get_paddr(r);
111        assert(paddr != 0);
112        save_paddr(r, paddr);
113        paddr = try_extract_paddr(r);
114    }
115    assert(paddr != 0);
116    return paddr;
117}
118
119/* Various helpers for dealing with the above data structure layout. */
120static void prepend_node(region_t *node)
121{
122    assert(node != NULL);
123    node->next = head;
124    head = node;
125}
126static void remove_node(region_t *previous, region_t *node)
127{
128    assert(node != NULL);
129    if (previous == NULL) {
130        head = node->next;
131    } else {
132        previous->next = node->next;
133    }
134}
135static void replace_node(region_t *previous, region_t *old, region_t *new)
136{
137    assert(old != NULL);
138    assert(new != NULL);
139    new->next = old->next;
140    if (previous == NULL) {
141        head = new;
142    } else {
143        previous->next = new;
144    }
145}
146static void shrink_node(region_t *node, size_t by)
147{
148    assert(node != NULL);
149    assert(by > 0 && node->size > by);
150    node->size -= by;
151}
152static void grow_node(region_t *node, size_t by)
153{
154    assert(node != NULL);
155    assert(by > 0);
156    node->size += by;
157}
158
159#ifdef DEBUG_DMA
160
161/* Check certain assumptions hold on the free list. This function is intended
162 * to be a no-op when NDEBUG is defined.
163 */
164static void check_consistency(void)
165{
166    if (head == NULL) {
167        /* Empty free list. */
168        return;
169    }
170
171    /* Validate that there are no cycles in the free list using Brent's
172     * algorithm.
173     */
174    region_t *tortoise = head, *hare = tortoise->next;
175    for (int power = 1, lambda = 1; hare != NULL; lambda++) {
176        assert(tortoise != hare && "cycle in free list");
177        if (power == lambda) {
178            tortoise = hare;
179            power *= 2;
180            lambda = 0;
181        }
182        hare = hare->next;
183    }
184
185    /* Validate invariants on individual regions. */
186    for (region_t *r = head; r != NULL; r = r->next) {
187
188        assert(r != NULL && "a region includes NULL");
189
190        assert(extract_paddr(r) != 0 && "a region includes physical frame 0");
191
192        assert(r->size > 0 && "a region has size 0");
193
194        assert(r->size >= sizeof(region_t) && "a region has an invalid size");
195
196        assert(UINTPTR_MAX - (uintptr_t)r >= r->size &&
197               "a region overflows in virtual address space");
198
199        assert(UINTPTR_MAX - extract_paddr(r) >= r->size &&
200               "a region overflows in physical address space");
201    }
202
203    /* Ensure no regions overlap. */
204    for (region_t *r = head; r != NULL; r = r->next) {
205        for (region_t *p = head; p != r; p = p->next) {
206
207            uintptr_t r_vaddr UNUSED = (uintptr_t)r,
208                              p_vaddr UNUSED = (uintptr_t)p,
209                                      r_paddr UNUSED = extract_paddr(r),
210                                              p_paddr UNUSED = extract_paddr(p);
211
212            assert(!((r_vaddr >= p_vaddr && r_vaddr < p_vaddr + p->size) ||
213                     (p_vaddr >= r_vaddr && p_vaddr < r_vaddr + r->size)) &&
214                   "two regions overlap in virtual address space");
215
216            assert(!((r_paddr >= p_paddr && r_paddr < p_paddr + p->size) ||
217                     (p_paddr >= r_paddr && p_paddr < r_paddr + r->size)) &&
218                   "two regions overlap in physical address space");
219        }
220    }
221}
222#else
223#define check_consistency()
224#endif
225
226#ifdef NDEBUG
227#define STATS(arg) do { } while (0)
228#else
229/* Statistics functionality. */
230
231#define STATS(arg) do { arg; } while (0)
232
233static camkes_dma_stats_t stats;
234
235static size_t total_allocation_bytes;
236
237const camkes_dma_stats_t *camkes_dma_stats(void)
238{
239    if (stats.total_allocations > 0) {
240        stats.average_allocation = total_allocation_bytes / stats.total_allocations;
241    } else {
242        stats.average_allocation = 0;
243    }
244    return (const camkes_dma_stats_t *)&stats;
245}
246#endif
247
248/* Defragment the free list. Can safely be called at any time. The complexity
249 * of this function is at least O(n��).
250 *
251 * Over time the free list can evolve to contain separate chunks that are
252 * actually contiguous, both physically and virtually. This fragmentation can
253 * result in unnecessary allocation failures, so this function is provided to
254 * coalesce such chunks. For example, the free list may end up like:
255 *
256 *  ���������������������������������������������   ���������������������������������������������   ���������������������������������������������
257 *  ���vaddr: 0x4000���   ���vaddr: 0x7000���   ���vaddr: 0x2000���
258 *  ���size : 0x1000���   ���size : 0x2000���   ���size : 0x2000���
259 *  ���next :       ���������������next :       ���������������next :   NULL���
260 *  ���paddr: 0x6000���   ���paddr: 0x8000���   ���paddr: 0x4000���
261 *  ���������������������������������������������   ���������������������������������������������   ���������������������������������������������
262 *
263 * after defragmentation, the free list will look like:
264 *
265 *  ���������������������������������������������   ���������������������������������������������
266 *  ���vaddr: 0x2000���   ���vaddr: 0x7000���
267 *  ���size : 0x3000���   ���size : 0x2000���
268 *  ���next :       ���������������next :   NULL���
269 *  ���paddr: 0x4000���   ���paddr: 0x8000���
270 *  ���������������������������������������������   ���������������������������������������������
271 */
272static void defrag(void)
273{
274    assert(head != NULL &&
275           "attempted defragmentation of DMA free list before initialisation");
276
277    check_consistency();
278
279    STATS(stats.defragmentations++);
280
281    /* For each region in the free list... */
282    for (region_t *pprev = NULL, *p = head; p != NULL; pprev = p, p = p->next) {
283
284        uintptr_t p_vstart = (uintptr_t)p,           /* start virtual address */
285                  p_vend   = (uintptr_t)p + p->size, /* end virtual address */
286                  p_pstart = extract_paddr(p),       /* start physical address */
287                  p_pend   = p_pstart + p->size;     /* end physical address */
288
289        /* For each region *before* this one... */
290        for (region_t *qprev = NULL, *q = head; q != p; qprev = q, q = q->next) {
291
292            uintptr_t q_vstart = (uintptr_t)q,
293                      q_vend   = (uintptr_t)q + q->size,
294                      q_pstart = extract_paddr(q),
295                      q_pend   = q_pstart + q->size;
296
297            /* We could not have entered this loop if 'p' was the head of the
298             * free list.
299             */
300            assert(pprev != NULL);
301
302            if (p_vstart == q_vend && p_pstart == q_pend) {
303                /* 'p' immediately follows the region 'q'. Coalesce 'p' into
304                 * 'q'.
305                 */
306                grow_node(q, p->size);
307                remove_node(pprev, p);
308                STATS(stats.coalesces++);
309                /* Bump the outer scan back to the node we just modified
310                 * (accounting for the fact that the next thing we will do is
311                 * increment 'p' as we go round the loop). The reason for this
312                 * is that our changes may have made further coalescing
313                 * possible between the node we modified and where 'p' is
314                 * currently pointing.
315                 */
316                if (qprev == NULL) {
317                    /* We just coalesced 'p' into the free list head; reset the
318                     * scan. Note that we'll end up skipping the head as we go
319                     * round the loop, but that's fine because the body of the
320                     * outer loop does nothing for the first iteration.
321                     */
322                    p = head;
323                } else {
324                    p = qprev;
325                }
326                break;
327            }
328
329            if (p_vend == q_vstart && p_pend == q_pstart) {
330                /* 'p' immediately precedes the region 'q'. Coalesce 'q' into
331                 * 'p'.
332                 */
333                grow_node(p, q->size);
334                remove_node(qprev, q);
335                STATS(stats.coalesces++);
336
337                /* Similar to above, we bump the outer scan back so we
338                 * reconsider 'p' again the next time around the loop. Now that
339                 * we've expanded 'p' there may be further coalescing we can
340                 * do.
341                 */
342                p = pprev;
343                break;
344            }
345        }
346    }
347
348    check_consistency();
349}
350
351int camkes_dma_init(void *dma_pool, size_t dma_pool_sz, size_t page_size)
352{
353
354    /* The caller should have passed us a valid DMA pool. */
355    if (page_size != 0 && (page_size <= sizeof(region_t) ||
356                           (uintptr_t)dma_pool % page_size != 0))  {
357        return -1;
358    }
359
360    /* Bail out if the caller gave us an insufficiently aligned pool. */
361    if (dma_pool == NULL || (uintptr_t)dma_pool % alignof(region_t) != 0) {
362        return -1;
363    }
364
365    /* We're going to store bookkeeping in the DMA pages, that we expect to be
366     * power-of-2-sized, so the bookkeeping struct better be
367     * power-of-2-aligned. Your compiler should always guarantee this.
368     */
369    static_assert(IS_POWER_OF_2(alignof(region_t)),
370                  "region_t is not power-of-2-aligned");
371
372    /* The page size the caller has given us should be a power of 2 and at least
373     * the alignment of `region_t`.
374     */
375    if (page_size != 0 && (!IS_POWER_OF_2(page_size) ||
376                           page_size < alignof(region_t))) {
377        return -1;
378    }
379
380    STATS(stats.heap_size = dma_pool_sz);
381    STATS(stats.minimum_heap_size = dma_pool_sz);
382    STATS(stats.minimum_allocation = SIZE_MAX);
383    STATS(stats.minimum_alignment = INT_MAX);
384
385    if (page_size != 0) {
386        /* The caller specified a page size. Excellent; we don't have to work
387         * it out for ourselves.
388         */
389        for (void *base = dma_pool; base < dma_pool + dma_pool_sz;
390             base += page_size) {
391            assert((uintptr_t)base % alignof(region_t) == 0 &&
392                   "we misaligned the DMA pool base address during "
393                   "initialisation");
394            camkes_dma_free(base, page_size);
395        }
396    } else {
397        /* The lazy caller didn't bother giving us a page size. Manually scan
398         * for breaks in physical contiguity.
399         */
400        for (void *base = dma_pool; base < dma_pool + dma_pool_sz;) {
401            uintptr_t base_paddr = camkes_dma_get_paddr(base);
402            if (base_paddr == 0) {
403                /* The caller gave us a region backed by non-reversible frames. */
404                return -1;
405            }
406            void *limit = base + 1;
407            uintptr_t next_expected_paddr = base_paddr + 1;
408            while (limit < dma_pool + dma_pool_sz) {
409                if (limit == NULL) {
410                    /* The user gave us a region that wraps virtual memory. */
411                    return -1;
412                }
413                uintptr_t limit_paddr = camkes_dma_get_paddr(limit);
414                if (limit_paddr == 0) {
415                    /* The user gave us a region that wraps physical memory. */
416                    return -1;
417                }
418                if (limit_paddr != next_expected_paddr) {
419                    /* We've hit a physical contiguity break (== frame
420                     * boundary).
421                     */
422                    break;
423                }
424                limit++;
425                next_expected_paddr++;
426            }
427            /* Only add the region if it's large enough to actually contain the
428             * necessary metadata.
429             */
430            if (base + sizeof(region_t) >= limit) {
431                assert((uintptr_t)base % alignof(region_t) == 0 &&
432                       "we misaligned the DMA pool base address during "
433                       "initialisation");
434                camkes_dma_free(base, limit - base);
435            }
436
437            /* Move to the next region. We always need to be considering a
438             * region aligned for bookkeeping, so bump the address up if
439             * necessary.
440             */
441            base = (void *)ALIGN_UP((uintptr_t)limit, alignof(region_t));
442        }
443    }
444
445    check_consistency();
446
447    return 0;
448}
449
450static dma_frame_t *get_frame_desc(void *ptr)
451{
452    for (dma_frame_t **frame = __start__dma_frames;
453         frame < __stop__dma_frames; frame++) {
454        uintptr_t base = (uintptr_t)ptr & ~MASK(ffs((*frame)->size) - 1);
455        if (base == (*frame)->vaddr) {
456            return *frame;
457        }
458    }
459    return NULL;
460
461}
462
463uintptr_t camkes_dma_get_paddr(void *ptr)
464{
465    dma_frame_t *frame = get_frame_desc(ptr);
466    uintptr_t offset = (uintptr_t)ptr & MASK(ffs(frame->size) - 1);
467    if (frame) {
468        seL4_ARCH_Page_GetAddress_t res = seL4_ARCH_Page_GetAddress(frame->cap);
469        ERR_IF(res.error != 0, camkes_error, ((camkes_error_t) {
470            .type = CE_SYSCALL_FAILED,
471            .instance = get_instance_name(),
472            .description = "failed to reverse virtual mapping to a DMA frame",
473            .syscall = ARCHPageGetAddress,
474            .error = res.error,
475        }), ({
476            return (uintptr_t)NULL;
477        }));
478        return res.paddr + offset;
479
480    } else {
481        return (uintptr_t)NULL;
482    }
483}
484
485seL4_CPtr camkes_dma_get_cptr(void *ptr)
486{
487
488    for (dma_frame_t **frame = __start__dma_frames;
489         frame < __stop__dma_frames; frame++) {
490        uintptr_t base = (uintptr_t)ptr & ~MASK(ffs((*frame)->size) - 1);
491        if (base == (*frame)->vaddr) {
492            return (*frame)->cap;
493        }
494    }
495    return seL4_CapNull;
496}
497
498/* Allocate a DMA region. This is refactored out of camkes_dma_alloc simply so
499 * we can more eloquently express reattempting allocations.
500 */
501static void *alloc(size_t size, int align, bool cached)
502{
503
504    /* Our caller should have rounded 'size' up. */
505    assert(size >= sizeof(region_t));
506
507    /* The caller should have ensured 'size' is aligned to the bookkeeping
508     * struct, so that the bookkeeping we may have to write for the remainder
509     * chunk of a region is aligned.
510     */
511    assert(size % alignof(region_t) == 0);
512
513    /* The caller should have ensured that the alignment requirements are
514     * sufficient that any chunk we ourselves allocate, can later host
515     * bookkeeping in its initial bytes when it is freed.
516     */
517    assert(align >= (int)alignof(region_t));
518
519    /* For each region in the free list... */
520    for (region_t *prev = NULL, *p = head; p != NULL; prev = p, p = p->next) {
521
522        if (p->size >= size) {
523            /* This region or a subinterval of it may satisfy this request. */
524
525            /* Scan subintervals of 'size' bytes within this region from the
526             * end. We scan the region from the end as an optimisation because
527             * we can avoid relocating the region's metadata if we find a
528             * satisfying allocation that doesn't involve the initial
529             * sizeof(region_t) bytes.
530             */
531            for (void *q = (void *)ROUND_DOWN((uintptr_t)p + p->size - size, align);
532                 q == (void *)p || q >= (void *)p + sizeof(region_t);
533                 q -= align) {
534
535                if (q + size == (void *)p + p->size ||
536                    q + size + sizeof(region_t) <= (void *)p + p->size) {
537                    /* Found something that satisfies the caller's
538                     * requirements and leaves us enough room to turn the cut
539                     * off suffix into a new chunk.
540                     */
541
542                    uintptr_t base_paddr = try_extract_paddr(p);
543
544                    /* There are four possible cases here... */
545
546                    if (p == q) {
547                        if (p->size == size) {
548                            /* 1. We're giving them the whole chunk; we can
549                             * just remove this node.
550                             */
551                            remove_node(prev, p);
552                        } else {
553                            /* 2. We're giving them the start of the chunk. We
554                             * need to extract the end as a new node.
555                             */
556                            region_t *r = (void *)p + size;
557                            if (base_paddr != 0) {
558                                /* PERF: The original chunk had a physical
559                                 * address. Save the overhead of a future
560                                 * syscall by reusing this information now.
561                                 */
562                                save_paddr(r, base_paddr + size);
563                            } else {
564                                r->paddr_upper = 0;
565                            }
566                            r->size = p->size - size;
567                            replace_node(prev, p, r);
568                        }
569                    } else if (q + size == (void *)p + p->size) {
570                        /* 3. We're giving them the end of the chunk. We need
571                         * to shrink the existing node.
572                         */
573                        shrink_node(p, size);
574                    } else {
575                        /* 4. We're giving them the middle of a chunk. We need
576                         * to shrink the existing node and extract the end as a
577                         * new node.
578                         */
579                        size_t start_size = (uintptr_t)q - (uintptr_t)p;
580                        region_t *end = q + size;
581                        if (base_paddr != 0) {
582                            /* PERF: An optimisation as above. */
583                            save_paddr(end, base_paddr + start_size + size);
584                        } else {
585                            end->paddr_upper = 0;
586                        }
587                        end->size = p->size - size - start_size;
588                        prepend_node(end);
589                        p->size = start_size;
590                    }
591
592                    return q;
593                }
594            }
595        }
596    }
597
598    /* No satisfying region found. */
599    return NULL;
600}
601
602void *camkes_dma_alloc(size_t size, int align, bool cached)
603{
604
605    STATS(({
606        stats.total_allocations++;
607        if (size < stats.minimum_allocation)
608        {
609            stats.minimum_allocation = size;
610        }
611        if (size > stats.maximum_allocation)
612        {
613            stats.maximum_allocation = size;
614        }
615        if (align < stats.minimum_alignment)
616        {
617            stats.minimum_alignment = align;
618        }
619        if (align > stats.maximum_alignment)
620        {
621            stats.maximum_alignment = align;
622        }
623        total_allocation_bytes += size;
624    }));
625
626    if (head == NULL) {
627        /* Nothing in the free list. */
628        STATS(stats.failed_allocations_out_of_memory++);
629        return NULL;
630    }
631
632    if (align == 0) {
633        /* No alignment requirements. */
634        align = 1;
635    }
636
637    if (align < (int)alignof(region_t)) {
638        /* Allocating something with a weaker alignment constraint than our
639         * bookkeeping data may lead to us giving out a chunk of memory that is
640         * not sufficiently aligned to host bookkeeping data when it is
641         * returned to us. Bump it up in this case.
642         */
643        align = alignof(region_t);
644    }
645
646    if (size < sizeof(region_t)) {
647        /* We need to bump up smaller allocations because they may be freed at
648         * a point when they cannot be conjoined with another chunk in the heap
649         * and therefore need to become host to region_t metadata.
650         */
651        size = sizeof(region_t);
652    }
653
654    if (size % alignof(region_t) != 0) {
655        /* We need to ensure that 'size' is aligned to the bookkeeping
656         * struct, so that the remainder chunk of a region is aligned.
657         */
658        size = ROUND_UP(size, alignof(region_t));
659    }
660
661    void *p = alloc(size, align, cached);
662
663    if (p == NULL && size > sizeof(region_t)) {
664        /* We failed to allocate a matching region, but we may be able to
665         * satisfy this allocation by defragmenting the free list and
666         * re-attempting.
667         */
668        defrag();
669        p = alloc(size, align, cached);
670
671        if (p != NULL) {
672            STATS(stats.succeeded_allocations_on_defrag++);
673        }
674    }
675
676    check_consistency();
677
678    if (p == NULL) {
679        STATS(stats.failed_allocations_other++);
680    } else {
681        STATS(({
682            stats.current_outstanding += size;
683            if (stats.heap_size - stats.current_outstanding < stats.minimum_heap_size)
684            {
685                stats.minimum_heap_size = stats.heap_size - stats.current_outstanding;
686            }
687        }));
688    }
689
690    return p;
691}
692
693void camkes_dma_free(void *ptr, size_t size)
694{
695
696    /* Allow the user to free NULL. */
697    if (ptr == NULL) {
698        return;
699    }
700
701    /* If the user allocated a region that was too small, we would have rounded
702     * up the size during allocation.
703     */
704    if (size < sizeof(region_t)) {
705        size = sizeof(region_t);
706    }
707
708    /* The 'size' of all allocated chunk should be aligned to the bookkeeping
709     * struct, so bump it to the actual size we have allocated.
710     */
711    if (size % __alignof__(region_t) != 0) {
712        size = ROUND_UP(size, __alignof__(region_t));
713    }
714
715    /* We should have never allocated memory that is insufficiently aligned to
716     * host bookkeeping data now that it has been returned to us.
717     */
718    assert((uintptr_t)ptr % alignof(region_t) == 0);
719
720    STATS(({
721        if (size >= stats.current_outstanding)
722        {
723            stats.current_outstanding = 0;
724        } else
725        {
726            stats.current_outstanding -= size;
727        }
728    }));
729
730    region_t *p = ptr;
731    p->paddr_upper = 0;
732    p->size = size;
733    prepend_node(p);
734
735    check_consistency();
736}
737
738/* The remaining functions are to comply with the ps_io_ops-related interface
739 * from libplatsupport. Note that many of the operations are no-ops, because
740 * our case is somewhat constrained.
741 */
742
743static void *dma_alloc(void *cookie UNUSED, size_t size, int align, int cached,
744                       ps_mem_flags_t flags UNUSED)
745{
746
747    /* Ignore the cached argument and allocate an uncached page. The assumption
748     * here is that any caller that wants a cached page only wants it so as an
749     * optimisation. Their usage pattern is expected to be (1) write repeatedly
750     * to the page, (2) flush the page, (3) pass it to a device. In the case of
751     * an uncached frame we simply lose some performance in (1) and make (2) a
752     * no-op.
753     */
754    (void)cached;
755
756    return camkes_dma_alloc(size, align, cached);
757}
758
759static void dma_free(void *cookie UNUSED, void *addr, size_t size)
760{
761    camkes_dma_free(addr, size);
762}
763
764/* All CAmkES DMA pages are pinned for the duration of execution, so this is
765 * effectively a no-op.
766 */
767static uintptr_t dma_pin(void *cookie UNUSED, void *addr, size_t size UNUSED)
768{
769    return camkes_dma_get_paddr(addr);
770}
771
772/* As above, all pages are pinned so this is also a no-op. */
773static void dma_unpin(void *cookie UNUSED, void *addr UNUSED, size_t size UNUSED)
774{
775}
776
777static void dma_cache_op(void *cookie UNUSED, void *addr UNUSED,
778                         size_t size UNUSED, dma_cache_op_t op UNUSED)
779{
780    /* x86 DMA is usually cache coherent and doesn't need maintenance ops */
781#ifdef CONFIG_ARCH_ARM
782    dma_frame_t *frame = get_frame_desc(addr);
783    if (frame == NULL) {
784        ZF_LOGE("Could not perform cache op");
785        return;
786    }
787
788    /* If the frame is uncached then the cache op isn't required. This assumes
789       that if there is a setup where multiple software components have mappings
790       to the same DMA memory with different cache attributes then the component
791       with the cached mappings will be performing the cache maintenance ops and
792       not the uncached one.*/
793    if (frame->cached == 0) {
794        return;
795    }
796    seL4_CPtr frame_cap = frame->cap;
797    if (frame_cap == seL4_CapNull) {
798        ZF_LOGE("Could not perform cache op");
799        return;
800    }
801
802
803    size_t page_size_of_region = frame->size;
804    size_t frame_start_offset = (uintptr_t)addr % page_size_of_region;
805    if ((frame_start_offset + size) > frame->size) {
806        ZF_LOGE("Specified range is outside the bounds of the dataport");
807        return;
808    }
809
810    switch (op) {
811    case DMA_CACHE_OP_CLEAN:
812        seL4_ARM_Page_Clean_Data(frame_cap, frame_start_offset, frame_start_offset + size);
813        break;
814    case DMA_CACHE_OP_INVALIDATE:
815        seL4_ARM_Page_Invalidate_Data(frame_cap, frame_start_offset, frame_start_offset + size);
816        break;
817    case DMA_CACHE_OP_CLEAN_INVALIDATE:
818        seL4_ARM_Page_CleanInvalidate_Data(frame_cap, frame_start_offset, frame_start_offset + size);
819        break;
820    default:
821        ZF_LOGF("Invalid cache_op %d", op);
822        return;
823    }
824#endif
825}
826
827int camkes_dma_manager(ps_dma_man_t *man)
828{
829    if (man == NULL) {
830        ZF_LOGE("man is NULL");
831        return -1;
832    }
833    man->dma_alloc_fn = dma_alloc;
834    man->dma_free_fn = dma_free;
835    man->dma_pin_fn = dma_pin;
836    man->dma_unpin_fn = dma_unpin;
837    man->dma_cache_op_fn = dma_cache_op;
838    return 0;
839}
840
841/* Legacy functions */
842void *camkes_dma_alloc_page(void)
843{
844    return camkes_dma_alloc(PAGE_SIZE_4K, PAGE_SIZE_4K, false);
845}
846void camkes_dma_free_page(void *ptr)
847{
848    return camkes_dma_free(ptr, PAGE_SIZE_4K);
849}
850