1/* 2 * Copyright 2017, Data61 3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO) 4 * ABN 41 687 119 230. 5 * 6 * This software may be distributed and modified according to the terms of 7 * the BSD 2-Clause license. Note that NO WARRANTY is provided. 8 * See "LICENSE_BSD2.txt" for details. 9 * 10 * @TAG(DATA61_BSD) 11 */ 12 13/* CAmkES DMA functionality. Note that parts of this interoperate with 14 * generated code to provide complete functionality. 15 */ 16 17#include <assert.h> 18#include <limits.h> 19#include <platsupport/io.h> 20#include <stdalign.h> 21#include <stdbool.h> 22#include <stdint.h> 23#include <stdlib.h> 24#include <string.h> 25#include <camkes/dma.h> 26#include <camkes/error.h> 27#include <utils/util.h> 28#include <sel4/sel4.h> 29#include <vspace/page.h> 30 31/* Check consistency of bookkeeping structures */ 32// #define DEBUG_DMA 33 34/* Force the _dma_frames section to be created even if no modules are defined. */ 35static USED SECTION("_dma_frames") struct {} dummy_dma_frame; 36/* Definitions so that we can find the exposed DMA frames */ 37extern dma_frame_t *__start__dma_frames[]; 38extern dma_frame_t *__stop__dma_frames[]; 39 40/* NOT THREAD SAFE. The code could be made thread safe relatively easily by 41 * operating atomically on the free list. 42 */ 43 44/* We store the free list as a linked-list. If 'head' is NULL that implies we 45 * have exhausted our allocation pool. 46 */ 47static void *head; 48 49/* This is a helper function to query the name of the current instance */ 50extern const char *get_instance_name(void); 51 52/* A node in the free list. Note that the free list is stored as a linked-list 53 * of such nodes *within* the DMA pages themselves. This struct is deliberately 54 * arranged to be tightly packed (the non-word sized member at the end) so that 55 * it consumes as little size as possible. The size of this struct determines 56 * the minimum region we can track, and we'd like to be as permissive as 57 * possible. Ordinarily this would be achievable in a straightforward way with 58 * `__attribute__((packed, aligned(1)))`, but unaligned accesses to uncached 59 * memory (which these will live in) are UNPREDICTABLE on some of our platforms 60 * like ARMv7. 61 */ 62typedef struct { 63 64 /* This struct also conceptually has the following member. However, it is 65 * not directly necessary because the nodes are stored in-place. The 66 * virtual address of a region is available as the pointer to the node 67 * itself. 68 * 69 * void *vaddr; 70 */ 71 72 /* The size in bytes of this region. */ 73 size_t size; 74 75 /* The next node in the list. */ 76 void *next; 77 78 /* The upper bits of the physical address of this region. We don't need to 79 * store the lower bits (the offset into the physical frame) because we can 80 * reconstruct these from the offset into the page, obtainable as described 81 * above. See `extract_paddr` below. 82 */ 83uintptr_t paddr_upper: 84 sizeof(uintptr_t) * 8 - PAGE_BITS_4K; 85 86} region_t; 87 88static void save_paddr(region_t *r, uintptr_t paddr) 89{ 90 assert(r != NULL); 91 r->paddr_upper = paddr >> PAGE_BITS_4K; 92} 93static uintptr_t PURE try_extract_paddr(region_t *r) 94{ 95 assert(r != NULL); 96 uintptr_t paddr = r->paddr_upper; 97 if (paddr != 0) { 98 uintptr_t offset = (uintptr_t)r & MASK(PAGE_BITS_4K); 99 paddr = (paddr << PAGE_BITS_4K) | offset; 100 } 101 return paddr; 102} 103static uintptr_t extract_paddr(region_t *r) 104{ 105 uintptr_t paddr = try_extract_paddr(r); 106 if (paddr == 0) { 107 /* We've never looked up the physical address of this region. Look it 108 * up and cache it now. 109 */ 110 paddr = camkes_dma_get_paddr(r); 111 assert(paddr != 0); 112 save_paddr(r, paddr); 113 paddr = try_extract_paddr(r); 114 } 115 assert(paddr != 0); 116 return paddr; 117} 118 119/* Various helpers for dealing with the above data structure layout. */ 120static void prepend_node(region_t *node) 121{ 122 assert(node != NULL); 123 node->next = head; 124 head = node; 125} 126static void remove_node(region_t *previous, region_t *node) 127{ 128 assert(node != NULL); 129 if (previous == NULL) { 130 head = node->next; 131 } else { 132 previous->next = node->next; 133 } 134} 135static void replace_node(region_t *previous, region_t *old, region_t *new) 136{ 137 assert(old != NULL); 138 assert(new != NULL); 139 new->next = old->next; 140 if (previous == NULL) { 141 head = new; 142 } else { 143 previous->next = new; 144 } 145} 146static void shrink_node(region_t *node, size_t by) 147{ 148 assert(node != NULL); 149 assert(by > 0 && node->size > by); 150 node->size -= by; 151} 152static void grow_node(region_t *node, size_t by) 153{ 154 assert(node != NULL); 155 assert(by > 0); 156 node->size += by; 157} 158 159#ifdef DEBUG_DMA 160 161/* Check certain assumptions hold on the free list. This function is intended 162 * to be a no-op when NDEBUG is defined. 163 */ 164static void check_consistency(void) 165{ 166 if (head == NULL) { 167 /* Empty free list. */ 168 return; 169 } 170 171 /* Validate that there are no cycles in the free list using Brent's 172 * algorithm. 173 */ 174 region_t *tortoise = head, *hare = tortoise->next; 175 for (int power = 1, lambda = 1; hare != NULL; lambda++) { 176 assert(tortoise != hare && "cycle in free list"); 177 if (power == lambda) { 178 tortoise = hare; 179 power *= 2; 180 lambda = 0; 181 } 182 hare = hare->next; 183 } 184 185 /* Validate invariants on individual regions. */ 186 for (region_t *r = head; r != NULL; r = r->next) { 187 188 assert(r != NULL && "a region includes NULL"); 189 190 assert(extract_paddr(r) != 0 && "a region includes physical frame 0"); 191 192 assert(r->size > 0 && "a region has size 0"); 193 194 assert(r->size >= sizeof(region_t) && "a region has an invalid size"); 195 196 assert(UINTPTR_MAX - (uintptr_t)r >= r->size && 197 "a region overflows in virtual address space"); 198 199 assert(UINTPTR_MAX - extract_paddr(r) >= r->size && 200 "a region overflows in physical address space"); 201 } 202 203 /* Ensure no regions overlap. */ 204 for (region_t *r = head; r != NULL; r = r->next) { 205 for (region_t *p = head; p != r; p = p->next) { 206 207 uintptr_t r_vaddr UNUSED = (uintptr_t)r, 208 p_vaddr UNUSED = (uintptr_t)p, 209 r_paddr UNUSED = extract_paddr(r), 210 p_paddr UNUSED = extract_paddr(p); 211 212 assert(!((r_vaddr >= p_vaddr && r_vaddr < p_vaddr + p->size) || 213 (p_vaddr >= r_vaddr && p_vaddr < r_vaddr + r->size)) && 214 "two regions overlap in virtual address space"); 215 216 assert(!((r_paddr >= p_paddr && r_paddr < p_paddr + p->size) || 217 (p_paddr >= r_paddr && p_paddr < r_paddr + r->size)) && 218 "two regions overlap in physical address space"); 219 } 220 } 221} 222#else 223#define check_consistency() 224#endif 225 226#ifdef NDEBUG 227#define STATS(arg) do { } while (0) 228#else 229/* Statistics functionality. */ 230 231#define STATS(arg) do { arg; } while (0) 232 233static camkes_dma_stats_t stats; 234 235static size_t total_allocation_bytes; 236 237const camkes_dma_stats_t *camkes_dma_stats(void) 238{ 239 if (stats.total_allocations > 0) { 240 stats.average_allocation = total_allocation_bytes / stats.total_allocations; 241 } else { 242 stats.average_allocation = 0; 243 } 244 return (const camkes_dma_stats_t *)&stats; 245} 246#endif 247 248/* Defragment the free list. Can safely be called at any time. The complexity 249 * of this function is at least O(n��). 250 * 251 * Over time the free list can evolve to contain separate chunks that are 252 * actually contiguous, both physically and virtually. This fragmentation can 253 * result in unnecessary allocation failures, so this function is provided to 254 * coalesce such chunks. For example, the free list may end up like: 255 * 256 * ��������������������������������������������� ��������������������������������������������� ��������������������������������������������� 257 * ���vaddr: 0x4000��� ���vaddr: 0x7000��� ���vaddr: 0x2000��� 258 * ���size : 0x1000��� ���size : 0x2000��� ���size : 0x2000��� 259 * ���next : ���������������next : ���������������next : NULL��� 260 * ���paddr: 0x6000��� ���paddr: 0x8000��� ���paddr: 0x4000��� 261 * ��������������������������������������������� ��������������������������������������������� ��������������������������������������������� 262 * 263 * after defragmentation, the free list will look like: 264 * 265 * ��������������������������������������������� ��������������������������������������������� 266 * ���vaddr: 0x2000��� ���vaddr: 0x7000��� 267 * ���size : 0x3000��� ���size : 0x2000��� 268 * ���next : ���������������next : NULL��� 269 * ���paddr: 0x4000��� ���paddr: 0x8000��� 270 * ��������������������������������������������� ��������������������������������������������� 271 */ 272static void defrag(void) 273{ 274 assert(head != NULL && 275 "attempted defragmentation of DMA free list before initialisation"); 276 277 check_consistency(); 278 279 STATS(stats.defragmentations++); 280 281 /* For each region in the free list... */ 282 for (region_t *pprev = NULL, *p = head; p != NULL; pprev = p, p = p->next) { 283 284 uintptr_t p_vstart = (uintptr_t)p, /* start virtual address */ 285 p_vend = (uintptr_t)p + p->size, /* end virtual address */ 286 p_pstart = extract_paddr(p), /* start physical address */ 287 p_pend = p_pstart + p->size; /* end physical address */ 288 289 /* For each region *before* this one... */ 290 for (region_t *qprev = NULL, *q = head; q != p; qprev = q, q = q->next) { 291 292 uintptr_t q_vstart = (uintptr_t)q, 293 q_vend = (uintptr_t)q + q->size, 294 q_pstart = extract_paddr(q), 295 q_pend = q_pstart + q->size; 296 297 /* We could not have entered this loop if 'p' was the head of the 298 * free list. 299 */ 300 assert(pprev != NULL); 301 302 if (p_vstart == q_vend && p_pstart == q_pend) { 303 /* 'p' immediately follows the region 'q'. Coalesce 'p' into 304 * 'q'. 305 */ 306 grow_node(q, p->size); 307 remove_node(pprev, p); 308 STATS(stats.coalesces++); 309 /* Bump the outer scan back to the node we just modified 310 * (accounting for the fact that the next thing we will do is 311 * increment 'p' as we go round the loop). The reason for this 312 * is that our changes may have made further coalescing 313 * possible between the node we modified and where 'p' is 314 * currently pointing. 315 */ 316 if (qprev == NULL) { 317 /* We just coalesced 'p' into the free list head; reset the 318 * scan. Note that we'll end up skipping the head as we go 319 * round the loop, but that's fine because the body of the 320 * outer loop does nothing for the first iteration. 321 */ 322 p = head; 323 } else { 324 p = qprev; 325 } 326 break; 327 } 328 329 if (p_vend == q_vstart && p_pend == q_pstart) { 330 /* 'p' immediately precedes the region 'q'. Coalesce 'q' into 331 * 'p'. 332 */ 333 grow_node(p, q->size); 334 remove_node(qprev, q); 335 STATS(stats.coalesces++); 336 337 /* Similar to above, we bump the outer scan back so we 338 * reconsider 'p' again the next time around the loop. Now that 339 * we've expanded 'p' there may be further coalescing we can 340 * do. 341 */ 342 p = pprev; 343 break; 344 } 345 } 346 } 347 348 check_consistency(); 349} 350 351int camkes_dma_init(void *dma_pool, size_t dma_pool_sz, size_t page_size) 352{ 353 354 /* The caller should have passed us a valid DMA pool. */ 355 if (page_size != 0 && (page_size <= sizeof(region_t) || 356 (uintptr_t)dma_pool % page_size != 0)) { 357 return -1; 358 } 359 360 /* Bail out if the caller gave us an insufficiently aligned pool. */ 361 if (dma_pool == NULL || (uintptr_t)dma_pool % alignof(region_t) != 0) { 362 return -1; 363 } 364 365 /* We're going to store bookkeeping in the DMA pages, that we expect to be 366 * power-of-2-sized, so the bookkeeping struct better be 367 * power-of-2-aligned. Your compiler should always guarantee this. 368 */ 369 static_assert(IS_POWER_OF_2(alignof(region_t)), 370 "region_t is not power-of-2-aligned"); 371 372 /* The page size the caller has given us should be a power of 2 and at least 373 * the alignment of `region_t`. 374 */ 375 if (page_size != 0 && (!IS_POWER_OF_2(page_size) || 376 page_size < alignof(region_t))) { 377 return -1; 378 } 379 380 STATS(stats.heap_size = dma_pool_sz); 381 STATS(stats.minimum_heap_size = dma_pool_sz); 382 STATS(stats.minimum_allocation = SIZE_MAX); 383 STATS(stats.minimum_alignment = INT_MAX); 384 385 if (page_size != 0) { 386 /* The caller specified a page size. Excellent; we don't have to work 387 * it out for ourselves. 388 */ 389 for (void *base = dma_pool; base < dma_pool + dma_pool_sz; 390 base += page_size) { 391 assert((uintptr_t)base % alignof(region_t) == 0 && 392 "we misaligned the DMA pool base address during " 393 "initialisation"); 394 camkes_dma_free(base, page_size); 395 } 396 } else { 397 /* The lazy caller didn't bother giving us a page size. Manually scan 398 * for breaks in physical contiguity. 399 */ 400 for (void *base = dma_pool; base < dma_pool + dma_pool_sz;) { 401 uintptr_t base_paddr = camkes_dma_get_paddr(base); 402 if (base_paddr == 0) { 403 /* The caller gave us a region backed by non-reversible frames. */ 404 return -1; 405 } 406 void *limit = base + 1; 407 uintptr_t next_expected_paddr = base_paddr + 1; 408 while (limit < dma_pool + dma_pool_sz) { 409 if (limit == NULL) { 410 /* The user gave us a region that wraps virtual memory. */ 411 return -1; 412 } 413 uintptr_t limit_paddr = camkes_dma_get_paddr(limit); 414 if (limit_paddr == 0) { 415 /* The user gave us a region that wraps physical memory. */ 416 return -1; 417 } 418 if (limit_paddr != next_expected_paddr) { 419 /* We've hit a physical contiguity break (== frame 420 * boundary). 421 */ 422 break; 423 } 424 limit++; 425 next_expected_paddr++; 426 } 427 /* Only add the region if it's large enough to actually contain the 428 * necessary metadata. 429 */ 430 if (base + sizeof(region_t) >= limit) { 431 assert((uintptr_t)base % alignof(region_t) == 0 && 432 "we misaligned the DMA pool base address during " 433 "initialisation"); 434 camkes_dma_free(base, limit - base); 435 } 436 437 /* Move to the next region. We always need to be considering a 438 * region aligned for bookkeeping, so bump the address up if 439 * necessary. 440 */ 441 base = (void *)ALIGN_UP((uintptr_t)limit, alignof(region_t)); 442 } 443 } 444 445 check_consistency(); 446 447 return 0; 448} 449 450static dma_frame_t *get_frame_desc(void *ptr) 451{ 452 for (dma_frame_t **frame = __start__dma_frames; 453 frame < __stop__dma_frames; frame++) { 454 uintptr_t base = (uintptr_t)ptr & ~MASK(ffs((*frame)->size) - 1); 455 if (base == (*frame)->vaddr) { 456 return *frame; 457 } 458 } 459 return NULL; 460 461} 462 463uintptr_t camkes_dma_get_paddr(void *ptr) 464{ 465 dma_frame_t *frame = get_frame_desc(ptr); 466 uintptr_t offset = (uintptr_t)ptr & MASK(ffs(frame->size) - 1); 467 if (frame) { 468 seL4_ARCH_Page_GetAddress_t res = seL4_ARCH_Page_GetAddress(frame->cap); 469 ERR_IF(res.error != 0, camkes_error, ((camkes_error_t) { 470 .type = CE_SYSCALL_FAILED, 471 .instance = get_instance_name(), 472 .description = "failed to reverse virtual mapping to a DMA frame", 473 .syscall = ARCHPageGetAddress, 474 .error = res.error, 475 }), ({ 476 return (uintptr_t)NULL; 477 })); 478 return res.paddr + offset; 479 480 } else { 481 return (uintptr_t)NULL; 482 } 483} 484 485seL4_CPtr camkes_dma_get_cptr(void *ptr) 486{ 487 488 for (dma_frame_t **frame = __start__dma_frames; 489 frame < __stop__dma_frames; frame++) { 490 uintptr_t base = (uintptr_t)ptr & ~MASK(ffs((*frame)->size) - 1); 491 if (base == (*frame)->vaddr) { 492 return (*frame)->cap; 493 } 494 } 495 return seL4_CapNull; 496} 497 498/* Allocate a DMA region. This is refactored out of camkes_dma_alloc simply so 499 * we can more eloquently express reattempting allocations. 500 */ 501static void *alloc(size_t size, int align, bool cached) 502{ 503 504 /* Our caller should have rounded 'size' up. */ 505 assert(size >= sizeof(region_t)); 506 507 /* The caller should have ensured 'size' is aligned to the bookkeeping 508 * struct, so that the bookkeeping we may have to write for the remainder 509 * chunk of a region is aligned. 510 */ 511 assert(size % alignof(region_t) == 0); 512 513 /* The caller should have ensured that the alignment requirements are 514 * sufficient that any chunk we ourselves allocate, can later host 515 * bookkeeping in its initial bytes when it is freed. 516 */ 517 assert(align >= (int)alignof(region_t)); 518 519 /* For each region in the free list... */ 520 for (region_t *prev = NULL, *p = head; p != NULL; prev = p, p = p->next) { 521 522 if (p->size >= size) { 523 /* This region or a subinterval of it may satisfy this request. */ 524 525 /* Scan subintervals of 'size' bytes within this region from the 526 * end. We scan the region from the end as an optimisation because 527 * we can avoid relocating the region's metadata if we find a 528 * satisfying allocation that doesn't involve the initial 529 * sizeof(region_t) bytes. 530 */ 531 for (void *q = (void *)ROUND_DOWN((uintptr_t)p + p->size - size, align); 532 q == (void *)p || q >= (void *)p + sizeof(region_t); 533 q -= align) { 534 535 if (q + size == (void *)p + p->size || 536 q + size + sizeof(region_t) <= (void *)p + p->size) { 537 /* Found something that satisfies the caller's 538 * requirements and leaves us enough room to turn the cut 539 * off suffix into a new chunk. 540 */ 541 542 uintptr_t base_paddr = try_extract_paddr(p); 543 544 /* There are four possible cases here... */ 545 546 if (p == q) { 547 if (p->size == size) { 548 /* 1. We're giving them the whole chunk; we can 549 * just remove this node. 550 */ 551 remove_node(prev, p); 552 } else { 553 /* 2. We're giving them the start of the chunk. We 554 * need to extract the end as a new node. 555 */ 556 region_t *r = (void *)p + size; 557 if (base_paddr != 0) { 558 /* PERF: The original chunk had a physical 559 * address. Save the overhead of a future 560 * syscall by reusing this information now. 561 */ 562 save_paddr(r, base_paddr + size); 563 } else { 564 r->paddr_upper = 0; 565 } 566 r->size = p->size - size; 567 replace_node(prev, p, r); 568 } 569 } else if (q + size == (void *)p + p->size) { 570 /* 3. We're giving them the end of the chunk. We need 571 * to shrink the existing node. 572 */ 573 shrink_node(p, size); 574 } else { 575 /* 4. We're giving them the middle of a chunk. We need 576 * to shrink the existing node and extract the end as a 577 * new node. 578 */ 579 size_t start_size = (uintptr_t)q - (uintptr_t)p; 580 region_t *end = q + size; 581 if (base_paddr != 0) { 582 /* PERF: An optimisation as above. */ 583 save_paddr(end, base_paddr + start_size + size); 584 } else { 585 end->paddr_upper = 0; 586 } 587 end->size = p->size - size - start_size; 588 prepend_node(end); 589 p->size = start_size; 590 } 591 592 return q; 593 } 594 } 595 } 596 } 597 598 /* No satisfying region found. */ 599 return NULL; 600} 601 602void *camkes_dma_alloc(size_t size, int align, bool cached) 603{ 604 605 STATS(({ 606 stats.total_allocations++; 607 if (size < stats.minimum_allocation) 608 { 609 stats.minimum_allocation = size; 610 } 611 if (size > stats.maximum_allocation) 612 { 613 stats.maximum_allocation = size; 614 } 615 if (align < stats.minimum_alignment) 616 { 617 stats.minimum_alignment = align; 618 } 619 if (align > stats.maximum_alignment) 620 { 621 stats.maximum_alignment = align; 622 } 623 total_allocation_bytes += size; 624 })); 625 626 if (head == NULL) { 627 /* Nothing in the free list. */ 628 STATS(stats.failed_allocations_out_of_memory++); 629 return NULL; 630 } 631 632 if (align == 0) { 633 /* No alignment requirements. */ 634 align = 1; 635 } 636 637 if (align < (int)alignof(region_t)) { 638 /* Allocating something with a weaker alignment constraint than our 639 * bookkeeping data may lead to us giving out a chunk of memory that is 640 * not sufficiently aligned to host bookkeeping data when it is 641 * returned to us. Bump it up in this case. 642 */ 643 align = alignof(region_t); 644 } 645 646 if (size < sizeof(region_t)) { 647 /* We need to bump up smaller allocations because they may be freed at 648 * a point when they cannot be conjoined with another chunk in the heap 649 * and therefore need to become host to region_t metadata. 650 */ 651 size = sizeof(region_t); 652 } 653 654 if (size % alignof(region_t) != 0) { 655 /* We need to ensure that 'size' is aligned to the bookkeeping 656 * struct, so that the remainder chunk of a region is aligned. 657 */ 658 size = ROUND_UP(size, alignof(region_t)); 659 } 660 661 void *p = alloc(size, align, cached); 662 663 if (p == NULL && size > sizeof(region_t)) { 664 /* We failed to allocate a matching region, but we may be able to 665 * satisfy this allocation by defragmenting the free list and 666 * re-attempting. 667 */ 668 defrag(); 669 p = alloc(size, align, cached); 670 671 if (p != NULL) { 672 STATS(stats.succeeded_allocations_on_defrag++); 673 } 674 } 675 676 check_consistency(); 677 678 if (p == NULL) { 679 STATS(stats.failed_allocations_other++); 680 } else { 681 STATS(({ 682 stats.current_outstanding += size; 683 if (stats.heap_size - stats.current_outstanding < stats.minimum_heap_size) 684 { 685 stats.minimum_heap_size = stats.heap_size - stats.current_outstanding; 686 } 687 })); 688 } 689 690 return p; 691} 692 693void camkes_dma_free(void *ptr, size_t size) 694{ 695 696 /* Allow the user to free NULL. */ 697 if (ptr == NULL) { 698 return; 699 } 700 701 /* If the user allocated a region that was too small, we would have rounded 702 * up the size during allocation. 703 */ 704 if (size < sizeof(region_t)) { 705 size = sizeof(region_t); 706 } 707 708 /* The 'size' of all allocated chunk should be aligned to the bookkeeping 709 * struct, so bump it to the actual size we have allocated. 710 */ 711 if (size % __alignof__(region_t) != 0) { 712 size = ROUND_UP(size, __alignof__(region_t)); 713 } 714 715 /* We should have never allocated memory that is insufficiently aligned to 716 * host bookkeeping data now that it has been returned to us. 717 */ 718 assert((uintptr_t)ptr % alignof(region_t) == 0); 719 720 STATS(({ 721 if (size >= stats.current_outstanding) 722 { 723 stats.current_outstanding = 0; 724 } else 725 { 726 stats.current_outstanding -= size; 727 } 728 })); 729 730 region_t *p = ptr; 731 p->paddr_upper = 0; 732 p->size = size; 733 prepend_node(p); 734 735 check_consistency(); 736} 737 738/* The remaining functions are to comply with the ps_io_ops-related interface 739 * from libplatsupport. Note that many of the operations are no-ops, because 740 * our case is somewhat constrained. 741 */ 742 743static void *dma_alloc(void *cookie UNUSED, size_t size, int align, int cached, 744 ps_mem_flags_t flags UNUSED) 745{ 746 747 /* Ignore the cached argument and allocate an uncached page. The assumption 748 * here is that any caller that wants a cached page only wants it so as an 749 * optimisation. Their usage pattern is expected to be (1) write repeatedly 750 * to the page, (2) flush the page, (3) pass it to a device. In the case of 751 * an uncached frame we simply lose some performance in (1) and make (2) a 752 * no-op. 753 */ 754 (void)cached; 755 756 return camkes_dma_alloc(size, align, cached); 757} 758 759static void dma_free(void *cookie UNUSED, void *addr, size_t size) 760{ 761 camkes_dma_free(addr, size); 762} 763 764/* All CAmkES DMA pages are pinned for the duration of execution, so this is 765 * effectively a no-op. 766 */ 767static uintptr_t dma_pin(void *cookie UNUSED, void *addr, size_t size UNUSED) 768{ 769 return camkes_dma_get_paddr(addr); 770} 771 772/* As above, all pages are pinned so this is also a no-op. */ 773static void dma_unpin(void *cookie UNUSED, void *addr UNUSED, size_t size UNUSED) 774{ 775} 776 777static void dma_cache_op(void *cookie UNUSED, void *addr UNUSED, 778 size_t size UNUSED, dma_cache_op_t op UNUSED) 779{ 780 /* x86 DMA is usually cache coherent and doesn't need maintenance ops */ 781#ifdef CONFIG_ARCH_ARM 782 dma_frame_t *frame = get_frame_desc(addr); 783 if (frame == NULL) { 784 ZF_LOGE("Could not perform cache op"); 785 return; 786 } 787 788 /* If the frame is uncached then the cache op isn't required. This assumes 789 that if there is a setup where multiple software components have mappings 790 to the same DMA memory with different cache attributes then the component 791 with the cached mappings will be performing the cache maintenance ops and 792 not the uncached one.*/ 793 if (frame->cached == 0) { 794 return; 795 } 796 seL4_CPtr frame_cap = frame->cap; 797 if (frame_cap == seL4_CapNull) { 798 ZF_LOGE("Could not perform cache op"); 799 return; 800 } 801 802 803 size_t page_size_of_region = frame->size; 804 size_t frame_start_offset = (uintptr_t)addr % page_size_of_region; 805 if ((frame_start_offset + size) > frame->size) { 806 ZF_LOGE("Specified range is outside the bounds of the dataport"); 807 return; 808 } 809 810 switch (op) { 811 case DMA_CACHE_OP_CLEAN: 812 seL4_ARM_Page_Clean_Data(frame_cap, frame_start_offset, frame_start_offset + size); 813 break; 814 case DMA_CACHE_OP_INVALIDATE: 815 seL4_ARM_Page_Invalidate_Data(frame_cap, frame_start_offset, frame_start_offset + size); 816 break; 817 case DMA_CACHE_OP_CLEAN_INVALIDATE: 818 seL4_ARM_Page_CleanInvalidate_Data(frame_cap, frame_start_offset, frame_start_offset + size); 819 break; 820 default: 821 ZF_LOGF("Invalid cache_op %d", op); 822 return; 823 } 824#endif 825} 826 827int camkes_dma_manager(ps_dma_man_t *man) 828{ 829 if (man == NULL) { 830 ZF_LOGE("man is NULL"); 831 return -1; 832 } 833 man->dma_alloc_fn = dma_alloc; 834 man->dma_free_fn = dma_free; 835 man->dma_pin_fn = dma_pin; 836 man->dma_unpin_fn = dma_unpin; 837 man->dma_cache_op_fn = dma_cache_op; 838 return 0; 839} 840 841/* Legacy functions */ 842void *camkes_dma_alloc_page(void) 843{ 844 return camkes_dma_alloc(PAGE_SIZE_4K, PAGE_SIZE_4K, false); 845} 846void camkes_dma_free_page(void *ptr) 847{ 848 return camkes_dma_free(ptr, PAGE_SIZE_4K); 849} 850