/** * \file * \brief General Numa functions * */ /* * Copyright (c) 2014, ETH Zurich. * All rights reserved. * * This file is distributed under the terms in the attached LICENSE file. * If you do not find this file, copies can be found by writing to: * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group. */ #include #include #include #include #include #include "numa_internal.h" ///< numa interleave mask for allocations struct bitmap *numa_alloc_interleave_mask; ///< numa bind mask for allocations struct bitmap *numa_alloc_bind_mask; /** * \brief validates the given page size and sets the flags * * \param pagesize desired page size * \param flags returns the VREGION_FLAGS_* * * \returns activated page size * * if the page size is not known or not supported, BASE_PAGE_SIZE is returned */ static size_t validate_page_size(size_t pagesize, vregion_flags_t *flags) { #if defined(__x86_64__) || defined(__aarch64__) /* use huge, large or base pages on 64 bits */ switch(pagesize) { case LARGE_PAGE_SIZE: *flags = VREGION_FLAGS_READ_WRITE | VREGION_FLAGS_LARGE; return LARGE_PAGE_SIZE; case HUGE_PAGE_SIZE: *flags = VREGION_FLAGS_READ_WRITE | VREGION_FLAGS_HUGE; return HUGE_PAGE_SIZE; case BASE_PAGE_SIZE: default: *flags = VREGION_FLAGS_READ_WRITE; return BASE_PAGE_SIZE; } #elif defined(__x86_32__) || (defined(__arm__) && !defined(__aarch64__)) /* use base or large pages on 32bits */ switch(pagesize) { case LARGE_PAGE_SIZE: *flags = VREGION_FLAGS_READ_WRITE | VREGION_FLAGS_LARGE; return LARGE_PAGE_SIZE; case BASE_PAGE_SIZE: default: *flags = VREGION_FLAGS_READ_WRITE; return BASE_PAGE_SIZE; } #else /* falling back to base page sizes uf unknown architecture */ *flags = VREGION_FLAGS_READ_WRITE; return BASE_PAGE_SIZE; #endif } /** \brief returns the current interleave mask * * \returns bitmask representing the current interleave state * * returns the current interleave mask if the task's memory allocation policy is * page interleaved. Otherwise, this function returns an empty mask. */ struct bitmap *numa_get_interleave_mask(void) { assert(numa_alloc_interleave_mask); struct bitmap *im = numa_allocate_nodemask(); if (im == NULL) { return NULL; } bitmap_copy(im, numa_alloc_interleave_mask); return im; } /** * \brief sets the memory interleave mask for the current task to nodemask * * \param nodemask bitmask representing the nodes * * All new memory allocations are page interleaved over all nodes in the interleave * mask. Interleaving can be turned off again by passing an empty mask. * * This bitmask is considered to be a hint. Fallback to other nodes may be possible */ void numa_set_interleave_mask(struct bitmap *nodemask) { assert(numa_alloc_interleave_mask); if (!nodemask) { bitmap_clear_all(numa_alloc_interleave_mask); return; } if (bitmap_get_nbits(nodemask) < NUMA_MAX_NUMNODES) { NUMA_WARNING("supplied interleave mask (%p) has to less bits!", nodemask); return; } bitmap_copy(numa_alloc_interleave_mask, nodemask); /* clear out the invalid nodes */ bitmap_clear_range(numa_alloc_interleave_mask, numa_num_configured_nodes(), bitmap_get_nbits(numa_alloc_interleave_mask)); /* clear the bind mask as we are using interleaving mode now */ bitmap_clear_all(numa_alloc_bind_mask); } /** * \brief binds the current task and its children to the nodes specified in nodemask. * * \param nodemask bitmap representing the nodes */ void numa_bind(struct bitmap *nodemask) { USER_PANIC("Not yet implemented"); } /** * \brief sets the memory allocation policy for the calling task to local allocation. */ void numa_set_localalloc(void) { assert(numa_alloc_bind_mask); assert(numa_alloc_interleave_mask); /* clear interleave mode */ bitmap_clear_all(numa_alloc_interleave_mask); bitmap_clear_all(numa_alloc_bind_mask); bitmap_set_bit(numa_alloc_bind_mask, numa_current_node()); } /** * \brief sets the memory allocation mask. * * \param nodemask bitmap representing the nodes * * The task will only allocate memory from the nodes set in nodemask. * * an empty mask or not allowed nodes in the mask will result in an error */ errval_t numa_set_membind(struct bitmap *nodemask) { assert(numa_alloc_bind_mask); assert(numa_alloc_interleave_mask); if (!nodemask) { return NUMA_ERR_BITMAP_PARSE; } if (bitmap_get_nbits(nodemask) < NUMA_MAX_NUMNODES) { NUMA_WARNING("supplied interleave mask (%p) has to less bits!", nodemask); return NUMA_ERR_BITMAP_RANGE; } /* copy new membind mask and clear out invalid bits */ bitmap_copy(numa_alloc_bind_mask, nodemask); bitmap_clear_range(numa_alloc_bind_mask, numa_num_configured_nodes(), bitmap_get_nbits(numa_alloc_bind_mask)); if (bitmap_get_weight(numa_alloc_bind_mask) == 0) { /* cannot bind to no node, restore with all nodes pointer*/ bitmap_copy(numa_alloc_bind_mask, numa_all_nodes_ptr); return NUMA_ERR_NUMA_MEMBIND; } /* disable interleaving mode */ bitmap_clear_all(numa_alloc_interleave_mask); return SYS_ERR_OK; } /** * \brief returns the mask of nodes from which memory can currently be allocated. * * \return bitmap of nodes from which can be allocated */ struct bitmap *numa_get_membind(void) { assert(numa_alloc_bind_mask); struct bitmap *im = numa_allocate_nodemask(); if (im == NULL) { return NULL; } bitmap_copy(im, numa_alloc_bind_mask); return im; } /** * \brief allocates memory on a specific node. * * \param size size of the region in bytes * \param node ID of the node to allocate from * \param pagesize page size to be used for the mapping * * \returns pointer to memory region * * The size argument will be rounded up to a multiple of the system page size. * if the specified node is externally denied to this process, this call will fail. * The memory must be freed with numa_free(). On errors NULL is returned. */ void *numa_alloc_onnode(size_t size, nodeid_t node, size_t pagesize) { errval_t err; /* * TODO: keep track of the allocated numa frames */ NUMA_DEBUG_ALLOC("allocate on node %" PRIuNODEID "\n", node); /* validate page size and round up size */ vregion_flags_t flags; pagesize = validate_page_size(pagesize, &flags); size = (size + pagesize - 1) & ~(pagesize - 1); /* allocate frame */ struct capref frame; size_t ret_size; err = numa_frame_alloc_on_node(&frame, size, node, &ret_size); if (err_is_fail(err)) { return NULL; } NUMA_DEBUG_ALLOC("mapping allocated frame\n"); void *addr; err = vspace_map_one_frame_attr_aligned(&addr, size, frame, flags, pagesize, NULL, NULL); if (err_is_fail(err)) { USER_PANIC_ERR(err, "vspace_map_one_frame_attr_aligned"); err = numa_frame_free(frame); if (err_is_fail(err)) { USER_PANIC_ERR(err, "nested error while freeing frame"); } return NULL; } NUMA_DEBUG_ALLOC("frame mapped @ %p\n", addr); return addr; } /** * \brief allocates size bytes of memory on the local node * * \param size size of the memory region in bytes * \param pagesize page size to be used for the mapping * * \returns pointer to memory region * * The memory must be freed with numa_free(). On errors NULL is returned. */ void *numa_alloc_local(size_t size, size_t pagesize) { nodeid_t node = numa_current_node(); NUMA_DEBUG_ALLOC("allocate on local node %" PRIuNODEID "\n", node); return numa_alloc_onnode(size, node, pagesize); } /** * \brief allocates size bytes of memory page interleaved on all nodes. * * \param size size of the memory region in bytes * \param pagesize preferred page size to be used * * \returns pointer to the mapped memory region * * should only be used for large areas consisting of multiple pages. * The memory must be freed with numa_free(). On errors NULL is returned. */ void *numa_alloc_interleaved(size_t size, size_t pagesize) { return numa_alloc_interleaved_subset(size, pagesize, numa_all_nodes_ptr); } /** * \brief allocates size bytes of memory page interleaved the nodes specified in * the nodemask. * * \param size size of the memory region in bytes * \param nodemask subset of nodes to consider for allocation * \param pagesize preferred page size to be used * * \returns pointer to the mapped memory region * * should only be used for large areas consisting of multiple pages. * The memory must be freed with numa_free(). On errors NULL is returned. */ void *numa_alloc_interleaved_subset(size_t size, size_t pagesize, struct bitmap *nodemask) { errval_t err; /* clear out invalid bits */ bitmap_clear_range(nodemask, numa_num_configured_nodes(), bitmap_get_nbits(nodemask)); /* get the number of nodes */ nodeid_t nodes = bitmap_get_weight(nodemask); if (nodes == 0) { return NULL; } NUMA_DEBUG_ALLOC("allocating interleaved using %" PRIuNODEID " nodes\n", nodes); assert(nodes <= numa_num_configured_nodes()); vregion_flags_t flags; pagesize = validate_page_size(pagesize, &flags); size_t stride = pagesize; size_t node_size = size / nodes; node_size = (node_size + pagesize - 1) & ~(pagesize - 1); /* update total size as this may change due to rounding of node sizes*/ size = nodes * node_size; /* * XXX: we may want to keep track of numa alloced frames */ struct memobj_numa *memobj = calloc(1, sizeof(struct memobj_numa)); err = memobj_create_numa(memobj, size, 0, numa_num_configured_nodes(), stride); if (err_is_fail(err)) { return NULL; } bitmap_bit_t node = bitmap_get_first(nodemask); nodeid_t node_idx=0; while(node != BITMAP_BIT_NONE) { struct capref frame; err = numa_frame_alloc_on_node(&frame, node_size, (nodeid_t)node, NULL); if (err_is_fail(err)) { DEBUG_ERR(err, "numa_frame_alloc_on_node"); goto out_err; } memobj->m.f.fill(&memobj->m, node_idx, frame, 0); ++node_idx; node = bitmap_get_next(nodemask, node); } struct vregion *vreg = calloc(1, sizeof(struct vregion)); if (vreg == NULL) { goto out_err; } err = vregion_map_aligned(vreg, get_current_vspace(), &memobj->m, 0, size, flags, pagesize); if (err_is_fail(err)) { DEBUG_ERR(err, "vregion_map_aligned"); goto out_err; } err = memobj->m.f.pagefault(&memobj->m, vreg, 0, 0); if (err_is_fail(err)) { vregion_destroy(vreg); free(vreg); DEBUG_ERR(err, "memobj.m.f.pagefault"); goto out_err; } // XXX - Is this right? return (void *)(uintptr_t)vregion_get_base_addr(vreg); out_err: for (int i = 0; i < node_idx; ++i) { struct capref frame; memobj->m.f.unfill(&memobj->m, node_idx, &frame, NULL); cap_delete(frame); } return NULL; } /** * \brief allocates size bytes of memory with the current NUMA policy. * * \param size size of the memory region in bytes * \param pagesize preferred page size to be used * \returns pointer to the mapped memory region * * The memory must be freed with numa_free(). On errors NULL is returned. */ void *numa_alloc(size_t size, size_t pagesize) { NUMA_DEBUG_ALLOC("allocate according to policy\n"); /* check if we use interleaved mode */ if (bitmap_get_weight(numa_alloc_interleave_mask)) { return numa_alloc_interleaved_subset(size, pagesize, numa_alloc_interleave_mask); } /* check membind */ if (bitmap_get_weight(numa_alloc_bind_mask) == 1) { nodeid_t node = (nodeid_t) bitmap_get_first(numa_alloc_bind_mask); return numa_alloc_onnode(size, node, pagesize); } /* TODO: * - handle the case where multiple nodes are set in membind */ /* just return some memory */ return malloc(size); } /** * \brief changes the size of the memory area. * * \param old_addr pointer ot the old memory region * \param old_size size of the old memory region * \param new_size new size to allocate */ void *numa_realloc(void *old_addr, size_t old_size, size_t new_size) { assert(!"NYI"); return 0; } /** * \brief frees size bytes of memory starting at start * * \param start start of the memory region * \param size number of bytes to free * * the memory must be previously allocated by one of the numa_alloc* functions */ void numa_free(void *start, size_t size) { assert(!"NYI"); } /** * \brief allocates a frame on a specific node * * \param dest capref to store the frame * \param size size of the frame to allocated * \param node node on which the frame should be allocated * \param ret_size returned size of the frame capability * * \returns SYS_ERR_OK on SUCCESS * errval on FAILURE */ errval_t numa_frame_alloc_on_node(struct capref *dest, size_t size, nodeid_t node, size_t *ret_size) { errval_t err; NUMA_DEBUG_ALLOC("allocating frame on node %" PRIuNODEID "\n", node); uint64_t min_base, max_limit; ram_get_affinity(&min_base, &max_limit); if (node >= numa_topology.num_nodes) { return NUMA_ERR_NODEID_INVALID; } uint64_t node_base = numa_node_base(node); uint64_t node_limit = node_base + numa_node_size(node, NULL); NUMA_DEBUG_ALLOC("setting affinity to 0x%" PRIx64 "..0x%" PRIx64 "\n", node_base, node_limit); ram_set_affinity(node_base, node_limit); err = frame_alloc(dest, size, ret_size); ram_set_affinity(min_base, max_limit); NUMA_DEBUG_ALLOC("restore affinity to 0x%" PRIx64 "..0x%" PRIx64 "\n", min_base, max_limit); return err; } /** * \brief frees a previously allocated frame * * \param frame capability to free */ errval_t numa_frame_free(struct capref frame) { assert(!"NYI"); return 0; } /** * \brief moves a list of pages in the address space of the current domain * * \param did the domain ID * \param count number of pages to move * \param pages list of pages * \param nodes list of nodes to which the pages can be moved * \param status returns the outcome for each page * \param flags flags for moving the pages * * \returns SYS_ERR_OK on SUCCESS */ errval_t numa_move_pages(domainid_t did, size_t count, void **pages, const nodeid_t *nodes, errval_t *status, int flags) { assert(!"NYI"); return 0; } /** * \brief migrate a domain from one set of nodes to another * * \param did the domain ID * \param fromnodes bitmap representing the current nodes * \param tonodes bitmap representing the * * \returns SYS_ERR_OK on SUCCESS */ errval_t numa_migrate_pages(domainid_t did, struct bitmap *fromnodes, struct bitmap *tonodes) { assert(!"NYI"); return 0; }