1/**
2 * \file
3 * \brief General Numa functions
4 *
5 */
6
7/*
8 * Copyright (c) 2014, ETH Zurich.
9 * All rights reserved.
10 *
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
14 */
15
16#include <stdio.h>
17#include <string.h>
18
19#include <barrelfish/barrelfish.h>
20
21#include <numa.h>
22#include <bitmap.h>
23#include "numa_internal.h"
24
25uint8_t numa_initialized = 0x0;
26
27/**
28 * \brief bitmask that is allocated by the library with bits representing all nodes
29 *        on which the calling task may allocate memory.
30 */
31struct bitmap *numa_all_nodes_ptr;
32
33/**
34 * \brief points to a bitmask that is allocated by the library and left all zeroes.
35 */
36struct bitmap *numa_no_nodes_ptr;
37
38/**
39 * \brief points to a bitmask that is allocated by the library with bits
40 *        representing all cpus on which the calling task may execute.
41 */
42struct bitmap *numa_all_cpus_ptr;
43
44/**
45 * \brief data structure representing the numa topology
46 */
47struct numa_topology numa_topology;
48
49
50/**
51 * \brief checks if numa support is available
52 *
53 * \returns NUMA_ERR_NOT_AVAILABLE  value all other functions are undefined
54 *          SYS_ERR_OK:             NUMA functionality is available
55 *
56 * this function must be called before any of the other functions of libnuma.
57 * during the call to numa_available the library also gets initialized
58 */
59errval_t numa_available(void)
60{
61    errval_t err;
62
63    if (numa_initialized) {
64        return (numa_initialized == 0xff) ? NUMA_ERR_LIB_INIT : SYS_ERR_OK;
65    }
66
67    NUMA_DEBUG_INIT("Initializing libnuma...\n");
68
69    err = numa_get_topology_from_skb(&numa_topology);
70    if (err_is_fail(err)) {
71        goto out_err;
72    }
73
74#if NUMA_DEBUG_ENABLED
75    numa_dump_topology(&numa_topology);
76#endif
77
78    numa_all_cpus_ptr = numa_allocate_cpumask();
79    if(numa_all_cpus_ptr == NULL) {
80        err =  LIB_ERR_MALLOC_FAIL;
81        goto out_err1;
82    }
83
84    for (coreid_t i = 0; i < numa_topology.num_cores; ++i) {
85        bitmap_set_bit(numa_all_cpus_ptr, numa_topology.cores[i]->id);
86    }
87
88#if NUMA_DEBUG_ENABLED
89    bitmap_dump(numa_all_cpus_ptr);
90#endif
91
92    numa_all_nodes_ptr = numa_allocate_nodemask();
93    if(numa_all_nodes_ptr == NULL) {
94        err =  LIB_ERR_MALLOC_FAIL;
95        goto out_err2;
96    }
97
98    for (nodeid_t i = 0; i < numa_topology.num_nodes; ++i) {
99        bitmap_set_bit(numa_all_nodes_ptr, numa_topology.nodes[i].id);
100    }
101
102#if NUMA_DEBUG_ENABLED
103    bitmap_dump(numa_all_nodes_ptr);
104#endif
105
106    numa_no_nodes_ptr = numa_allocate_nodemask();
107    if(numa_no_nodes_ptr == NULL) {
108        err =  LIB_ERR_MALLOC_FAIL;
109        goto out_err3;
110    }
111
112    numa_alloc_bind_mask = numa_allocate_nodemask();
113    if(numa_alloc_bind_mask == NULL) {
114        err =  LIB_ERR_MALLOC_FAIL;
115        goto out_err4;
116    }
117    numa_alloc_interleave_mask = numa_allocate_nodemask();
118    if(numa_alloc_interleave_mask == NULL) {
119        err =  LIB_ERR_MALLOC_FAIL;
120        goto out_err5;
121    }
122
123#if NUMA_DEBUG_ENABLED
124    bitmap_dump(numa_no_nodes_ptr);
125#endif
126
127    numa_initialized = 0x1;
128
129    return SYS_ERR_OK;
130
131    /* cleanup in case of error */
132    out_err5:
133    free(numa_alloc_bind_mask);
134    out_err4:
135    free(numa_no_nodes_ptr);
136    out_err3:
137    free(numa_all_nodes_ptr);
138    out_err2:
139    free(numa_all_cpus_ptr);
140    out_err1:
141    numa_free_topology(&numa_topology);
142    out_err:
143    numa_initialized = 0xff;
144    return err_push(err, NUMA_ERR_LIB_INIT);
145}
146
147/**
148 * \brief returns the highest node number available on the current system.
149 *
150 * \returns ID of the max NUMA node
151 */
152nodeid_t numa_max_node(void)
153{
154    numa_check_init();
155
156    // XXX: assume nodes are 0..n-1
157    return numa_topology.num_nodes - 1;
158}
159
160/**
161 * \brief returns the highest ID of the present cores
162 *
163 * \returns the maximum number of cores in the system
164 */
165coreid_t numa_max_core(void)
166{
167    numa_check_init();
168
169    // XXX: assume the IDs are 0...n-1
170    return numa_topology.num_cores - 1;
171}
172
173/**
174 * \brief returns the current node the domain is running on
175 *
176 * \return ID of the current node
177 */
178nodeid_t numa_current_node(void)
179{
180    numa_check_init();
181
182    // XXX: do we need disp_get_core_id() here?
183    return numa_topology.cores[disp_get_current_core_id()]->node->id;
184}
185
186/**
187 * \brief returns the size of the node mask
188 *
189 * \return size of the node mask
190 */
191nodeid_t numa_num_possible_nodes(void)
192{
193    numa_check_init();
194
195    return NUMA_MAX_NUMNODES;
196}
197
198/**
199 * \brief Obtains the number of all memory nodes in the system
200 *
201 * \return number of memory nodes in the system
202 *
203 * returns the number of memory nodes in the system. This count includes any nodes
204 * that are currently disabled.
205 */
206nodeid_t numa_num_configured_nodes(void)
207{
208    numa_check_init();
209
210    // XXX: we have all nodes configures
211    return numa_topology.num_nodes;
212}
213
214/**
215 * \brief obtains the nodes the domain is allowed to allocate memory from
216 *
217 * \returns bitmask representing the allowing nodes
218 *
219 * returns the mask of nodes from which the process is allowed to allocate memory
220 * in it's current cpuset context.
221 */
222struct bitmap *numa_get_mems_allowed(void)
223{
224    numa_check_init();
225
226    /* we don't have restriction yet. */
227    return numa_all_nodes_ptr;
228}
229
230/**
231 * \brief returns the total numberof CPUs in the system
232 *
233 * \returns total number of CPUs in the system
234 *
235 * returns the number of cpus in the system. This count includes any cpus that are
236 * currently disabled.
237 */
238coreid_t numa_num_configured_cpus(void)
239{
240    numa_check_init();
241
242    // XXX we assume that we can schedule all cores
243    return numa_topology.num_cores;
244}
245
246/**
247 * \brief returns the number of cpus that the calling domain is allowed to use.
248 *
249 * \returns number of CPUs the domain is allowed to use
250 */
251coreid_t numa_num_task_cpus(void)
252{
253    numa_check_init();
254
255    // XXX: we do not have any restrictions yet, return all cores
256    return numa_topology.num_cores;
257}
258
259/**
260 * \brief returns the number of nodes on which the calling domain is allowed to
261 *        allocate memory
262 *
263 * \returns number of nodes the domain is allowed to use
264 */
265nodeid_t numa_num_task_nodes(void)
266{
267    numa_check_init();
268
269    // XXX: We do not have any restrictions yet. just return all nodes
270    return numa_topology.num_nodes;
271}
272
273/**
274 * \brief obtains the size of a node
275 *
276 * \param node  ID of the NUMA node
277 * \param freep returns the number of available bytes of the node
278 *
279 * \returns size of the node in bytes
280 *
281 * returns the memory size of a node. If the argument freep is not NULL, it used
282 * to return the amount of free memory on the node. On error it returns
283 * NUMA_NODE_INVALID
284 */
285size_t numa_node_size(nodeid_t node, uintptr_t *freep)
286{
287    numa_check_init();
288    numa_check_node_id(node);
289
290    if (freep) {
291        // TODO: figure out how much memory is left in the node
292    }
293
294    return (numa_topology.nodes[node].mem_limit - numa_topology.nodes[node].mem_base);
295}
296
297/**
298 * \brief obtains the base address of the numa node
299 *
300 * \returns physical address of the start of the numa node
301 *          NUMA_NODE_INVALID if the node does not exist
302 */
303lpaddr_t numa_node_base(nodeid_t node)
304{
305    numa_check_init();
306    numa_check_node_id(node);
307
308    return numa_topology.nodes[node].mem_base;
309}
310
311/**
312 * \brief returns the preferred node of the current task.
313 *
314 * \returns node ID where memory is preferably allocated
315 */
316nodeid_t numa_preferred(void)
317{
318    numa_check_init();
319    return numa_current_node();
320}
321
322/**
323 * \brief  sets the preferred node for the current task to node
324 *
325 * \param node  ID of the node to set preferred
326 *
327 * The system will attempt to allocate memory from the preferred node, but will
328 * fall back to other nodes if no memory is available on the the preferred node
329 *
330 * Passing a node of -1 argument specifies local allocation
331 */
332void numa_set_preferred(nodeid_t node)
333{
334    numa_check_init();
335
336    if (node >= numa_topology.num_nodes) {
337        NUMA_WARNING("Node ID exceeds number of available nodes %" PRIuNODEID "/%"
338                     PRIuNODEID, node, numa_topology.num_nodes);
339        return;
340    }
341
342    numa_topology.preferred = node;
343}
344
345
346/**
347 * \brief runs the current domain on a specific node.
348 *
349 * \param node  ID of the node to run the domain on
350 *
351 * \returns SYS_ERR_OK on SUCCESS
352 *          errval on FAILURE
353 *
354 * Passing -1 permits the kernel to schedule on all nodes again
355 */
356errval_t numa_run_on_node(nodeid_t node)
357{
358    numa_check_init();
359
360    USER_PANIC("running the domain on a specific node is not supported yet\n");
361    return 0;
362}
363
364
365/**
366 * \brief runs the current domain only on nodes specified in nodemask.
367 *
368 * \param nodemask bitmap representing the nodes to run the domain on
369 *
370 * \returns SYS_ERR_OK on SUCCESS
371 *          errval on FAILURE
372 */
373errval_t numa_run_on_node_mask(struct bitmap *nodemask)
374{
375    numa_check_init();
376
377    USER_PANIC("running the domain on a specific node is not supported yet\n");
378    return 0;
379}
380
381
382/**
383 * \brief returns a mask of CPUs on which the current task is allowed to run.
384 *
385 * \returns bitmap represening the coreids the domain is allowed to run
386 */
387struct bitmap *numa_get_run_node_mask(void)
388{
389    numa_check_init();
390
391    return numa_all_nodes_ptr;
392}
393
394
395/**
396 * \brief specify the memory bind policy
397 *
398 * \param strict numa policy to apply
399 *
400 * specifies whether calls that bind memory to a specific node should use the
401 * preferred policy or a strict policy.
402 */
403void numa_set_bind_policy(numa_policy_t strict)
404{
405    numa_check_init();
406
407    if (strict == NUMA_POLICY_STRICT) {
408        numa_topology.bind = strict;
409    } else {
410        numa_topology.bind = NUMA_POLICY_PREFERRED;
411    }
412}
413
414
415/**
416 * \brief enable or disable the strict allocation policy
417 *
418 * \param strict numa policy to apply
419 *
420 * s a flag that says whether the functions allocating on specific nodes should
421 * use a strict policy. Strict means the allocation will fail if the memory cannot
422 * be allocated on the target node.
423 */
424void numa_set_strict(numa_policy_t strict)
425{
426    numa_check_init();
427
428    if (strict == NUMA_POLICY_STRICT) {
429        numa_topology.strict = strict;
430    } else {
431        numa_topology.strict = NUMA_POLICY_PREFERRED;
432    }
433}
434
435
436/**
437 * \brief reports the distance in the machine topology between two nodes
438 *
439 * \param from source node to measure the distance
440 * \param to   target node to measure the distance
441 *
442 * \returns distance between two nodes
443 *          0 iff cannot be deterimed
444 *
445 * The factors are a multiple of 10.  A node has distance 10 to itself.
446 */
447uint32_t numa_distance(nodeid_t from, nodeid_t to)
448{
449    numa_check_init();
450
451    if (from >= numa_topology.num_nodes || to >= numa_topology.num_nodes) {
452        return (uint32_t)NUMA_NODE_INVALID;
453    }
454
455    return numa_topology.distances[from * numa_topology.num_nodes + to];
456}
457
458
459/**
460 * \brief retrieves a bitmask of the cpus on which a domain may run
461 *
462 * \param did   domain ID
463 * \param mask  returned bitmask
464 *
465 * \returns SYS_ERR_OK on success
466 *          errval on FAILURE
467 */
468errval_t numa_sched_getaffinity(domainid_t did, struct bitmap *mask)
469{
470    numa_check_init();
471
472    assert(!"NYI");
473    return 0;
474}
475
476
477/**
478 * \brief sets a domain's allowed cpu's to those cpu's specified in mask.
479 *
480 * \param did   domain ID
481 * \param mask  bitmap representing the CPUs
482 *
483 * \returns SYS_ERR_OK on success
484 *          errval on FAILURE
485 */
486errval_t numa_sched_setaffinity(domainid_t did, struct bitmap *mask)
487{
488    numa_check_init();
489
490    assert(!"NYI");
491    return 0;
492}
493
494
495/**
496 * \brief returns the page size
497 *
498 * \returns the number of bytes in a page
499 */
500size_t numa_pagesize(void)
501{
502    numa_check_init();
503
504    return numa_topology.pagesize;
505}
506
507
508
509/**
510 * \brief converts a node number to a bitmask of CPUs
511 *
512 * \param node  the ID of the node
513 * \param mask  bitmap representing the CPUs of this node
514 *
515 * \return  SYS_ERR_OK on SUCCESS
516 *          NUMA_ERR_BITMAP_RANGE on FAILURE (too small bitmap)
517 *
518 * The user must pass a bitmask structure with a mask buffer long enough to
519 * represent all possible cpu's
520 */
521errval_t numa_node_to_cpus(nodeid_t node, struct bitmap *mask)
522{
523    numa_check_init();
524
525    if (!(node < numa_topology.num_nodes)) {
526        return NUMA_ERR_NODEID_INVALID;
527    }
528
529    if (bitmap_get_nbits(mask) < numa_topology.num_cores) {
530        return NUMA_ERR_BITMAP_RANGE;
531    }
532
533    bitmap_clear_all(mask);
534
535    struct numa_node *nnode = &numa_topology.nodes[node];
536    for (coreid_t i = 0; i < nnode->num_cores; ++i) {
537        bitmap_set_bit(mask, nnode->cores[i].id);
538    }
539
540#if NUMA_DEBUG_ENABLED
541    bitmap_dump(mask);
542#endif
543
544    return SYS_ERR_OK;
545}
546
547
548/**
549 * \brief returns the node that a cpu belongs to
550 *
551 * \param cpu   ID of the core
552 *
553 * \returns node ID on SUCCESS
554 *          NUMA_NODE_INVALID on FAILURE
555 */
556nodeid_t numa_node_of_cpu(coreid_t cpu)
557{
558    numa_check_init();
559
560    numa_check_core_id(cpu);
561
562    return numa_topology.cores[cpu]->node->id;
563}
564
565
566/**
567 * \brief gets the number of cores for the given numa node
568 *
569 * \param node NUMA node to get the number of cores
570 *
571 * \returns number of cores for the node
572 */
573coreid_t numa_num_node_cpus(nodeid_t node)
574{
575    if (node >= numa_topology.num_nodes) {
576        NUMA_WARNING("Node ID exceeds number of available nodes: %" PRIuNODEID "/%"
577                      PRIuNODEID, node, numa_topology.num_nodes);
578        return 0;
579    }
580
581    return numa_topology.nodes[node].num_cores;
582}
583