1/**
2 * \file
3 * \brief header specifying the interface of libnuma
4 *
5 * This is derived from:
6 *
7 * Linux man pages "numa"
8 * libnuma from http://oss.sgi.com/projects/libnuma/
9 *
10 */
11
12/*
13 * Copyright (c) 2014, ETH Zurich.
14 * All rights reserved.
15 *
16 * This file is distributed under the terms in the attached LICENSE file.
17 * If you do not find this file, copies can be found by writing to:
18 * ETH Zurich D-INFK, CAB F.78, Universitaetstr. 6, CH-8092 Zurich.
19 * Attn: Systems Group.
20 */
21
22#ifndef __NUMA_H
23#define __NUMA_H 1
24
25#ifdef __cplusplus
26extern "C" {
27#endif
28
29///< the maximum number of nodes supported
30#define NUMA_MAX_NUMNODES 16
31
32#if NUMA_MAX_NUMNODES > MAX_NODEID
33#error maximum node bigger than maximum nodeid
34#endif
35
36///< specify the local node for allocation
37#define NUMA_NODE_LOCAL ((nodeid_t)-1)
38
39///< error value for the numa node size
40#define NUMA_NODE_INVALID ((uintptr_t)-1)
41
42///< error value for invalid cores
43#define NUMA_CORE_INVALID ((coreid_t)-1);
44
45
46typedef enum numa_policy {
47    NUMA_POLICY_DEFAULT,   ///< default numa policy
48    NUMA_POLICY_STRICT,    ///< strict numa policy
49    NUMA_POLICY_PREFERRED  ///< preferred memory policy
50} numa_policy_t;
51
52///< typedef for the nodemask
53typedef struct bitmask nodemask_t;
54
55/**
56 * \brief checks if numa support is available
57 *
58 * \returns NUMA_ERR_NOT_AVAILABLE  value all other functions are undefined
59 *          SYS_ERR_OK:             NUMA functionality is available
60 *
61 * this function must be called before any of the other functions of libnuma.
62 * during the call to numa_available the library also gets initialized
63 */
64errval_t numa_available(void);
65
66/**
67 * \brief returns the highest node number available on the current system.
68 *
69 * \returns ID of the max NUMA node
70 */
71nodeid_t numa_max_node(void);
72
73/**
74 * \brief returns the highest ID of the present cores
75 *
76 * \returns the maximum coreID in the system
77 */
78coreid_t numa_max_core(void);
79
80/**
81 * \brief returns the current node the domain is running on
82 *
83 * \return ID of the current node
84 */
85nodeid_t numa_current_node(void);
86
87/**
88 * \brief returns the size of the node mask
89 *
90 * \return size of the node mask
91 */
92nodeid_t numa_num_possible_nodes(void);
93
94/**
95 * \brief Obtains the maximum number of nodes the system can handle
96 *
97 * \return maximum nodes supported
98 *
99 * returns the number of the highest possible node in a system. In other words,
100 * the size of a kernel type nodemask_t (in bits) minus 1
101 */
102static inline nodeid_t numa_max_possible_node(void)
103{
104    return numa_num_possible_nodes() - 1;
105}
106
107/**
108 * \brief Obtains the number of all memory nodes in the system
109 *
110 * \return number of memory nodes in the system
111 *
112 * returns the number of memory nodes in the system. This count includes any nodes
113 * that are currently disabled.
114 */
115nodeid_t numa_num_configured_nodes(void);
116
117/**
118 * \brief obtains the nodes the domain is allowed to allocate memory from
119 *
120 * \returns bitmask representing the allowing nodes
121 *
122 * returns the mask of nodes from which the process is allowed to allocate memory
123 * in it's current cpuset context.
124 */
125struct bitmap *numa_get_mems_allowed(void);
126
127/**
128 * \brief returns the total numberof CPUs in the system
129 *
130 * \returns total number of CPUs in the system
131 *
132 * returns the number of cpus in the system. This count includes any cpus that are
133 * currently disabled.
134 */
135coreid_t numa_num_configured_cpus(void);
136
137/**
138 * \brief bitmask that is allocated by the library with bits representing all nodes
139 *        on which the calling task may allocate memory.
140 */
141extern struct bitmap *numa_all_nodes_ptr;
142
143/**
144 * \brief points to a bitmask that is allocated by the library and left all zeroes.
145 */
146extern struct bitmap *numa_no_nodes_ptr;
147
148/**
149 * \brief points to a bitmask that is allocated by the library with bits
150 *        representing all cpus on which the calling task may execute.
151 */
152extern struct bitmap *numa_all_cpus_ptr;
153
154/**
155 * \brief returns the number of cpus that the calling domain is allowed to use.
156 *
157 * \returns number of CPUs the domain is allowed to use
158 */
159coreid_t numa_num_task_cpus(void);
160
161/**
162 * \brief returns the number of nodes on which the calling domain is allowed to
163 *        allocate memory
164 *
165 * \returns number of nodes the domain is allowed to use
166 */
167nodeid_t numa_num_task_nodes(void);
168
169/**
170 * \brief parses line , which is a character string
171 *
172 * \param line  character string to parse
173 * \param mask  bitmap to store the result
174 *
175 * \returns SYS_ERR_OK            on SUCCESS
176 *          NUMA_ERR_BITMAP_PARSE on FAILURE
177 *
178 * The string contains the hexadecimal representation of a bit map.
179 *
180 * XXX according to the man pages this function is only used internally
181 */
182errval_t numa_parse_bitmap(char *line, struct bitmap *mask);
183
184/**
185 * \brief parses a character string list of nodes into a bit mask.
186 *
187 * \param string character string to parse
188 *
189 * \returns NUMA bitmask on SUCCESS
190 *          NULL if the string is invalid
191 *
192 * The string is a comma-separated list of node numbers or node ranges
193 * Examples: 1-5,7,10 !4-5 +0-3
194 *
195 * If the string length is zero, then the numa_no_nodes_ptr is returned
196 */
197struct bitmap *numa_parse_nodestring(char *string);
198
199/**
200 * \brief parses a character string list of cpus into a bit mask.
201 *
202 * \param string character string to parse
203 *
204 * \returns NUMA bitmask on SUCCESS
205 *          NULL if the string is invalid
206 *
207 * The string is a comma-separated list of cpu numbers or cpu ranges
208 * Examples: 1-5,7,10 !4-5 +0-3
209 */
210struct bitmap *numa_parse_cpustring(char *string);
211
212/**
213 * \brief obtains the size of a node
214 *
215 * \param node  ID of the NUMA node
216 * \param freep
217 *
218 * \returns size of the node in bytes
219 *
220 * returns the memory size of a node. If the argument freep is not NULL, it used
221 * to return the amount of free memory on the node. On error it returns
222 * NUMA_NODE_INVALID
223 */
224size_t numa_node_size(nodeid_t node, uintptr_t *freep);
225
226///< alias for NUMA node size 64bit variants
227#define numa_node_size64(_node, _freep) numa_node_size(_node, _freep)
228
229/**
230 * \brief obtains the base address of the numa node
231 *
232 * \returns physical address of the start of the numa node
233 */
234lpaddr_t numa_node_base(nodeid_t node);
235
236/**
237 * \brief returns the preferred node of the current task.
238 *
239 * \returns node ID where memory is preferably allocated
240 */
241nodeid_t numa_preferred(void);
242
243/**
244 * \brief  sets the preferred node for the current task to node
245 *
246 * \param node  ID of the node to set preferred
247 *
248 * The system will attempt to allocate memory from the preferred node, but will
249 * fall back to other nodes if no memory is available on the the preferred node
250 *
251 * Passing a node of -1 argument specifies local allocation
252 */
253void numa_set_preferred(nodeid_t node);
254
255/**
256 * \brief   returns the current interleave mask
257 *
258 * \returns bitmask representing the current interleave state
259 *
260 * returns the current interleave mask if the task's memory allocation policy is
261 * page interleaved. Otherwise, this function returns an empty mask.
262 */
263struct bitmap *numa_get_interleave_mask(void);
264
265/**
266 * \brief sets the memory interleave mask for the current task to nodemask
267 *
268 * \param nodemask bitmask representing the nodes
269 *
270 * All new memory allocations are page interleaved over all nodes in the interleave
271 * mask. Interleaving can be turned off again by passing an empty mask.
272 *
273 * This bitmask is considered to be a hint. Fallback to other nodes may be possible
274 */
275void numa_set_interleave_mask(struct bitmap *nodemask);
276
277/**
278 * \brief binds the current task and its children to the nodes specified in nodemask.
279 *
280 * \param nodemask  bitmap representing the nodes
281 */
282void numa_bind(struct bitmap *nodemask);
283
284/**
285 * \brief sets the memory allocation policy for the calling task to local allocation.
286 */
287void numa_set_localalloc(void);
288
289/**
290 * \brief sets the memory allocation mask.
291 *
292 * \param nodemask  bitmap representing the nodes
293 *
294 * The task will only allocate memory from the nodes set in nodemask.
295 *
296 * an empty mask or not allowed nodes in the mask will result in an error
297 */
298errval_t numa_set_membind(struct bitmap *nodemask);
299
300/**
301 * \brief returns the mask of nodes from which memory can currently be allocated.
302 *
303 * \return bitmap of nodes from which can be allocated
304 */
305struct bitmap *numa_get_membind(void);
306
307/**
308 * \brief allocates memory on a specific node.
309 *
310 * \param size      size of the region in bytes
311 * \param node      ID of the node to allocate from
312 * \param pagesize  page size to be used for the mapping
313 *
314 * \returns pointer to memory region
315 *
316 * The size argument will be rounded up to a multiple of the system page size.
317 * if the specified node is externally denied to this process, this call will fail.
318 * The memory must be freed with numa_free(). On errors NULL is returned.
319 */
320void *numa_alloc_onnode(size_t size, nodeid_t node, size_t pagesize);
321
322/**
323 * \brief allocates size bytes of memory on the local node
324 *
325 * \param size  size of the memory region in bytes
326 * \param pagesize  page size to be used for the mapping
327 *
328 * \returns pointer to memory region
329 *
330 * The memory must be freed with numa_free(). On errors NULL is returned.
331 */
332void *numa_alloc_local(size_t size, size_t pagesize);
333
334/**
335 * \brief allocates size bytes of memory page interleaved on all nodes.
336 *
337 * \param size      size of the memory region in bytes
338 * \param pagesize  page size to be used for the mapping
339 *
340 * \returns pointer to the mapped memory region
341 *
342 * should only be used for large areas consisting of multiple pages.
343 * The memory must be freed with numa_free(). On errors NULL is returned.
344 */
345void *numa_alloc_interleaved(size_t size, size_t pagesize);
346
347/**
348 * \brief allocates size bytes of memory page interleaved the nodes specified in
349 *        the nodemask.
350 *
351 * \param size     size of the memory region in bytes
352 * \param nodemask subset of nodes to consider for allocation
353 * \param pagesize  page size to be used for the mapping
354 *
355 * \returns pointer to the mapped memory region
356 *
357 * should only be used for large areas consisting of multiple pages.
358 * The memory must be freed with numa_free(). On errors NULL is returned.
359 */
360void *numa_alloc_interleaved_subset(size_t size, size_t pagesize,
361                                    struct bitmap *nodemask);
362
363/**
364 * \brief allocates size bytes of memory with the current NUMA policy.
365 *
366 * \param size      size of the memory region in bytes
367 * \param pagesize  page size to be used for the mapping
368 * \returns pointer to the mapped memory region
369 *
370 * The memory must be freed with numa_free(). On errors NULL is returned.
371 */
372void *numa_alloc(size_t size, size_t pagesize);
373
374/**
375 * \brief changes the size of the memory area.
376 *
377 * \param old_addr  pointer ot the old memory region
378 * \param old_size  size of the old memory region
379 * \param new_size  new size to allocate
380 */
381void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
382
383/**
384 * \brief frees size bytes of memory starting at start
385 *
386 * \param start start of the memory region
387 * \param size  number of bytes to free
388 *
389 * the memory must be previously allocated by one of the numa_alloc* functions
390 */
391void numa_free(void *start, size_t size);
392
393
394/**
395 * \brief allocates a frame on a specific node
396 *
397 * \param dest      capref to store the frame
398 * \param size      size of the frame to allocated
399 * \param node      node on which the frame should be allocated
400 * \param ret_size  returned size of the frame capability
401 *
402 * \returns SYS_ERR_OK on SUCCESS
403 *          errval on FAILURE
404 */
405errval_t numa_frame_alloc_on_node(struct capref *dest,
406                                  size_t size,
407                                  nodeid_t node,
408                                  size_t *ret_size);
409
410/**
411 * \brief allocates a frame on the local node
412 *
413 * \param dest      capref to store the frame
414 * \param size      size of the frame to allocated
415 * \param ret_size  returned size of the frame capability
416 *
417 * \returns SYS_ERR_OK on SUCCESS
418 *          errval on FAILURE
419 */
420static inline errval_t numa_frame_alloc_local(struct capref *dest,
421                                              size_t size,
422                                              size_t *ret_size)
423{
424    return numa_frame_alloc_on_node(dest, size, numa_current_node(), ret_size);
425}
426
427/**
428 * \brief frees a previously allocated frame
429 *
430 * \param frame capability to free
431 */
432errval_t numa_frame_free(struct capref frame);
433
434/**
435 * \brief runs the current domain on a specific node.
436 *
437 * \param node  ID of the node to run the domain on
438 *
439 * \returns SYS_ERR_OK on SUCCESS
440 *          errval on FAILURE
441 *
442 * Passing -1 permits the kernel to schedule on all nodes again
443 */
444errval_t numa_run_on_node(nodeid_t node);
445
446/**
447 * \brief runs the current domain only on nodes specified in nodemask.
448 *
449 * \param nodemask bitmap representing the nodes to run the domain on
450 *
451 * \returns SYS_ERR_OK on SUCCESS
452 *          errval on FAILURE
453 */
454errval_t numa_run_on_node_mask(struct bitmap *nodemask);
455
456/**
457 * \brief returns a mask of CPUs on which the current task is allowed to run.
458 *
459 * \returns bitmap represening the coreids the domain is allowed to run
460 */
461struct bitmap *numa_get_run_node_mask(void);
462
463/**
464 * \brief specify the memory bind policy
465 *
466 * \param strict numa policy to apply
467 *
468 * specifies whether calls that bind memory to a specific node should use the preferred policy or a strict policy.
469 */
470void numa_set_bind_policy(numa_policy_t strict);
471
472/**
473 * \brief enable or disable the strict allocation policy
474 *
475 * \param strict numa policy to apply
476 *
477 * s a flag that says whether the functions allocating on specific nodes should
478 * use a strict policy. Strict means the allocation will fail if the memory cannot
479 * be allocated on the target node.
480 */
481void numa_set_strict(numa_policy_t strict);
482
483/**
484 * \brief reports the distance in the machine topology between two nodes
485 *
486 * \param from source node to measure the distance
487 * \param to   target node to measure the distance
488 *
489 * \returns distance between two nodes
490 *          0 iff cannot be deterimed
491 *
492 * The factors are a multiple of 10.  A node has distance 10 to itself.
493 */
494uint32_t numa_distance(nodeid_t from, nodeid_t to);
495
496/**
497 * \brief retrieves a bitmask of the cpus on which a domain may run
498 *
499 * \param did   domain ID
500 * \param mask  returned bitmask
501 *
502 * \returns SYS_ERR_OK on success
503 *          errval on FAILURE
504 */
505errval_t numa_sched_getaffinity(domainid_t did, struct bitmap *mask);
506
507/**
508 * \brief sets a domain's allowed cpu's to those cpu's specified in mask.
509 *
510 * \param did   domain ID
511 * \param mask  bitmap representing the CPUs
512 *
513 * \returns SYS_ERR_OK on success
514 *          errval on FAILURE
515 */
516errval_t numa_sched_setaffinity(domainid_t did, struct bitmap *mask);
517
518/**
519 * \brief returns the page size
520 *
521 * \returns the number of bytes in a page
522 */
523size_t numa_pagesize(void);
524
525/**
526 * \brief converts a node number to a bitmask of CPUs
527 *
528 * \param node  the ID of the node
529 * \param mask  bitmap representing the CPUs of this node
530 *
531 * \return  SYS_ERR_OK on SUCCESS
532 *          NUMA_ERR_BITMAP_RANGE on FAILURE (too small bitmap)
533 *
534 * The user must pass a bitmask structure with a mask buffer long enough to
535 * represent all possible cpu's
536 */
537errval_t numa_node_to_cpus(nodeid_t node, struct bitmap *mask);
538
539
540/**
541 * \brief gets the number of cores for the given numa node
542 *
543 * \param node NUMA node to get the number of cores
544 *
545 * \returns number of cores for the node
546 */
547coreid_t numa_num_node_cpus(nodeid_t node);
548
549/**
550 * \brief gets the system's core ID for a node/local core id configuration
551 *
552 * \param
553 */
554coreid_t numa_node_get_core(nodeid_t node, coreid_t local_core_id);
555
556
557/**
558 * \brief returns the node that a cpu belongs to
559 *
560 * \param cpu   ID of the core
561 *
562 * \returns node ID on SUCCESS
563 *          NUMA_NODE_INVALID on FAILURE
564 */
565nodeid_t numa_node_of_cpu(coreid_t cpu);
566
567/**
568 * \brief allocates a bit mask to represent the cores in the system
569 *
570 * \returns pointer to a new bitmask
571 *          NULL on failure
572 */
573struct bitmap *numa_allocate_cpumask(void);
574
575/**
576 * \brief frees a previously allocated CPU bitmask
577 *
578 * \param cpumask pointer to a previously allocated CPU bitmask
579 */
580void numa_free_cpumask(struct bitmap *cpumask);
581
582/**
583 * \brief allocates a bit mask to represent the nodes in the system
584 *
585 * \returns pointer to a new bitmask
586 *          NULL on failure
587 */
588struct bitmap *numa_allocate_nodemask(void);
589
590/**
591 * \brief frees a previously allocated node bitmask
592 *
593 * \param nodemask pointer to a previously allocated node bitmask
594 */
595void numa_free_nodemask(struct bitmap *nodemask);
596
597/**
598 * \brief allocates a bitmask structure and its associated bit mask
599 *
600 * \param n the number of bits
601 *
602 * \returns pointer to the bitmask
603 *          NULL on error
604 */
605struct bitmap *numa_bitmask_alloc(unsigned int n);
606
607/**
608 * \brief sets all bits in the bit mask to 0.
609 *
610 * \param bmp   pointer to the bitmap
611 *
612 * \returns pointer to the cleared bit map
613 */
614struct bitmap *numa_bitmask_clearall(struct bitmap *bmp);
615
616/**
617 * \brief clears the n-th bit of a bitmask
618 *
619 * \param bmp   the bitmask
620 * \param n     the bit to clear
621 *
622 * \returns pointer to the bitmask
623 */
624struct bitmap *numa_bitmask_clearbit(struct bitmap *bmp, unsigned int n);
625
626/**
627 * \brief checks if two bitmasks are equal
628 *
629 * \param bmp1  bitmask 1
630 * \param bmp2  bitmask 2
631 *
632 * \return TRUE if the bitmasks are equal
633 *         FALSE if the are distinct
634 */
635bool numa_bitmask_equal(const struct bitmap *bmp1, const struct bitmap *bmp2);
636
637/**
638 * \brief frees the memory of a bitmask
639 *
640 * \param bmp the bitmask to be freed
641 */
642void numa_bitmask_free(struct bitmap *bmp);
643
644/**
645 * \brief checks if the n-th bit is set in the bitmask
646 *
647 * \param bmp   the bitmap
648 * \param n     which bit to check
649 *
650 * \returns TRUE if the n-th bit is set
651 *          FALSE otherwise
652 */
653bool numa_bitmask_isbitset(const struct bitmap *bmp, unsigned int n);
654
655/**
656 * \brief returns the size (in bytes) of the bit mask
657 *
658 * \param bmp   the bitmask
659 *
660 * \returns the size of the memory in bytes rounded up to a multiple of wordsize
661 */
662size_t numa_bitmask_nbytes(struct bitmap *bmp);
663
664/**
665 * \brief sets all bits of a bitmask to 1
666 *
667 * \param bmp the bitmask
668 *
669 * \returns the bitmask
670 */
671struct bitmap *numa_bitmask_setall(struct bitmap *bmp);
672
673/**
674 * \brief sets the n-th bit of a bitmask to 1
675 *
676 * \param bmp   the bitmask
677 * \param n     which bit to activate
678 *
679 * \returns the bitmask
680 */
681struct bitmap *numa_bitmask_setbit(struct bitmap *bmp, unsigned int n);
682
683/**
684 * \brief copies the bitmask to a nodemask
685 *
686 * \param bmp       the bitmask to copy
687 * \param nodemask  the destination nodemask
688 *
689 * If the two areas differ in size, the copy is truncated to the size of the
690 * receiving field or zero-filled.
691 */
692void copy_bitmask_to_nodemask(struct bitmap *bmp, nodemask_t *nodemask);
693
694/**
695 * \brief copies the contents of a nodemask into the bitmask
696 *
697 * \param nodemask  node mask to copy from
698 * \param bmp       bitmap to copy into
699 *
700 * If the two areas differ in size, the copy is truncated to the size of the
701 * receiving field or zero-filled.
702 */
703void copy_nodemask_to_bitmask(nodemask_t *nodemask, struct bitmap *bmp);
704
705/**
706 * \brief copies one bitmask into another
707 *
708 * \param bmpfrom   the source bitmask
709 * \param bmpto     the destination bitmask
710 *
711 * If the two areas differ in size, the copy is truncated to the size of the
712 * receiving field or zero-filled.
713 */
714void copy_bitmask_to_bitmask(struct bitmap *bmpfrom, struct bitmap *bmpto);
715
716/**
717 * \brief returns a count of the bits that are set in the body of the bitmask
718 *
719 * \param bmp   the bitmask to count the set bits
720 *
721 * \return number of set bits in this bitmask
722 */
723uint32_t numa_bitmask_weight(const struct bitmap *bmp);
724
725/**
726 * \brief  moves a list of pages in the address space of the current domain
727 *
728 * \param did    the domain ID
729 * \param count  number of pages to move
730 * \param pages  list of pages
731 * \param nodes  list of nodes to which the pages can be moved
732 * \param status returns the outcome for each page
733 * \param flags  flags for moving the pages
734 *
735 * \returns SYS_ERR_OK on SUCCESS
736 */
737errval_t numa_move_pages(domainid_t did,
738                         size_t count,
739                         void **pages,
740                         const nodeid_t *nodes,
741                         errval_t *status,
742                         int flags);
743/**
744 * \brief migrate a domain from one set of nodes to another
745 *
746 * \param did        the domain ID
747 * \param fromnodes  bitmap representing the current nodes
748 * \param tonodes    bitmap representing the
749 *
750 * \returns SYS_ERR_OK on SUCCESS
751 */
752errval_t numa_migrate_pages(domainid_t did,
753                            struct bitmap *fromnodes,
754                            struct bitmap *tonodes);
755
756/**
757 * is a libnuma internal function that can be overridden by the user program. This
758 * function is called with a char * argument when a libnuma function fails.
759 * Overriding the library internal definition makes it possible to specify a
760 * different error handling strategy when a libnuma function fails. It does not
761 * affect numa_available(). The numa_error() function defined in libnuma prints an
762 * error on stderr and terminates the program if numa_exit_on_error is set to a
763 * non-zero value. The default value of numa_exit_on_error is zero.
764 *
765 * \param where
766 */
767void numa_error(char *where);
768
769extern int numa_exit_on_error;
770extern int numa_exit_on_warn;
771
772/**
773 * is a libnuma internal function that can be also overridden by the user program.
774 * It is called to warn the user when a libnuma function encounters a non-fatal
775 * error. The default implementation prints a warning to stderr. The first argument
776 * is a unique number identifying each warning. After that there is a printf(3)-style
777 * format string and a variable number of arguments. numa_warn exits the program
778 * when numa_exit_on_warn is set to a non-zero value. The default value of
779 * numa_exit_on_warn is zero.
780 *
781 * \param number
782 * \param where
783 */
784void numa_warn(int number, char *where, ...);
785
786#ifdef __cplusplus
787}
788#endif
789
790#endif /* __NUMA_H */
791