1/** 2 * \file 3 * \brief header specifying the interface of libnuma 4 * 5 * This is derived from: 6 * 7 * Linux man pages "numa" 8 * libnuma from http://oss.sgi.com/projects/libnuma/ 9 * 10 */ 11 12/* 13 * Copyright (c) 2014, ETH Zurich. 14 * All rights reserved. 15 * 16 * This file is distributed under the terms in the attached LICENSE file. 17 * If you do not find this file, copies can be found by writing to: 18 * ETH Zurich D-INFK, CAB F.78, Universitaetstr. 6, CH-8092 Zurich. 19 * Attn: Systems Group. 20 */ 21 22#ifndef __NUMA_H 23#define __NUMA_H 1 24 25#ifdef __cplusplus 26extern "C" { 27#endif 28 29///< the maximum number of nodes supported 30#define NUMA_MAX_NUMNODES 16 31 32#if NUMA_MAX_NUMNODES > MAX_NODEID 33#error maximum node bigger than maximum nodeid 34#endif 35 36///< specify the local node for allocation 37#define NUMA_NODE_LOCAL ((nodeid_t)-1) 38 39///< error value for the numa node size 40#define NUMA_NODE_INVALID ((uintptr_t)-1) 41 42///< error value for invalid cores 43#define NUMA_CORE_INVALID ((coreid_t)-1); 44 45 46typedef enum numa_policy { 47 NUMA_POLICY_DEFAULT, ///< default numa policy 48 NUMA_POLICY_STRICT, ///< strict numa policy 49 NUMA_POLICY_PREFERRED ///< preferred memory policy 50} numa_policy_t; 51 52///< typedef for the nodemask 53typedef struct bitmask nodemask_t; 54 55/** 56 * \brief checks if numa support is available 57 * 58 * \returns NUMA_ERR_NOT_AVAILABLE value all other functions are undefined 59 * SYS_ERR_OK: NUMA functionality is available 60 * 61 * this function must be called before any of the other functions of libnuma. 62 * during the call to numa_available the library also gets initialized 63 */ 64errval_t numa_available(void); 65 66/** 67 * \brief returns the highest node number available on the current system. 68 * 69 * \returns ID of the max NUMA node 70 */ 71nodeid_t numa_max_node(void); 72 73/** 74 * \brief returns the highest ID of the present cores 75 * 76 * \returns the maximum coreID in the system 77 */ 78coreid_t numa_max_core(void); 79 80/** 81 * \brief returns the current node the domain is running on 82 * 83 * \return ID of the current node 84 */ 85nodeid_t numa_current_node(void); 86 87/** 88 * \brief returns the size of the node mask 89 * 90 * \return size of the node mask 91 */ 92nodeid_t numa_num_possible_nodes(void); 93 94/** 95 * \brief Obtains the maximum number of nodes the system can handle 96 * 97 * \return maximum nodes supported 98 * 99 * returns the number of the highest possible node in a system. In other words, 100 * the size of a kernel type nodemask_t (in bits) minus 1 101 */ 102static inline nodeid_t numa_max_possible_node(void) 103{ 104 return numa_num_possible_nodes() - 1; 105} 106 107/** 108 * \brief Obtains the number of all memory nodes in the system 109 * 110 * \return number of memory nodes in the system 111 * 112 * returns the number of memory nodes in the system. This count includes any nodes 113 * that are currently disabled. 114 */ 115nodeid_t numa_num_configured_nodes(void); 116 117/** 118 * \brief obtains the nodes the domain is allowed to allocate memory from 119 * 120 * \returns bitmask representing the allowing nodes 121 * 122 * returns the mask of nodes from which the process is allowed to allocate memory 123 * in it's current cpuset context. 124 */ 125struct bitmap *numa_get_mems_allowed(void); 126 127/** 128 * \brief returns the total numberof CPUs in the system 129 * 130 * \returns total number of CPUs in the system 131 * 132 * returns the number of cpus in the system. This count includes any cpus that are 133 * currently disabled. 134 */ 135coreid_t numa_num_configured_cpus(void); 136 137/** 138 * \brief bitmask that is allocated by the library with bits representing all nodes 139 * on which the calling task may allocate memory. 140 */ 141extern struct bitmap *numa_all_nodes_ptr; 142 143/** 144 * \brief points to a bitmask that is allocated by the library and left all zeroes. 145 */ 146extern struct bitmap *numa_no_nodes_ptr; 147 148/** 149 * \brief points to a bitmask that is allocated by the library with bits 150 * representing all cpus on which the calling task may execute. 151 */ 152extern struct bitmap *numa_all_cpus_ptr; 153 154/** 155 * \brief returns the number of cpus that the calling domain is allowed to use. 156 * 157 * \returns number of CPUs the domain is allowed to use 158 */ 159coreid_t numa_num_task_cpus(void); 160 161/** 162 * \brief returns the number of nodes on which the calling domain is allowed to 163 * allocate memory 164 * 165 * \returns number of nodes the domain is allowed to use 166 */ 167nodeid_t numa_num_task_nodes(void); 168 169/** 170 * \brief parses line , which is a character string 171 * 172 * \param line character string to parse 173 * \param mask bitmap to store the result 174 * 175 * \returns SYS_ERR_OK on SUCCESS 176 * NUMA_ERR_BITMAP_PARSE on FAILURE 177 * 178 * The string contains the hexadecimal representation of a bit map. 179 * 180 * XXX according to the man pages this function is only used internally 181 */ 182errval_t numa_parse_bitmap(char *line, struct bitmap *mask); 183 184/** 185 * \brief parses a character string list of nodes into a bit mask. 186 * 187 * \param string character string to parse 188 * 189 * \returns NUMA bitmask on SUCCESS 190 * NULL if the string is invalid 191 * 192 * The string is a comma-separated list of node numbers or node ranges 193 * Examples: 1-5,7,10 !4-5 +0-3 194 * 195 * If the string length is zero, then the numa_no_nodes_ptr is returned 196 */ 197struct bitmap *numa_parse_nodestring(char *string); 198 199/** 200 * \brief parses a character string list of cpus into a bit mask. 201 * 202 * \param string character string to parse 203 * 204 * \returns NUMA bitmask on SUCCESS 205 * NULL if the string is invalid 206 * 207 * The string is a comma-separated list of cpu numbers or cpu ranges 208 * Examples: 1-5,7,10 !4-5 +0-3 209 */ 210struct bitmap *numa_parse_cpustring(char *string); 211 212/** 213 * \brief obtains the size of a node 214 * 215 * \param node ID of the NUMA node 216 * \param freep 217 * 218 * \returns size of the node in bytes 219 * 220 * returns the memory size of a node. If the argument freep is not NULL, it used 221 * to return the amount of free memory on the node. On error it returns 222 * NUMA_NODE_INVALID 223 */ 224size_t numa_node_size(nodeid_t node, uintptr_t *freep); 225 226///< alias for NUMA node size 64bit variants 227#define numa_node_size64(_node, _freep) numa_node_size(_node, _freep) 228 229/** 230 * \brief obtains the base address of the numa node 231 * 232 * \returns physical address of the start of the numa node 233 */ 234lpaddr_t numa_node_base(nodeid_t node); 235 236/** 237 * \brief returns the preferred node of the current task. 238 * 239 * \returns node ID where memory is preferably allocated 240 */ 241nodeid_t numa_preferred(void); 242 243/** 244 * \brief sets the preferred node for the current task to node 245 * 246 * \param node ID of the node to set preferred 247 * 248 * The system will attempt to allocate memory from the preferred node, but will 249 * fall back to other nodes if no memory is available on the the preferred node 250 * 251 * Passing a node of -1 argument specifies local allocation 252 */ 253void numa_set_preferred(nodeid_t node); 254 255/** 256 * \brief returns the current interleave mask 257 * 258 * \returns bitmask representing the current interleave state 259 * 260 * returns the current interleave mask if the task's memory allocation policy is 261 * page interleaved. Otherwise, this function returns an empty mask. 262 */ 263struct bitmap *numa_get_interleave_mask(void); 264 265/** 266 * \brief sets the memory interleave mask for the current task to nodemask 267 * 268 * \param nodemask bitmask representing the nodes 269 * 270 * All new memory allocations are page interleaved over all nodes in the interleave 271 * mask. Interleaving can be turned off again by passing an empty mask. 272 * 273 * This bitmask is considered to be a hint. Fallback to other nodes may be possible 274 */ 275void numa_set_interleave_mask(struct bitmap *nodemask); 276 277/** 278 * \brief binds the current task and its children to the nodes specified in nodemask. 279 * 280 * \param nodemask bitmap representing the nodes 281 */ 282void numa_bind(struct bitmap *nodemask); 283 284/** 285 * \brief sets the memory allocation policy for the calling task to local allocation. 286 */ 287void numa_set_localalloc(void); 288 289/** 290 * \brief sets the memory allocation mask. 291 * 292 * \param nodemask bitmap representing the nodes 293 * 294 * The task will only allocate memory from the nodes set in nodemask. 295 * 296 * an empty mask or not allowed nodes in the mask will result in an error 297 */ 298errval_t numa_set_membind(struct bitmap *nodemask); 299 300/** 301 * \brief returns the mask of nodes from which memory can currently be allocated. 302 * 303 * \return bitmap of nodes from which can be allocated 304 */ 305struct bitmap *numa_get_membind(void); 306 307/** 308 * \brief allocates memory on a specific node. 309 * 310 * \param size size of the region in bytes 311 * \param node ID of the node to allocate from 312 * \param pagesize page size to be used for the mapping 313 * 314 * \returns pointer to memory region 315 * 316 * The size argument will be rounded up to a multiple of the system page size. 317 * if the specified node is externally denied to this process, this call will fail. 318 * The memory must be freed with numa_free(). On errors NULL is returned. 319 */ 320void *numa_alloc_onnode(size_t size, nodeid_t node, size_t pagesize); 321 322/** 323 * \brief allocates size bytes of memory on the local node 324 * 325 * \param size size of the memory region in bytes 326 * \param pagesize page size to be used for the mapping 327 * 328 * \returns pointer to memory region 329 * 330 * The memory must be freed with numa_free(). On errors NULL is returned. 331 */ 332void *numa_alloc_local(size_t size, size_t pagesize); 333 334/** 335 * \brief allocates size bytes of memory page interleaved on all nodes. 336 * 337 * \param size size of the memory region in bytes 338 * \param pagesize page size to be used for the mapping 339 * 340 * \returns pointer to the mapped memory region 341 * 342 * should only be used for large areas consisting of multiple pages. 343 * The memory must be freed with numa_free(). On errors NULL is returned. 344 */ 345void *numa_alloc_interleaved(size_t size, size_t pagesize); 346 347/** 348 * \brief allocates size bytes of memory page interleaved the nodes specified in 349 * the nodemask. 350 * 351 * \param size size of the memory region in bytes 352 * \param nodemask subset of nodes to consider for allocation 353 * \param pagesize page size to be used for the mapping 354 * 355 * \returns pointer to the mapped memory region 356 * 357 * should only be used for large areas consisting of multiple pages. 358 * The memory must be freed with numa_free(). On errors NULL is returned. 359 */ 360void *numa_alloc_interleaved_subset(size_t size, size_t pagesize, 361 struct bitmap *nodemask); 362 363/** 364 * \brief allocates size bytes of memory with the current NUMA policy. 365 * 366 * \param size size of the memory region in bytes 367 * \param pagesize page size to be used for the mapping 368 * \returns pointer to the mapped memory region 369 * 370 * The memory must be freed with numa_free(). On errors NULL is returned. 371 */ 372void *numa_alloc(size_t size, size_t pagesize); 373 374/** 375 * \brief changes the size of the memory area. 376 * 377 * \param old_addr pointer ot the old memory region 378 * \param old_size size of the old memory region 379 * \param new_size new size to allocate 380 */ 381void *numa_realloc(void *old_addr, size_t old_size, size_t new_size); 382 383/** 384 * \brief frees size bytes of memory starting at start 385 * 386 * \param start start of the memory region 387 * \param size number of bytes to free 388 * 389 * the memory must be previously allocated by one of the numa_alloc* functions 390 */ 391void numa_free(void *start, size_t size); 392 393 394/** 395 * \brief allocates a frame on a specific node 396 * 397 * \param dest capref to store the frame 398 * \param size size of the frame to allocated 399 * \param node node on which the frame should be allocated 400 * \param ret_size returned size of the frame capability 401 * 402 * \returns SYS_ERR_OK on SUCCESS 403 * errval on FAILURE 404 */ 405errval_t numa_frame_alloc_on_node(struct capref *dest, 406 size_t size, 407 nodeid_t node, 408 size_t *ret_size); 409 410/** 411 * \brief allocates a frame on the local node 412 * 413 * \param dest capref to store the frame 414 * \param size size of the frame to allocated 415 * \param ret_size returned size of the frame capability 416 * 417 * \returns SYS_ERR_OK on SUCCESS 418 * errval on FAILURE 419 */ 420static inline errval_t numa_frame_alloc_local(struct capref *dest, 421 size_t size, 422 size_t *ret_size) 423{ 424 return numa_frame_alloc_on_node(dest, size, numa_current_node(), ret_size); 425} 426 427/** 428 * \brief frees a previously allocated frame 429 * 430 * \param frame capability to free 431 */ 432errval_t numa_frame_free(struct capref frame); 433 434/** 435 * \brief runs the current domain on a specific node. 436 * 437 * \param node ID of the node to run the domain on 438 * 439 * \returns SYS_ERR_OK on SUCCESS 440 * errval on FAILURE 441 * 442 * Passing -1 permits the kernel to schedule on all nodes again 443 */ 444errval_t numa_run_on_node(nodeid_t node); 445 446/** 447 * \brief runs the current domain only on nodes specified in nodemask. 448 * 449 * \param nodemask bitmap representing the nodes to run the domain on 450 * 451 * \returns SYS_ERR_OK on SUCCESS 452 * errval on FAILURE 453 */ 454errval_t numa_run_on_node_mask(struct bitmap *nodemask); 455 456/** 457 * \brief returns a mask of CPUs on which the current task is allowed to run. 458 * 459 * \returns bitmap represening the coreids the domain is allowed to run 460 */ 461struct bitmap *numa_get_run_node_mask(void); 462 463/** 464 * \brief specify the memory bind policy 465 * 466 * \param strict numa policy to apply 467 * 468 * specifies whether calls that bind memory to a specific node should use the preferred policy or a strict policy. 469 */ 470void numa_set_bind_policy(numa_policy_t strict); 471 472/** 473 * \brief enable or disable the strict allocation policy 474 * 475 * \param strict numa policy to apply 476 * 477 * s a flag that says whether the functions allocating on specific nodes should 478 * use a strict policy. Strict means the allocation will fail if the memory cannot 479 * be allocated on the target node. 480 */ 481void numa_set_strict(numa_policy_t strict); 482 483/** 484 * \brief reports the distance in the machine topology between two nodes 485 * 486 * \param from source node to measure the distance 487 * \param to target node to measure the distance 488 * 489 * \returns distance between two nodes 490 * 0 iff cannot be deterimed 491 * 492 * The factors are a multiple of 10. A node has distance 10 to itself. 493 */ 494uint32_t numa_distance(nodeid_t from, nodeid_t to); 495 496/** 497 * \brief retrieves a bitmask of the cpus on which a domain may run 498 * 499 * \param did domain ID 500 * \param mask returned bitmask 501 * 502 * \returns SYS_ERR_OK on success 503 * errval on FAILURE 504 */ 505errval_t numa_sched_getaffinity(domainid_t did, struct bitmap *mask); 506 507/** 508 * \brief sets a domain's allowed cpu's to those cpu's specified in mask. 509 * 510 * \param did domain ID 511 * \param mask bitmap representing the CPUs 512 * 513 * \returns SYS_ERR_OK on success 514 * errval on FAILURE 515 */ 516errval_t numa_sched_setaffinity(domainid_t did, struct bitmap *mask); 517 518/** 519 * \brief returns the page size 520 * 521 * \returns the number of bytes in a page 522 */ 523size_t numa_pagesize(void); 524 525/** 526 * \brief converts a node number to a bitmask of CPUs 527 * 528 * \param node the ID of the node 529 * \param mask bitmap representing the CPUs of this node 530 * 531 * \return SYS_ERR_OK on SUCCESS 532 * NUMA_ERR_BITMAP_RANGE on FAILURE (too small bitmap) 533 * 534 * The user must pass a bitmask structure with a mask buffer long enough to 535 * represent all possible cpu's 536 */ 537errval_t numa_node_to_cpus(nodeid_t node, struct bitmap *mask); 538 539 540/** 541 * \brief gets the number of cores for the given numa node 542 * 543 * \param node NUMA node to get the number of cores 544 * 545 * \returns number of cores for the node 546 */ 547coreid_t numa_num_node_cpus(nodeid_t node); 548 549/** 550 * \brief gets the system's core ID for a node/local core id configuration 551 * 552 * \param 553 */ 554coreid_t numa_node_get_core(nodeid_t node, coreid_t local_core_id); 555 556 557/** 558 * \brief returns the node that a cpu belongs to 559 * 560 * \param cpu ID of the core 561 * 562 * \returns node ID on SUCCESS 563 * NUMA_NODE_INVALID on FAILURE 564 */ 565nodeid_t numa_node_of_cpu(coreid_t cpu); 566 567/** 568 * \brief allocates a bit mask to represent the cores in the system 569 * 570 * \returns pointer to a new bitmask 571 * NULL on failure 572 */ 573struct bitmap *numa_allocate_cpumask(void); 574 575/** 576 * \brief frees a previously allocated CPU bitmask 577 * 578 * \param cpumask pointer to a previously allocated CPU bitmask 579 */ 580void numa_free_cpumask(struct bitmap *cpumask); 581 582/** 583 * \brief allocates a bit mask to represent the nodes in the system 584 * 585 * \returns pointer to a new bitmask 586 * NULL on failure 587 */ 588struct bitmap *numa_allocate_nodemask(void); 589 590/** 591 * \brief frees a previously allocated node bitmask 592 * 593 * \param nodemask pointer to a previously allocated node bitmask 594 */ 595void numa_free_nodemask(struct bitmap *nodemask); 596 597/** 598 * \brief allocates a bitmask structure and its associated bit mask 599 * 600 * \param n the number of bits 601 * 602 * \returns pointer to the bitmask 603 * NULL on error 604 */ 605struct bitmap *numa_bitmask_alloc(unsigned int n); 606 607/** 608 * \brief sets all bits in the bit mask to 0. 609 * 610 * \param bmp pointer to the bitmap 611 * 612 * \returns pointer to the cleared bit map 613 */ 614struct bitmap *numa_bitmask_clearall(struct bitmap *bmp); 615 616/** 617 * \brief clears the n-th bit of a bitmask 618 * 619 * \param bmp the bitmask 620 * \param n the bit to clear 621 * 622 * \returns pointer to the bitmask 623 */ 624struct bitmap *numa_bitmask_clearbit(struct bitmap *bmp, unsigned int n); 625 626/** 627 * \brief checks if two bitmasks are equal 628 * 629 * \param bmp1 bitmask 1 630 * \param bmp2 bitmask 2 631 * 632 * \return TRUE if the bitmasks are equal 633 * FALSE if the are distinct 634 */ 635bool numa_bitmask_equal(const struct bitmap *bmp1, const struct bitmap *bmp2); 636 637/** 638 * \brief frees the memory of a bitmask 639 * 640 * \param bmp the bitmask to be freed 641 */ 642void numa_bitmask_free(struct bitmap *bmp); 643 644/** 645 * \brief checks if the n-th bit is set in the bitmask 646 * 647 * \param bmp the bitmap 648 * \param n which bit to check 649 * 650 * \returns TRUE if the n-th bit is set 651 * FALSE otherwise 652 */ 653bool numa_bitmask_isbitset(const struct bitmap *bmp, unsigned int n); 654 655/** 656 * \brief returns the size (in bytes) of the bit mask 657 * 658 * \param bmp the bitmask 659 * 660 * \returns the size of the memory in bytes rounded up to a multiple of wordsize 661 */ 662size_t numa_bitmask_nbytes(struct bitmap *bmp); 663 664/** 665 * \brief sets all bits of a bitmask to 1 666 * 667 * \param bmp the bitmask 668 * 669 * \returns the bitmask 670 */ 671struct bitmap *numa_bitmask_setall(struct bitmap *bmp); 672 673/** 674 * \brief sets the n-th bit of a bitmask to 1 675 * 676 * \param bmp the bitmask 677 * \param n which bit to activate 678 * 679 * \returns the bitmask 680 */ 681struct bitmap *numa_bitmask_setbit(struct bitmap *bmp, unsigned int n); 682 683/** 684 * \brief copies the bitmask to a nodemask 685 * 686 * \param bmp the bitmask to copy 687 * \param nodemask the destination nodemask 688 * 689 * If the two areas differ in size, the copy is truncated to the size of the 690 * receiving field or zero-filled. 691 */ 692void copy_bitmask_to_nodemask(struct bitmap *bmp, nodemask_t *nodemask); 693 694/** 695 * \brief copies the contents of a nodemask into the bitmask 696 * 697 * \param nodemask node mask to copy from 698 * \param bmp bitmap to copy into 699 * 700 * If the two areas differ in size, the copy is truncated to the size of the 701 * receiving field or zero-filled. 702 */ 703void copy_nodemask_to_bitmask(nodemask_t *nodemask, struct bitmap *bmp); 704 705/** 706 * \brief copies one bitmask into another 707 * 708 * \param bmpfrom the source bitmask 709 * \param bmpto the destination bitmask 710 * 711 * If the two areas differ in size, the copy is truncated to the size of the 712 * receiving field or zero-filled. 713 */ 714void copy_bitmask_to_bitmask(struct bitmap *bmpfrom, struct bitmap *bmpto); 715 716/** 717 * \brief returns a count of the bits that are set in the body of the bitmask 718 * 719 * \param bmp the bitmask to count the set bits 720 * 721 * \return number of set bits in this bitmask 722 */ 723uint32_t numa_bitmask_weight(const struct bitmap *bmp); 724 725/** 726 * \brief moves a list of pages in the address space of the current domain 727 * 728 * \param did the domain ID 729 * \param count number of pages to move 730 * \param pages list of pages 731 * \param nodes list of nodes to which the pages can be moved 732 * \param status returns the outcome for each page 733 * \param flags flags for moving the pages 734 * 735 * \returns SYS_ERR_OK on SUCCESS 736 */ 737errval_t numa_move_pages(domainid_t did, 738 size_t count, 739 void **pages, 740 const nodeid_t *nodes, 741 errval_t *status, 742 int flags); 743/** 744 * \brief migrate a domain from one set of nodes to another 745 * 746 * \param did the domain ID 747 * \param fromnodes bitmap representing the current nodes 748 * \param tonodes bitmap representing the 749 * 750 * \returns SYS_ERR_OK on SUCCESS 751 */ 752errval_t numa_migrate_pages(domainid_t did, 753 struct bitmap *fromnodes, 754 struct bitmap *tonodes); 755 756/** 757 * is a libnuma internal function that can be overridden by the user program. This 758 * function is called with a char * argument when a libnuma function fails. 759 * Overriding the library internal definition makes it possible to specify a 760 * different error handling strategy when a libnuma function fails. It does not 761 * affect numa_available(). The numa_error() function defined in libnuma prints an 762 * error on stderr and terminates the program if numa_exit_on_error is set to a 763 * non-zero value. The default value of numa_exit_on_error is zero. 764 * 765 * \param where 766 */ 767void numa_error(char *where); 768 769extern int numa_exit_on_error; 770extern int numa_exit_on_warn; 771 772/** 773 * is a libnuma internal function that can be also overridden by the user program. 774 * It is called to warn the user when a libnuma function encounters a non-fatal 775 * error. The default implementation prints a warning to stderr. The first argument 776 * is a unique number identifying each warning. After that there is a printf(3)-style 777 * format string and a variable number of arguments. numa_warn exits the program 778 * when numa_exit_on_warn is set to a non-zero value. The default value of 779 * numa_exit_on_warn is zero. 780 * 781 * \param number 782 * \param where 783 */ 784void numa_warn(int number, char *where, ...); 785 786#ifdef __cplusplus 787} 788#endif 789 790#endif /* __NUMA_H */ 791