1/** 2 * \file 3 * \brief General Numa functions 4 * 5 */ 6 7/* 8 * Copyright (c) 2014, ETH Zurich. 9 * All rights reserved. 10 * 11 * This file is distributed under the terms in the attached LICENSE file. 12 * If you do not find this file, copies can be found by writing to: 13 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group. 14 */ 15 16#include <stdio.h> 17#include <string.h> 18 19#include <barrelfish/barrelfish.h> 20 21#include <numa.h> 22#include <bitmap.h> 23#include "numa_internal.h" 24 25uint8_t numa_initialized = 0x0; 26 27/** 28 * \brief bitmask that is allocated by the library with bits representing all nodes 29 * on which the calling task may allocate memory. 30 */ 31struct bitmap *numa_all_nodes_ptr; 32 33/** 34 * \brief points to a bitmask that is allocated by the library and left all zeroes. 35 */ 36struct bitmap *numa_no_nodes_ptr; 37 38/** 39 * \brief points to a bitmask that is allocated by the library with bits 40 * representing all cpus on which the calling task may execute. 41 */ 42struct bitmap *numa_all_cpus_ptr; 43 44/** 45 * \brief data structure representing the numa topology 46 */ 47struct numa_topology numa_topology; 48 49 50/** 51 * \brief checks if numa support is available 52 * 53 * \returns NUMA_ERR_NOT_AVAILABLE value all other functions are undefined 54 * SYS_ERR_OK: NUMA functionality is available 55 * 56 * this function must be called before any of the other functions of libnuma. 57 * during the call to numa_available the library also gets initialized 58 */ 59errval_t numa_available(void) 60{ 61 errval_t err; 62 63 if (numa_initialized) { 64 return (numa_initialized == 0xff) ? NUMA_ERR_LIB_INIT : SYS_ERR_OK; 65 } 66 67 NUMA_DEBUG_INIT("Initializing libnuma...\n"); 68 69 err = numa_get_topology_from_skb(&numa_topology); 70 if (err_is_fail(err)) { 71 goto out_err; 72 } 73 74#if NUMA_DEBUG_ENABLED 75 numa_dump_topology(&numa_topology); 76#endif 77 78 numa_all_cpus_ptr = numa_allocate_cpumask(); 79 if(numa_all_cpus_ptr == NULL) { 80 err = LIB_ERR_MALLOC_FAIL; 81 goto out_err1; 82 } 83 84 for (coreid_t i = 0; i < numa_topology.num_cores; ++i) { 85 bitmap_set_bit(numa_all_cpus_ptr, numa_topology.cores[i]->id); 86 } 87 88#if NUMA_DEBUG_ENABLED 89 bitmap_dump(numa_all_cpus_ptr); 90#endif 91 92 numa_all_nodes_ptr = numa_allocate_nodemask(); 93 if(numa_all_nodes_ptr == NULL) { 94 err = LIB_ERR_MALLOC_FAIL; 95 goto out_err2; 96 } 97 98 for (nodeid_t i = 0; i < numa_topology.num_nodes; ++i) { 99 bitmap_set_bit(numa_all_nodes_ptr, numa_topology.nodes[i].id); 100 } 101 102#if NUMA_DEBUG_ENABLED 103 bitmap_dump(numa_all_nodes_ptr); 104#endif 105 106 numa_no_nodes_ptr = numa_allocate_nodemask(); 107 if(numa_no_nodes_ptr == NULL) { 108 err = LIB_ERR_MALLOC_FAIL; 109 goto out_err3; 110 } 111 112 numa_alloc_bind_mask = numa_allocate_nodemask(); 113 if(numa_alloc_bind_mask == NULL) { 114 err = LIB_ERR_MALLOC_FAIL; 115 goto out_err4; 116 } 117 numa_alloc_interleave_mask = numa_allocate_nodemask(); 118 if(numa_alloc_interleave_mask == NULL) { 119 err = LIB_ERR_MALLOC_FAIL; 120 goto out_err5; 121 } 122 123#if NUMA_DEBUG_ENABLED 124 bitmap_dump(numa_no_nodes_ptr); 125#endif 126 127 numa_initialized = 0x1; 128 129 return SYS_ERR_OK; 130 131 /* cleanup in case of error */ 132 out_err5: 133 free(numa_alloc_bind_mask); 134 out_err4: 135 free(numa_no_nodes_ptr); 136 out_err3: 137 free(numa_all_nodes_ptr); 138 out_err2: 139 free(numa_all_cpus_ptr); 140 out_err1: 141 numa_free_topology(&numa_topology); 142 out_err: 143 numa_initialized = 0xff; 144 return err_push(err, NUMA_ERR_LIB_INIT); 145} 146 147/** 148 * \brief returns the highest node number available on the current system. 149 * 150 * \returns ID of the max NUMA node 151 */ 152nodeid_t numa_max_node(void) 153{ 154 numa_check_init(); 155 156 // XXX: assume nodes are 0..n-1 157 return numa_topology.num_nodes - 1; 158} 159 160/** 161 * \brief returns the highest ID of the present cores 162 * 163 * \returns the maximum number of cores in the system 164 */ 165coreid_t numa_max_core(void) 166{ 167 numa_check_init(); 168 169 // XXX: assume the IDs are 0...n-1 170 return numa_topology.num_cores - 1; 171} 172 173/** 174 * \brief returns the current node the domain is running on 175 * 176 * \return ID of the current node 177 */ 178nodeid_t numa_current_node(void) 179{ 180 numa_check_init(); 181 182 // XXX: do we need disp_get_core_id() here? 183 return numa_topology.cores[disp_get_current_core_id()]->node->id; 184} 185 186/** 187 * \brief returns the size of the node mask 188 * 189 * \return size of the node mask 190 */ 191nodeid_t numa_num_possible_nodes(void) 192{ 193 numa_check_init(); 194 195 return NUMA_MAX_NUMNODES; 196} 197 198/** 199 * \brief Obtains the number of all memory nodes in the system 200 * 201 * \return number of memory nodes in the system 202 * 203 * returns the number of memory nodes in the system. This count includes any nodes 204 * that are currently disabled. 205 */ 206nodeid_t numa_num_configured_nodes(void) 207{ 208 numa_check_init(); 209 210 // XXX: we have all nodes configures 211 return numa_topology.num_nodes; 212} 213 214/** 215 * \brief obtains the nodes the domain is allowed to allocate memory from 216 * 217 * \returns bitmask representing the allowing nodes 218 * 219 * returns the mask of nodes from which the process is allowed to allocate memory 220 * in it's current cpuset context. 221 */ 222struct bitmap *numa_get_mems_allowed(void) 223{ 224 numa_check_init(); 225 226 /* we don't have restriction yet. */ 227 return numa_all_nodes_ptr; 228} 229 230/** 231 * \brief returns the total numberof CPUs in the system 232 * 233 * \returns total number of CPUs in the system 234 * 235 * returns the number of cpus in the system. This count includes any cpus that are 236 * currently disabled. 237 */ 238coreid_t numa_num_configured_cpus(void) 239{ 240 numa_check_init(); 241 242 // XXX we assume that we can schedule all cores 243 return numa_topology.num_cores; 244} 245 246/** 247 * \brief returns the number of cpus that the calling domain is allowed to use. 248 * 249 * \returns number of CPUs the domain is allowed to use 250 */ 251coreid_t numa_num_task_cpus(void) 252{ 253 numa_check_init(); 254 255 // XXX: we do not have any restrictions yet, return all cores 256 return numa_topology.num_cores; 257} 258 259/** 260 * \brief returns the number of nodes on which the calling domain is allowed to 261 * allocate memory 262 * 263 * \returns number of nodes the domain is allowed to use 264 */ 265nodeid_t numa_num_task_nodes(void) 266{ 267 numa_check_init(); 268 269 // XXX: We do not have any restrictions yet. just return all nodes 270 return numa_topology.num_nodes; 271} 272 273/** 274 * \brief obtains the size of a node 275 * 276 * \param node ID of the NUMA node 277 * \param freep returns the number of available bytes of the node 278 * 279 * \returns size of the node in bytes 280 * 281 * returns the memory size of a node. If the argument freep is not NULL, it used 282 * to return the amount of free memory on the node. On error it returns 283 * NUMA_NODE_INVALID 284 */ 285size_t numa_node_size(nodeid_t node, uintptr_t *freep) 286{ 287 numa_check_init(); 288 numa_check_node_id(node); 289 290 if (freep) { 291 // TODO: figure out how much memory is left in the node 292 } 293 294 return (numa_topology.nodes[node].mem_limit - numa_topology.nodes[node].mem_base); 295} 296 297/** 298 * \brief obtains the base address of the numa node 299 * 300 * \returns physical address of the start of the numa node 301 * NUMA_NODE_INVALID if the node does not exist 302 */ 303lpaddr_t numa_node_base(nodeid_t node) 304{ 305 numa_check_init(); 306 numa_check_node_id(node); 307 308 return numa_topology.nodes[node].mem_base; 309} 310 311/** 312 * \brief returns the preferred node of the current task. 313 * 314 * \returns node ID where memory is preferably allocated 315 */ 316nodeid_t numa_preferred(void) 317{ 318 numa_check_init(); 319 return numa_current_node(); 320} 321 322/** 323 * \brief sets the preferred node for the current task to node 324 * 325 * \param node ID of the node to set preferred 326 * 327 * The system will attempt to allocate memory from the preferred node, but will 328 * fall back to other nodes if no memory is available on the the preferred node 329 * 330 * Passing a node of -1 argument specifies local allocation 331 */ 332void numa_set_preferred(nodeid_t node) 333{ 334 numa_check_init(); 335 336 if (node >= numa_topology.num_nodes) { 337 NUMA_WARNING("Node ID exceeds number of available nodes %" PRIuNODEID "/%" 338 PRIuNODEID, node, numa_topology.num_nodes); 339 return; 340 } 341 342 numa_topology.preferred = node; 343} 344 345 346/** 347 * \brief runs the current domain on a specific node. 348 * 349 * \param node ID of the node to run the domain on 350 * 351 * \returns SYS_ERR_OK on SUCCESS 352 * errval on FAILURE 353 * 354 * Passing -1 permits the kernel to schedule on all nodes again 355 */ 356errval_t numa_run_on_node(nodeid_t node) 357{ 358 numa_check_init(); 359 360 USER_PANIC("running the domain on a specific node is not supported yet\n"); 361 return 0; 362} 363 364 365/** 366 * \brief runs the current domain only on nodes specified in nodemask. 367 * 368 * \param nodemask bitmap representing the nodes to run the domain on 369 * 370 * \returns SYS_ERR_OK on SUCCESS 371 * errval on FAILURE 372 */ 373errval_t numa_run_on_node_mask(struct bitmap *nodemask) 374{ 375 numa_check_init(); 376 377 USER_PANIC("running the domain on a specific node is not supported yet\n"); 378 return 0; 379} 380 381 382/** 383 * \brief returns a mask of CPUs on which the current task is allowed to run. 384 * 385 * \returns bitmap represening the coreids the domain is allowed to run 386 */ 387struct bitmap *numa_get_run_node_mask(void) 388{ 389 numa_check_init(); 390 391 return numa_all_nodes_ptr; 392} 393 394 395/** 396 * \brief specify the memory bind policy 397 * 398 * \param strict numa policy to apply 399 * 400 * specifies whether calls that bind memory to a specific node should use the 401 * preferred policy or a strict policy. 402 */ 403void numa_set_bind_policy(numa_policy_t strict) 404{ 405 numa_check_init(); 406 407 if (strict == NUMA_POLICY_STRICT) { 408 numa_topology.bind = strict; 409 } else { 410 numa_topology.bind = NUMA_POLICY_PREFERRED; 411 } 412} 413 414 415/** 416 * \brief enable or disable the strict allocation policy 417 * 418 * \param strict numa policy to apply 419 * 420 * s a flag that says whether the functions allocating on specific nodes should 421 * use a strict policy. Strict means the allocation will fail if the memory cannot 422 * be allocated on the target node. 423 */ 424void numa_set_strict(numa_policy_t strict) 425{ 426 numa_check_init(); 427 428 if (strict == NUMA_POLICY_STRICT) { 429 numa_topology.strict = strict; 430 } else { 431 numa_topology.strict = NUMA_POLICY_PREFERRED; 432 } 433} 434 435 436/** 437 * \brief reports the distance in the machine topology between two nodes 438 * 439 * \param from source node to measure the distance 440 * \param to target node to measure the distance 441 * 442 * \returns distance between two nodes 443 * 0 iff cannot be deterimed 444 * 445 * The factors are a multiple of 10. A node has distance 10 to itself. 446 */ 447uint32_t numa_distance(nodeid_t from, nodeid_t to) 448{ 449 numa_check_init(); 450 451 if (from >= numa_topology.num_nodes || to >= numa_topology.num_nodes) { 452 return (uint32_t)NUMA_NODE_INVALID; 453 } 454 455 return numa_topology.distances[from * numa_topology.num_nodes + to]; 456} 457 458 459/** 460 * \brief retrieves a bitmask of the cpus on which a domain may run 461 * 462 * \param did domain ID 463 * \param mask returned bitmask 464 * 465 * \returns SYS_ERR_OK on success 466 * errval on FAILURE 467 */ 468errval_t numa_sched_getaffinity(domainid_t did, struct bitmap *mask) 469{ 470 numa_check_init(); 471 472 assert(!"NYI"); 473 return 0; 474} 475 476 477/** 478 * \brief sets a domain's allowed cpu's to those cpu's specified in mask. 479 * 480 * \param did domain ID 481 * \param mask bitmap representing the CPUs 482 * 483 * \returns SYS_ERR_OK on success 484 * errval on FAILURE 485 */ 486errval_t numa_sched_setaffinity(domainid_t did, struct bitmap *mask) 487{ 488 numa_check_init(); 489 490 assert(!"NYI"); 491 return 0; 492} 493 494 495/** 496 * \brief returns the page size 497 * 498 * \returns the number of bytes in a page 499 */ 500size_t numa_pagesize(void) 501{ 502 numa_check_init(); 503 504 return numa_topology.pagesize; 505} 506 507 508 509/** 510 * \brief converts a node number to a bitmask of CPUs 511 * 512 * \param node the ID of the node 513 * \param mask bitmap representing the CPUs of this node 514 * 515 * \return SYS_ERR_OK on SUCCESS 516 * NUMA_ERR_BITMAP_RANGE on FAILURE (too small bitmap) 517 * 518 * The user must pass a bitmask structure with a mask buffer long enough to 519 * represent all possible cpu's 520 */ 521errval_t numa_node_to_cpus(nodeid_t node, struct bitmap *mask) 522{ 523 numa_check_init(); 524 525 if (!(node < numa_topology.num_nodes)) { 526 return NUMA_ERR_NODEID_INVALID; 527 } 528 529 if (bitmap_get_nbits(mask) < numa_topology.num_cores) { 530 return NUMA_ERR_BITMAP_RANGE; 531 } 532 533 bitmap_clear_all(mask); 534 535 struct numa_node *nnode = &numa_topology.nodes[node]; 536 for (coreid_t i = 0; i < nnode->num_cores; ++i) { 537 bitmap_set_bit(mask, nnode->cores[i].id); 538 } 539 540#if NUMA_DEBUG_ENABLED 541 bitmap_dump(mask); 542#endif 543 544 return SYS_ERR_OK; 545} 546 547 548/** 549 * \brief returns the node that a cpu belongs to 550 * 551 * \param cpu ID of the core 552 * 553 * \returns node ID on SUCCESS 554 * NUMA_NODE_INVALID on FAILURE 555 */ 556nodeid_t numa_node_of_cpu(coreid_t cpu) 557{ 558 numa_check_init(); 559 560 numa_check_core_id(cpu); 561 562 return numa_topology.cores[cpu]->node->id; 563} 564 565 566/** 567 * \brief gets the number of cores for the given numa node 568 * 569 * \param node NUMA node to get the number of cores 570 * 571 * \returns number of cores for the node 572 */ 573coreid_t numa_num_node_cpus(nodeid_t node) 574{ 575 if (node >= numa_topology.num_nodes) { 576 NUMA_WARNING("Node ID exceeds number of available nodes: %" PRIuNODEID "/%" 577 PRIuNODEID, node, numa_topology.num_nodes); 578 return 0; 579 } 580 581 return numa_topology.nodes[node].num_cores; 582} 583