1/** 2 * \file 3 * \brief OpenMP API implementation as defined in OpenMP Version 4.0 4 * 5 * Source: http://www.openmp.org/mp-documents/OpenMP4.0.0.pdf 6 */ 7 8/* 9 * Copyright (c)2014 ETH Zurich. 10 * All rights reserved. 11 * 12 * This file is distributed under the terms in the attached LICENSE file. 13 * If you do not find this file, copies can be found by writing to: 14 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. 15 */ 16 17#include <bomp_internal.h> 18 19 20/* 21 * =========================================================================== 22 * OpenMP 4.0 API 23 * =========================================================================== 24 */ 25 26/* 27 * --------------------------------------------------------------------------- 28 * 3.2 Execution Environment Routines 29 * --------------------------------------------------------------------------- 30 * 31 * Execution environment routines affect and monitor threads, processors, and 32 * the parallel environment. The library routines are external functions with 33 * ���C��� linkage. 34 */ 35 36/** 37 * \brief Sets the number of threads to be used for parallel regions 38 * 39 * \param num_threads the number of threads 40 * 41 * Affects the number of threads used for subsequent parallel regions not 42 * specifying a num_threads clause, by setting the value of the first element of 43 * the nthreads-var ICV of the current task to num_threads. 44 */ 45void omp_set_num_threads(int num_threads) 46{ 47 if (num_threads > 0) { 48 if (num_threads > OMP_GET_ICV_GLOBAL(thread_limit)) { 49 num_threads = OMP_GET_ICV_GLOBAL(thread_limit); 50 } 51 52 OMP_SET_ICV_TASK(nthreads, num_threads); 53 } 54} 55 56/** 57 * \brief returns the current number of threads used (innermost parallel region) 58 * 59 * \returns number of used threads 60 * 61 * Returns the number of threads in the current team. The binding region for an 62 * omp_get_num_threads region is the innermost enclosing parallel region. 63 * If called from the sequential part of a program, this routine returns 1. 64 */ 65int omp_get_num_threads(void) 66{ 67 /* 68 struct gomp_team *team = gomp_thread ()->ts.team; 69 return team ? team->nthreads : 1; 70 71 XXX: we dont't have teams yet so we just return the number of threads 72 participating in working in the task 73 */ 74 75 if (bomp_icv_get()->task) { 76 if (OMP_GET_ICV_TASK(active_levels) > 1) { 77 return 1; /// if we are nested return 1 78 } 79 return OMP_GET_ICV_TASK(nthreads); 80 } 81 return 1; 82} 83 84/** 85 * \brief the maximum number of threads that can be used for a new parallel task 86 * 87 * \returns number of usable threads 88 * 89 * Returns an upper bound on the number of threads that could be used to form a 90 * new team if a parallel construct without a num_threads clause were encountered 91 * after execution returns from this routine. 92 * 93 * The value returned by omp_get_max_threads is the value of the first element of 94 * the nthreads-var ICV of the current task. This value is also an upper bound on 95 * the number of threads that could be used to form a new team if a parallel 96 * region without a num_threads clause were encountered after execution returns 97 * from this routine. 98 */ 99int omp_get_max_threads(void) 100{ 101 if (bomp_icv_get()->task) { 102 return OMP_GET_ICV_TASK(thread_limit); 103 } 104 return OMP_GET_ICV_GLOBAL(thread_limit); 105} 106 107/** 108 * \brief Returns the thread number of the calling thread within the current team. 109 * 110 * \returns ThreadID 111 */ 112int omp_get_thread_num(void) 113{ 114 if (bomp_icv_get()->task) { 115 return ((struct bomp_tls *)thread_get_tls())->thread_id; 116 } 117 return 0; 118} 119 120/** 121 * \brief returns the number of available processors 122 * 123 * \returns available processor count 124 * 125 * Returns the number of processors that are available to the device at the time 126 * the routine is called. 127 */ 128int omp_get_num_procs(void) 129{ 130 return numa_num_configured_cpus(); 131} 132 133/** 134 * \brief checks if we are currently in a parallel region 135 * 136 * \returns TRUE active threads is greater than 1 137 * FALSE active threads is 1 (main thread) 138 * 139 * Returns true if the active-levels-var ICV is greater than zero; otherwise it 140 * returns false. The effect of the omp_in_parallel routine is to return true if 141 * the current task is enclosed by an active parallel region, and the parallel 142 * region is enclosed by the outermost initial task region on the device; 143 * otherwise it returns false. 144 */ 145int omp_in_parallel(void) 146{ 147 if (bomp_icv_get()->task) { 148 return (OMP_GET_ICV_TASK(active_levels) > 0); 149 } else { 150 return 0; 151 } 152} 153 154/** 155 * \brief enables / disables the dynamic behavior 156 * 157 * \param dynamic_threads zero to disable dynamic behavior 158 * non-zero to enable dynamic behavior 159 * 160 * Returns the value of the dyn-var ICV, which indicates if dynamic adjustment 161 * of the number of threads is enabled or disabled. 162 */ 163void omp_set_dynamic(int dynamic_threads) 164{ 165#if OMP_SUPPORT_DYNAMIC 166 OMP_SET_ICV_TASK(dynamic, (!!dynamic_threads)); 167#endif 168} 169 170/** 171 * \brief checks if the dynamic behavior is enabled for the current task 172 * 173 * \returns TRUE if dynamic behavior enabled 174 * FALSE if disabled 175 * 176 * This routine returns the value of the dyn-var ICV, which is true if dynamic 177 * adjustment of the number of threads is enabled for the current task. 178 */ 179int omp_get_dynamic(void) 180{ 181#if OMP_SUPPORT_DYNAMIC 182 return OMP_GET_ICV_TASK(dynamic); 183#else 184 return 0; 185#endif 186} 187 188/** 189 * \brief Enables or disables nested parallelism, by setting the nest-var ICV. 190 * 191 * \param nested TRUE: enable nested behavior 192 * FALSE: disable nested behavior 193 */ 194void omp_set_nested(int nested) 195{ 196#if OMP_SUPPORT_NESTED 197 OMP_SET_ICV_TASK(nested, !!nested); 198#endif 199 200} 201 202/** 203 * \brief checks if the nested behavior is enabled 204 * 205 * \returns TRUE if nested behavior is enabled 206 * FALSE if disabled 207 * 208 * Returns the value of the nest-var ICV, which indicates if nested parallelism 209 * is enabled or disabled. 210 */ 211int omp_get_nested(void) 212{ 213#if OMP_SUPPORT_NESTED 214 return OMP_GET_ICV_TASK(nested); 215#else 216 return 0; 217#endif 218} 219 220/** 221 * \brief sets the schedule to be used 222 * 223 * \param kind which schedule to be used (one of OMP_SCHED_*) 224 * \param modifier modifier to tweak the scheduler (depends on kind) 225 * 226 * The omp_set_schedule routine affects the schedule that is applied when runtime 227 * is used as schedule kind, by setting the value of the run-sched-var ICV. 228 */ 229void omp_set_schedule(omp_sched_t kind, 230 int modifier) 231{ 232 OMP_SET_ICV_TASK(run_sched, kind); 233 OMP_SET_ICV_TASK(run_sched_modifier, modifier); 234} 235 236/** 237 * \brief returns the current scheduler settings 238 * 239 * \param kind returns the current scheduler setting (one of OMP_SCHED_*) 240 * \param modifier returns the modifier of the scheduler 241 * 242 * Returns the value of run-sched-var ICV, which is the schedule applied when 243 * runtime schedule is used. 244 */ 245void omp_get_schedule(omp_sched_t *kind, 246 int *modifier) 247{ 248 if (kind) { 249 *kind = OMP_GET_ICV_TASK(run_sched); 250 } 251 if (modifier) { 252 *modifier = OMP_GET_ICV_TASK(run_sched_modifier); 253 } 254} 255 256/** 257 * \brief obtains he maximum number of OpenMP threads available 258 * 259 * \returns number of available threads 260 * 261 * Returns the value of the thread-limit-var ICV, which is the maximum number 262 * of OpenMP threads available. 263 * 264 * The binding thread set for an omp_get_thread_limit region is all threads on the 265 * device. The effect of executing this routine is not related to any specific 266 * region corresponding to any construct or API routine. 267 */ 268int omp_get_thread_limit(void) 269{ 270 return OMP_GET_ICV_TASK(thread_limit); 271} 272 273/** 274 * \brief limits the nested depth 275 * 276 * \param max_active_levels maximum nested level 277 * 278 * Limits the number of nested active parallel regions, by setting 279 * max-active-levels-var ICV. 280 */ 281void omp_set_max_active_levels(int max_active_levels) 282{ 283 if (max_active_levels > 0) { 284 OMP_SET_ICV_DEV(max_active_levels, max_active_levels); 285 } 286} 287 288/** 289 * \brief returns the maximim nested depth 290 * 291 * \returns maximum nested level 292 * 293 * Returns the value of max-active-levels-var ICV, which determines the maximum 294 * number of nested active parallel regions. 295 */ 296int omp_get_max_active_levels(void) 297{ 298 return OMP_GET_ICV_DEV(max_active_levels); 299} 300 301/** 302 * \brief returns the level the task is runnig at 303 * 304 * \param number enclosing nested parallel regions 305 * 306 * For the enclosing device region, returns the levels-vars ICV, which is the 307 * number of nested parallel regions that enclose the task containing the call. 308 */ 309int omp_get_level(void) 310{ 311 return OMP_GET_ICV_TASK(levels); 312} 313 314/** 315 * \brief returns the ancestor thread number of a thread at a given level 316 * 317 * \param level the level of the ancestor 318 * 319 * \returns thread number of ancestor thread 320 * 321 * The omp_get_ancestor_thread_num routine returns the thread number of the 322 * ancestor at a given nest level of the current thread or the thread number of 323 * the current thread. If the requested nest level is outside the range of 0 and 324 * the nest level of the current thread, as returned by the omp_get_level routine, 325 * the routine returns -1. 326 */ 327int omp_get_ancestor_thread_num(int level) 328{ 329 int my_level = omp_get_level(); 330 if (level > my_level || level < 0) { 331 return -1; 332 } else if (my_level == level) { 333 return omp_get_thread_num(); 334 } else { 335 /* TODO */ 336 assert(!"NYI"); 337 return 0; 338 } 339} 340 341/** 342 * \brief returns the team size of a thread at a given level 343 * 344 * \param level the level to consider 345 * 346 * \returns number of threads in the team * 347 * 348 * The omp_get_team_size routine returns the size of the thread team to which the 349 * ancestor or the current thread belongs. If the requested nested level is outside 350 * the range of 0 and the nested level of the current thread, as returned by the 351 * omp_get_level routine, the routine returns -1. Inactive parallel regions are 352 * regarded like active parallel regions executed with one thread. 353 */ 354int omp_get_team_size(int level) 355{ 356 int my_level = omp_get_level(); 357 if (level > my_level || level < 0) { 358 return -1; 359 } else { 360 /* TODO */ 361 assert(!"NYI"); 362 return 0; 363 } 364} 365 366/** 367 * \brief returns the number of active, nested parallel regions 368 * 369 * \returns number of nested parallel regions * 370 * 371 * The effect of the omp_get_active_level routine is to return the number of nested, 372 * active parallel regions enclosing the current task such that all of the parallel 373 * regions are enclosed by the outermost initial task region on the current device. 374 */ 375int omp_get_active_level(void) 376{ 377 return OMP_GET_ICV_TASK(active_levels); 378} 379 380/** 381 * \brief checks if thread is in the final task region 382 * 383 * \returns TRUE if thread is in the final task region 384 * FALSE otherwise 385 * 386 * Returns true if the routine is executed in a final task region; otherwise, 387 * it returns false. 388 */ 389int omp_in_final(void) 390{ 391 assert(!"NYI"); 392 return 1; // TODO 393} 394 395#if OMP_VERSION >= OMP_VERSION_40 396 397/** 398 * \brief returns the cancellation value 399 * 400 * \returns cancellation value 401 * 402 * Returns the value of the cancel-var ICV, which controls the behavior of 403 * cancel construct and cancellation points. 404 */ 405int omp_get_cancellation(void) 406{ 407 return OMP_GET_ICV_DEV(cancel); 408} 409 410/** 411 * \brief returns the thread affinitiy policy 412 * 413 * \returns OpenMP thread policy value 414 * 415 * Returns the thread affinity policy to be used for the subsequent nested 416 * parallel regions that do not specify a proc_bind clause. 417 */ 418omp_proc_bind_t omp_get_proc_bind(void) 419{ 420 return OMP_GET_ICV_TASK(bind); 421} 422 423/** 424 * \brief controls the default target device 425 * 426 * \param device_num device number of the target device 427 * 428 * The effect of this routine is to set the value of the default-device-var ICV 429 * of the current task to the value specified in the argument. When called from 430 * within a target region the effect of this routine is unspecified. 431 */ 432void omp_set_default_device(int device_num) 433{ 434 OMP_SET_ICV_TASK(default_device, device_num); 435} 436 437/** 438 * \brief Returns the default target device. 439 * 440 * \returns device number of default target device 441 * 442 * The omp_get_default_device routine returns the value of the default-device-var 443 * ICV of the current task. When called from within a target region the effect of 444 * this routine is unspecified. 445 */ 446int omp_get_default_device(void) 447{ 448 // TODO: behavior if on target 449 return OMP_GET_ICV_TASK(default_device); 450} 451 452/** 453 * \brief Returns the number of target devices. 454 * 455 * \returns number of target devices 456 * 457 * The omp_get_num_devices routine returns the number of available target devices. 458 * When called from within a target region the effect of this routine is 459 * unspecified. 460 */ 461int omp_get_num_devices(void) 462{ 463 return 0; // TODO 464} 465 466/** 467 * \brief returns the number of teams in the current region 468 * 469 * \returns number of teams 470 * 471 * The effect of this routine is to return the number of teams in the current teams 472 * region. The routine returns 1 if it is called from outside of a teams region. 473 */ 474int omp_get_num_teams(void) 475{ 476 assert(!"NYI: Teams"); 477 return 1; // TODO: team counting 478} 479 480/** 481 * \brief gets the team number of the calling thread 482 * 483 * \returns team number 484 * 485 * Returns the team number of calling thread. The team number is an integer 486 * between 0 and one less than the value returned by omp_get_num_teams, inclusive. 487 */ 488int omp_get_team_num(void) 489{ 490 assert(!"NYI: Teams"); 491 return 0; 492} 493 494/** 495 * \brief checks if the task is executing as the host device 496 * 497 * \returns TRUE if task is host device 498 * FALSE otherwise 499 * Returns true if the current task is executing on the host device; otherwise, 500 * it returns false. 501 */ 502int omp_is_initial_device(void) 503{ 504 assert(!"NYI: Initial device"); 505 return 1; 506} 507#endif 508 509/* 510 * --------------------------------------------------------------------------- 511 * 3.3 Lock Routines 512 * --------------------------------------------------------------------------- 513 * General-purpose lock routines. Two types of locks are supported: simple locks 514 * and nestable locks. A nestable lock can be set multiple times by the same task 515 * before being unset; a simple lock cannot be set if it is already owned by the 516 * task trying to set it. 517 * 518 * XXX: we may have to consider something different when we are dealing with 519 * non-shared address spaces such as XOMP 520 */ 521 522 523/* 524 * Simple OpenMP locks 525 */ 526 527/** 528 * \brief initializes and allocates a simple OpenMP lock 529 * 530 * \param arg returned pointer to the lock 531 * 532 * The effect of these routines is to initialize the lock to the unlocked state; 533 * that is, no task owns the lock. 534 */ 535void omp_init_lock(omp_lock_t *arg) 536{ 537 struct __omp_lock *lock = (struct __omp_lock *)arg; 538 539 assert(lock != NULL); 540 541 thread_mutex_init(&lock->mutex); 542 lock->initialized = 0x1; 543} 544 545/** 546 * \brief destroys a simple OpenMP lock 547 * 548 * \param arg OpenMP lock to destroyed (set to zero) 549 * 550 * The effect of these routines is to change the state of the lock to uninitialized. 551 */ 552void omp_destroy_lock(omp_lock_t *arg) 553{ 554 struct __omp_lock *lock = (struct __omp_lock *) arg; 555 556 /* acquire the lock to make sure there are no other threads holding the lock */ 557 thread_mutex_lock(&lock->mutex); 558 /* we have the lock now */ 559 memset(lock, 0, sizeof (*lock)); 560} 561 562/** 563 * \brief acquires a simple OpenMP lock 564 * 565 * \param arg The lock to acquire 566 * 567 * Each of these routines causes suspension of the task executing the routine 568 * until the specified lock is available and then sets the lock. 569 */ 570void omp_set_lock(omp_lock_t *arg) 571{ 572 struct __omp_lock *lock = (struct __omp_lock *) arg; 573 assert(lock->initialized); 574 thread_mutex_lock(&lock->mutex); 575} 576 577/** 578 * \brief Releases the simple OpenMP lock 579 * 580 * \param arg The lock to be released 581 * 582 * For a simple lock, the omp_unset_lock routine causes the lock to become 583 * unlocked. 584 */ 585void omp_unset_lock(omp_lock_t *arg) 586{ 587 struct __omp_lock *lock = (struct __omp_lock *) arg; 588 assert(lock->initialized); 589 thread_mutex_unlock(&lock->mutex); 590} 591 592/** 593 * \brief tries to acquire a simple openMP lock 594 * 595 * \param arg The OpenMP lock to acquire 596 * 597 * \returns TRUE if lock is acquired successfully 598 * FALSE if lock is already held by other thread 599 * 600 * These routines attempt to set a lock in the same manner as omp_set_lock and 601 * omp_set_nest_lock, except that they do not suspend execution of the task 602 * executing the routine. 603 * For a simple lock, the omp_test_lock routine returns true if the lock is 604 * successfully set; otherwise, it returns false. 605 */ 606int omp_test_lock(omp_lock_t *arg) 607{ 608 struct __omp_lock *lock = (struct __omp_lock *) arg; 609 assert(lock->initialized); 610 return thread_mutex_trylock(&lock->mutex); 611} 612 613/* 614 * Nested OpenMP locks 615 */ 616 617/** 618 * \brief initializes and allocates a nested OpenMP lock 619 * 620 * \param arg returned pointer to the lock 621 * 622 * The effect of these routines is to initialize the lock to the unlocked state; 623 * that is, no task owns the lock. In addition, the nesting count for a nestable 624 * lock is set to zero. 625 */ 626void omp_init_nest_lock(omp_nest_lock_t *arg) 627{ 628 629 struct __omp_nested_lock *nlock = (struct __omp_nested_lock *)arg; 630 assert(nlock != NULL); 631 thread_mutex_init(&nlock->mutex); 632 nlock->owner = NULL; 633 nlock->count = 0; 634 nlock->initialized = 1; 635} 636 637/** 638 * \brief destroys a Nested OpenMP lock 639 * 640 * \param arg OpenMP lock to destroyed (set to zero) 641 * 642 * The effect of these routines is to change the state of the lock to uninitialized. 643 */ 644void omp_destroy_nest_lock(omp_nest_lock_t *arg) 645{ 646 struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; 647 648 /*acquire the lock to make sure there are no other threads holding the lock */ 649 thread_mutex_lock(&nlock->mutex); 650 /* we have the lock now */ 651 memset(nlock, 0, sizeof (*nlock)); 652} 653 654/** 655 * \brief acquires a simple OpenMP lock 656 * 657 * \param arg The lock to acquire 658 * 659 * Each of these routines causes suspension of the task executing the routine 660 * until the specified lock is available and then sets the lock. 661 * 662 * A nestable lock is available if it is unlocked or if it is already owned by 663 * the task executing the routine. The task executing the routine is granted, 664 * or retains, ownership of the lock, and the nesting count for the lock is 665 * incremented. 666 */ 667void omp_set_nest_lock(omp_nest_lock_t *arg) 668{ 669 struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; 670 assert(nlock->initialized); 671 672 if (nlock->owner != thread_self()) { 673 thread_mutex_lock (&nlock->mutex); 674 nlock->owner = thread_self(); 675 } 676 nlock->count++; 677} 678 679/** 680 * \brief Releases the simple OpenMP lock 681 * 682 * \param arg The lock to be released 683 * 684 * For a nestable lock, the omp_unset_nest_lock routine decrements the nesting 685 * count, and causes the lock to become unlocked if the resulting nesting count 686 * is zero. 687 */ 688void omp_unset_nest_lock(omp_nest_lock_t *arg) 689{ 690 struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; 691 assert(nlock->initialized); 692 693 nlock->count--; 694 695 // if we were the last holder unlock the mutex 696 if (nlock->count == 0) { 697 thread_mutex_unlock(&nlock->mutex); 698 } 699} 700 701/** 702 * \brief tries to acquire a simple openMP lock 703 * 704 * \param arg The OpenMP lock to acquire 705 * 706 * \returns TRUE if lock is acquired successfully 707 * FALSE if lock is already held by other thread 708 * 709 * These routines attempt to set a lock in the same manner as omp_set_lock and 710 * omp_set_nest_lock, except that they do not suspend execution of the task 711 * executing the routine. 712 * For a nestable lock, the omp_test_nest_lock routine returns the new nesting 713 * count if the lock is successfully set; otherwise, it returns zero. 714 */ 715int omp_test_nest_lock(omp_nest_lock_t *arg) 716{ 717 struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; 718 assert(nlock->initialized); 719 720 if (nlock->owner != thread_self()) { 721 if (!thread_mutex_trylock(&nlock->mutex)) { 722 return 0; 723 } 724 nlock->owner = thread_self(); 725 } 726 727 nlock->count++; 728 729 return nlock->count; 730} 731 732/* 733 * --------------------------------------------------------------------------- 734 * 3.4 Timing Routines 735 * --------------------------------------------------------------------------- 736 * Timing routines support a portable wall clock timer. These record elapsed 737 * time per-thread and are not guaranteed to be globally consistent across all 738 * the threads participating in an application. 739 */ 740 741/** 742 * \brief returns elapsed wall clock time in seconds. 743 * 744 * \returns call clock time 745 * 746 * The omp_get_wtime routine returns a value equal to the elapsed wall clock time 747 * in seconds since some ���time in the past���. The actual ���time in the past��� is 748 * arbitrary, but it is guaranteed not to change during the execution of the 749 * application program. The time returned is a ���per-thread time���, so it is not 750 * required to be globally consistent across all the threads participating in an 751 * application. 752 */ 753double omp_get_wtime(void) 754{ 755 cycles_t t_start = OMP_GET_ICV_GLOBAL(time_start); 756 cycles_t t_current = rdtsc(); 757 assert(!"conversion to ms"); 758 return (t_current - t_start); 759} 760 761/** 762 * \brief returns the precision of the timer used by omp_get_wtime. 763 * 764 * \returns the timer precision 765 * 766 * The omp_get_wtick routine returns a value equal to the number of seconds 767 * between successive clock ticks of the timer used by omp_get_wtime. 768 */ 769double omp_get_wtick(void) 770{ 771 return 1.0 / 1e6; 772} 773 774