/** * \file * \brief OpenMP API implementation as defined in OpenMP Version 4.0 * * Source: http://www.openmp.org/mp-documents/OpenMP4.0.0.pdf */ /* * Copyright (c)2014 ETH Zurich. * All rights reserved. * * This file is distributed under the terms in the attached LICENSE file. * If you do not find this file, copies can be found by writing to: * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. */ #include /* * =========================================================================== * OpenMP 4.0 API * =========================================================================== */ /* * --------------------------------------------------------------------------- * 3.2 Execution Environment Routines * --------------------------------------------------------------------------- * * Execution environment routines affect and monitor threads, processors, and * the parallel environment. The library routines are external functions with * “C” linkage. */ /** * \brief Sets the number of threads to be used for parallel regions * * \param num_threads the number of threads * * Affects the number of threads used for subsequent parallel regions not * specifying a num_threads clause, by setting the value of the first element of * the nthreads-var ICV of the current task to num_threads. */ void omp_set_num_threads(int num_threads) { if (num_threads > 0) { if (num_threads > OMP_GET_ICV_GLOBAL(thread_limit)) { num_threads = OMP_GET_ICV_GLOBAL(thread_limit); } OMP_SET_ICV_TASK(nthreads, num_threads); } } /** * \brief returns the current number of threads used (innermost parallel region) * * \returns number of used threads * * Returns the number of threads in the current team. The binding region for an * omp_get_num_threads region is the innermost enclosing parallel region. * If called from the sequential part of a program, this routine returns 1. */ int omp_get_num_threads(void) { /* struct gomp_team *team = gomp_thread ()->ts.team; return team ? team->nthreads : 1; XXX: we dont't have teams yet so we just return the number of threads participating in working in the task */ if (bomp_icv_get()->task) { if (OMP_GET_ICV_TASK(active_levels) > 1) { return 1; /// if we are nested return 1 } return OMP_GET_ICV_TASK(nthreads); } return 1; } /** * \brief the maximum number of threads that can be used for a new parallel task * * \returns number of usable threads * * Returns an upper bound on the number of threads that could be used to form a * new team if a parallel construct without a num_threads clause were encountered * after execution returns from this routine. * * The value returned by omp_get_max_threads is the value of the first element of * the nthreads-var ICV of the current task. This value is also an upper bound on * the number of threads that could be used to form a new team if a parallel * region without a num_threads clause were encountered after execution returns * from this routine. */ int omp_get_max_threads(void) { if (bomp_icv_get()->task) { return OMP_GET_ICV_TASK(thread_limit); } return OMP_GET_ICV_GLOBAL(thread_limit); } /** * \brief Returns the thread number of the calling thread within the current team. * * \returns ThreadID */ int omp_get_thread_num(void) { if (bomp_icv_get()->task) { return ((struct bomp_tls *)thread_get_tls())->thread_id; } return 0; } /** * \brief returns the number of available processors * * \returns available processor count * * Returns the number of processors that are available to the device at the time * the routine is called. */ int omp_get_num_procs(void) { return numa_num_configured_cpus(); } /** * \brief checks if we are currently in a parallel region * * \returns TRUE active threads is greater than 1 * FALSE active threads is 1 (main thread) * * Returns true if the active-levels-var ICV is greater than zero; otherwise it * returns false. The effect of the omp_in_parallel routine is to return true if * the current task is enclosed by an active parallel region, and the parallel * region is enclosed by the outermost initial task region on the device; * otherwise it returns false. */ int omp_in_parallel(void) { if (bomp_icv_get()->task) { return (OMP_GET_ICV_TASK(active_levels) > 0); } else { return 0; } } /** * \brief enables / disables the dynamic behavior * * \param dynamic_threads zero to disable dynamic behavior * non-zero to enable dynamic behavior * * Returns the value of the dyn-var ICV, which indicates if dynamic adjustment * of the number of threads is enabled or disabled. */ void omp_set_dynamic(int dynamic_threads) { #if OMP_SUPPORT_DYNAMIC OMP_SET_ICV_TASK(dynamic, (!!dynamic_threads)); #endif } /** * \brief checks if the dynamic behavior is enabled for the current task * * \returns TRUE if dynamic behavior enabled * FALSE if disabled * * This routine returns the value of the dyn-var ICV, which is true if dynamic * adjustment of the number of threads is enabled for the current task. */ int omp_get_dynamic(void) { #if OMP_SUPPORT_DYNAMIC return OMP_GET_ICV_TASK(dynamic); #else return 0; #endif } /** * \brief Enables or disables nested parallelism, by setting the nest-var ICV. * * \param nested TRUE: enable nested behavior * FALSE: disable nested behavior */ void omp_set_nested(int nested) { #if OMP_SUPPORT_NESTED OMP_SET_ICV_TASK(nested, !!nested); #endif } /** * \brief checks if the nested behavior is enabled * * \returns TRUE if nested behavior is enabled * FALSE if disabled * * Returns the value of the nest-var ICV, which indicates if nested parallelism * is enabled or disabled. */ int omp_get_nested(void) { #if OMP_SUPPORT_NESTED return OMP_GET_ICV_TASK(nested); #else return 0; #endif } /** * \brief sets the schedule to be used * * \param kind which schedule to be used (one of OMP_SCHED_*) * \param modifier modifier to tweak the scheduler (depends on kind) * * The omp_set_schedule routine affects the schedule that is applied when runtime * is used as schedule kind, by setting the value of the run-sched-var ICV. */ void omp_set_schedule(omp_sched_t kind, int modifier) { OMP_SET_ICV_TASK(run_sched, kind); OMP_SET_ICV_TASK(run_sched_modifier, modifier); } /** * \brief returns the current scheduler settings * * \param kind returns the current scheduler setting (one of OMP_SCHED_*) * \param modifier returns the modifier of the scheduler * * Returns the value of run-sched-var ICV, which is the schedule applied when * runtime schedule is used. */ void omp_get_schedule(omp_sched_t *kind, int *modifier) { if (kind) { *kind = OMP_GET_ICV_TASK(run_sched); } if (modifier) { *modifier = OMP_GET_ICV_TASK(run_sched_modifier); } } /** * \brief obtains he maximum number of OpenMP threads available * * \returns number of available threads * * Returns the value of the thread-limit-var ICV, which is the maximum number * of OpenMP threads available. * * The binding thread set for an omp_get_thread_limit region is all threads on the * device. The effect of executing this routine is not related to any specific * region corresponding to any construct or API routine. */ int omp_get_thread_limit(void) { return OMP_GET_ICV_TASK(thread_limit); } /** * \brief limits the nested depth * * \param max_active_levels maximum nested level * * Limits the number of nested active parallel regions, by setting * max-active-levels-var ICV. */ void omp_set_max_active_levels(int max_active_levels) { if (max_active_levels > 0) { OMP_SET_ICV_DEV(max_active_levels, max_active_levels); } } /** * \brief returns the maximim nested depth * * \returns maximum nested level * * Returns the value of max-active-levels-var ICV, which determines the maximum * number of nested active parallel regions. */ int omp_get_max_active_levels(void) { return OMP_GET_ICV_DEV(max_active_levels); } /** * \brief returns the level the task is runnig at * * \param number enclosing nested parallel regions * * For the enclosing device region, returns the levels-vars ICV, which is the * number of nested parallel regions that enclose the task containing the call. */ int omp_get_level(void) { return OMP_GET_ICV_TASK(levels); } /** * \brief returns the ancestor thread number of a thread at a given level * * \param level the level of the ancestor * * \returns thread number of ancestor thread * * The omp_get_ancestor_thread_num routine returns the thread number of the * ancestor at a given nest level of the current thread or the thread number of * the current thread. If the requested nest level is outside the range of 0 and * the nest level of the current thread, as returned by the omp_get_level routine, * the routine returns -1. */ int omp_get_ancestor_thread_num(int level) { int my_level = omp_get_level(); if (level > my_level || level < 0) { return -1; } else if (my_level == level) { return omp_get_thread_num(); } else { /* TODO */ assert(!"NYI"); return 0; } } /** * \brief returns the team size of a thread at a given level * * \param level the level to consider * * \returns number of threads in the team * * * The omp_get_team_size routine returns the size of the thread team to which the * ancestor or the current thread belongs. If the requested nested level is outside * the range of 0 and the nested level of the current thread, as returned by the * omp_get_level routine, the routine returns -1. Inactive parallel regions are * regarded like active parallel regions executed with one thread. */ int omp_get_team_size(int level) { int my_level = omp_get_level(); if (level > my_level || level < 0) { return -1; } else { /* TODO */ assert(!"NYI"); return 0; } } /** * \brief returns the number of active, nested parallel regions * * \returns number of nested parallel regions * * * The effect of the omp_get_active_level routine is to return the number of nested, * active parallel regions enclosing the current task such that all of the parallel * regions are enclosed by the outermost initial task region on the current device. */ int omp_get_active_level(void) { return OMP_GET_ICV_TASK(active_levels); } /** * \brief checks if thread is in the final task region * * \returns TRUE if thread is in the final task region * FALSE otherwise * * Returns true if the routine is executed in a final task region; otherwise, * it returns false. */ int omp_in_final(void) { assert(!"NYI"); return 1; // TODO } #if OMP_VERSION >= OMP_VERSION_40 /** * \brief returns the cancellation value * * \returns cancellation value * * Returns the value of the cancel-var ICV, which controls the behavior of * cancel construct and cancellation points. */ int omp_get_cancellation(void) { return OMP_GET_ICV_DEV(cancel); } /** * \brief returns the thread affinitiy policy * * \returns OpenMP thread policy value * * Returns the thread affinity policy to be used for the subsequent nested * parallel regions that do not specify a proc_bind clause. */ omp_proc_bind_t omp_get_proc_bind(void) { return OMP_GET_ICV_TASK(bind); } /** * \brief controls the default target device * * \param device_num device number of the target device * * The effect of this routine is to set the value of the default-device-var ICV * of the current task to the value specified in the argument. When called from * within a target region the effect of this routine is unspecified. */ void omp_set_default_device(int device_num) { OMP_SET_ICV_TASK(default_device, device_num); } /** * \brief Returns the default target device. * * \returns device number of default target device * * The omp_get_default_device routine returns the value of the default-device-var * ICV of the current task. When called from within a target region the effect of * this routine is unspecified. */ int omp_get_default_device(void) { // TODO: behavior if on target return OMP_GET_ICV_TASK(default_device); } /** * \brief Returns the number of target devices. * * \returns number of target devices * * The omp_get_num_devices routine returns the number of available target devices. * When called from within a target region the effect of this routine is * unspecified. */ int omp_get_num_devices(void) { return 0; // TODO } /** * \brief returns the number of teams in the current region * * \returns number of teams * * The effect of this routine is to return the number of teams in the current teams * region. The routine returns 1 if it is called from outside of a teams region. */ int omp_get_num_teams(void) { assert(!"NYI: Teams"); return 1; // TODO: team counting } /** * \brief gets the team number of the calling thread * * \returns team number * * Returns the team number of calling thread. The team number is an integer * between 0 and one less than the value returned by omp_get_num_teams, inclusive. */ int omp_get_team_num(void) { assert(!"NYI: Teams"); return 0; } /** * \brief checks if the task is executing as the host device * * \returns TRUE if task is host device * FALSE otherwise * Returns true if the current task is executing on the host device; otherwise, * it returns false. */ int omp_is_initial_device(void) { assert(!"NYI: Initial device"); return 1; } #endif /* * --------------------------------------------------------------------------- * 3.3 Lock Routines * --------------------------------------------------------------------------- * General-purpose lock routines. Two types of locks are supported: simple locks * and nestable locks. A nestable lock can be set multiple times by the same task * before being unset; a simple lock cannot be set if it is already owned by the * task trying to set it. * * XXX: we may have to consider something different when we are dealing with * non-shared address spaces such as XOMP */ /* * Simple OpenMP locks */ /** * \brief initializes and allocates a simple OpenMP lock * * \param arg returned pointer to the lock * * The effect of these routines is to initialize the lock to the unlocked state; * that is, no task owns the lock. */ void omp_init_lock(omp_lock_t *arg) { struct __omp_lock *lock = (struct __omp_lock *)arg; assert(lock != NULL); thread_mutex_init(&lock->mutex); lock->initialized = 0x1; } /** * \brief destroys a simple OpenMP lock * * \param arg OpenMP lock to destroyed (set to zero) * * The effect of these routines is to change the state of the lock to uninitialized. */ void omp_destroy_lock(omp_lock_t *arg) { struct __omp_lock *lock = (struct __omp_lock *) arg; /* acquire the lock to make sure there are no other threads holding the lock */ thread_mutex_lock(&lock->mutex); /* we have the lock now */ memset(lock, 0, sizeof (*lock)); } /** * \brief acquires a simple OpenMP lock * * \param arg The lock to acquire * * Each of these routines causes suspension of the task executing the routine * until the specified lock is available and then sets the lock. */ void omp_set_lock(omp_lock_t *arg) { struct __omp_lock *lock = (struct __omp_lock *) arg; assert(lock->initialized); thread_mutex_lock(&lock->mutex); } /** * \brief Releases the simple OpenMP lock * * \param arg The lock to be released * * For a simple lock, the omp_unset_lock routine causes the lock to become * unlocked. */ void omp_unset_lock(omp_lock_t *arg) { struct __omp_lock *lock = (struct __omp_lock *) arg; assert(lock->initialized); thread_mutex_unlock(&lock->mutex); } /** * \brief tries to acquire a simple openMP lock * * \param arg The OpenMP lock to acquire * * \returns TRUE if lock is acquired successfully * FALSE if lock is already held by other thread * * These routines attempt to set a lock in the same manner as omp_set_lock and * omp_set_nest_lock, except that they do not suspend execution of the task * executing the routine. * For a simple lock, the omp_test_lock routine returns true if the lock is * successfully set; otherwise, it returns false. */ int omp_test_lock(omp_lock_t *arg) { struct __omp_lock *lock = (struct __omp_lock *) arg; assert(lock->initialized); return thread_mutex_trylock(&lock->mutex); } /* * Nested OpenMP locks */ /** * \brief initializes and allocates a nested OpenMP lock * * \param arg returned pointer to the lock * * The effect of these routines is to initialize the lock to the unlocked state; * that is, no task owns the lock. In addition, the nesting count for a nestable * lock is set to zero. */ void omp_init_nest_lock(omp_nest_lock_t *arg) { struct __omp_nested_lock *nlock = (struct __omp_nested_lock *)arg; assert(nlock != NULL); thread_mutex_init(&nlock->mutex); nlock->owner = NULL; nlock->count = 0; nlock->initialized = 1; } /** * \brief destroys a Nested OpenMP lock * * \param arg OpenMP lock to destroyed (set to zero) * * The effect of these routines is to change the state of the lock to uninitialized. */ void omp_destroy_nest_lock(omp_nest_lock_t *arg) { struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; /*acquire the lock to make sure there are no other threads holding the lock */ thread_mutex_lock(&nlock->mutex); /* we have the lock now */ memset(nlock, 0, sizeof (*nlock)); } /** * \brief acquires a simple OpenMP lock * * \param arg The lock to acquire * * Each of these routines causes suspension of the task executing the routine * until the specified lock is available and then sets the lock. * * A nestable lock is available if it is unlocked or if it is already owned by * the task executing the routine. The task executing the routine is granted, * or retains, ownership of the lock, and the nesting count for the lock is * incremented. */ void omp_set_nest_lock(omp_nest_lock_t *arg) { struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; assert(nlock->initialized); if (nlock->owner != thread_self()) { thread_mutex_lock (&nlock->mutex); nlock->owner = thread_self(); } nlock->count++; } /** * \brief Releases the simple OpenMP lock * * \param arg The lock to be released * * For a nestable lock, the omp_unset_nest_lock routine decrements the nesting * count, and causes the lock to become unlocked if the resulting nesting count * is zero. */ void omp_unset_nest_lock(omp_nest_lock_t *arg) { struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; assert(nlock->initialized); nlock->count--; // if we were the last holder unlock the mutex if (nlock->count == 0) { thread_mutex_unlock(&nlock->mutex); } } /** * \brief tries to acquire a simple openMP lock * * \param arg The OpenMP lock to acquire * * \returns TRUE if lock is acquired successfully * FALSE if lock is already held by other thread * * These routines attempt to set a lock in the same manner as omp_set_lock and * omp_set_nest_lock, except that they do not suspend execution of the task * executing the routine. * For a nestable lock, the omp_test_nest_lock routine returns the new nesting * count if the lock is successfully set; otherwise, it returns zero. */ int omp_test_nest_lock(omp_nest_lock_t *arg) { struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg; assert(nlock->initialized); if (nlock->owner != thread_self()) { if (!thread_mutex_trylock(&nlock->mutex)) { return 0; } nlock->owner = thread_self(); } nlock->count++; return nlock->count; } /* * --------------------------------------------------------------------------- * 3.4 Timing Routines * --------------------------------------------------------------------------- * Timing routines support a portable wall clock timer. These record elapsed * time per-thread and are not guaranteed to be globally consistent across all * the threads participating in an application. */ /** * \brief returns elapsed wall clock time in seconds. * * \returns call clock time * * The omp_get_wtime routine returns a value equal to the elapsed wall clock time * in seconds since some “time in the past”. The actual “time in the past” is * arbitrary, but it is guaranteed not to change during the execution of the * application program. The time returned is a “per-thread time”, so it is not * required to be globally consistent across all the threads participating in an * application. */ double omp_get_wtime(void) { cycles_t t_start = OMP_GET_ICV_GLOBAL(time_start); cycles_t t_current = rdtsc(); assert(!"conversion to ms"); return (t_current - t_start); } /** * \brief returns the precision of the timer used by omp_get_wtime. * * \returns the timer precision * * The omp_get_wtick routine returns a value equal to the number of seconds * between successive clock ticks of the timer used by omp_get_wtime. */ double omp_get_wtick(void) { return 1.0 / 1e6; }