1/**
2 * \file
3 * \brief OpenMP API implementation as defined in OpenMP Version 4.0
4 *
5 * Source: http://www.openmp.org/mp-documents/OpenMP4.0.0.pdf
6 */
7
8/*
9 * Copyright (c)2014 ETH Zurich.
10 * All rights reserved.
11 *
12 * This file is distributed under the terms in the attached LICENSE file.
13 * If you do not find this file, copies can be found by writing to:
14 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
15 */
16
17#include <bomp_internal.h>
18
19
20/*
21 * ===========================================================================
22 * OpenMP 4.0 API
23 * ===========================================================================
24 */
25
26/*
27 * ---------------------------------------------------------------------------
28 * 3.2 Execution Environment Routines
29 * ---------------------------------------------------------------------------
30 *
31 * Execution environment routines affect and monitor threads, processors, and
32 * the parallel environment. The library routines are external functions with
33 * ���C��� linkage.
34 */
35
36/**
37 * \brief Sets the number of threads to be used for parallel regions
38 *
39 * \param num_threads   the number of threads
40 *
41 * Affects the number of threads used for subsequent parallel regions not
42 * specifying a num_threads clause, by setting the value of the first element of
43 * the nthreads-var ICV of the current task to num_threads.
44 */
45void omp_set_num_threads(int num_threads)
46{
47    if (num_threads > 0) {
48        if (num_threads > OMP_GET_ICV_GLOBAL(thread_limit)) {
49            num_threads = OMP_GET_ICV_GLOBAL(thread_limit);
50        }
51
52        OMP_SET_ICV_TASK(nthreads, num_threads);
53    }
54}
55
56/**
57 * \brief returns the current number of threads used (innermost parallel region)
58 *
59 * \returns number of used threads
60 *
61 * Returns the number of threads in the current team. The binding region for an
62 * omp_get_num_threads region is the innermost enclosing parallel region.
63 * If called from the sequential part of a program, this routine returns 1.
64 */
65int omp_get_num_threads(void)
66{
67    /*
68      struct gomp_team *team = gomp_thread ()->ts.team;
69      return team ? team->nthreads : 1;
70
71      XXX: we dont't have teams yet so we just return the number of threads
72           participating in working in the task
73      */
74
75    if (bomp_icv_get()->task) {
76        if (OMP_GET_ICV_TASK(active_levels) > 1) {
77            return 1; /// if we are nested return 1
78        }
79        return OMP_GET_ICV_TASK(nthreads);
80    }
81    return 1;
82}
83
84/**
85 * \brief the maximum number of threads that can be used for a new parallel task
86 *
87 * \returns number of usable threads
88 *
89 * Returns an upper bound on the number of threads that could be used to form a
90 * new team if a parallel construct without a num_threads clause were encountered
91 * after execution returns from this routine.
92 *
93 * The value returned by omp_get_max_threads is the value of the first element of
94 * the nthreads-var ICV of the current task. This value is also an upper bound on
95 * the number of threads that could be used to form a new team if a parallel
96 * region without a num_threads clause were encountered after execution returns
97 * from this routine.
98 */
99int omp_get_max_threads(void)
100{
101    if (bomp_icv_get()->task) {
102        return OMP_GET_ICV_TASK(thread_limit);
103    }
104    return OMP_GET_ICV_GLOBAL(thread_limit);
105}
106
107/**
108 * \brief Returns the thread number of the calling thread within the current team.
109 *
110 * \returns ThreadID
111 */
112int omp_get_thread_num(void)
113{
114    if (bomp_icv_get()->task) {
115        return ((struct bomp_tls *)thread_get_tls())->thread_id;
116    }
117    return 0;
118}
119
120/**
121 * \brief returns the number of available processors
122 *
123 * \returns available processor count
124 *
125 * Returns the number of processors that are available to the device at the time
126 * the routine is called.
127 */
128int omp_get_num_procs(void)
129{
130    return numa_num_configured_cpus();
131}
132
133/**
134 * \brief checks if we are currently in a parallel region
135 *
136 * \returns TRUE  active threads is greater than 1
137 *          FALSE active threads is 1 (main thread)
138 *
139 * Returns true if the active-levels-var ICV is greater than zero; otherwise it
140 * returns false. The effect of the omp_in_parallel routine is to return true if
141 * the current task is enclosed by an active parallel region, and the parallel
142 * region is enclosed by the outermost initial task region on the device;
143 * otherwise it returns false.
144 */
145int omp_in_parallel(void)
146{
147    if (bomp_icv_get()->task) {
148        return (OMP_GET_ICV_TASK(active_levels) > 0);
149    } else {
150        return 0;
151    }
152}
153
154/**
155 * \brief enables / disables the dynamic behavior
156 *
157 * \param dynamic_threads zero to disable dynamic behavior
158 *                        non-zero to enable dynamic behavior
159 *
160 * Returns the value of the dyn-var ICV, which indicates if dynamic adjustment
161 * of the number of threads is enabled or disabled.
162 */
163void omp_set_dynamic(int dynamic_threads)
164{
165#if OMP_SUPPORT_DYNAMIC
166    OMP_SET_ICV_TASK(dynamic, (!!dynamic_threads));
167#endif
168}
169
170/**
171 * \brief checks if the dynamic behavior is enabled for the current task
172 *
173 * \returns TRUE if dynamic behavior enabled
174 *          FALSE if disabled
175 *
176 * This routine returns the value of the dyn-var ICV, which is true if dynamic
177 * adjustment of the number of threads is enabled for the current task.
178 */
179int omp_get_dynamic(void)
180{
181#if OMP_SUPPORT_DYNAMIC
182    return OMP_GET_ICV_TASK(dynamic);
183#else
184    return 0;
185#endif
186}
187
188/**
189 * \brief Enables or disables nested parallelism, by setting the nest-var ICV.
190 *
191 * \param nested TRUE: enable nested behavior
192 *               FALSE: disable nested behavior
193 */
194void omp_set_nested(int nested)
195{
196#if OMP_SUPPORT_NESTED
197    OMP_SET_ICV_TASK(nested, !!nested);
198#endif
199
200}
201
202/**
203 * \brief checks if the nested behavior is enabled
204 *
205 * \returns TRUE if nested behavior is enabled
206 *          FALSE if disabled
207 *
208 * Returns the value of the nest-var ICV, which indicates if nested parallelism
209 * is enabled or disabled.
210 */
211int omp_get_nested(void)
212{
213#if OMP_SUPPORT_NESTED
214    return OMP_GET_ICV_TASK(nested);
215#else
216    return 0;
217#endif
218}
219
220/**
221 * \brief sets the schedule to be used
222 *
223 * \param kind      which schedule to be used (one of OMP_SCHED_*)
224 * \param modifier  modifier to tweak the scheduler (depends on kind)
225 *
226 * The omp_set_schedule routine affects the schedule that is applied when runtime
227 * is used as schedule kind, by setting the value of the run-sched-var ICV.
228 */
229void omp_set_schedule(omp_sched_t kind,
230                      int modifier)
231{
232    OMP_SET_ICV_TASK(run_sched, kind);
233    OMP_SET_ICV_TASK(run_sched_modifier, modifier);
234}
235
236/**
237 * \brief returns the current scheduler settings
238 *
239 * \param kind      returns the current scheduler setting (one of OMP_SCHED_*)
240 * \param modifier  returns the modifier of the scheduler
241 *
242 * Returns the value of run-sched-var ICV, which is the schedule applied when
243 * runtime schedule is used.
244 */
245void omp_get_schedule(omp_sched_t *kind,
246                      int *modifier)
247{
248    if (kind) {
249        *kind = OMP_GET_ICV_TASK(run_sched);
250    }
251    if (modifier) {
252        *modifier = OMP_GET_ICV_TASK(run_sched_modifier);
253    }
254}
255
256/**
257 * \brief obtains he maximum number of OpenMP threads available
258 *
259 * \returns number of available threads
260 *
261 * Returns the value of the thread-limit-var ICV, which is the maximum number
262 * of OpenMP threads available.
263 *
264 * The binding thread set for an omp_get_thread_limit region is all threads on the
265 * device. The effect of executing this routine is not related to any specific
266 * region corresponding to any construct or API routine.
267 */
268int omp_get_thread_limit(void)
269{
270    return OMP_GET_ICV_TASK(thread_limit);
271}
272
273/**
274 * \brief limits the nested depth
275 *
276 * \param max_active_levels maximum nested level
277 *
278 * Limits the number of nested active parallel regions, by setting
279 * max-active-levels-var ICV.
280 */
281void omp_set_max_active_levels(int max_active_levels)
282{
283    if (max_active_levels > 0) {
284        OMP_SET_ICV_DEV(max_active_levels, max_active_levels);
285    }
286}
287
288/**
289 * \brief returns the maximim nested depth
290 *
291 * \returns maximum nested level
292 *
293 * Returns the value of max-active-levels-var ICV, which determines the maximum
294 * number of nested active parallel regions.
295 */
296int omp_get_max_active_levels(void)
297{
298    return OMP_GET_ICV_DEV(max_active_levels);
299}
300
301/**
302 * \brief returns the level the task is runnig at
303 *
304 * \param number enclosing nested parallel regions
305 *
306 * For the enclosing device region, returns the levels-vars ICV, which is the
307 * number of nested parallel regions that enclose the task containing the call.
308 */
309int omp_get_level(void)
310{
311    return OMP_GET_ICV_TASK(levels);
312}
313
314/**
315 * \brief returns the ancestor thread number of a thread at a given level
316 *
317 * \param level the level of the ancestor
318 *
319 * \returns thread number of ancestor thread
320 *
321 * The omp_get_ancestor_thread_num routine returns the thread number of the
322 * ancestor at a given nest level of the current thread or the thread number of
323 * the current thread. If the requested nest level is outside the range of 0 and
324 * the nest level of the current thread, as returned by the omp_get_level routine,
325 * the routine returns -1.
326 */
327int omp_get_ancestor_thread_num(int level)
328{
329    int my_level = omp_get_level();
330    if (level > my_level || level < 0) {
331        return -1;
332    } else if (my_level == level) {
333        return omp_get_thread_num();
334    } else {
335        /* TODO */
336        assert(!"NYI");
337        return 0;
338    }
339}
340
341/**
342 * \brief returns the team size of a thread at a given level
343 *
344 * \param level the level to consider
345 *
346 * \returns number of threads in the team *
347 *
348 * The omp_get_team_size routine returns the size of the thread team to which the
349 * ancestor or the current thread belongs. If the requested nested level is outside
350 * the range of 0 and the nested level of the current thread, as returned by the
351 * omp_get_level routine, the routine returns -1. Inactive parallel regions are
352 * regarded like active parallel regions executed with one thread.
353 */
354int omp_get_team_size(int level)
355{
356    int my_level = omp_get_level();
357    if (level > my_level || level < 0) {
358        return -1;
359    } else {
360        /* TODO */
361        assert(!"NYI");
362        return 0;
363    }
364}
365
366/**
367 * \brief returns the number of active, nested parallel regions
368 *
369 * \returns number of nested parallel regions *
370 *
371 * The effect of the omp_get_active_level routine is to return the number of nested,
372 * active parallel regions enclosing the current task such that all of the parallel
373 * regions are enclosed by the outermost initial task region on the current device.
374 */
375int omp_get_active_level(void)
376{
377    return OMP_GET_ICV_TASK(active_levels);
378}
379
380/**
381 * \brief checks if thread is in the final task region
382 *
383 * \returns TRUE if thread is in the final task region
384 *          FALSE otherwise
385 *
386 * Returns true if the routine is executed in a final task region; otherwise,
387 * it returns false.
388 */
389int omp_in_final(void)
390{
391    assert(!"NYI");
392    return 1;  // TODO
393}
394
395#if OMP_VERSION >= OMP_VERSION_40
396
397/**
398 * \brief returns the cancellation value
399 *
400 * \returns cancellation value
401 *
402 * Returns the value of the cancel-var ICV, which controls the behavior of
403 * cancel construct and cancellation points.
404 */
405int omp_get_cancellation(void)
406{
407    return OMP_GET_ICV_DEV(cancel);
408}
409
410/**
411 * \brief returns the thread affinitiy policy
412 *
413 * \returns OpenMP thread policy value
414 *
415 * Returns the thread affinity policy to be used for the subsequent nested
416 * parallel regions that do not specify a proc_bind clause.
417 */
418omp_proc_bind_t omp_get_proc_bind(void)
419{
420    return OMP_GET_ICV_TASK(bind);
421}
422
423/**
424 * \brief controls the default target device
425 *
426 * \param device_num device number of the target device
427 *
428 * The effect of this routine is to set the value of the default-device-var ICV
429 * of the current task to the value specified in the argument. When called from
430 * within a target region the effect of this routine is unspecified.
431 */
432void omp_set_default_device(int device_num)
433{
434    OMP_SET_ICV_TASK(default_device, device_num);
435}
436
437/**
438 * \brief Returns the default target device.
439 *
440 * \returns device number of default target device
441 *
442 * The omp_get_default_device routine returns the value of the default-device-var
443 * ICV of the current task. When called from within a target region the effect of
444 * this routine is unspecified.
445 */
446int omp_get_default_device(void)
447{
448    // TODO: behavior if on target
449    return OMP_GET_ICV_TASK(default_device);
450}
451
452/**
453 * \brief Returns the number of target devices.
454 *
455 * \returns number of target devices
456 *
457 * The omp_get_num_devices routine returns the number of available target devices.
458 * When called from within a target region the effect of this routine is
459 * unspecified.
460 */
461int omp_get_num_devices(void)
462{
463    return 0;  // TODO
464}
465
466/**
467 * \brief returns the number of teams in the current region
468 *
469 * \returns number of teams
470 *
471 * The effect of this routine is to return the number of teams in the current teams
472 * region. The routine returns 1 if it is called from outside of a teams region.
473 */
474int omp_get_num_teams(void)
475{
476    assert(!"NYI: Teams");
477    return 1;  // TODO: team counting
478}
479
480/**
481 * \brief gets the team number of the calling thread
482 *
483 * \returns team number
484 *
485 * Returns the team number of calling thread. The team number is an integer
486 * between 0 and one less than the value returned by omp_get_num_teams, inclusive.
487 */
488int omp_get_team_num(void)
489{
490    assert(!"NYI: Teams");
491    return 0;
492}
493
494/**
495 * \brief checks if the task is executing as the host device
496 *
497 * \returns TRUE if task is host device
498 *          FALSE otherwise
499 * Returns true if the current task is executing on the host device; otherwise,
500 * it returns false.
501 */
502int omp_is_initial_device(void)
503{
504    assert(!"NYI: Initial device");
505    return 1;
506}
507#endif
508
509/*
510 * ---------------------------------------------------------------------------
511 * 3.3 Lock Routines
512 * ---------------------------------------------------------------------------
513 * General-purpose lock routines. Two types of locks are supported: simple locks
514 * and nestable locks. A nestable lock can be set multiple times by the same task
515 * before being unset; a simple lock cannot be set if it is already owned by the
516 * task trying to set it.
517 *
518 * XXX: we may have to consider something different when we are dealing with
519 *      non-shared address spaces such as XOMP
520 */
521
522
523/*
524 * Simple OpenMP locks
525 */
526
527/**
528 * \brief initializes and allocates a simple OpenMP lock
529 *
530 * \param arg returned pointer to the lock
531 *
532 * The effect of these routines is to initialize the lock to the unlocked state;
533 * that is, no task owns the lock.
534 */
535void omp_init_lock(omp_lock_t *arg)
536{
537    struct __omp_lock *lock = (struct __omp_lock *)arg;
538
539    assert(lock != NULL);
540
541    thread_mutex_init(&lock->mutex);
542    lock->initialized = 0x1;
543}
544
545/**
546 * \brief destroys a simple OpenMP lock
547 *
548 * \param arg OpenMP lock to destroyed (set to zero)
549 *
550 * The effect of these routines is to change the state of the lock to uninitialized.
551 */
552void omp_destroy_lock(omp_lock_t *arg)
553{
554    struct __omp_lock *lock = (struct __omp_lock *) arg;
555
556    /* acquire the lock to make sure there are no other threads holding the lock */
557    thread_mutex_lock(&lock->mutex);
558    /* we have the lock now */
559    memset(lock, 0, sizeof (*lock));
560}
561
562/**
563 * \brief acquires a simple OpenMP lock
564 *
565 * \param arg   The lock to acquire
566 *
567 * Each of these routines causes suspension of the task executing the routine
568 * until the specified lock is available and then sets the lock.
569 */
570void omp_set_lock(omp_lock_t *arg)
571{
572    struct __omp_lock *lock = (struct __omp_lock *) arg;
573    assert(lock->initialized);
574    thread_mutex_lock(&lock->mutex);
575}
576
577/**
578 * \brief Releases the simple OpenMP lock
579 *
580 * \param arg   The lock to be released
581 *
582 * For a simple lock, the omp_unset_lock routine causes the lock to become
583 * unlocked.
584 */
585void omp_unset_lock(omp_lock_t *arg)
586{
587    struct __omp_lock *lock = (struct __omp_lock *) arg;
588    assert(lock->initialized);
589    thread_mutex_unlock(&lock->mutex);
590}
591
592/**
593 * \brief tries to acquire a simple openMP lock
594 *
595 * \param arg   The OpenMP lock to acquire
596 *
597 * \returns TRUE if lock is acquired successfully
598 *          FALSE if lock is already held by other thread
599 *
600 * These routines attempt to set a lock in the same manner as omp_set_lock and
601 * omp_set_nest_lock, except that they do not suspend execution of the task
602 * executing the routine.
603 * For a simple lock, the omp_test_lock routine returns true if the lock is
604 * successfully set; otherwise, it returns false.
605 */
606int omp_test_lock(omp_lock_t *arg)
607{
608    struct __omp_lock *lock = (struct __omp_lock *) arg;
609    assert(lock->initialized);
610    return thread_mutex_trylock(&lock->mutex);
611}
612
613/*
614 * Nested OpenMP locks
615 */
616
617/**
618 * \brief initializes and allocates a nested OpenMP lock
619 *
620 * \param arg returned pointer to the lock
621 *
622 * The effect of these routines is to initialize the lock to the unlocked state;
623 * that is, no task owns the lock. In addition, the nesting count for a nestable
624 * lock is set to zero.
625 */
626void omp_init_nest_lock(omp_nest_lock_t *arg)
627{
628
629    struct __omp_nested_lock *nlock = (struct __omp_nested_lock *)arg;
630    assert(nlock != NULL);
631    thread_mutex_init(&nlock->mutex);
632    nlock->owner = NULL;
633    nlock->count = 0;
634    nlock->initialized = 1;
635}
636
637/**
638 * \brief destroys a Nested OpenMP lock
639 *
640 * \param arg OpenMP lock to destroyed (set to zero)
641 *
642 * The effect of these routines is to change the state of the lock to uninitialized.
643 */
644void omp_destroy_nest_lock(omp_nest_lock_t *arg)
645{
646    struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
647
648    /*acquire the lock to make sure there are no other threads holding the lock */
649    thread_mutex_lock(&nlock->mutex);
650    /* we have the lock now */
651    memset(nlock, 0, sizeof (*nlock));
652}
653
654/**
655 * \brief acquires a simple OpenMP lock
656 *
657 * \param arg   The lock to acquire
658 *
659 * Each of these routines causes suspension of the task executing the routine
660 * until the specified lock is available and then sets the lock.
661 *
662 * A nestable lock is available if it is unlocked or if it is already owned by
663 * the task executing the routine. The task executing the routine is granted,
664 * or retains, ownership of the lock, and the nesting count for the lock is
665 * incremented.
666 */
667void omp_set_nest_lock(omp_nest_lock_t *arg)
668{
669    struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
670    assert(nlock->initialized);
671
672    if (nlock->owner != thread_self()) {
673        thread_mutex_lock (&nlock->mutex);
674        nlock->owner = thread_self();
675    }
676    nlock->count++;
677}
678
679/**
680 * \brief Releases the simple OpenMP lock
681 *
682 * \param arg   The lock to be released
683 *
684 * For a nestable lock, the omp_unset_nest_lock routine decrements the nesting
685 * count, and causes the lock to become unlocked if the resulting nesting count
686 * is zero.
687 */
688void omp_unset_nest_lock(omp_nest_lock_t *arg)
689{
690    struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
691    assert(nlock->initialized);
692
693    nlock->count--;
694
695    // if we were the last holder unlock the mutex
696    if (nlock->count == 0) {
697        thread_mutex_unlock(&nlock->mutex);
698    }
699}
700
701/**
702 * \brief tries to acquire a simple openMP lock
703 *
704 * \param arg   The OpenMP lock to acquire
705 *
706 * \returns TRUE if lock is acquired successfully
707 *          FALSE if lock is already held by other thread
708 *
709 * These routines attempt to set a lock in the same manner as omp_set_lock and
710 * omp_set_nest_lock, except that they do not suspend execution of the task
711 * executing the routine.
712 * For a nestable lock, the omp_test_nest_lock routine returns the new nesting
713 * count if the lock is successfully set; otherwise, it returns zero.
714 */
715int omp_test_nest_lock(omp_nest_lock_t *arg)
716{
717    struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
718    assert(nlock->initialized);
719
720    if (nlock->owner != thread_self()) {
721        if (!thread_mutex_trylock(&nlock->mutex)) {
722            return 0;
723        }
724        nlock->owner = thread_self();
725    }
726
727    nlock->count++;
728
729    return nlock->count;
730}
731
732/*
733 * ---------------------------------------------------------------------------
734 * 3.4 Timing Routines
735 * ---------------------------------------------------------------------------
736 * Timing routines support a portable wall clock timer. These record elapsed
737 * time per-thread and are not guaranteed to be globally consistent across all
738 * the threads participating in an application.
739 */
740
741/**
742 * \brief returns elapsed wall clock time in seconds.
743 *
744 * \returns call clock time
745 *
746 * The omp_get_wtime routine returns a value equal to the elapsed wall clock time
747 * in seconds since some ���time in the past���. The actual ���time in the past��� is
748 * arbitrary, but it is guaranteed not to change during the execution of the
749 * application program. The time returned is a ���per-thread time���, so it is not
750 * required to be globally consistent across all the threads participating in an
751 * application.
752 */
753double omp_get_wtime(void)
754{
755    cycles_t t_start = OMP_GET_ICV_GLOBAL(time_start);
756    cycles_t t_current = rdtsc();
757    assert(!"conversion to ms");
758    return (t_current - t_start);
759}
760
761/**
762 * \brief returns the precision of the timer used by omp_get_wtime.
763 *
764 * \returns the timer precision
765 *
766 * The omp_get_wtick routine returns a value equal to the number of seconds
767 * between successive clock ticks of the timer used by omp_get_wtime.
768 */
769double omp_get_wtick(void)
770{
771    return 1.0 / 1e6;
772}
773
774