z_Linux_util.cpp revision 355940
1/*
2 * z_Linux_util.cpp -- platform specific routines.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_i18n.h"
16#include "kmp_io.h"
17#include "kmp_itt.h"
18#include "kmp_lock.h"
19#include "kmp_stats.h"
20#include "kmp_str.h"
21#include "kmp_wait_release.h"
22#include "kmp_wrapper_getpid.h"
23
24#if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
25#include <alloca.h>
26#endif
27#include <math.h> // HUGE_VAL.
28#include <sys/resource.h>
29#include <sys/syscall.h>
30#include <sys/time.h>
31#include <sys/times.h>
32#include <unistd.h>
33
34#if KMP_OS_LINUX && !KMP_OS_CNK
35#include <sys/sysinfo.h>
36#if KMP_USE_FUTEX
37// We should really include <futex.h>, but that causes compatibility problems on
38// different Linux* OS distributions that either require that you include (or
39// break when you try to include) <pci/types.h>. Since all we need is the two
40// macros below (which are part of the kernel ABI, so can't change) we just
41// define the constants here and don't include <futex.h>
42#ifndef FUTEX_WAIT
43#define FUTEX_WAIT 0
44#endif
45#ifndef FUTEX_WAKE
46#define FUTEX_WAKE 1
47#endif
48#endif
49#elif KMP_OS_DARWIN
50#include <mach/mach.h>
51#include <sys/sysctl.h>
52#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
53#include <pthread_np.h>
54#elif KMP_OS_NETBSD
55#include <sys/types.h>
56#include <sys/sysctl.h>
57#endif
58
59#include <ctype.h>
60#include <dirent.h>
61#include <fcntl.h>
62
63#include "tsan_annotations.h"
64
65struct kmp_sys_timer {
66  struct timespec start;
67};
68
69// Convert timespec to nanoseconds.
70#define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec)
71
72static struct kmp_sys_timer __kmp_sys_timer_data;
73
74#if KMP_HANDLE_SIGNALS
75typedef void (*sig_func_t)(int);
76STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG];
77static sigset_t __kmp_sigset;
78#endif
79
80static int __kmp_init_runtime = FALSE;
81
82static int __kmp_fork_count = 0;
83
84static pthread_condattr_t __kmp_suspend_cond_attr;
85static pthread_mutexattr_t __kmp_suspend_mutex_attr;
86
87static kmp_cond_align_t __kmp_wait_cv;
88static kmp_mutex_align_t __kmp_wait_mx;
89
90kmp_uint64 __kmp_ticks_per_msec = 1000000;
91
92#ifdef DEBUG_SUSPEND
93static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
94  KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
95               cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
96               cond->c_cond.__c_waiting);
97}
98#endif
99
100#if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
101
102/* Affinity support */
103
104void __kmp_affinity_bind_thread(int which) {
105  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
106              "Illegal set affinity operation when not capable");
107
108  kmp_affin_mask_t *mask;
109  KMP_CPU_ALLOC_ON_STACK(mask);
110  KMP_CPU_ZERO(mask);
111  KMP_CPU_SET(which, mask);
112  __kmp_set_system_affinity(mask, TRUE);
113  KMP_CPU_FREE_FROM_STACK(mask);
114}
115
116/* Determine if we can access affinity functionality on this version of
117 * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
118 * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
119void __kmp_affinity_determine_capable(const char *env_var) {
120// Check and see if the OS supports thread affinity.
121
122#define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
123
124  int gCode;
125  int sCode;
126  unsigned char *buf;
127  buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
128
129  // If Linux* OS:
130  // If the syscall fails or returns a suggestion for the size,
131  // then we don't have to search for an appropriate size.
132  gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
133  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
134                "initial getaffinity call returned %d errno = %d\n",
135                gCode, errno));
136
137  // if ((gCode < 0) && (errno == ENOSYS))
138  if (gCode < 0) {
139    // System call not supported
140    if (__kmp_affinity_verbose ||
141        (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
142         (__kmp_affinity_type != affinity_default) &&
143         (__kmp_affinity_type != affinity_disabled))) {
144      int error = errno;
145      kmp_msg_t err_code = KMP_ERR(error);
146      __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
147                err_code, __kmp_msg_null);
148      if (__kmp_generate_warnings == kmp_warnings_off) {
149        __kmp_str_free(&err_code.str);
150      }
151    }
152    KMP_AFFINITY_DISABLE();
153    KMP_INTERNAL_FREE(buf);
154    return;
155  }
156  if (gCode > 0) { // Linux* OS only
157    // The optimal situation: the OS returns the size of the buffer it expects.
158    //
159    // A verification of correct behavior is that Isetaffinity on a NULL
160    // buffer with the same size fails with errno set to EFAULT.
161    sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
162    KA_TRACE(30, ("__kmp_affinity_determine_capable: "
163                  "setaffinity for mask size %d returned %d errno = %d\n",
164                  gCode, sCode, errno));
165    if (sCode < 0) {
166      if (errno == ENOSYS) {
167        if (__kmp_affinity_verbose ||
168            (__kmp_affinity_warnings &&
169             (__kmp_affinity_type != affinity_none) &&
170             (__kmp_affinity_type != affinity_default) &&
171             (__kmp_affinity_type != affinity_disabled))) {
172          int error = errno;
173          kmp_msg_t err_code = KMP_ERR(error);
174          __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
175                    err_code, __kmp_msg_null);
176          if (__kmp_generate_warnings == kmp_warnings_off) {
177            __kmp_str_free(&err_code.str);
178          }
179        }
180        KMP_AFFINITY_DISABLE();
181        KMP_INTERNAL_FREE(buf);
182      }
183      if (errno == EFAULT) {
184        KMP_AFFINITY_ENABLE(gCode);
185        KA_TRACE(10, ("__kmp_affinity_determine_capable: "
186                      "affinity supported (mask size %d)\n",
187                      (int)__kmp_affin_mask_size));
188        KMP_INTERNAL_FREE(buf);
189        return;
190      }
191    }
192  }
193
194  // Call the getaffinity system call repeatedly with increasing set sizes
195  // until we succeed, or reach an upper bound on the search.
196  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
197                "searching for proper set size\n"));
198  int size;
199  for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) {
200    gCode = syscall(__NR_sched_getaffinity, 0, size, buf);
201    KA_TRACE(30, ("__kmp_affinity_determine_capable: "
202                  "getaffinity for mask size %d returned %d errno = %d\n",
203                  size, gCode, errno));
204
205    if (gCode < 0) {
206      if (errno == ENOSYS) {
207        // We shouldn't get here
208        KA_TRACE(30, ("__kmp_affinity_determine_capable: "
209                      "inconsistent OS call behavior: errno == ENOSYS for mask "
210                      "size %d\n",
211                      size));
212        if (__kmp_affinity_verbose ||
213            (__kmp_affinity_warnings &&
214             (__kmp_affinity_type != affinity_none) &&
215             (__kmp_affinity_type != affinity_default) &&
216             (__kmp_affinity_type != affinity_disabled))) {
217          int error = errno;
218          kmp_msg_t err_code = KMP_ERR(error);
219          __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
220                    err_code, __kmp_msg_null);
221          if (__kmp_generate_warnings == kmp_warnings_off) {
222            __kmp_str_free(&err_code.str);
223          }
224        }
225        KMP_AFFINITY_DISABLE();
226        KMP_INTERNAL_FREE(buf);
227        return;
228      }
229      continue;
230    }
231
232    sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
233    KA_TRACE(30, ("__kmp_affinity_determine_capable: "
234                  "setaffinity for mask size %d returned %d errno = %d\n",
235                  gCode, sCode, errno));
236    if (sCode < 0) {
237      if (errno == ENOSYS) { // Linux* OS only
238        // We shouldn't get here
239        KA_TRACE(30, ("__kmp_affinity_determine_capable: "
240                      "inconsistent OS call behavior: errno == ENOSYS for mask "
241                      "size %d\n",
242                      size));
243        if (__kmp_affinity_verbose ||
244            (__kmp_affinity_warnings &&
245             (__kmp_affinity_type != affinity_none) &&
246             (__kmp_affinity_type != affinity_default) &&
247             (__kmp_affinity_type != affinity_disabled))) {
248          int error = errno;
249          kmp_msg_t err_code = KMP_ERR(error);
250          __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
251                    err_code, __kmp_msg_null);
252          if (__kmp_generate_warnings == kmp_warnings_off) {
253            __kmp_str_free(&err_code.str);
254          }
255        }
256        KMP_AFFINITY_DISABLE();
257        KMP_INTERNAL_FREE(buf);
258        return;
259      }
260      if (errno == EFAULT) {
261        KMP_AFFINITY_ENABLE(gCode);
262        KA_TRACE(10, ("__kmp_affinity_determine_capable: "
263                      "affinity supported (mask size %d)\n",
264                      (int)__kmp_affin_mask_size));
265        KMP_INTERNAL_FREE(buf);
266        return;
267      }
268    }
269  }
270  // save uncaught error code
271  // int error = errno;
272  KMP_INTERNAL_FREE(buf);
273  // restore uncaught error code, will be printed at the next KMP_WARNING below
274  // errno = error;
275
276  // Affinity is not supported
277  KMP_AFFINITY_DISABLE();
278  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
279                "cannot determine mask size - affinity not supported\n"));
280  if (__kmp_affinity_verbose ||
281      (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
282       (__kmp_affinity_type != affinity_default) &&
283       (__kmp_affinity_type != affinity_disabled))) {
284    KMP_WARNING(AffCantGetMaskSize, env_var);
285  }
286}
287
288#endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
289
290#if KMP_USE_FUTEX
291
292int __kmp_futex_determine_capable() {
293  int loc = 0;
294  int rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0);
295  int retval = (rc == 0) || (errno != ENOSYS);
296
297  KA_TRACE(10,
298           ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno));
299  KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n",
300                retval ? "" : " not"));
301
302  return retval;
303}
304
305#endif // KMP_USE_FUTEX
306
307#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS)
308/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
309   use compare_and_store for these routines */
310
311kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) {
312  kmp_int8 old_value, new_value;
313
314  old_value = TCR_1(*p);
315  new_value = old_value | d;
316
317  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
318    KMP_CPU_PAUSE();
319    old_value = TCR_1(*p);
320    new_value = old_value | d;
321  }
322  return old_value;
323}
324
325kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) {
326  kmp_int8 old_value, new_value;
327
328  old_value = TCR_1(*p);
329  new_value = old_value & d;
330
331  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
332    KMP_CPU_PAUSE();
333    old_value = TCR_1(*p);
334    new_value = old_value & d;
335  }
336  return old_value;
337}
338
339kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) {
340  kmp_uint32 old_value, new_value;
341
342  old_value = TCR_4(*p);
343  new_value = old_value | d;
344
345  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
346    KMP_CPU_PAUSE();
347    old_value = TCR_4(*p);
348    new_value = old_value | d;
349  }
350  return old_value;
351}
352
353kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) {
354  kmp_uint32 old_value, new_value;
355
356  old_value = TCR_4(*p);
357  new_value = old_value & d;
358
359  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
360    KMP_CPU_PAUSE();
361    old_value = TCR_4(*p);
362    new_value = old_value & d;
363  }
364  return old_value;
365}
366
367#if KMP_ARCH_X86
368kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) {
369  kmp_int8 old_value, new_value;
370
371  old_value = TCR_1(*p);
372  new_value = old_value + d;
373
374  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
375    KMP_CPU_PAUSE();
376    old_value = TCR_1(*p);
377    new_value = old_value + d;
378  }
379  return old_value;
380}
381
382kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) {
383  kmp_int64 old_value, new_value;
384
385  old_value = TCR_8(*p);
386  new_value = old_value + d;
387
388  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
389    KMP_CPU_PAUSE();
390    old_value = TCR_8(*p);
391    new_value = old_value + d;
392  }
393  return old_value;
394}
395#endif /* KMP_ARCH_X86 */
396
397kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) {
398  kmp_uint64 old_value, new_value;
399
400  old_value = TCR_8(*p);
401  new_value = old_value | d;
402  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
403    KMP_CPU_PAUSE();
404    old_value = TCR_8(*p);
405    new_value = old_value | d;
406  }
407  return old_value;
408}
409
410kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) {
411  kmp_uint64 old_value, new_value;
412
413  old_value = TCR_8(*p);
414  new_value = old_value & d;
415  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
416    KMP_CPU_PAUSE();
417    old_value = TCR_8(*p);
418    new_value = old_value & d;
419  }
420  return old_value;
421}
422
423#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */
424
425void __kmp_terminate_thread(int gtid) {
426  int status;
427  kmp_info_t *th = __kmp_threads[gtid];
428
429  if (!th)
430    return;
431
432#ifdef KMP_CANCEL_THREADS
433  KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid));
434  status = pthread_cancel(th->th.th_info.ds.ds_thread);
435  if (status != 0 && status != ESRCH) {
436    __kmp_fatal(KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status),
437                __kmp_msg_null);
438  }
439#endif
440  KMP_YIELD(TRUE);
441} //
442
443/* Set thread stack info according to values returned by pthread_getattr_np().
444   If values are unreasonable, assume call failed and use incremental stack
445   refinement method instead. Returns TRUE if the stack parameters could be
446   determined exactly, FALSE if incremental refinement is necessary. */
447static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
448  int stack_data;
449#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
450        KMP_OS_HURD
451  pthread_attr_t attr;
452  int status;
453  size_t size = 0;
454  void *addr = 0;
455
456  /* Always do incremental stack refinement for ubermaster threads since the
457     initial thread stack range can be reduced by sibling thread creation so
458     pthread_attr_getstack may cause thread gtid aliasing */
459  if (!KMP_UBER_GTID(gtid)) {
460
461    /* Fetch the real thread attributes */
462    status = pthread_attr_init(&attr);
463    KMP_CHECK_SYSFAIL("pthread_attr_init", status);
464#if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD
465    status = pthread_attr_get_np(pthread_self(), &attr);
466    KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
467#else
468    status = pthread_getattr_np(pthread_self(), &attr);
469    KMP_CHECK_SYSFAIL("pthread_getattr_np", status);
470#endif
471    status = pthread_attr_getstack(&attr, &addr, &size);
472    KMP_CHECK_SYSFAIL("pthread_attr_getstack", status);
473    KA_TRACE(60,
474             ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:"
475              " %lu, low addr: %p\n",
476              gtid, size, addr));
477    status = pthread_attr_destroy(&attr);
478    KMP_CHECK_SYSFAIL("pthread_attr_destroy", status);
479  }
480
481  if (size != 0 && addr != 0) { // was stack parameter determination successful?
482    /* Store the correct base and size */
483    TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size));
484    TCW_PTR(th->th.th_info.ds.ds_stacksize, size);
485    TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
486    return TRUE;
487  }
488#endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
489              KMP_OS_HURD */
490  /* Use incremental refinement starting from initial conservative estimate */
491  TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
492  TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
493  TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
494  return FALSE;
495}
496
497static void *__kmp_launch_worker(void *thr) {
498  int status, old_type, old_state;
499#ifdef KMP_BLOCK_SIGNALS
500  sigset_t new_set, old_set;
501#endif /* KMP_BLOCK_SIGNALS */
502  void *exit_val;
503#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
504        KMP_OS_OPENBSD || KMP_OS_HURD
505  void *volatile padding = 0;
506#endif
507  int gtid;
508
509  gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid;
510  __kmp_gtid_set_specific(gtid);
511#ifdef KMP_TDATA_GTID
512  __kmp_gtid = gtid;
513#endif
514#if KMP_STATS_ENABLED
515  // set thread local index to point to thread-specific stats
516  __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats;
517  __kmp_stats_thread_ptr->startLife();
518  KMP_SET_THREAD_STATE(IDLE);
519  KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
520#endif
521
522#if USE_ITT_BUILD
523  __kmp_itt_thread_name(gtid);
524#endif /* USE_ITT_BUILD */
525
526#if KMP_AFFINITY_SUPPORTED
527  __kmp_affinity_set_init_mask(gtid, FALSE);
528#endif
529
530#ifdef KMP_CANCEL_THREADS
531  status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
532  KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
533  // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
534  status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
535  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
536#endif
537
538#if KMP_ARCH_X86 || KMP_ARCH_X86_64
539  // Set FP control regs to be a copy of the parallel initialization thread's.
540  __kmp_clear_x87_fpu_status_word();
541  __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
542  __kmp_load_mxcsr(&__kmp_init_mxcsr);
543#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
544
545#ifdef KMP_BLOCK_SIGNALS
546  status = sigfillset(&new_set);
547  KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
548  status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set);
549  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
550#endif /* KMP_BLOCK_SIGNALS */
551
552#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
553        KMP_OS_OPENBSD
554  if (__kmp_stkoffset > 0 && gtid > 0) {
555    padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
556  }
557#endif
558
559  KMP_MB();
560  __kmp_set_stack_info(gtid, (kmp_info_t *)thr);
561
562  __kmp_check_stack_overlap((kmp_info_t *)thr);
563
564  exit_val = __kmp_launch_thread((kmp_info_t *)thr);
565
566#ifdef KMP_BLOCK_SIGNALS
567  status = pthread_sigmask(SIG_SETMASK, &old_set, NULL);
568  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
569#endif /* KMP_BLOCK_SIGNALS */
570
571  return exit_val;
572}
573
574#if KMP_USE_MONITOR
575/* The monitor thread controls all of the threads in the complex */
576
577static void *__kmp_launch_monitor(void *thr) {
578  int status, old_type, old_state;
579#ifdef KMP_BLOCK_SIGNALS
580  sigset_t new_set;
581#endif /* KMP_BLOCK_SIGNALS */
582  struct timespec interval;
583
584  KMP_MB(); /* Flush all pending memory write invalidates.  */
585
586  KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n"));
587
588  /* register us as the monitor thread */
589  __kmp_gtid_set_specific(KMP_GTID_MONITOR);
590#ifdef KMP_TDATA_GTID
591  __kmp_gtid = KMP_GTID_MONITOR;
592#endif
593
594  KMP_MB();
595
596#if USE_ITT_BUILD
597  // Instruct Intel(R) Threading Tools to ignore monitor thread.
598  __kmp_itt_thread_ignore();
599#endif /* USE_ITT_BUILD */
600
601  __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid,
602                       (kmp_info_t *)thr);
603
604  __kmp_check_stack_overlap((kmp_info_t *)thr);
605
606#ifdef KMP_CANCEL_THREADS
607  status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
608  KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
609  // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
610  status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
611  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
612#endif
613
614#if KMP_REAL_TIME_FIX
615  // This is a potential fix which allows application with real-time scheduling
616  // policy work. However, decision about the fix is not made yet, so it is
617  // disabled by default.
618  { // Are program started with real-time scheduling policy?
619    int sched = sched_getscheduler(0);
620    if (sched == SCHED_FIFO || sched == SCHED_RR) {
621      // Yes, we are a part of real-time application. Try to increase the
622      // priority of the monitor.
623      struct sched_param param;
624      int max_priority = sched_get_priority_max(sched);
625      int rc;
626      KMP_WARNING(RealTimeSchedNotSupported);
627      sched_getparam(0, &param);
628      if (param.sched_priority < max_priority) {
629        param.sched_priority += 1;
630        rc = sched_setscheduler(0, sched, &param);
631        if (rc != 0) {
632          int error = errno;
633          kmp_msg_t err_code = KMP_ERR(error);
634          __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority),
635                    err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null);
636          if (__kmp_generate_warnings == kmp_warnings_off) {
637            __kmp_str_free(&err_code.str);
638          }
639        }
640      } else {
641        // We cannot abort here, because number of CPUs may be enough for all
642        // the threads, including the monitor thread, so application could
643        // potentially work...
644        __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority),
645                  KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority),
646                  __kmp_msg_null);
647      }
648    }
649    // AC: free thread that waits for monitor started
650    TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
651  }
652#endif // KMP_REAL_TIME_FIX
653
654  KMP_MB(); /* Flush all pending memory write invalidates.  */
655
656  if (__kmp_monitor_wakeups == 1) {
657    interval.tv_sec = 1;
658    interval.tv_nsec = 0;
659  } else {
660    interval.tv_sec = 0;
661    interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
662  }
663
664  KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
665
666  while (!TCR_4(__kmp_global.g.g_done)) {
667    struct timespec now;
668    struct timeval tval;
669
670    /*  This thread monitors the state of the system */
671
672    KA_TRACE(15, ("__kmp_launch_monitor: update\n"));
673
674    status = gettimeofday(&tval, NULL);
675    KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
676    TIMEVAL_TO_TIMESPEC(&tval, &now);
677
678    now.tv_sec += interval.tv_sec;
679    now.tv_nsec += interval.tv_nsec;
680
681    if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
682      now.tv_sec += 1;
683      now.tv_nsec -= KMP_NSEC_PER_SEC;
684    }
685
686    status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
687    KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
688    // AC: the monitor should not fall asleep if g_done has been set
689    if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex
690      status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond,
691                                      &__kmp_wait_mx.m_mutex, &now);
692      if (status != 0) {
693        if (status != ETIMEDOUT && status != EINTR) {
694          KMP_SYSFAIL("pthread_cond_timedwait", status);
695        }
696      }
697    }
698    status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
699    KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
700
701    TCW_4(__kmp_global.g.g_time.dt.t_value,
702          TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
703
704    KMP_MB(); /* Flush all pending memory write invalidates.  */
705  }
706
707  KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n"));
708
709#ifdef KMP_BLOCK_SIGNALS
710  status = sigfillset(&new_set);
711  KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
712  status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL);
713  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
714#endif /* KMP_BLOCK_SIGNALS */
715
716  KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n"));
717
718  if (__kmp_global.g.g_abort != 0) {
719    /* now we need to terminate the worker threads  */
720    /* the value of t_abort is the signal we caught */
721
722    int gtid;
723
724    KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n",
725                  __kmp_global.g.g_abort));
726
727    /* terminate the OpenMP worker threads */
728    /* TODO this is not valid for sibling threads!!
729     * the uber master might not be 0 anymore.. */
730    for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
731      __kmp_terminate_thread(gtid);
732
733    __kmp_cleanup();
734
735    KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n",
736                  __kmp_global.g.g_abort));
737
738    if (__kmp_global.g.g_abort > 0)
739      raise(__kmp_global.g.g_abort);
740  }
741
742  KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n"));
743
744  return thr;
745}
746#endif // KMP_USE_MONITOR
747
748void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
749  pthread_t handle;
750  pthread_attr_t thread_attr;
751  int status;
752
753  th->th.th_info.ds.ds_gtid = gtid;
754
755#if KMP_STATS_ENABLED
756  // sets up worker thread stats
757  __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid);
758
759  // th->th.th_stats is used to transfer thread-specific stats-pointer to
760  // __kmp_launch_worker. So when thread is created (goes into
761  // __kmp_launch_worker) it will set its thread local pointer to
762  // th->th.th_stats
763  if (!KMP_UBER_GTID(gtid)) {
764    th->th.th_stats = __kmp_stats_list->push_back(gtid);
765  } else {
766    // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(),
767    // so set the th->th.th_stats field to it.
768    th->th.th_stats = __kmp_stats_thread_ptr;
769  }
770  __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
771
772#endif // KMP_STATS_ENABLED
773
774  if (KMP_UBER_GTID(gtid)) {
775    KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid));
776    th->th.th_info.ds.ds_thread = pthread_self();
777    __kmp_set_stack_info(gtid, th);
778    __kmp_check_stack_overlap(th);
779    return;
780  }
781
782  KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid));
783
784  KMP_MB(); /* Flush all pending memory write invalidates.  */
785
786#ifdef KMP_THREAD_ATTR
787  status = pthread_attr_init(&thread_attr);
788  if (status != 0) {
789    __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
790  }
791  status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
792  if (status != 0) {
793    __kmp_fatal(KMP_MSG(CantSetWorkerState), KMP_ERR(status), __kmp_msg_null);
794  }
795
796  /* Set stack size for this thread now.
797     The multiple of 2 is there because on some machines, requesting an unusual
798     stacksize causes the thread to have an offset before the dummy alloca()
799     takes place to create the offset.  Since we want the user to have a
800     sufficient stacksize AND support a stack offset, we alloca() twice the
801     offset so that the upcoming alloca() does not eliminate any premade offset,
802     and also gives the user the stack space they requested for all threads */
803  stack_size += gtid * __kmp_stkoffset * 2;
804
805  KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
806                "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
807                gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
808
809#ifdef _POSIX_THREAD_ATTR_STACKSIZE
810  status = pthread_attr_setstacksize(&thread_attr, stack_size);
811#ifdef KMP_BACKUP_STKSIZE
812  if (status != 0) {
813    if (!__kmp_env_stksize) {
814      stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset;
815      __kmp_stksize = KMP_BACKUP_STKSIZE;
816      KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
817                    "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu "
818                    "bytes\n",
819                    gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
820      status = pthread_attr_setstacksize(&thread_attr, stack_size);
821    }
822  }
823#endif /* KMP_BACKUP_STKSIZE */
824  if (status != 0) {
825    __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
826                KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null);
827  }
828#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
829
830#endif /* KMP_THREAD_ATTR */
831
832  status =
833      pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th);
834  if (status != 0 || !handle) { // ??? Why do we check handle??
835#ifdef _POSIX_THREAD_ATTR_STACKSIZE
836    if (status == EINVAL) {
837      __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
838                  KMP_HNT(IncreaseWorkerStackSize), __kmp_msg_null);
839    }
840    if (status == ENOMEM) {
841      __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
842                  KMP_HNT(DecreaseWorkerStackSize), __kmp_msg_null);
843    }
844#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
845    if (status == EAGAIN) {
846      __kmp_fatal(KMP_MSG(NoResourcesForWorkerThread), KMP_ERR(status),
847                  KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null);
848    }
849    KMP_SYSFAIL("pthread_create", status);
850  }
851
852  th->th.th_info.ds.ds_thread = handle;
853
854#ifdef KMP_THREAD_ATTR
855  status = pthread_attr_destroy(&thread_attr);
856  if (status) {
857    kmp_msg_t err_code = KMP_ERR(status);
858    __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
859              __kmp_msg_null);
860    if (__kmp_generate_warnings == kmp_warnings_off) {
861      __kmp_str_free(&err_code.str);
862    }
863  }
864#endif /* KMP_THREAD_ATTR */
865
866  KMP_MB(); /* Flush all pending memory write invalidates.  */
867
868  KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid));
869
870} // __kmp_create_worker
871
872#if KMP_USE_MONITOR
873void __kmp_create_monitor(kmp_info_t *th) {
874  pthread_t handle;
875  pthread_attr_t thread_attr;
876  size_t size;
877  int status;
878  int auto_adj_size = FALSE;
879
880  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
881    // We don't need monitor thread in case of MAX_BLOCKTIME
882    KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
883                  "MAX blocktime\n"));
884    th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
885    th->th.th_info.ds.ds_gtid = 0;
886    return;
887  }
888  KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));
889
890  KMP_MB(); /* Flush all pending memory write invalidates.  */
891
892  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
893  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
894#if KMP_REAL_TIME_FIX
895  TCW_4(__kmp_global.g.g_time.dt.t_value,
896        -1); // Will use it for synchronization a bit later.
897#else
898  TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
899#endif // KMP_REAL_TIME_FIX
900
901#ifdef KMP_THREAD_ATTR
902  if (__kmp_monitor_stksize == 0) {
903    __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
904    auto_adj_size = TRUE;
905  }
906  status = pthread_attr_init(&thread_attr);
907  if (status != 0) {
908    __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
909  }
910  status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
911  if (status != 0) {
912    __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null);
913  }
914
915#ifdef _POSIX_THREAD_ATTR_STACKSIZE
916  status = pthread_attr_getstacksize(&thread_attr, &size);
917  KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status);
918#else
919  size = __kmp_sys_min_stksize;
920#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
921#endif /* KMP_THREAD_ATTR */
922
923  if (__kmp_monitor_stksize == 0) {
924    __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
925  }
926  if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
927    __kmp_monitor_stksize = __kmp_sys_min_stksize;
928  }
929
930  KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes,"
931                "requested stacksize = %lu bytes\n",
932                size, __kmp_monitor_stksize));
933
934retry:
935
936/* Set stack size for this thread now. */
937#ifdef _POSIX_THREAD_ATTR_STACKSIZE
938  KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,",
939                __kmp_monitor_stksize));
940  status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize);
941  if (status != 0) {
942    if (auto_adj_size) {
943      __kmp_monitor_stksize *= 2;
944      goto retry;
945    }
946    kmp_msg_t err_code = KMP_ERR(status);
947    __kmp_msg(kmp_ms_warning, // should this be fatal?  BB
948              KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize),
949              err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null);
950    if (__kmp_generate_warnings == kmp_warnings_off) {
951      __kmp_str_free(&err_code.str);
952    }
953  }
954#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
955
956  status =
957      pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th);
958
959  if (status != 0) {
960#ifdef _POSIX_THREAD_ATTR_STACKSIZE
961    if (status == EINVAL) {
962      if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) {
963        __kmp_monitor_stksize *= 2;
964        goto retry;
965      }
966      __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
967                  KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize),
968                  __kmp_msg_null);
969    }
970    if (status == ENOMEM) {
971      __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
972                  KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize),
973                  __kmp_msg_null);
974    }
975#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
976    if (status == EAGAIN) {
977      __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status),
978                  KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null);
979    }
980    KMP_SYSFAIL("pthread_create", status);
981  }
982
983  th->th.th_info.ds.ds_thread = handle;
984
985#if KMP_REAL_TIME_FIX
986  // Wait for the monitor thread is really started and set its *priority*.
987  KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
988                   sizeof(__kmp_global.g.g_time.dt.t_value));
989  __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
990               &__kmp_neq_4, NULL);
991#endif // KMP_REAL_TIME_FIX
992
993#ifdef KMP_THREAD_ATTR
994  status = pthread_attr_destroy(&thread_attr);
995  if (status != 0) {
996    kmp_msg_t err_code = KMP_ERR(status);
997    __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
998              __kmp_msg_null);
999    if (__kmp_generate_warnings == kmp_warnings_off) {
1000      __kmp_str_free(&err_code.str);
1001    }
1002  }
1003#endif
1004
1005  KMP_MB(); /* Flush all pending memory write invalidates.  */
1006
1007  KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n",
1008                th->th.th_info.ds.ds_thread));
1009
1010} // __kmp_create_monitor
1011#endif // KMP_USE_MONITOR
1012
1013void __kmp_exit_thread(int exit_status) {
1014  pthread_exit((void *)(intptr_t)exit_status);
1015} // __kmp_exit_thread
1016
1017#if KMP_USE_MONITOR
1018void __kmp_resume_monitor();
1019
1020void __kmp_reap_monitor(kmp_info_t *th) {
1021  int status;
1022  void *exit_val;
1023
1024  KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle"
1025                " %#.8lx\n",
1026                th->th.th_info.ds.ds_thread));
1027
1028  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1029  // If both tid and gtid are 0, it means the monitor did not ever start.
1030  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1031  KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
1032  if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
1033    KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
1034    return;
1035  }
1036
1037  KMP_MB(); /* Flush all pending memory write invalidates.  */
1038
1039  /* First, check to see whether the monitor thread exists to wake it up. This
1040     is to avoid performance problem when the monitor sleeps during
1041     blocktime-size interval */
1042
1043  status = pthread_kill(th->th.th_info.ds.ds_thread, 0);
1044  if (status != ESRCH) {
1045    __kmp_resume_monitor(); // Wake up the monitor thread
1046  }
1047  KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n"));
1048  status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1049  if (exit_val != th) {
1050    __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null);
1051  }
1052
1053  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1054  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1055
1056  KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle"
1057                " %#.8lx\n",
1058                th->th.th_info.ds.ds_thread));
1059
1060  KMP_MB(); /* Flush all pending memory write invalidates.  */
1061}
1062#endif // KMP_USE_MONITOR
1063
1064void __kmp_reap_worker(kmp_info_t *th) {
1065  int status;
1066  void *exit_val;
1067
1068  KMP_MB(); /* Flush all pending memory write invalidates.  */
1069
1070  KA_TRACE(
1071      10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid));
1072
1073  status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1074#ifdef KMP_DEBUG
1075  /* Don't expose these to the user until we understand when they trigger */
1076  if (status != 0) {
1077    __kmp_fatal(KMP_MSG(ReapWorkerError), KMP_ERR(status), __kmp_msg_null);
1078  }
1079  if (exit_val != th) {
1080    KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, "
1081                  "exit_val = %p\n",
1082                  th->th.th_info.ds.ds_gtid, exit_val));
1083  }
1084#endif /* KMP_DEBUG */
1085
1086  KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n",
1087                th->th.th_info.ds.ds_gtid));
1088
1089  KMP_MB(); /* Flush all pending memory write invalidates.  */
1090}
1091
1092#if KMP_HANDLE_SIGNALS
1093
1094static void __kmp_null_handler(int signo) {
1095  //  Do nothing, for doing SIG_IGN-type actions.
1096} // __kmp_null_handler
1097
1098static void __kmp_team_handler(int signo) {
1099  if (__kmp_global.g.g_abort == 0) {
1100/* Stage 1 signal handler, let's shut down all of the threads */
1101#ifdef KMP_DEBUG
1102    __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo);
1103#endif
1104    switch (signo) {
1105    case SIGHUP:
1106    case SIGINT:
1107    case SIGQUIT:
1108    case SIGILL:
1109    case SIGABRT:
1110    case SIGFPE:
1111    case SIGBUS:
1112    case SIGSEGV:
1113#ifdef SIGSYS
1114    case SIGSYS:
1115#endif
1116    case SIGTERM:
1117      if (__kmp_debug_buf) {
1118        __kmp_dump_debug_buffer();
1119      }
1120      KMP_MB(); // Flush all pending memory write invalidates.
1121      TCW_4(__kmp_global.g.g_abort, signo);
1122      KMP_MB(); // Flush all pending memory write invalidates.
1123      TCW_4(__kmp_global.g.g_done, TRUE);
1124      KMP_MB(); // Flush all pending memory write invalidates.
1125      break;
1126    default:
1127#ifdef KMP_DEBUG
1128      __kmp_debug_printf("__kmp_team_handler: unknown signal type");
1129#endif
1130      break;
1131    }
1132  }
1133} // __kmp_team_handler
1134
1135static void __kmp_sigaction(int signum, const struct sigaction *act,
1136                            struct sigaction *oldact) {
1137  int rc = sigaction(signum, act, oldact);
1138  KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc);
1139}
1140
1141static void __kmp_install_one_handler(int sig, sig_func_t handler_func,
1142                                      int parallel_init) {
1143  KMP_MB(); // Flush all pending memory write invalidates.
1144  KB_TRACE(60,
1145           ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init));
1146  if (parallel_init) {
1147    struct sigaction new_action;
1148    struct sigaction old_action;
1149    new_action.sa_handler = handler_func;
1150    new_action.sa_flags = 0;
1151    sigfillset(&new_action.sa_mask);
1152    __kmp_sigaction(sig, &new_action, &old_action);
1153    if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) {
1154      sigaddset(&__kmp_sigset, sig);
1155    } else {
1156      // Restore/keep user's handler if one previously installed.
1157      __kmp_sigaction(sig, &old_action, NULL);
1158    }
1159  } else {
1160    // Save initial/system signal handlers to see if user handlers installed.
1161    __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]);
1162  }
1163  KMP_MB(); // Flush all pending memory write invalidates.
1164} // __kmp_install_one_handler
1165
1166static void __kmp_remove_one_handler(int sig) {
1167  KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig));
1168  if (sigismember(&__kmp_sigset, sig)) {
1169    struct sigaction old;
1170    KMP_MB(); // Flush all pending memory write invalidates.
1171    __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old);
1172    if ((old.sa_handler != __kmp_team_handler) &&
1173        (old.sa_handler != __kmp_null_handler)) {
1174      // Restore the users signal handler.
1175      KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, "
1176                    "restoring: sig=%d\n",
1177                    sig));
1178      __kmp_sigaction(sig, &old, NULL);
1179    }
1180    sigdelset(&__kmp_sigset, sig);
1181    KMP_MB(); // Flush all pending memory write invalidates.
1182  }
1183} // __kmp_remove_one_handler
1184
1185void __kmp_install_signals(int parallel_init) {
1186  KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init));
1187  if (__kmp_handle_signals || !parallel_init) {
1188    // If ! parallel_init, we do not install handlers, just save original
1189    // handlers. Let us do it even __handle_signals is 0.
1190    sigemptyset(&__kmp_sigset);
1191    __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init);
1192    __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init);
1193    __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init);
1194    __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init);
1195    __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init);
1196    __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init);
1197    __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init);
1198    __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init);
1199#ifdef SIGSYS
1200    __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init);
1201#endif // SIGSYS
1202    __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init);
1203#ifdef SIGPIPE
1204    __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init);
1205#endif // SIGPIPE
1206  }
1207} // __kmp_install_signals
1208
1209void __kmp_remove_signals(void) {
1210  int sig;
1211  KB_TRACE(10, ("__kmp_remove_signals()\n"));
1212  for (sig = 1; sig < NSIG; ++sig) {
1213    __kmp_remove_one_handler(sig);
1214  }
1215} // __kmp_remove_signals
1216
1217#endif // KMP_HANDLE_SIGNALS
1218
1219void __kmp_enable(int new_state) {
1220#ifdef KMP_CANCEL_THREADS
1221  int status, old_state;
1222  status = pthread_setcancelstate(new_state, &old_state);
1223  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1224  KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE);
1225#endif
1226}
1227
1228void __kmp_disable(int *old_state) {
1229#ifdef KMP_CANCEL_THREADS
1230  int status;
1231  status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state);
1232  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1233#endif
1234}
1235
1236static void __kmp_atfork_prepare(void) {
1237  __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
1238  __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1239}
1240
1241static void __kmp_atfork_parent(void) {
1242  __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1243  __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1244}
1245
1246/* Reset the library so execution in the child starts "all over again" with
1247   clean data structures in initial states.  Don't worry about freeing memory
1248   allocated by parent, just abandon it to be safe. */
1249static void __kmp_atfork_child(void) {
1250  __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1251  /* TODO make sure this is done right for nested/sibling */
1252  // ATT:  Memory leaks are here? TODO: Check it and fix.
1253  /* KMP_ASSERT( 0 ); */
1254
1255  ++__kmp_fork_count;
1256
1257#if KMP_AFFINITY_SUPPORTED
1258#if KMP_OS_LINUX
1259  // reset the affinity in the child to the initial thread
1260  // affinity in the parent
1261  kmp_set_thread_affinity_mask_initial();
1262#endif
1263  // Set default not to bind threads tightly in the child (we���re expecting
1264  // over-subscription after the fork and this can improve things for
1265  // scripting languages that use OpenMP inside process-parallel code).
1266  __kmp_affinity_type = affinity_none;
1267  if (__kmp_nested_proc_bind.bind_types != NULL) {
1268    __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
1269  }
1270#endif // KMP_AFFINITY_SUPPORTED
1271
1272  __kmp_init_runtime = FALSE;
1273#if KMP_USE_MONITOR
1274  __kmp_init_monitor = 0;
1275#endif
1276  __kmp_init_parallel = FALSE;
1277  __kmp_init_middle = FALSE;
1278  __kmp_init_serial = FALSE;
1279  TCW_4(__kmp_init_gtid, FALSE);
1280  __kmp_init_common = FALSE;
1281
1282  TCW_4(__kmp_init_user_locks, FALSE);
1283#if !KMP_USE_DYNAMIC_LOCK
1284  __kmp_user_lock_table.used = 1;
1285  __kmp_user_lock_table.allocated = 0;
1286  __kmp_user_lock_table.table = NULL;
1287  __kmp_lock_blocks = NULL;
1288#endif
1289
1290  __kmp_all_nth = 0;
1291  TCW_4(__kmp_nth, 0);
1292
1293  __kmp_thread_pool = NULL;
1294  __kmp_thread_pool_insert_pt = NULL;
1295  __kmp_team_pool = NULL;
1296
1297  /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
1298     here so threadprivate doesn't use stale data */
1299  KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
1300                __kmp_threadpriv_cache_list));
1301
1302  while (__kmp_threadpriv_cache_list != NULL) {
1303
1304    if (*__kmp_threadpriv_cache_list->addr != NULL) {
1305      KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
1306                    &(*__kmp_threadpriv_cache_list->addr)));
1307
1308      *__kmp_threadpriv_cache_list->addr = NULL;
1309    }
1310    __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
1311  }
1312
1313  __kmp_init_runtime = FALSE;
1314
1315  /* reset statically initialized locks */
1316  __kmp_init_bootstrap_lock(&__kmp_initz_lock);
1317  __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
1318  __kmp_init_bootstrap_lock(&__kmp_console_lock);
1319  __kmp_init_bootstrap_lock(&__kmp_task_team_lock);
1320
1321#if USE_ITT_BUILD
1322  __kmp_itt_reset(); // reset ITT's global state
1323#endif /* USE_ITT_BUILD */
1324
1325  /* This is necessary to make sure no stale data is left around */
1326  /* AC: customers complain that we use unsafe routines in the atfork
1327     handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
1328     in dynamic_link when check the presence of shared tbbmalloc library.
1329     Suggestion is to make the library initialization lazier, similar
1330     to what done for __kmpc_begin(). */
1331  // TODO: synchronize all static initializations with regular library
1332  //       startup; look at kmp_global.cpp and etc.
1333  //__kmp_internal_begin ();
1334}
1335
1336void __kmp_register_atfork(void) {
1337  if (__kmp_need_register_atfork) {
1338    int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
1339                                __kmp_atfork_child);
1340    KMP_CHECK_SYSFAIL("pthread_atfork", status);
1341    __kmp_need_register_atfork = FALSE;
1342  }
1343}
1344
1345void __kmp_suspend_initialize(void) {
1346  int status;
1347  status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr);
1348  KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1349  status = pthread_condattr_init(&__kmp_suspend_cond_attr);
1350  KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1351}
1352
1353void __kmp_suspend_initialize_thread(kmp_info_t *th) {
1354  ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count);
1355  int old_value = KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count);
1356  int new_value = __kmp_fork_count + 1;
1357  // Return if already initialized
1358  if (old_value == new_value)
1359    return;
1360  // Wait, then return if being initialized
1361  if (old_value == -1 ||
1362      !__kmp_atomic_compare_store(&th->th.th_suspend_init_count, old_value,
1363                                  -1)) {
1364    while (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) != new_value) {
1365      KMP_CPU_PAUSE();
1366    }
1367  } else {
1368    // Claim to be the initializer and do initializations
1369    int status;
1370    status = pthread_cond_init(&th->th.th_suspend_cv.c_cond,
1371                               &__kmp_suspend_cond_attr);
1372    KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1373    status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex,
1374                                &__kmp_suspend_mutex_attr);
1375    KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1376    KMP_ATOMIC_ST_REL(&th->th.th_suspend_init_count, new_value);
1377    ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count);
1378  }
1379}
1380
1381void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {
1382  if (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) > __kmp_fork_count) {
1383    /* this means we have initialize the suspension pthread objects for this
1384       thread in this instance of the process */
1385    int status;
1386
1387    status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond);
1388    if (status != 0 && status != EBUSY) {
1389      KMP_SYSFAIL("pthread_cond_destroy", status);
1390    }
1391    status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex);
1392    if (status != 0 && status != EBUSY) {
1393      KMP_SYSFAIL("pthread_mutex_destroy", status);
1394    }
1395    --th->th.th_suspend_init_count;
1396    KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count) ==
1397                     __kmp_fork_count);
1398  }
1399}
1400
1401// return true if lock obtained, false otherwise
1402int __kmp_try_suspend_mx(kmp_info_t *th) {
1403  return (pthread_mutex_trylock(&th->th.th_suspend_mx.m_mutex) == 0);
1404}
1405
1406void __kmp_lock_suspend_mx(kmp_info_t *th) {
1407  int status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1408  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1409}
1410
1411void __kmp_unlock_suspend_mx(kmp_info_t *th) {
1412  int status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1413  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1414}
1415
1416/* This routine puts the calling thread to sleep after setting the
1417   sleep bit for the indicated flag variable to true. */
1418template <class C>
1419static inline void __kmp_suspend_template(int th_gtid, C *flag) {
1420  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
1421  kmp_info_t *th = __kmp_threads[th_gtid];
1422  int status;
1423  typename C::flag_t old_spin;
1424
1425  KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid,
1426                flag->get()));
1427
1428  __kmp_suspend_initialize_thread(th);
1429
1430  status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1431  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1432
1433  KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
1434                th_gtid, flag->get()));
1435
1436  /* TODO: shouldn't this use release semantics to ensure that
1437     __kmp_suspend_initialize_thread gets called first? */
1438  old_spin = flag->set_sleeping();
1439  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1440      __kmp_pause_status != kmp_soft_paused) {
1441    flag->unset_sleeping();
1442    status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1443    KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1444    return;
1445  }
1446  KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
1447               " was %x\n",
1448               th_gtid, flag->get(), flag->load(), old_spin));
1449
1450  if (flag->done_check_val(old_spin)) {
1451    old_spin = flag->unset_sleeping();
1452    KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
1453                 "for spin(%p)\n",
1454                 th_gtid, flag->get()));
1455  } else {
1456    /* Encapsulate in a loop as the documentation states that this may
1457       "with low probability" return when the condition variable has
1458       not been signaled or broadcast */
1459    int deactivated = FALSE;
1460    TCW_PTR(th->th.th_sleep_loc, (void *)flag);
1461
1462    while (flag->is_sleeping()) {
1463#ifdef DEBUG_SUSPEND
1464      char buffer[128];
1465      __kmp_suspend_count++;
1466      __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1467      __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,
1468                   buffer);
1469#endif
1470      // Mark the thread as no longer active (only in the first iteration of the
1471      // loop).
1472      if (!deactivated) {
1473        th->th.th_active = FALSE;
1474        if (th->th.th_active_in_pool) {
1475          th->th.th_active_in_pool = FALSE;
1476          KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
1477          KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
1478        }
1479        deactivated = TRUE;
1480      }
1481
1482#if USE_SUSPEND_TIMEOUT
1483      struct timespec now;
1484      struct timeval tval;
1485      int msecs;
1486
1487      status = gettimeofday(&tval, NULL);
1488      KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1489      TIMEVAL_TO_TIMESPEC(&tval, &now);
1490
1491      msecs = (4 * __kmp_dflt_blocktime) + 200;
1492      now.tv_sec += msecs / 1000;
1493      now.tv_nsec += (msecs % 1000) * 1000;
1494
1495      KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
1496                    "pthread_cond_timedwait\n",
1497                    th_gtid));
1498      status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond,
1499                                      &th->th.th_suspend_mx.m_mutex, &now);
1500#else
1501      KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform"
1502                    " pthread_cond_wait\n",
1503                    th_gtid));
1504      status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
1505                                 &th->th.th_suspend_mx.m_mutex);
1506#endif
1507
1508      if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
1509        KMP_SYSFAIL("pthread_cond_wait", status);
1510      }
1511#ifdef KMP_DEBUG
1512      if (status == ETIMEDOUT) {
1513        if (flag->is_sleeping()) {
1514          KF_TRACE(100,
1515                   ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));
1516        } else {
1517          KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
1518                       "not set!\n",
1519                       th_gtid));
1520        }
1521      } else if (flag->is_sleeping()) {
1522        KF_TRACE(100,
1523                 ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
1524      }
1525#endif
1526    } // while
1527
1528    // Mark the thread as active again (if it was previous marked as inactive)
1529    if (deactivated) {
1530      th->th.th_active = TRUE;
1531      if (TCR_4(th->th.th_in_pool)) {
1532        KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
1533        th->th.th_active_in_pool = TRUE;
1534      }
1535    }
1536  }
1537#ifdef DEBUG_SUSPEND
1538  {
1539    char buffer[128];
1540    __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1541    __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,
1542                 buffer);
1543  }
1544#endif
1545
1546  status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1547  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1548  KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
1549}
1550
1551void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
1552  __kmp_suspend_template(th_gtid, flag);
1553}
1554void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
1555  __kmp_suspend_template(th_gtid, flag);
1556}
1557void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
1558  __kmp_suspend_template(th_gtid, flag);
1559}
1560
1561/* This routine signals the thread specified by target_gtid to wake up
1562   after setting the sleep bit indicated by the flag argument to FALSE.
1563   The target thread must already have called __kmp_suspend_template() */
1564template <class C>
1565static inline void __kmp_resume_template(int target_gtid, C *flag) {
1566  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1567  kmp_info_t *th = __kmp_threads[target_gtid];
1568  int status;
1569
1570#ifdef KMP_DEBUG
1571  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1572#endif
1573
1574  KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
1575                gtid, target_gtid));
1576  KMP_DEBUG_ASSERT(gtid != target_gtid);
1577
1578  __kmp_suspend_initialize_thread(th);
1579
1580  status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1581  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1582
1583  if (!flag) { // coming from __kmp_null_resume_wrapper
1584    flag = (C *)CCAST(void *, th->th.th_sleep_loc);
1585  }
1586
1587  // First, check if the flag is null or its type has changed. If so, someone
1588  // else woke it up.
1589  if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
1590    // simply shows what
1591    // flag was cast to
1592    KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1593                 "awake: flag(%p)\n",
1594                 gtid, target_gtid, NULL));
1595    status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1596    KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1597    return;
1598  } else { // if multiple threads are sleeping, flag should be internally
1599    // referring to a specific thread here
1600    typename C::flag_t old_spin = flag->unset_sleeping();
1601    if (!flag->is_sleeping_val(old_spin)) {
1602      KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1603                   "awake: flag(%p): "
1604                   "%u => %u\n",
1605                   gtid, target_gtid, flag->get(), old_spin, flag->load()));
1606      status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1607      KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1608      return;
1609    }
1610    KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
1611                 "sleep bit for flag's loc(%p): "
1612                 "%u => %u\n",
1613                 gtid, target_gtid, flag->get(), old_spin, flag->load()));
1614  }
1615  TCW_PTR(th->th.th_sleep_loc, NULL);
1616
1617#ifdef DEBUG_SUSPEND
1618  {
1619    char buffer[128];
1620    __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1621    __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,
1622                 target_gtid, buffer);
1623  }
1624#endif
1625  status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
1626  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1627  status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1628  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1629  KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
1630                " for T#%d\n",
1631                gtid, target_gtid));
1632}
1633
1634void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
1635  __kmp_resume_template(target_gtid, flag);
1636}
1637void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
1638  __kmp_resume_template(target_gtid, flag);
1639}
1640void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
1641  __kmp_resume_template(target_gtid, flag);
1642}
1643
1644#if KMP_USE_MONITOR
1645void __kmp_resume_monitor() {
1646  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1647  int status;
1648#ifdef KMP_DEBUG
1649  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1650  KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
1651                KMP_GTID_MONITOR));
1652  KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR);
1653#endif
1654  status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
1655  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1656#ifdef DEBUG_SUSPEND
1657  {
1658    char buffer[128];
1659    __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond);
1660    __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid,
1661                 KMP_GTID_MONITOR, buffer);
1662  }
1663#endif
1664  status = pthread_cond_signal(&__kmp_wait_cv.c_cond);
1665  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1666  status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
1667  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1668  KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up"
1669                " for T#%d\n",
1670                gtid, KMP_GTID_MONITOR));
1671}
1672#endif // KMP_USE_MONITOR
1673
1674void __kmp_yield() { sched_yield(); }
1675
1676void __kmp_gtid_set_specific(int gtid) {
1677  if (__kmp_init_gtid) {
1678    int status;
1679    status = pthread_setspecific(__kmp_gtid_threadprivate_key,
1680                                 (void *)(intptr_t)(gtid + 1));
1681    KMP_CHECK_SYSFAIL("pthread_setspecific", status);
1682  } else {
1683    KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
1684  }
1685}
1686
1687int __kmp_gtid_get_specific() {
1688  int gtid;
1689  if (!__kmp_init_gtid) {
1690    KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning "
1691                  "KMP_GTID_SHUTDOWN\n"));
1692    return KMP_GTID_SHUTDOWN;
1693  }
1694  gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key);
1695  if (gtid == 0) {
1696    gtid = KMP_GTID_DNE;
1697  } else {
1698    gtid--;
1699  }
1700  KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
1701                __kmp_gtid_threadprivate_key, gtid));
1702  return gtid;
1703}
1704
1705double __kmp_read_cpu_time(void) {
1706  /*clock_t   t;*/
1707  struct tms buffer;
1708
1709  /*t =*/times(&buffer);
1710
1711  return (buffer.tms_utime + buffer.tms_cutime) / (double)CLOCKS_PER_SEC;
1712}
1713
1714int __kmp_read_system_info(struct kmp_sys_info *info) {
1715  int status;
1716  struct rusage r_usage;
1717
1718  memset(info, 0, sizeof(*info));
1719
1720  status = getrusage(RUSAGE_SELF, &r_usage);
1721  KMP_CHECK_SYSFAIL_ERRNO("getrusage", status);
1722
1723  // The maximum resident set size utilized (in kilobytes)
1724  info->maxrss = r_usage.ru_maxrss;
1725  // The number of page faults serviced without any I/O
1726  info->minflt = r_usage.ru_minflt;
1727  // The number of page faults serviced that required I/O
1728  info->majflt = r_usage.ru_majflt;
1729  // The number of times a process was "swapped" out of memory
1730  info->nswap = r_usage.ru_nswap;
1731  // The number of times the file system had to perform input
1732  info->inblock = r_usage.ru_inblock;
1733  // The number of times the file system had to perform output
1734  info->oublock = r_usage.ru_oublock;
1735  // The number of times a context switch was voluntarily
1736  info->nvcsw = r_usage.ru_nvcsw;
1737  // The number of times a context switch was forced
1738  info->nivcsw = r_usage.ru_nivcsw;
1739
1740  return (status != 0);
1741}
1742
1743void __kmp_read_system_time(double *delta) {
1744  double t_ns;
1745  struct timeval tval;
1746  struct timespec stop;
1747  int status;
1748
1749  status = gettimeofday(&tval, NULL);
1750  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1751  TIMEVAL_TO_TIMESPEC(&tval, &stop);
1752  t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start);
1753  *delta = (t_ns * 1e-9);
1754}
1755
1756void __kmp_clear_system_time(void) {
1757  struct timeval tval;
1758  int status;
1759  status = gettimeofday(&tval, NULL);
1760  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1761  TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start);
1762}
1763
1764static int __kmp_get_xproc(void) {
1765
1766  int r = 0;
1767
1768#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
1769        KMP_OS_OPENBSD || KMP_OS_HURD
1770
1771  r = sysconf(_SC_NPROCESSORS_ONLN);
1772
1773#elif KMP_OS_DARWIN
1774
1775  // Bug C77011 High "OpenMP Threads and number of active cores".
1776
1777  // Find the number of available CPUs.
1778  kern_return_t rc;
1779  host_basic_info_data_t info;
1780  mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT;
1781  rc = host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &num);
1782  if (rc == 0 && num == HOST_BASIC_INFO_COUNT) {
1783    // Cannot use KA_TRACE() here because this code works before trace support
1784    // is initialized.
1785    r = info.avail_cpus;
1786  } else {
1787    KMP_WARNING(CantGetNumAvailCPU);
1788    KMP_INFORM(AssumedNumCPU);
1789  }
1790
1791#else
1792
1793#error "Unknown or unsupported OS."
1794
1795#endif
1796
1797  return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */
1798
1799} // __kmp_get_xproc
1800
1801int __kmp_read_from_file(char const *path, char const *format, ...) {
1802  int result;
1803  va_list args;
1804
1805  va_start(args, format);
1806  FILE *f = fopen(path, "rb");
1807  if (f == NULL)
1808    return 0;
1809  result = vfscanf(f, format, args);
1810  fclose(f);
1811
1812  return result;
1813}
1814
1815void __kmp_runtime_initialize(void) {
1816  int status;
1817  pthread_mutexattr_t mutex_attr;
1818  pthread_condattr_t cond_attr;
1819
1820  if (__kmp_init_runtime) {
1821    return;
1822  }
1823
1824#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1825  if (!__kmp_cpuinfo.initialized) {
1826    __kmp_query_cpuid(&__kmp_cpuinfo);
1827  }
1828#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1829
1830  __kmp_xproc = __kmp_get_xproc();
1831
1832#if ! KMP_32_BIT_ARCH
1833  struct rlimit rlim;
1834  // read stack size of calling thread, save it as default for worker threads;
1835  // this should be done before reading environment variables
1836  status = getrlimit(RLIMIT_STACK, &rlim);
1837  if (status == 0) { // success?
1838    __kmp_stksize = rlim.rlim_cur;
1839    __kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed
1840  }
1841#endif /* KMP_32_BIT_ARCH */
1842
1843  if (sysconf(_SC_THREADS)) {
1844
1845    /* Query the maximum number of threads */
1846    __kmp_sys_max_nth = sysconf(_SC_THREAD_THREADS_MAX);
1847    if (__kmp_sys_max_nth == -1) {
1848      /* Unlimited threads for NPTL */
1849      __kmp_sys_max_nth = INT_MAX;
1850    } else if (__kmp_sys_max_nth <= 1) {
1851      /* Can't tell, just use PTHREAD_THREADS_MAX */
1852      __kmp_sys_max_nth = KMP_MAX_NTH;
1853    }
1854
1855    /* Query the minimum stack size */
1856    __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN);
1857    if (__kmp_sys_min_stksize <= 1) {
1858      __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
1859    }
1860  }
1861
1862  /* Set up minimum number of threads to switch to TLS gtid */
1863  __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
1864
1865  status = pthread_key_create(&__kmp_gtid_threadprivate_key,
1866                              __kmp_internal_end_dest);
1867  KMP_CHECK_SYSFAIL("pthread_key_create", status);
1868  status = pthread_mutexattr_init(&mutex_attr);
1869  KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1870  status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr);
1871  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1872  status = pthread_condattr_init(&cond_attr);
1873  KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1874  status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr);
1875  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1876#if USE_ITT_BUILD
1877  __kmp_itt_initialize();
1878#endif /* USE_ITT_BUILD */
1879
1880  __kmp_init_runtime = TRUE;
1881}
1882
1883void __kmp_runtime_destroy(void) {
1884  int status;
1885
1886  if (!__kmp_init_runtime) {
1887    return; // Nothing to do.
1888  }
1889
1890#if USE_ITT_BUILD
1891  __kmp_itt_destroy();
1892#endif /* USE_ITT_BUILD */
1893
1894  status = pthread_key_delete(__kmp_gtid_threadprivate_key);
1895  KMP_CHECK_SYSFAIL("pthread_key_delete", status);
1896
1897  status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex);
1898  if (status != 0 && status != EBUSY) {
1899    KMP_SYSFAIL("pthread_mutex_destroy", status);
1900  }
1901  status = pthread_cond_destroy(&__kmp_wait_cv.c_cond);
1902  if (status != 0 && status != EBUSY) {
1903    KMP_SYSFAIL("pthread_cond_destroy", status);
1904  }
1905#if KMP_AFFINITY_SUPPORTED
1906  __kmp_affinity_uninitialize();
1907#endif
1908
1909  __kmp_init_runtime = FALSE;
1910}
1911
1912/* Put the thread to sleep for a time period */
1913/* NOTE: not currently used anywhere */
1914void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); }
1915
1916/* Calculate the elapsed wall clock time for the user */
1917void __kmp_elapsed(double *t) {
1918  int status;
1919#ifdef FIX_SGI_CLOCK
1920  struct timespec ts;
1921
1922  status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
1923  KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status);
1924  *t =
1925      (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec;
1926#else
1927  struct timeval tv;
1928
1929  status = gettimeofday(&tv, NULL);
1930  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1931  *t =
1932      (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec;
1933#endif
1934}
1935
1936/* Calculate the elapsed wall clock tick for the user */
1937void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; }
1938
1939/* Return the current time stamp in nsec */
1940kmp_uint64 __kmp_now_nsec() {
1941  struct timeval t;
1942  gettimeofday(&t, NULL);
1943  kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec +
1944                    (kmp_uint64)1000 * (kmp_uint64)t.tv_usec;
1945  return nsec;
1946}
1947
1948#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1949/* Measure clock ticks per millisecond */
1950void __kmp_initialize_system_tick() {
1951  kmp_uint64 now, nsec2, diff;
1952  kmp_uint64 delay = 100000; // 50~100 usec on most machines.
1953  kmp_uint64 nsec = __kmp_now_nsec();
1954  kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
1955  while ((now = __kmp_hardware_timestamp()) < goal)
1956    ;
1957  nsec2 = __kmp_now_nsec();
1958  diff = nsec2 - nsec;
1959  if (diff > 0) {
1960    kmp_uint64 tpms = (kmp_uint64)(1e6 * (delay + (now - goal)) / diff);
1961    if (tpms > 0)
1962      __kmp_ticks_per_msec = tpms;
1963  }
1964}
1965#endif
1966
1967/* Determine whether the given address is mapped into the current address
1968   space. */
1969
1970int __kmp_is_address_mapped(void *addr) {
1971
1972  int found = 0;
1973  int rc;
1974
1975#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_HURD
1976
1977  /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the address
1978     ranges mapped into the address space. */
1979
1980  char *name = __kmp_str_format("/proc/%d/maps", getpid());
1981  FILE *file = NULL;
1982
1983  file = fopen(name, "r");
1984  KMP_ASSERT(file != NULL);
1985
1986  for (;;) {
1987
1988    void *beginning = NULL;
1989    void *ending = NULL;
1990    char perms[5];
1991
1992    rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms);
1993    if (rc == EOF) {
1994      break;
1995    }
1996    KMP_ASSERT(rc == 3 &&
1997               KMP_STRLEN(perms) == 4); // Make sure all fields are read.
1998
1999    // Ending address is not included in the region, but beginning is.
2000    if ((addr >= beginning) && (addr < ending)) {
2001      perms[2] = 0; // 3th and 4th character does not matter.
2002      if (strcmp(perms, "rw") == 0) {
2003        // Memory we are looking for should be readable and writable.
2004        found = 1;
2005      }
2006      break;
2007    }
2008  }
2009
2010  // Free resources.
2011  fclose(file);
2012  KMP_INTERNAL_FREE(name);
2013
2014#elif KMP_OS_DARWIN
2015
2016  /* On OS X*, /proc pseudo filesystem is not available. Try to read memory
2017     using vm interface. */
2018
2019  int buffer;
2020  vm_size_t count;
2021  rc = vm_read_overwrite(
2022      mach_task_self(), // Task to read memory of.
2023      (vm_address_t)(addr), // Address to read from.
2024      1, // Number of bytes to be read.
2025      (vm_address_t)(&buffer), // Address of buffer to save read bytes in.
2026      &count // Address of var to save number of read bytes in.
2027      );
2028  if (rc == 0) {
2029    // Memory successfully read.
2030    found = 1;
2031  }
2032
2033#elif KMP_OS_NETBSD
2034
2035  int mib[5];
2036  mib[0] = CTL_VM;
2037  mib[1] = VM_PROC;
2038  mib[2] = VM_PROC_MAP;
2039  mib[3] = getpid();
2040  mib[4] = sizeof(struct kinfo_vmentry);
2041
2042  size_t size;
2043  rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0);
2044  KMP_ASSERT(!rc);
2045  KMP_ASSERT(size);
2046
2047  size = size * 4 / 3;
2048  struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size);
2049  KMP_ASSERT(kiv);
2050
2051  rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0);
2052  KMP_ASSERT(!rc);
2053  KMP_ASSERT(size);
2054
2055  for (size_t i = 0; i < size; i++) {
2056    if (kiv[i].kve_start >= (uint64_t)addr &&
2057        kiv[i].kve_end <= (uint64_t)addr) {
2058      found = 1;
2059      break;
2060    }
2061  }
2062  KMP_INTERNAL_FREE(kiv);
2063#elif KMP_OS_DRAGONFLY || KMP_OS_OPENBSD
2064
2065  // FIXME(DragonFly, OpenBSD): Implement this
2066  found = 1;
2067
2068#else
2069
2070#error "Unknown or unsupported OS"
2071
2072#endif
2073
2074  return found;
2075
2076} // __kmp_is_address_mapped
2077
2078#ifdef USE_LOAD_BALANCE
2079
2080#if KMP_OS_DARWIN || KMP_OS_NETBSD
2081
2082// The function returns the rounded value of the system load average
2083// during given time interval which depends on the value of
2084// __kmp_load_balance_interval variable (default is 60 sec, other values
2085// may be 300 sec or 900 sec).
2086// It returns -1 in case of error.
2087int __kmp_get_load_balance(int max) {
2088  double averages[3];
2089  int ret_avg = 0;
2090
2091  int res = getloadavg(averages, 3);
2092
2093  // Check __kmp_load_balance_interval to determine which of averages to use.
2094  // getloadavg() may return the number of samples less than requested that is
2095  // less than 3.
2096  if (__kmp_load_balance_interval < 180 && (res >= 1)) {
2097    ret_avg = averages[0]; // 1 min
2098  } else if ((__kmp_load_balance_interval >= 180 &&
2099              __kmp_load_balance_interval < 600) &&
2100             (res >= 2)) {
2101    ret_avg = averages[1]; // 5 min
2102  } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) {
2103    ret_avg = averages[2]; // 15 min
2104  } else { // Error occurred
2105    return -1;
2106  }
2107
2108  return ret_avg;
2109}
2110
2111#else // Linux* OS
2112
2113// The fuction returns number of running (not sleeping) threads, or -1 in case
2114// of error. Error could be reported if Linux* OS kernel too old (without
2115// "/proc" support). Counting running threads stops if max running threads
2116// encountered.
2117int __kmp_get_load_balance(int max) {
2118  static int permanent_error = 0;
2119  static int glb_running_threads = 0; // Saved count of the running threads for
2120  // the thread balance algortihm
2121  static double glb_call_time = 0; /* Thread balance algorithm call time */
2122
2123  int running_threads = 0; // Number of running threads in the system.
2124
2125  DIR *proc_dir = NULL; // Handle of "/proc/" directory.
2126  struct dirent *proc_entry = NULL;
2127
2128  kmp_str_buf_t task_path; // "/proc/<pid>/task/<tid>/" path.
2129  DIR *task_dir = NULL; // Handle of "/proc/<pid>/task/<tid>/" directory.
2130  struct dirent *task_entry = NULL;
2131  int task_path_fixed_len;
2132
2133  kmp_str_buf_t stat_path; // "/proc/<pid>/task/<tid>/stat" path.
2134  int stat_file = -1;
2135  int stat_path_fixed_len;
2136
2137  int total_processes = 0; // Total number of processes in system.
2138  int total_threads = 0; // Total number of threads in system.
2139
2140  double call_time = 0.0;
2141
2142  __kmp_str_buf_init(&task_path);
2143  __kmp_str_buf_init(&stat_path);
2144
2145  __kmp_elapsed(&call_time);
2146
2147  if (glb_call_time &&
2148      (call_time - glb_call_time < __kmp_load_balance_interval)) {
2149    running_threads = glb_running_threads;
2150    goto finish;
2151  }
2152
2153  glb_call_time = call_time;
2154
2155  // Do not spend time on scanning "/proc/" if we have a permanent error.
2156  if (permanent_error) {
2157    running_threads = -1;
2158    goto finish;
2159  }
2160
2161  if (max <= 0) {
2162    max = INT_MAX;
2163  }
2164
2165  // Open "/proc/" directory.
2166  proc_dir = opendir("/proc");
2167  if (proc_dir == NULL) {
2168    // Cannot open "/prroc/". Probably the kernel does not support it. Return an
2169    // error now and in subsequent calls.
2170    running_threads = -1;
2171    permanent_error = 1;
2172    goto finish;
2173  }
2174
2175  // Initialize fixed part of task_path. This part will not change.
2176  __kmp_str_buf_cat(&task_path, "/proc/", 6);
2177  task_path_fixed_len = task_path.used; // Remember number of used characters.
2178
2179  proc_entry = readdir(proc_dir);
2180  while (proc_entry != NULL) {
2181    // Proc entry is a directory and name starts with a digit. Assume it is a
2182    // process' directory.
2183    if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) {
2184
2185      ++total_processes;
2186      // Make sure init process is the very first in "/proc", so we can replace
2187      // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes ==
2188      // 1. We are going to check that total_processes == 1 => d_name == "1" is
2189      // true (where "=>" is implication). Since C++ does not have => operator,
2190      // let us replace it with its equivalent: a => b == ! a || b.
2191      KMP_DEBUG_ASSERT(total_processes != 1 ||
2192                       strcmp(proc_entry->d_name, "1") == 0);
2193
2194      // Construct task_path.
2195      task_path.used = task_path_fixed_len; // Reset task_path to "/proc/".
2196      __kmp_str_buf_cat(&task_path, proc_entry->d_name,
2197                        KMP_STRLEN(proc_entry->d_name));
2198      __kmp_str_buf_cat(&task_path, "/task", 5);
2199
2200      task_dir = opendir(task_path.str);
2201      if (task_dir == NULL) {
2202        // Process can finish between reading "/proc/" directory entry and
2203        // opening process' "task/" directory. So, in general case we should not
2204        // complain, but have to skip this process and read the next one. But on
2205        // systems with no "task/" support we will spend lot of time to scan
2206        // "/proc/" tree again and again without any benefit. "init" process
2207        // (its pid is 1) should exist always, so, if we cannot open
2208        // "/proc/1/task/" directory, it means "task/" is not supported by
2209        // kernel. Report an error now and in the future.
2210        if (strcmp(proc_entry->d_name, "1") == 0) {
2211          running_threads = -1;
2212          permanent_error = 1;
2213          goto finish;
2214        }
2215      } else {
2216        // Construct fixed part of stat file path.
2217        __kmp_str_buf_clear(&stat_path);
2218        __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used);
2219        __kmp_str_buf_cat(&stat_path, "/", 1);
2220        stat_path_fixed_len = stat_path.used;
2221
2222        task_entry = readdir(task_dir);
2223        while (task_entry != NULL) {
2224          // It is a directory and name starts with a digit.
2225          if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) {
2226            ++total_threads;
2227
2228            // Consruct complete stat file path. Easiest way would be:
2229            //  __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str,
2230            //  task_entry->d_name );
2231            // but seriae of __kmp_str_buf_cat works a bit faster.
2232            stat_path.used =
2233                stat_path_fixed_len; // Reset stat path to its fixed part.
2234            __kmp_str_buf_cat(&stat_path, task_entry->d_name,
2235                              KMP_STRLEN(task_entry->d_name));
2236            __kmp_str_buf_cat(&stat_path, "/stat", 5);
2237
2238            // Note: Low-level API (open/read/close) is used. High-level API
2239            // (fopen/fclose)  works ~ 30 % slower.
2240            stat_file = open(stat_path.str, O_RDONLY);
2241            if (stat_file == -1) {
2242              // We cannot report an error because task (thread) can terminate
2243              // just before reading this file.
2244            } else {
2245              /* Content of "stat" file looks like:
2246                 24285 (program) S ...
2247
2248                 It is a single line (if program name does not include funny
2249                 symbols). First number is a thread id, then name of executable
2250                 file name in paretheses, then state of the thread. We need just
2251                 thread state.
2252
2253                 Good news: Length of program name is 15 characters max. Longer
2254                 names are truncated.
2255
2256                 Thus, we need rather short buffer: 15 chars for program name +
2257                 2 parenthesis, + 3 spaces + ~7 digits of pid = 37.
2258
2259                 Bad news: Program name may contain special symbols like space,
2260                 closing parenthesis, or even new line. This makes parsing
2261                 "stat" file not 100 % reliable. In case of fanny program names
2262                 parsing may fail (report incorrect thread state).
2263
2264                 Parsing "status" file looks more promissing (due to different
2265                 file structure and escaping special symbols) but reading and
2266                 parsing of "status" file works slower.
2267                  -- ln
2268              */
2269              char buffer[65];
2270              int len;
2271              len = read(stat_file, buffer, sizeof(buffer) - 1);
2272              if (len >= 0) {
2273                buffer[len] = 0;
2274                // Using scanf:
2275                //     sscanf( buffer, "%*d (%*s) %c ", & state );
2276                // looks very nice, but searching for a closing parenthesis
2277                // works a bit faster.
2278                char *close_parent = strstr(buffer, ") ");
2279                if (close_parent != NULL) {
2280                  char state = *(close_parent + 2);
2281                  if (state == 'R') {
2282                    ++running_threads;
2283                    if (running_threads >= max) {
2284                      goto finish;
2285                    }
2286                  }
2287                }
2288              }
2289              close(stat_file);
2290              stat_file = -1;
2291            }
2292          }
2293          task_entry = readdir(task_dir);
2294        }
2295        closedir(task_dir);
2296        task_dir = NULL;
2297      }
2298    }
2299    proc_entry = readdir(proc_dir);
2300  }
2301
2302  // There _might_ be a timing hole where the thread executing this
2303  // code get skipped in the load balance, and running_threads is 0.
2304  // Assert in the debug builds only!!!
2305  KMP_DEBUG_ASSERT(running_threads > 0);
2306  if (running_threads <= 0) {
2307    running_threads = 1;
2308  }
2309
2310finish: // Clean up and exit.
2311  if (proc_dir != NULL) {
2312    closedir(proc_dir);
2313  }
2314  __kmp_str_buf_free(&task_path);
2315  if (task_dir != NULL) {
2316    closedir(task_dir);
2317  }
2318  __kmp_str_buf_free(&stat_path);
2319  if (stat_file != -1) {
2320    close(stat_file);
2321  }
2322
2323  glb_running_threads = running_threads;
2324
2325  return running_threads;
2326
2327} // __kmp_get_load_balance
2328
2329#endif // KMP_OS_DARWIN
2330
2331#endif // USE_LOAD_BALANCE
2332
2333#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
2334      ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
2335
2336// we really only need the case with 1 argument, because CLANG always build
2337// a struct of pointers to shared variables referenced in the outlined function
2338int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2339                           void *p_argv[]
2340#if OMPT_SUPPORT
2341                           ,
2342                           void **exit_frame_ptr
2343#endif
2344                           ) {
2345#if OMPT_SUPPORT
2346  *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2347#endif
2348
2349  switch (argc) {
2350  default:
2351    fprintf(stderr, "Too many args to microtask: %d!\n", argc);
2352    fflush(stderr);
2353    exit(-1);
2354  case 0:
2355    (*pkfn)(&gtid, &tid);
2356    break;
2357  case 1:
2358    (*pkfn)(&gtid, &tid, p_argv[0]);
2359    break;
2360  case 2:
2361    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
2362    break;
2363  case 3:
2364    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
2365    break;
2366  case 4:
2367    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
2368    break;
2369  case 5:
2370    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
2371    break;
2372  case 6:
2373    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2374            p_argv[5]);
2375    break;
2376  case 7:
2377    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2378            p_argv[5], p_argv[6]);
2379    break;
2380  case 8:
2381    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2382            p_argv[5], p_argv[6], p_argv[7]);
2383    break;
2384  case 9:
2385    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2386            p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
2387    break;
2388  case 10:
2389    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2390            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
2391    break;
2392  case 11:
2393    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2394            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
2395    break;
2396  case 12:
2397    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2398            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2399            p_argv[11]);
2400    break;
2401  case 13:
2402    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2403            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2404            p_argv[11], p_argv[12]);
2405    break;
2406  case 14:
2407    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2408            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2409            p_argv[11], p_argv[12], p_argv[13]);
2410    break;
2411  case 15:
2412    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2413            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2414            p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
2415    break;
2416  }
2417
2418#if OMPT_SUPPORT
2419  *exit_frame_ptr = 0;
2420#endif
2421
2422  return 1;
2423}
2424
2425#endif
2426
2427// end of file //
2428