1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18
19#if KMP_AFFINITY_SUPPORTED
20#if KMP_USE_HWLOC
21class KMPHwlocAffinity : public KMPAffinity {
22public:
23  class Mask : public KMPAffinity::Mask {
24    hwloc_cpuset_t mask;
25
26  public:
27    Mask() {
28      mask = hwloc_bitmap_alloc();
29      this->zero();
30    }
31    ~Mask() { hwloc_bitmap_free(mask); }
32    void set(int i) override { hwloc_bitmap_set(mask, i); }
33    bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34    void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35    void zero() override { hwloc_bitmap_zero(mask); }
36    void copy(const KMPAffinity::Mask *src) override {
37      const Mask *convert = static_cast<const Mask *>(src);
38      hwloc_bitmap_copy(mask, convert->mask);
39    }
40    void bitwise_and(const KMPAffinity::Mask *rhs) override {
41      const Mask *convert = static_cast<const Mask *>(rhs);
42      hwloc_bitmap_and(mask, mask, convert->mask);
43    }
44    void bitwise_or(const KMPAffinity::Mask *rhs) override {
45      const Mask *convert = static_cast<const Mask *>(rhs);
46      hwloc_bitmap_or(mask, mask, convert->mask);
47    }
48    void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49    int begin() const override { return hwloc_bitmap_first(mask); }
50    int end() const override { return -1; }
51    int next(int previous) const override {
52      return hwloc_bitmap_next(mask, previous);
53    }
54    int get_system_affinity(bool abort_on_error) override {
55      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56                  "Illegal get affinity operation when not capable");
57      int retval =
58          hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59      if (retval >= 0) {
60        return 0;
61      }
62      int error = errno;
63      if (abort_on_error) {
64        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65      }
66      return error;
67    }
68    int set_system_affinity(bool abort_on_error) const override {
69      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70                  "Illegal get affinity operation when not capable");
71      int retval =
72          hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73      if (retval >= 0) {
74        return 0;
75      }
76      int error = errno;
77      if (abort_on_error) {
78        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79      }
80      return error;
81    }
82    int get_proc_group() const override {
83      int group = -1;
84#if KMP_OS_WINDOWS
85      if (__kmp_num_proc_groups == 1) {
86        return 1;
87      }
88      for (int i = 0; i < __kmp_num_proc_groups; i++) {
89        // On windows, the long type is always 32 bits
90        unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91        unsigned long second_32_bits =
92            hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93        if (first_32_bits == 0 && second_32_bits == 0) {
94          continue;
95        }
96        if (group >= 0) {
97          return -1;
98        }
99        group = i;
100      }
101#endif /* KMP_OS_WINDOWS */
102      return group;
103    }
104  };
105  void determine_capable(const char *var) override {
106    const hwloc_topology_support *topology_support;
107    if (__kmp_hwloc_topology == NULL) {
108      if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109        __kmp_hwloc_error = TRUE;
110        if (__kmp_affinity_verbose)
111          KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112      }
113      if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114        __kmp_hwloc_error = TRUE;
115        if (__kmp_affinity_verbose)
116          KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117      }
118    }
119    topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120    // Is the system capable of setting/getting this thread's affinity?
121    // Also, is topology discovery possible? (pu indicates ability to discover
122    // processing units). And finally, were there no errors when calling any
123    // hwloc_* API functions?
124    if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125        topology_support->cpubind->get_thisthread_cpubind &&
126        topology_support->discovery->pu && !__kmp_hwloc_error) {
127      // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128      KMP_AFFINITY_ENABLE(TRUE);
129    } else {
130      // indicate that hwloc didn't work and disable affinity
131      __kmp_hwloc_error = TRUE;
132      KMP_AFFINITY_DISABLE();
133    }
134  }
135  void bind_thread(int which) override {
136    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137                "Illegal set affinity operation when not capable");
138    KMPAffinity::Mask *mask;
139    KMP_CPU_ALLOC_ON_STACK(mask);
140    KMP_CPU_ZERO(mask);
141    KMP_CPU_SET(which, mask);
142    __kmp_set_system_affinity(mask, TRUE);
143    KMP_CPU_FREE_FROM_STACK(mask);
144  }
145  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147  KMPAffinity::Mask *allocate_mask_array(int num) override {
148    return new Mask[num];
149  }
150  void deallocate_mask_array(KMPAffinity::Mask *array) override {
151    Mask *hwloc_array = static_cast<Mask *>(array);
152    delete[] hwloc_array;
153  }
154  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155                                      int index) override {
156    Mask *hwloc_array = static_cast<Mask *>(array);
157    return &(hwloc_array[index]);
158  }
159  api_type get_api_type() const override { return HWLOC; }
160};
161#endif /* KMP_USE_HWLOC */
162
163#if KMP_OS_LINUX || KMP_OS_FREEBSD
164#if KMP_OS_LINUX
165/* On some of the older OS's that we build on, these constants aren't present
166   in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167   all systems of the same arch where they are defined, and they cannot change.
168   stone forever. */
169#include <sys/syscall.h>
170#if KMP_ARCH_X86 || KMP_ARCH_ARM
171#ifndef __NR_sched_setaffinity
172#define __NR_sched_setaffinity 241
173#elif __NR_sched_setaffinity != 241
174#error Wrong code for setaffinity system call.
175#endif /* __NR_sched_setaffinity */
176#ifndef __NR_sched_getaffinity
177#define __NR_sched_getaffinity 242
178#elif __NR_sched_getaffinity != 242
179#error Wrong code for getaffinity system call.
180#endif /* __NR_sched_getaffinity */
181#elif KMP_ARCH_AARCH64
182#ifndef __NR_sched_setaffinity
183#define __NR_sched_setaffinity 122
184#elif __NR_sched_setaffinity != 122
185#error Wrong code for setaffinity system call.
186#endif /* __NR_sched_setaffinity */
187#ifndef __NR_sched_getaffinity
188#define __NR_sched_getaffinity 123
189#elif __NR_sched_getaffinity != 123
190#error Wrong code for getaffinity system call.
191#endif /* __NR_sched_getaffinity */
192#elif KMP_ARCH_X86_64
193#ifndef __NR_sched_setaffinity
194#define __NR_sched_setaffinity 203
195#elif __NR_sched_setaffinity != 203
196#error Wrong code for setaffinity system call.
197#endif /* __NR_sched_setaffinity */
198#ifndef __NR_sched_getaffinity
199#define __NR_sched_getaffinity 204
200#elif __NR_sched_getaffinity != 204
201#error Wrong code for getaffinity system call.
202#endif /* __NR_sched_getaffinity */
203#elif KMP_ARCH_PPC64
204#ifndef __NR_sched_setaffinity
205#define __NR_sched_setaffinity 222
206#elif __NR_sched_setaffinity != 222
207#error Wrong code for setaffinity system call.
208#endif /* __NR_sched_setaffinity */
209#ifndef __NR_sched_getaffinity
210#define __NR_sched_getaffinity 223
211#elif __NR_sched_getaffinity != 223
212#error Wrong code for getaffinity system call.
213#endif /* __NR_sched_getaffinity */
214#elif KMP_ARCH_MIPS
215#ifndef __NR_sched_setaffinity
216#define __NR_sched_setaffinity 4239
217#elif __NR_sched_setaffinity != 4239
218#error Wrong code for setaffinity system call.
219#endif /* __NR_sched_setaffinity */
220#ifndef __NR_sched_getaffinity
221#define __NR_sched_getaffinity 4240
222#elif __NR_sched_getaffinity != 4240
223#error Wrong code for getaffinity system call.
224#endif /* __NR_sched_getaffinity */
225#elif KMP_ARCH_MIPS64
226#ifndef __NR_sched_setaffinity
227#define __NR_sched_setaffinity 5195
228#elif __NR_sched_setaffinity != 5195
229#error Wrong code for setaffinity system call.
230#endif /* __NR_sched_setaffinity */
231#ifndef __NR_sched_getaffinity
232#define __NR_sched_getaffinity 5196
233#elif __NR_sched_getaffinity != 5196
234#error Wrong code for getaffinity system call.
235#endif /* __NR_sched_getaffinity */
236#error Unknown or unsupported architecture
237#endif /* KMP_ARCH_* */
238#elif KMP_OS_FREEBSD
239#include <pthread.h>
240#include <pthread_np.h>
241#endif
242class KMPNativeAffinity : public KMPAffinity {
243  class Mask : public KMPAffinity::Mask {
244    typedef unsigned char mask_t;
245    static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
246
247  public:
248    mask_t *mask;
249    Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
250    ~Mask() {
251      if (mask)
252        __kmp_free(mask);
253    }
254    void set(int i) override {
255      mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
256    }
257    bool is_set(int i) const override {
258      return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
259    }
260    void clear(int i) override {
261      mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
262    }
263    void zero() override {
264      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265        mask[i] = 0;
266    }
267    void copy(const KMPAffinity::Mask *src) override {
268      const Mask *convert = static_cast<const Mask *>(src);
269      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270        mask[i] = convert->mask[i];
271    }
272    void bitwise_and(const KMPAffinity::Mask *rhs) override {
273      const Mask *convert = static_cast<const Mask *>(rhs);
274      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275        mask[i] &= convert->mask[i];
276    }
277    void bitwise_or(const KMPAffinity::Mask *rhs) override {
278      const Mask *convert = static_cast<const Mask *>(rhs);
279      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
280        mask[i] |= convert->mask[i];
281    }
282    void bitwise_not() override {
283      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
284        mask[i] = ~(mask[i]);
285    }
286    int begin() const override {
287      int retval = 0;
288      while (retval < end() && !is_set(retval))
289        ++retval;
290      return retval;
291    }
292    int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
293    int next(int previous) const override {
294      int retval = previous + 1;
295      while (retval < end() && !is_set(retval))
296        ++retval;
297      return retval;
298    }
299    int get_system_affinity(bool abort_on_error) override {
300      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
301                  "Illegal get affinity operation when not capable");
302#if KMP_OS_LINUX
303      int retval =
304          syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
305#elif KMP_OS_FREEBSD
306      int retval =
307          pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
308#endif
309      if (retval >= 0) {
310        return 0;
311      }
312      int error = errno;
313      if (abort_on_error) {
314        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
315      }
316      return error;
317    }
318    int set_system_affinity(bool abort_on_error) const override {
319      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
320                  "Illegal get affinity operation when not capable");
321#if KMP_OS_LINUX
322      int retval =
323          syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
324#elif KMP_OS_FREEBSD
325      int retval =
326          pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
327#endif
328      if (retval >= 0) {
329        return 0;
330      }
331      int error = errno;
332      if (abort_on_error) {
333        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
334      }
335      return error;
336    }
337  };
338  void determine_capable(const char *env_var) override {
339    __kmp_affinity_determine_capable(env_var);
340  }
341  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
342  KMPAffinity::Mask *allocate_mask() override {
343    KMPNativeAffinity::Mask *retval = new Mask();
344    return retval;
345  }
346  void deallocate_mask(KMPAffinity::Mask *m) override {
347    KMPNativeAffinity::Mask *native_mask =
348        static_cast<KMPNativeAffinity::Mask *>(m);
349    delete native_mask;
350  }
351  KMPAffinity::Mask *allocate_mask_array(int num) override {
352    return new Mask[num];
353  }
354  void deallocate_mask_array(KMPAffinity::Mask *array) override {
355    Mask *linux_array = static_cast<Mask *>(array);
356    delete[] linux_array;
357  }
358  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
359                                      int index) override {
360    Mask *linux_array = static_cast<Mask *>(array);
361    return &(linux_array[index]);
362  }
363  api_type get_api_type() const override { return NATIVE_OS; }
364};
365#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
366
367#if KMP_OS_WINDOWS
368class KMPNativeAffinity : public KMPAffinity {
369  class Mask : public KMPAffinity::Mask {
370    typedef ULONG_PTR mask_t;
371    static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
372    mask_t *mask;
373
374  public:
375    Mask() {
376      mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
377    }
378    ~Mask() {
379      if (mask)
380        __kmp_free(mask);
381    }
382    void set(int i) override {
383      mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
384    }
385    bool is_set(int i) const override {
386      return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
387    }
388    void clear(int i) override {
389      mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
390    }
391    void zero() override {
392      for (int i = 0; i < __kmp_num_proc_groups; ++i)
393        mask[i] = 0;
394    }
395    void copy(const KMPAffinity::Mask *src) override {
396      const Mask *convert = static_cast<const Mask *>(src);
397      for (int i = 0; i < __kmp_num_proc_groups; ++i)
398        mask[i] = convert->mask[i];
399    }
400    void bitwise_and(const KMPAffinity::Mask *rhs) override {
401      const Mask *convert = static_cast<const Mask *>(rhs);
402      for (int i = 0; i < __kmp_num_proc_groups; ++i)
403        mask[i] &= convert->mask[i];
404    }
405    void bitwise_or(const KMPAffinity::Mask *rhs) override {
406      const Mask *convert = static_cast<const Mask *>(rhs);
407      for (int i = 0; i < __kmp_num_proc_groups; ++i)
408        mask[i] |= convert->mask[i];
409    }
410    void bitwise_not() override {
411      for (int i = 0; i < __kmp_num_proc_groups; ++i)
412        mask[i] = ~(mask[i]);
413    }
414    int begin() const override {
415      int retval = 0;
416      while (retval < end() && !is_set(retval))
417        ++retval;
418      return retval;
419    }
420    int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
421    int next(int previous) const override {
422      int retval = previous + 1;
423      while (retval < end() && !is_set(retval))
424        ++retval;
425      return retval;
426    }
427    int set_system_affinity(bool abort_on_error) const override {
428      if (__kmp_num_proc_groups > 1) {
429        // Check for a valid mask.
430        GROUP_AFFINITY ga;
431        int group = get_proc_group();
432        if (group < 0) {
433          if (abort_on_error) {
434            KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
435          }
436          return -1;
437        }
438        // Transform the bit vector into a GROUP_AFFINITY struct
439        // and make the system call to set affinity.
440        ga.Group = group;
441        ga.Mask = mask[group];
442        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
443
444        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
445        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
446          DWORD error = GetLastError();
447          if (abort_on_error) {
448            __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
449                        __kmp_msg_null);
450          }
451          return error;
452        }
453      } else {
454        if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
455          DWORD error = GetLastError();
456          if (abort_on_error) {
457            __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
458                        __kmp_msg_null);
459          }
460          return error;
461        }
462      }
463      return 0;
464    }
465    int get_system_affinity(bool abort_on_error) override {
466      if (__kmp_num_proc_groups > 1) {
467        this->zero();
468        GROUP_AFFINITY ga;
469        KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
470        if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
471          DWORD error = GetLastError();
472          if (abort_on_error) {
473            __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
474                        KMP_ERR(error), __kmp_msg_null);
475          }
476          return error;
477        }
478        if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
479            (ga.Mask == 0)) {
480          return -1;
481        }
482        mask[ga.Group] = ga.Mask;
483      } else {
484        mask_t newMask, sysMask, retval;
485        if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
486          DWORD error = GetLastError();
487          if (abort_on_error) {
488            __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
489                        KMP_ERR(error), __kmp_msg_null);
490          }
491          return error;
492        }
493        retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
494        if (!retval) {
495          DWORD error = GetLastError();
496          if (abort_on_error) {
497            __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
498                        KMP_ERR(error), __kmp_msg_null);
499          }
500          return error;
501        }
502        newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
503        if (!newMask) {
504          DWORD error = GetLastError();
505          if (abort_on_error) {
506            __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
507                        KMP_ERR(error), __kmp_msg_null);
508          }
509        }
510        *mask = retval;
511      }
512      return 0;
513    }
514    int get_proc_group() const override {
515      int group = -1;
516      if (__kmp_num_proc_groups == 1) {
517        return 1;
518      }
519      for (int i = 0; i < __kmp_num_proc_groups; i++) {
520        if (mask[i] == 0)
521          continue;
522        if (group >= 0)
523          return -1;
524        group = i;
525      }
526      return group;
527    }
528  };
529  void determine_capable(const char *env_var) override {
530    __kmp_affinity_determine_capable(env_var);
531  }
532  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
533  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
534  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
535  KMPAffinity::Mask *allocate_mask_array(int num) override {
536    return new Mask[num];
537  }
538  void deallocate_mask_array(KMPAffinity::Mask *array) override {
539    Mask *windows_array = static_cast<Mask *>(array);
540    delete[] windows_array;
541  }
542  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
543                                      int index) override {
544    Mask *windows_array = static_cast<Mask *>(array);
545    return &(windows_array[index]);
546  }
547  api_type get_api_type() const override { return NATIVE_OS; }
548};
549#endif /* KMP_OS_WINDOWS */
550#endif /* KMP_AFFINITY_SUPPORTED */
551
552class Address {
553public:
554  static const unsigned maxDepth = 32;
555  unsigned labels[maxDepth];
556  unsigned childNums[maxDepth];
557  unsigned depth;
558  unsigned leader;
559  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
560  Address &operator=(const Address &b) {
561    depth = b.depth;
562    for (unsigned i = 0; i < depth; i++) {
563      labels[i] = b.labels[i];
564      childNums[i] = b.childNums[i];
565    }
566    leader = FALSE;
567    return *this;
568  }
569  bool operator==(const Address &b) const {
570    if (depth != b.depth)
571      return false;
572    for (unsigned i = 0; i < depth; i++)
573      if (labels[i] != b.labels[i])
574        return false;
575    return true;
576  }
577  bool isClose(const Address &b, int level) const {
578    if (depth != b.depth)
579      return false;
580    if ((unsigned)level >= depth)
581      return true;
582    for (unsigned i = 0; i < (depth - level); i++)
583      if (labels[i] != b.labels[i])
584        return false;
585    return true;
586  }
587  bool operator!=(const Address &b) const { return !operator==(b); }
588  void print() const {
589    unsigned i;
590    printf("Depth: %u --- ", depth);
591    for (i = 0; i < depth; i++) {
592      printf("%u ", labels[i]);
593    }
594  }
595};
596
597class AddrUnsPair {
598public:
599  Address first;
600  unsigned second;
601  AddrUnsPair(Address _first, unsigned _second)
602      : first(_first), second(_second) {}
603  AddrUnsPair &operator=(const AddrUnsPair &b) {
604    first = b.first;
605    second = b.second;
606    return *this;
607  }
608  void print() const {
609    printf("first = ");
610    first.print();
611    printf(" --- second = %u", second);
612  }
613  bool operator==(const AddrUnsPair &b) const {
614    if (first != b.first)
615      return false;
616    if (second != b.second)
617      return false;
618    return true;
619  }
620  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
621};
622
623static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
624  const Address *aa = &(((const AddrUnsPair *)a)->first);
625  const Address *bb = &(((const AddrUnsPair *)b)->first);
626  unsigned depth = aa->depth;
627  unsigned i;
628  KMP_DEBUG_ASSERT(depth == bb->depth);
629  for (i = 0; i < depth; i++) {
630    if (aa->labels[i] < bb->labels[i])
631      return -1;
632    if (aa->labels[i] > bb->labels[i])
633      return 1;
634  }
635  return 0;
636}
637
638/* A structure for holding machine-specific hierarchy info to be computed once
639   at init. This structure represents a mapping of threads to the actual machine
640   hierarchy, or to our best guess at what the hierarchy might be, for the
641   purpose of performing an efficient barrier. In the worst case, when there is
642   no machine hierarchy information, it produces a tree suitable for a barrier,
643   similar to the tree used in the hyper barrier. */
644class hierarchy_info {
645public:
646  /* Good default values for number of leaves and branching factor, given no
647     affinity information. Behaves a bit like hyper barrier. */
648  static const kmp_uint32 maxLeaves = 4;
649  static const kmp_uint32 minBranch = 4;
650  /** Number of levels in the hierarchy. Typical levels are threads/core,
651      cores/package or socket, packages/node, nodes/machine, etc. We don't want
652      to get specific with nomenclature. When the machine is oversubscribed we
653      add levels to duplicate the hierarchy, doubling the thread capacity of the
654      hierarchy each time we add a level. */
655  kmp_uint32 maxLevels;
656
657  /** This is specifically the depth of the machine configuration hierarchy, in
658      terms of the number of levels along the longest path from root to any
659      leaf. It corresponds to the number of entries in numPerLevel if we exclude
660      all but one trailing 1. */
661  kmp_uint32 depth;
662  kmp_uint32 base_num_threads;
663  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
664  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
665  // 2=initialization in progress
666  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
667
668  /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
669      the parent of a node at level i has. For example, if we have a machine
670      with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
671      {2, 4, 4, 1, 1}. All empty levels are set to 1. */
672  kmp_uint32 *numPerLevel;
673  kmp_uint32 *skipPerLevel;
674
675  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
676    int hier_depth = adr2os[0].first.depth;
677    int level = 0;
678    for (int i = hier_depth - 1; i >= 0; --i) {
679      int max = -1;
680      for (int j = 0; j < num_addrs; ++j) {
681        int next = adr2os[j].first.childNums[i];
682        if (next > max)
683          max = next;
684      }
685      numPerLevel[level] = max + 1;
686      ++level;
687    }
688  }
689
690  hierarchy_info()
691      : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
692
693  void fini() {
694    if (!uninitialized && numPerLevel) {
695      __kmp_free(numPerLevel);
696      numPerLevel = NULL;
697      uninitialized = not_initialized;
698    }
699  }
700
701  void init(AddrUnsPair *adr2os, int num_addrs) {
702    kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
703        &uninitialized, not_initialized, initializing);
704    if (bool_result == 0) { // Wait for initialization
705      while (TCR_1(uninitialized) != initialized)
706        KMP_CPU_PAUSE();
707      return;
708    }
709    KMP_DEBUG_ASSERT(bool_result == 1);
710
711    /* Added explicit initialization of the data fields here to prevent usage of
712       dirty value observed when static library is re-initialized multiple times
713       (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
714       OpenMP). */
715    depth = 1;
716    resizing = 0;
717    maxLevels = 7;
718    numPerLevel =
719        (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
720    skipPerLevel = &(numPerLevel[maxLevels]);
721    for (kmp_uint32 i = 0; i < maxLevels;
722         ++i) { // init numPerLevel[*] to 1 item per level
723      numPerLevel[i] = 1;
724      skipPerLevel[i] = 1;
725    }
726
727    // Sort table by physical ID
728    if (adr2os) {
729      qsort(adr2os, num_addrs, sizeof(*adr2os),
730            __kmp_affinity_cmp_Address_labels);
731      deriveLevels(adr2os, num_addrs);
732    } else {
733      numPerLevel[0] = maxLeaves;
734      numPerLevel[1] = num_addrs / maxLeaves;
735      if (num_addrs % maxLeaves)
736        numPerLevel[1]++;
737    }
738
739    base_num_threads = num_addrs;
740    for (int i = maxLevels - 1; i >= 0;
741         --i) // count non-empty levels to get depth
742      if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
743        depth++;
744
745    kmp_uint32 branch = minBranch;
746    if (numPerLevel[0] == 1)
747      branch = num_addrs / maxLeaves;
748    if (branch < minBranch)
749      branch = minBranch;
750    for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
751      while (numPerLevel[d] > branch ||
752             (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
753        if (numPerLevel[d] & 1)
754          numPerLevel[d]++;
755        numPerLevel[d] = numPerLevel[d] >> 1;
756        if (numPerLevel[d + 1] == 1)
757          depth++;
758        numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
759      }
760      if (numPerLevel[0] == 1) {
761        branch = branch >> 1;
762        if (branch < 4)
763          branch = minBranch;
764      }
765    }
766
767    for (kmp_uint32 i = 1; i < depth; ++i)
768      skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
769    // Fill in hierarchy in the case of oversubscription
770    for (kmp_uint32 i = depth; i < maxLevels; ++i)
771      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
772
773    uninitialized = initialized; // One writer
774  }
775
776  // Resize the hierarchy if nproc changes to something larger than before
777  void resize(kmp_uint32 nproc) {
778    kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
779    while (bool_result == 0) { // someone else is trying to resize
780      KMP_CPU_PAUSE();
781      if (nproc <= base_num_threads) // happy with other thread's resize
782        return;
783      else // try to resize
784        bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
785    }
786    KMP_DEBUG_ASSERT(bool_result != 0);
787    if (nproc <= base_num_threads)
788      return; // happy with other thread's resize
789
790    // Calculate new maxLevels
791    kmp_uint32 old_sz = skipPerLevel[depth - 1];
792    kmp_uint32 incs = 0, old_maxLevels = maxLevels;
793    // First see if old maxLevels is enough to contain new size
794    for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
795      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
796      numPerLevel[i - 1] *= 2;
797      old_sz *= 2;
798      depth++;
799    }
800    if (nproc > old_sz) { // Not enough space, need to expand hierarchy
801      while (nproc > old_sz) {
802        old_sz *= 2;
803        incs++;
804        depth++;
805      }
806      maxLevels += incs;
807
808      // Resize arrays
809      kmp_uint32 *old_numPerLevel = numPerLevel;
810      kmp_uint32 *old_skipPerLevel = skipPerLevel;
811      numPerLevel = skipPerLevel = NULL;
812      numPerLevel =
813          (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
814      skipPerLevel = &(numPerLevel[maxLevels]);
815
816      // Copy old elements from old arrays
817      for (kmp_uint32 i = 0; i < old_maxLevels;
818           ++i) { // init numPerLevel[*] to 1 item per level
819        numPerLevel[i] = old_numPerLevel[i];
820        skipPerLevel[i] = old_skipPerLevel[i];
821      }
822
823      // Init new elements in arrays to 1
824      for (kmp_uint32 i = old_maxLevels; i < maxLevels;
825           ++i) { // init numPerLevel[*] to 1 item per level
826        numPerLevel[i] = 1;
827        skipPerLevel[i] = 1;
828      }
829
830      // Free old arrays
831      __kmp_free(old_numPerLevel);
832    }
833
834    // Fill in oversubscription levels of hierarchy
835    for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
836      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
837
838    base_num_threads = nproc;
839    resizing = 0; // One writer
840  }
841};
842#endif // KMP_AFFINITY_H
843