1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18
19#if KMP_AFFINITY_SUPPORTED
20#if KMP_USE_HWLOC
21class KMPHwlocAffinity : public KMPAffinity {
22public:
23  class Mask : public KMPAffinity::Mask {
24    hwloc_cpuset_t mask;
25
26  public:
27    Mask() {
28      mask = hwloc_bitmap_alloc();
29      this->zero();
30    }
31    ~Mask() { hwloc_bitmap_free(mask); }
32    void set(int i) override { hwloc_bitmap_set(mask, i); }
33    bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34    void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35    void zero() override { hwloc_bitmap_zero(mask); }
36    void copy(const KMPAffinity::Mask *src) override {
37      const Mask *convert = static_cast<const Mask *>(src);
38      hwloc_bitmap_copy(mask, convert->mask);
39    }
40    void bitwise_and(const KMPAffinity::Mask *rhs) override {
41      const Mask *convert = static_cast<const Mask *>(rhs);
42      hwloc_bitmap_and(mask, mask, convert->mask);
43    }
44    void bitwise_or(const KMPAffinity::Mask *rhs) override {
45      const Mask *convert = static_cast<const Mask *>(rhs);
46      hwloc_bitmap_or(mask, mask, convert->mask);
47    }
48    void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49    int begin() const override { return hwloc_bitmap_first(mask); }
50    int end() const override { return -1; }
51    int next(int previous) const override {
52      return hwloc_bitmap_next(mask, previous);
53    }
54    int get_system_affinity(bool abort_on_error) override {
55      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56                  "Illegal get affinity operation when not capable");
57      int retval =
58          hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59      if (retval >= 0) {
60        return 0;
61      }
62      int error = errno;
63      if (abort_on_error) {
64        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65      }
66      return error;
67    }
68    int set_system_affinity(bool abort_on_error) const override {
69      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70                  "Illegal get affinity operation when not capable");
71      int retval =
72          hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73      if (retval >= 0) {
74        return 0;
75      }
76      int error = errno;
77      if (abort_on_error) {
78        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79      }
80      return error;
81    }
82    int get_proc_group() const override {
83      int group = -1;
84#if KMP_OS_WINDOWS
85      if (__kmp_num_proc_groups == 1) {
86        return 1;
87      }
88      for (int i = 0; i < __kmp_num_proc_groups; i++) {
89        // On windows, the long type is always 32 bits
90        unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91        unsigned long second_32_bits =
92            hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93        if (first_32_bits == 0 && second_32_bits == 0) {
94          continue;
95        }
96        if (group >= 0) {
97          return -1;
98        }
99        group = i;
100      }
101#endif /* KMP_OS_WINDOWS */
102      return group;
103    }
104  };
105  void determine_capable(const char *var) override {
106    const hwloc_topology_support *topology_support;
107    if (__kmp_hwloc_topology == NULL) {
108      if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109        __kmp_hwloc_error = TRUE;
110        if (__kmp_affinity_verbose)
111          KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112      }
113      if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114        __kmp_hwloc_error = TRUE;
115        if (__kmp_affinity_verbose)
116          KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117      }
118    }
119    topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120    // Is the system capable of setting/getting this thread's affinity?
121    // Also, is topology discovery possible? (pu indicates ability to discover
122    // processing units). And finally, were there no errors when calling any
123    // hwloc_* API functions?
124    if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125        topology_support->cpubind->get_thisthread_cpubind &&
126        topology_support->discovery->pu && !__kmp_hwloc_error) {
127      // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128      KMP_AFFINITY_ENABLE(TRUE);
129    } else {
130      // indicate that hwloc didn't work and disable affinity
131      __kmp_hwloc_error = TRUE;
132      KMP_AFFINITY_DISABLE();
133    }
134  }
135  void bind_thread(int which) override {
136    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137                "Illegal set affinity operation when not capable");
138    KMPAffinity::Mask *mask;
139    KMP_CPU_ALLOC_ON_STACK(mask);
140    KMP_CPU_ZERO(mask);
141    KMP_CPU_SET(which, mask);
142    __kmp_set_system_affinity(mask, TRUE);
143    KMP_CPU_FREE_FROM_STACK(mask);
144  }
145  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147  KMPAffinity::Mask *allocate_mask_array(int num) override {
148    return new Mask[num];
149  }
150  void deallocate_mask_array(KMPAffinity::Mask *array) override {
151    Mask *hwloc_array = static_cast<Mask *>(array);
152    delete[] hwloc_array;
153  }
154  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155                                      int index) override {
156    Mask *hwloc_array = static_cast<Mask *>(array);
157    return &(hwloc_array[index]);
158  }
159  api_type get_api_type() const override { return HWLOC; }
160};
161#endif /* KMP_USE_HWLOC */
162
163#if KMP_OS_LINUX || KMP_OS_FREEBSD
164#if KMP_OS_LINUX
165/* On some of the older OS's that we build on, these constants aren't present
166   in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167   all systems of the same arch where they are defined, and they cannot change.
168   stone forever. */
169#include <sys/syscall.h>
170#if KMP_ARCH_X86 || KMP_ARCH_ARM
171#ifndef __NR_sched_setaffinity
172#define __NR_sched_setaffinity 241
173#elif __NR_sched_setaffinity != 241
174#error Wrong code for setaffinity system call.
175#endif /* __NR_sched_setaffinity */
176#ifndef __NR_sched_getaffinity
177#define __NR_sched_getaffinity 242
178#elif __NR_sched_getaffinity != 242
179#error Wrong code for getaffinity system call.
180#endif /* __NR_sched_getaffinity */
181#elif KMP_ARCH_AARCH64
182#ifndef __NR_sched_setaffinity
183#define __NR_sched_setaffinity 122
184#elif __NR_sched_setaffinity != 122
185#error Wrong code for setaffinity system call.
186#endif /* __NR_sched_setaffinity */
187#ifndef __NR_sched_getaffinity
188#define __NR_sched_getaffinity 123
189#elif __NR_sched_getaffinity != 123
190#error Wrong code for getaffinity system call.
191#endif /* __NR_sched_getaffinity */
192#elif KMP_ARCH_X86_64
193#ifndef __NR_sched_setaffinity
194#define __NR_sched_setaffinity 203
195#elif __NR_sched_setaffinity != 203
196#error Wrong code for setaffinity system call.
197#endif /* __NR_sched_setaffinity */
198#ifndef __NR_sched_getaffinity
199#define __NR_sched_getaffinity 204
200#elif __NR_sched_getaffinity != 204
201#error Wrong code for getaffinity system call.
202#endif /* __NR_sched_getaffinity */
203#elif KMP_ARCH_PPC64
204#ifndef __NR_sched_setaffinity
205#define __NR_sched_setaffinity 222
206#elif __NR_sched_setaffinity != 222
207#error Wrong code for setaffinity system call.
208#endif /* __NR_sched_setaffinity */
209#ifndef __NR_sched_getaffinity
210#define __NR_sched_getaffinity 223
211#elif __NR_sched_getaffinity != 223
212#error Wrong code for getaffinity system call.
213#endif /* __NR_sched_getaffinity */
214#elif KMP_ARCH_MIPS
215#ifndef __NR_sched_setaffinity
216#define __NR_sched_setaffinity 4239
217#elif __NR_sched_setaffinity != 4239
218#error Wrong code for setaffinity system call.
219#endif /* __NR_sched_setaffinity */
220#ifndef __NR_sched_getaffinity
221#define __NR_sched_getaffinity 4240
222#elif __NR_sched_getaffinity != 4240
223#error Wrong code for getaffinity system call.
224#endif /* __NR_sched_getaffinity */
225#elif KMP_ARCH_MIPS64
226#ifndef __NR_sched_setaffinity
227#define __NR_sched_setaffinity 5195
228#elif __NR_sched_setaffinity != 5195
229#error Wrong code for setaffinity system call.
230#endif /* __NR_sched_setaffinity */
231#ifndef __NR_sched_getaffinity
232#define __NR_sched_getaffinity 5196
233#elif __NR_sched_getaffinity != 5196
234#error Wrong code for getaffinity system call.
235#endif /* __NR_sched_getaffinity */
236#error Unknown or unsupported architecture
237#endif /* KMP_ARCH_* */
238#elif KMP_OS_FREEBSD
239#include <pthread.h>
240#include <pthread_np.h>
241#endif
242class KMPNativeAffinity : public KMPAffinity {
243  class Mask : public KMPAffinity::Mask {
244    typedef unsigned char mask_t;
245    static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
246
247  public:
248    mask_t *mask;
249    Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
250    ~Mask() {
251      if (mask)
252        __kmp_free(mask);
253    }
254    void set(int i) override {
255      mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
256    }
257    bool is_set(int i) const override {
258      return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
259    }
260    void clear(int i) override {
261      mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
262    }
263    void zero() override {
264      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265        mask[i] = 0;
266    }
267    void copy(const KMPAffinity::Mask *src) override {
268      const Mask *convert = static_cast<const Mask *>(src);
269      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270        mask[i] = convert->mask[i];
271    }
272    void bitwise_and(const KMPAffinity::Mask *rhs) override {
273      const Mask *convert = static_cast<const Mask *>(rhs);
274      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275        mask[i] &= convert->mask[i];
276    }
277    void bitwise_or(const KMPAffinity::Mask *rhs) override {
278      const Mask *convert = static_cast<const Mask *>(rhs);
279      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
280        mask[i] |= convert->mask[i];
281    }
282    void bitwise_not() override {
283      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
284        mask[i] = ~(mask[i]);
285    }
286    int begin() const override {
287      int retval = 0;
288      while (retval < end() && !is_set(retval))
289        ++retval;
290      return retval;
291    }
292    int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
293    int next(int previous) const override {
294      int retval = previous + 1;
295      while (retval < end() && !is_set(retval))
296        ++retval;
297      return retval;
298    }
299    int get_system_affinity(bool abort_on_error) override {
300      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
301                  "Illegal get affinity operation when not capable");
302#if KMP_OS_LINUX
303      int retval =
304          syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
305#elif KMP_OS_FREEBSD
306      int r =
307          pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
308      int retval = (r == 0 ? 0 : -1);
309#endif
310      if (retval >= 0) {
311        return 0;
312      }
313      int error = errno;
314      if (abort_on_error) {
315        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
316      }
317      return error;
318    }
319    int set_system_affinity(bool abort_on_error) const override {
320      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
321                  "Illegal get affinity operation when not capable");
322#if KMP_OS_LINUX
323      int retval =
324          syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
325#elif KMP_OS_FREEBSD
326      int r =
327          pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
328      int retval = (r == 0 ? 0 : -1);
329#endif
330      if (retval >= 0) {
331        return 0;
332      }
333      int error = errno;
334      if (abort_on_error) {
335        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
336      }
337      return error;
338    }
339  };
340  void determine_capable(const char *env_var) override {
341    __kmp_affinity_determine_capable(env_var);
342  }
343  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
344  KMPAffinity::Mask *allocate_mask() override {
345    KMPNativeAffinity::Mask *retval = new Mask();
346    return retval;
347  }
348  void deallocate_mask(KMPAffinity::Mask *m) override {
349    KMPNativeAffinity::Mask *native_mask =
350        static_cast<KMPNativeAffinity::Mask *>(m);
351    delete native_mask;
352  }
353  KMPAffinity::Mask *allocate_mask_array(int num) override {
354    return new Mask[num];
355  }
356  void deallocate_mask_array(KMPAffinity::Mask *array) override {
357    Mask *linux_array = static_cast<Mask *>(array);
358    delete[] linux_array;
359  }
360  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
361                                      int index) override {
362    Mask *linux_array = static_cast<Mask *>(array);
363    return &(linux_array[index]);
364  }
365  api_type get_api_type() const override { return NATIVE_OS; }
366};
367#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
368
369#if KMP_OS_WINDOWS
370class KMPNativeAffinity : public KMPAffinity {
371  class Mask : public KMPAffinity::Mask {
372    typedef ULONG_PTR mask_t;
373    static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
374    mask_t *mask;
375
376  public:
377    Mask() {
378      mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
379    }
380    ~Mask() {
381      if (mask)
382        __kmp_free(mask);
383    }
384    void set(int i) override {
385      mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
386    }
387    bool is_set(int i) const override {
388      return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
389    }
390    void clear(int i) override {
391      mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
392    }
393    void zero() override {
394      for (int i = 0; i < __kmp_num_proc_groups; ++i)
395        mask[i] = 0;
396    }
397    void copy(const KMPAffinity::Mask *src) override {
398      const Mask *convert = static_cast<const Mask *>(src);
399      for (int i = 0; i < __kmp_num_proc_groups; ++i)
400        mask[i] = convert->mask[i];
401    }
402    void bitwise_and(const KMPAffinity::Mask *rhs) override {
403      const Mask *convert = static_cast<const Mask *>(rhs);
404      for (int i = 0; i < __kmp_num_proc_groups; ++i)
405        mask[i] &= convert->mask[i];
406    }
407    void bitwise_or(const KMPAffinity::Mask *rhs) override {
408      const Mask *convert = static_cast<const Mask *>(rhs);
409      for (int i = 0; i < __kmp_num_proc_groups; ++i)
410        mask[i] |= convert->mask[i];
411    }
412    void bitwise_not() override {
413      for (int i = 0; i < __kmp_num_proc_groups; ++i)
414        mask[i] = ~(mask[i]);
415    }
416    int begin() const override {
417      int retval = 0;
418      while (retval < end() && !is_set(retval))
419        ++retval;
420      return retval;
421    }
422    int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
423    int next(int previous) const override {
424      int retval = previous + 1;
425      while (retval < end() && !is_set(retval))
426        ++retval;
427      return retval;
428    }
429    int set_system_affinity(bool abort_on_error) const override {
430      if (__kmp_num_proc_groups > 1) {
431        // Check for a valid mask.
432        GROUP_AFFINITY ga;
433        int group = get_proc_group();
434        if (group < 0) {
435          if (abort_on_error) {
436            KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
437          }
438          return -1;
439        }
440        // Transform the bit vector into a GROUP_AFFINITY struct
441        // and make the system call to set affinity.
442        ga.Group = group;
443        ga.Mask = mask[group];
444        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
445
446        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
447        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
448          DWORD error = GetLastError();
449          if (abort_on_error) {
450            __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
451                        __kmp_msg_null);
452          }
453          return error;
454        }
455      } else {
456        if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
457          DWORD error = GetLastError();
458          if (abort_on_error) {
459            __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
460                        __kmp_msg_null);
461          }
462          return error;
463        }
464      }
465      return 0;
466    }
467    int get_system_affinity(bool abort_on_error) override {
468      if (__kmp_num_proc_groups > 1) {
469        this->zero();
470        GROUP_AFFINITY ga;
471        KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
472        if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
473          DWORD error = GetLastError();
474          if (abort_on_error) {
475            __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
476                        KMP_ERR(error), __kmp_msg_null);
477          }
478          return error;
479        }
480        if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
481            (ga.Mask == 0)) {
482          return -1;
483        }
484        mask[ga.Group] = ga.Mask;
485      } else {
486        mask_t newMask, sysMask, retval;
487        if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
488          DWORD error = GetLastError();
489          if (abort_on_error) {
490            __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
491                        KMP_ERR(error), __kmp_msg_null);
492          }
493          return error;
494        }
495        retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
496        if (!retval) {
497          DWORD error = GetLastError();
498          if (abort_on_error) {
499            __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
500                        KMP_ERR(error), __kmp_msg_null);
501          }
502          return error;
503        }
504        newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
505        if (!newMask) {
506          DWORD error = GetLastError();
507          if (abort_on_error) {
508            __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
509                        KMP_ERR(error), __kmp_msg_null);
510          }
511        }
512        *mask = retval;
513      }
514      return 0;
515    }
516    int get_proc_group() const override {
517      int group = -1;
518      if (__kmp_num_proc_groups == 1) {
519        return 1;
520      }
521      for (int i = 0; i < __kmp_num_proc_groups; i++) {
522        if (mask[i] == 0)
523          continue;
524        if (group >= 0)
525          return -1;
526        group = i;
527      }
528      return group;
529    }
530  };
531  void determine_capable(const char *env_var) override {
532    __kmp_affinity_determine_capable(env_var);
533  }
534  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
535  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
536  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
537  KMPAffinity::Mask *allocate_mask_array(int num) override {
538    return new Mask[num];
539  }
540  void deallocate_mask_array(KMPAffinity::Mask *array) override {
541    Mask *windows_array = static_cast<Mask *>(array);
542    delete[] windows_array;
543  }
544  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
545                                      int index) override {
546    Mask *windows_array = static_cast<Mask *>(array);
547    return &(windows_array[index]);
548  }
549  api_type get_api_type() const override { return NATIVE_OS; }
550};
551#endif /* KMP_OS_WINDOWS */
552#endif /* KMP_AFFINITY_SUPPORTED */
553
554class Address {
555public:
556  static const unsigned maxDepth = 32;
557  unsigned labels[maxDepth];
558  unsigned childNums[maxDepth];
559  unsigned depth;
560  unsigned leader;
561  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
562  Address &operator=(const Address &b) {
563    depth = b.depth;
564    for (unsigned i = 0; i < depth; i++) {
565      labels[i] = b.labels[i];
566      childNums[i] = b.childNums[i];
567    }
568    leader = FALSE;
569    return *this;
570  }
571  bool operator==(const Address &b) const {
572    if (depth != b.depth)
573      return false;
574    for (unsigned i = 0; i < depth; i++)
575      if (labels[i] != b.labels[i])
576        return false;
577    return true;
578  }
579  bool isClose(const Address &b, int level) const {
580    if (depth != b.depth)
581      return false;
582    if ((unsigned)level >= depth)
583      return true;
584    for (unsigned i = 0; i < (depth - level); i++)
585      if (labels[i] != b.labels[i])
586        return false;
587    return true;
588  }
589  bool operator!=(const Address &b) const { return !operator==(b); }
590  void print() const {
591    unsigned i;
592    printf("Depth: %u --- ", depth);
593    for (i = 0; i < depth; i++) {
594      printf("%u ", labels[i]);
595    }
596  }
597};
598
599class AddrUnsPair {
600public:
601  Address first;
602  unsigned second;
603  AddrUnsPair(Address _first, unsigned _second)
604      : first(_first), second(_second) {}
605  AddrUnsPair &operator=(const AddrUnsPair &b) {
606    first = b.first;
607    second = b.second;
608    return *this;
609  }
610  void print() const {
611    printf("first = ");
612    first.print();
613    printf(" --- second = %u", second);
614  }
615  bool operator==(const AddrUnsPair &b) const {
616    if (first != b.first)
617      return false;
618    if (second != b.second)
619      return false;
620    return true;
621  }
622  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
623};
624
625static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
626  const Address *aa = &(((const AddrUnsPair *)a)->first);
627  const Address *bb = &(((const AddrUnsPair *)b)->first);
628  unsigned depth = aa->depth;
629  unsigned i;
630  KMP_DEBUG_ASSERT(depth == bb->depth);
631  for (i = 0; i < depth; i++) {
632    if (aa->labels[i] < bb->labels[i])
633      return -1;
634    if (aa->labels[i] > bb->labels[i])
635      return 1;
636  }
637  return 0;
638}
639
640/* A structure for holding machine-specific hierarchy info to be computed once
641   at init. This structure represents a mapping of threads to the actual machine
642   hierarchy, or to our best guess at what the hierarchy might be, for the
643   purpose of performing an efficient barrier. In the worst case, when there is
644   no machine hierarchy information, it produces a tree suitable for a barrier,
645   similar to the tree used in the hyper barrier. */
646class hierarchy_info {
647public:
648  /* Good default values for number of leaves and branching factor, given no
649     affinity information. Behaves a bit like hyper barrier. */
650  static const kmp_uint32 maxLeaves = 4;
651  static const kmp_uint32 minBranch = 4;
652  /** Number of levels in the hierarchy. Typical levels are threads/core,
653      cores/package or socket, packages/node, nodes/machine, etc. We don't want
654      to get specific with nomenclature. When the machine is oversubscribed we
655      add levels to duplicate the hierarchy, doubling the thread capacity of the
656      hierarchy each time we add a level. */
657  kmp_uint32 maxLevels;
658
659  /** This is specifically the depth of the machine configuration hierarchy, in
660      terms of the number of levels along the longest path from root to any
661      leaf. It corresponds to the number of entries in numPerLevel if we exclude
662      all but one trailing 1. */
663  kmp_uint32 depth;
664  kmp_uint32 base_num_threads;
665  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
666  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
667  // 2=initialization in progress
668  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
669
670  /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
671      the parent of a node at level i has. For example, if we have a machine
672      with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
673      {2, 4, 4, 1, 1}. All empty levels are set to 1. */
674  kmp_uint32 *numPerLevel;
675  kmp_uint32 *skipPerLevel;
676
677  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
678    int hier_depth = adr2os[0].first.depth;
679    int level = 0;
680    for (int i = hier_depth - 1; i >= 0; --i) {
681      int max = -1;
682      for (int j = 0; j < num_addrs; ++j) {
683        int next = adr2os[j].first.childNums[i];
684        if (next > max)
685          max = next;
686      }
687      numPerLevel[level] = max + 1;
688      ++level;
689    }
690  }
691
692  hierarchy_info()
693      : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
694
695  void fini() {
696    if (!uninitialized && numPerLevel) {
697      __kmp_free(numPerLevel);
698      numPerLevel = NULL;
699      uninitialized = not_initialized;
700    }
701  }
702
703  void init(AddrUnsPair *adr2os, int num_addrs) {
704    kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
705        &uninitialized, not_initialized, initializing);
706    if (bool_result == 0) { // Wait for initialization
707      while (TCR_1(uninitialized) != initialized)
708        KMP_CPU_PAUSE();
709      return;
710    }
711    KMP_DEBUG_ASSERT(bool_result == 1);
712
713    /* Added explicit initialization of the data fields here to prevent usage of
714       dirty value observed when static library is re-initialized multiple times
715       (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
716       OpenMP). */
717    depth = 1;
718    resizing = 0;
719    maxLevels = 7;
720    numPerLevel =
721        (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
722    skipPerLevel = &(numPerLevel[maxLevels]);
723    for (kmp_uint32 i = 0; i < maxLevels;
724         ++i) { // init numPerLevel[*] to 1 item per level
725      numPerLevel[i] = 1;
726      skipPerLevel[i] = 1;
727    }
728
729    // Sort table by physical ID
730    if (adr2os) {
731      qsort(adr2os, num_addrs, sizeof(*adr2os),
732            __kmp_affinity_cmp_Address_labels);
733      deriveLevels(adr2os, num_addrs);
734    } else {
735      numPerLevel[0] = maxLeaves;
736      numPerLevel[1] = num_addrs / maxLeaves;
737      if (num_addrs % maxLeaves)
738        numPerLevel[1]++;
739    }
740
741    base_num_threads = num_addrs;
742    for (int i = maxLevels - 1; i >= 0;
743         --i) // count non-empty levels to get depth
744      if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
745        depth++;
746
747    kmp_uint32 branch = minBranch;
748    if (numPerLevel[0] == 1)
749      branch = num_addrs / maxLeaves;
750    if (branch < minBranch)
751      branch = minBranch;
752    for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
753      while (numPerLevel[d] > branch ||
754             (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
755        if (numPerLevel[d] & 1)
756          numPerLevel[d]++;
757        numPerLevel[d] = numPerLevel[d] >> 1;
758        if (numPerLevel[d + 1] == 1)
759          depth++;
760        numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
761      }
762      if (numPerLevel[0] == 1) {
763        branch = branch >> 1;
764        if (branch < 4)
765          branch = minBranch;
766      }
767    }
768
769    for (kmp_uint32 i = 1; i < depth; ++i)
770      skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
771    // Fill in hierarchy in the case of oversubscription
772    for (kmp_uint32 i = depth; i < maxLevels; ++i)
773      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
774
775    uninitialized = initialized; // One writer
776  }
777
778  // Resize the hierarchy if nproc changes to something larger than before
779  void resize(kmp_uint32 nproc) {
780    kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
781    while (bool_result == 0) { // someone else is trying to resize
782      KMP_CPU_PAUSE();
783      if (nproc <= base_num_threads) // happy with other thread's resize
784        return;
785      else // try to resize
786        bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
787    }
788    KMP_DEBUG_ASSERT(bool_result != 0);
789    if (nproc <= base_num_threads)
790      return; // happy with other thread's resize
791
792    // Calculate new maxLevels
793    kmp_uint32 old_sz = skipPerLevel[depth - 1];
794    kmp_uint32 incs = 0, old_maxLevels = maxLevels;
795    // First see if old maxLevels is enough to contain new size
796    for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
797      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
798      numPerLevel[i - 1] *= 2;
799      old_sz *= 2;
800      depth++;
801    }
802    if (nproc > old_sz) { // Not enough space, need to expand hierarchy
803      while (nproc > old_sz) {
804        old_sz *= 2;
805        incs++;
806        depth++;
807      }
808      maxLevels += incs;
809
810      // Resize arrays
811      kmp_uint32 *old_numPerLevel = numPerLevel;
812      kmp_uint32 *old_skipPerLevel = skipPerLevel;
813      numPerLevel = skipPerLevel = NULL;
814      numPerLevel =
815          (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
816      skipPerLevel = &(numPerLevel[maxLevels]);
817
818      // Copy old elements from old arrays
819      for (kmp_uint32 i = 0; i < old_maxLevels;
820           ++i) { // init numPerLevel[*] to 1 item per level
821        numPerLevel[i] = old_numPerLevel[i];
822        skipPerLevel[i] = old_skipPerLevel[i];
823      }
824
825      // Init new elements in arrays to 1
826      for (kmp_uint32 i = old_maxLevels; i < maxLevels;
827           ++i) { // init numPerLevel[*] to 1 item per level
828        numPerLevel[i] = 1;
829        skipPerLevel[i] = 1;
830      }
831
832      // Free old arrays
833      __kmp_free(old_numPerLevel);
834    }
835
836    // Fill in oversubscription levels of hierarchy
837    for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
838      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
839
840    base_num_threads = nproc;
841    resizing = 0; // One writer
842  }
843};
844#endif // KMP_AFFINITY_H
845