kmp_affinity.h revision 355940
1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18
19#if KMP_AFFINITY_SUPPORTED
20#if KMP_USE_HWLOC
21class KMPHwlocAffinity : public KMPAffinity {
22public:
23  class Mask : public KMPAffinity::Mask {
24    hwloc_cpuset_t mask;
25
26  public:
27    Mask() {
28      mask = hwloc_bitmap_alloc();
29      this->zero();
30    }
31    ~Mask() { hwloc_bitmap_free(mask); }
32    void set(int i) override { hwloc_bitmap_set(mask, i); }
33    bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34    void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35    void zero() override { hwloc_bitmap_zero(mask); }
36    void copy(const KMPAffinity::Mask *src) override {
37      const Mask *convert = static_cast<const Mask *>(src);
38      hwloc_bitmap_copy(mask, convert->mask);
39    }
40    void bitwise_and(const KMPAffinity::Mask *rhs) override {
41      const Mask *convert = static_cast<const Mask *>(rhs);
42      hwloc_bitmap_and(mask, mask, convert->mask);
43    }
44    void bitwise_or(const KMPAffinity::Mask *rhs) override {
45      const Mask *convert = static_cast<const Mask *>(rhs);
46      hwloc_bitmap_or(mask, mask, convert->mask);
47    }
48    void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49    int begin() const override { return hwloc_bitmap_first(mask); }
50    int end() const override { return -1; }
51    int next(int previous) const override {
52      return hwloc_bitmap_next(mask, previous);
53    }
54    int get_system_affinity(bool abort_on_error) override {
55      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56                  "Illegal get affinity operation when not capable");
57      int retval =
58          hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59      if (retval >= 0) {
60        return 0;
61      }
62      int error = errno;
63      if (abort_on_error) {
64        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65      }
66      return error;
67    }
68    int set_system_affinity(bool abort_on_error) const override {
69      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70                  "Illegal get affinity operation when not capable");
71      int retval =
72          hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73      if (retval >= 0) {
74        return 0;
75      }
76      int error = errno;
77      if (abort_on_error) {
78        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79      }
80      return error;
81    }
82    int get_proc_group() const override {
83      int group = -1;
84#if KMP_OS_WINDOWS
85      if (__kmp_num_proc_groups == 1) {
86        return 1;
87      }
88      for (int i = 0; i < __kmp_num_proc_groups; i++) {
89        // On windows, the long type is always 32 bits
90        unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91        unsigned long second_32_bits =
92            hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93        if (first_32_bits == 0 && second_32_bits == 0) {
94          continue;
95        }
96        if (group >= 0) {
97          return -1;
98        }
99        group = i;
100      }
101#endif /* KMP_OS_WINDOWS */
102      return group;
103    }
104  };
105  void determine_capable(const char *var) override {
106    const hwloc_topology_support *topology_support;
107    if (__kmp_hwloc_topology == NULL) {
108      if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109        __kmp_hwloc_error = TRUE;
110        if (__kmp_affinity_verbose)
111          KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112      }
113      if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114        __kmp_hwloc_error = TRUE;
115        if (__kmp_affinity_verbose)
116          KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117      }
118    }
119    topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120    // Is the system capable of setting/getting this thread's affinity?
121    // Also, is topology discovery possible? (pu indicates ability to discover
122    // processing units). And finally, were there no errors when calling any
123    // hwloc_* API functions?
124    if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125        topology_support->cpubind->get_thisthread_cpubind &&
126        topology_support->discovery->pu && !__kmp_hwloc_error) {
127      // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128      KMP_AFFINITY_ENABLE(TRUE);
129    } else {
130      // indicate that hwloc didn't work and disable affinity
131      __kmp_hwloc_error = TRUE;
132      KMP_AFFINITY_DISABLE();
133    }
134  }
135  void bind_thread(int which) override {
136    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137                "Illegal set affinity operation when not capable");
138    KMPAffinity::Mask *mask;
139    KMP_CPU_ALLOC_ON_STACK(mask);
140    KMP_CPU_ZERO(mask);
141    KMP_CPU_SET(which, mask);
142    __kmp_set_system_affinity(mask, TRUE);
143    KMP_CPU_FREE_FROM_STACK(mask);
144  }
145  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147  KMPAffinity::Mask *allocate_mask_array(int num) override {
148    return new Mask[num];
149  }
150  void deallocate_mask_array(KMPAffinity::Mask *array) override {
151    Mask *hwloc_array = static_cast<Mask *>(array);
152    delete[] hwloc_array;
153  }
154  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155                                      int index) override {
156    Mask *hwloc_array = static_cast<Mask *>(array);
157    return &(hwloc_array[index]);
158  }
159  api_type get_api_type() const override { return HWLOC; }
160};
161#endif /* KMP_USE_HWLOC */
162
163#if KMP_OS_LINUX
164/* On some of the older OS's that we build on, these constants aren't present
165   in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
166   all systems of the same arch where they are defined, and they cannot change.
167   stone forever. */
168#include <sys/syscall.h>
169#if KMP_ARCH_X86 || KMP_ARCH_ARM
170#ifndef __NR_sched_setaffinity
171#define __NR_sched_setaffinity 241
172#elif __NR_sched_setaffinity != 241
173#error Wrong code for setaffinity system call.
174#endif /* __NR_sched_setaffinity */
175#ifndef __NR_sched_getaffinity
176#define __NR_sched_getaffinity 242
177#elif __NR_sched_getaffinity != 242
178#error Wrong code for getaffinity system call.
179#endif /* __NR_sched_getaffinity */
180#elif KMP_ARCH_AARCH64
181#ifndef __NR_sched_setaffinity
182#define __NR_sched_setaffinity 122
183#elif __NR_sched_setaffinity != 122
184#error Wrong code for setaffinity system call.
185#endif /* __NR_sched_setaffinity */
186#ifndef __NR_sched_getaffinity
187#define __NR_sched_getaffinity 123
188#elif __NR_sched_getaffinity != 123
189#error Wrong code for getaffinity system call.
190#endif /* __NR_sched_getaffinity */
191#elif KMP_ARCH_X86_64
192#ifndef __NR_sched_setaffinity
193#define __NR_sched_setaffinity 203
194#elif __NR_sched_setaffinity != 203
195#error Wrong code for setaffinity system call.
196#endif /* __NR_sched_setaffinity */
197#ifndef __NR_sched_getaffinity
198#define __NR_sched_getaffinity 204
199#elif __NR_sched_getaffinity != 204
200#error Wrong code for getaffinity system call.
201#endif /* __NR_sched_getaffinity */
202#elif KMP_ARCH_PPC64
203#ifndef __NR_sched_setaffinity
204#define __NR_sched_setaffinity 222
205#elif __NR_sched_setaffinity != 222
206#error Wrong code for setaffinity system call.
207#endif /* __NR_sched_setaffinity */
208#ifndef __NR_sched_getaffinity
209#define __NR_sched_getaffinity 223
210#elif __NR_sched_getaffinity != 223
211#error Wrong code for getaffinity system call.
212#endif /* __NR_sched_getaffinity */
213#elif KMP_ARCH_MIPS
214#ifndef __NR_sched_setaffinity
215#define __NR_sched_setaffinity 4239
216#elif __NR_sched_setaffinity != 4239
217#error Wrong code for setaffinity system call.
218#endif /* __NR_sched_setaffinity */
219#ifndef __NR_sched_getaffinity
220#define __NR_sched_getaffinity 4240
221#elif __NR_sched_getaffinity != 4240
222#error Wrong code for getaffinity system call.
223#endif /* __NR_sched_getaffinity */
224#elif KMP_ARCH_MIPS64
225#ifndef __NR_sched_setaffinity
226#define __NR_sched_setaffinity 5195
227#elif __NR_sched_setaffinity != 5195
228#error Wrong code for setaffinity system call.
229#endif /* __NR_sched_setaffinity */
230#ifndef __NR_sched_getaffinity
231#define __NR_sched_getaffinity 5196
232#elif __NR_sched_getaffinity != 5196
233#error Wrong code for getaffinity system call.
234#endif /* __NR_sched_getaffinity */
235#error Unknown or unsupported architecture
236#endif /* KMP_ARCH_* */
237class KMPNativeAffinity : public KMPAffinity {
238  class Mask : public KMPAffinity::Mask {
239    typedef unsigned char mask_t;
240    static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
241
242  public:
243    mask_t *mask;
244    Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
245    ~Mask() {
246      if (mask)
247        __kmp_free(mask);
248    }
249    void set(int i) override {
250      mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
251    }
252    bool is_set(int i) const override {
253      return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
254    }
255    void clear(int i) override {
256      mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
257    }
258    void zero() override {
259      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
260        mask[i] = 0;
261    }
262    void copy(const KMPAffinity::Mask *src) override {
263      const Mask *convert = static_cast<const Mask *>(src);
264      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265        mask[i] = convert->mask[i];
266    }
267    void bitwise_and(const KMPAffinity::Mask *rhs) override {
268      const Mask *convert = static_cast<const Mask *>(rhs);
269      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270        mask[i] &= convert->mask[i];
271    }
272    void bitwise_or(const KMPAffinity::Mask *rhs) override {
273      const Mask *convert = static_cast<const Mask *>(rhs);
274      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275        mask[i] |= convert->mask[i];
276    }
277    void bitwise_not() override {
278      for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
279        mask[i] = ~(mask[i]);
280    }
281    int begin() const override {
282      int retval = 0;
283      while (retval < end() && !is_set(retval))
284        ++retval;
285      return retval;
286    }
287    int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
288    int next(int previous) const override {
289      int retval = previous + 1;
290      while (retval < end() && !is_set(retval))
291        ++retval;
292      return retval;
293    }
294    int get_system_affinity(bool abort_on_error) override {
295      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
296                  "Illegal get affinity operation when not capable");
297      int retval =
298          syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
299      if (retval >= 0) {
300        return 0;
301      }
302      int error = errno;
303      if (abort_on_error) {
304        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
305      }
306      return error;
307    }
308    int set_system_affinity(bool abort_on_error) const override {
309      KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
310                  "Illegal get affinity operation when not capable");
311      int retval =
312          syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
313      if (retval >= 0) {
314        return 0;
315      }
316      int error = errno;
317      if (abort_on_error) {
318        __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
319      }
320      return error;
321    }
322  };
323  void determine_capable(const char *env_var) override {
324    __kmp_affinity_determine_capable(env_var);
325  }
326  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
327  KMPAffinity::Mask *allocate_mask() override {
328    KMPNativeAffinity::Mask *retval = new Mask();
329    return retval;
330  }
331  void deallocate_mask(KMPAffinity::Mask *m) override {
332    KMPNativeAffinity::Mask *native_mask =
333        static_cast<KMPNativeAffinity::Mask *>(m);
334    delete native_mask;
335  }
336  KMPAffinity::Mask *allocate_mask_array(int num) override {
337    return new Mask[num];
338  }
339  void deallocate_mask_array(KMPAffinity::Mask *array) override {
340    Mask *linux_array = static_cast<Mask *>(array);
341    delete[] linux_array;
342  }
343  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
344                                      int index) override {
345    Mask *linux_array = static_cast<Mask *>(array);
346    return &(linux_array[index]);
347  }
348  api_type get_api_type() const override { return NATIVE_OS; }
349};
350#endif /* KMP_OS_LINUX */
351
352#if KMP_OS_WINDOWS
353class KMPNativeAffinity : public KMPAffinity {
354  class Mask : public KMPAffinity::Mask {
355    typedef ULONG_PTR mask_t;
356    static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
357    mask_t *mask;
358
359  public:
360    Mask() {
361      mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
362    }
363    ~Mask() {
364      if (mask)
365        __kmp_free(mask);
366    }
367    void set(int i) override {
368      mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
369    }
370    bool is_set(int i) const override {
371      return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
372    }
373    void clear(int i) override {
374      mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
375    }
376    void zero() override {
377      for (int i = 0; i < __kmp_num_proc_groups; ++i)
378        mask[i] = 0;
379    }
380    void copy(const KMPAffinity::Mask *src) override {
381      const Mask *convert = static_cast<const Mask *>(src);
382      for (int i = 0; i < __kmp_num_proc_groups; ++i)
383        mask[i] = convert->mask[i];
384    }
385    void bitwise_and(const KMPAffinity::Mask *rhs) override {
386      const Mask *convert = static_cast<const Mask *>(rhs);
387      for (int i = 0; i < __kmp_num_proc_groups; ++i)
388        mask[i] &= convert->mask[i];
389    }
390    void bitwise_or(const KMPAffinity::Mask *rhs) override {
391      const Mask *convert = static_cast<const Mask *>(rhs);
392      for (int i = 0; i < __kmp_num_proc_groups; ++i)
393        mask[i] |= convert->mask[i];
394    }
395    void bitwise_not() override {
396      for (int i = 0; i < __kmp_num_proc_groups; ++i)
397        mask[i] = ~(mask[i]);
398    }
399    int begin() const override {
400      int retval = 0;
401      while (retval < end() && !is_set(retval))
402        ++retval;
403      return retval;
404    }
405    int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
406    int next(int previous) const override {
407      int retval = previous + 1;
408      while (retval < end() && !is_set(retval))
409        ++retval;
410      return retval;
411    }
412    int set_system_affinity(bool abort_on_error) const override {
413      if (__kmp_num_proc_groups > 1) {
414        // Check for a valid mask.
415        GROUP_AFFINITY ga;
416        int group = get_proc_group();
417        if (group < 0) {
418          if (abort_on_error) {
419            KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
420          }
421          return -1;
422        }
423        // Transform the bit vector into a GROUP_AFFINITY struct
424        // and make the system call to set affinity.
425        ga.Group = group;
426        ga.Mask = mask[group];
427        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
428
429        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
430        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
431          DWORD error = GetLastError();
432          if (abort_on_error) {
433            __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
434                        __kmp_msg_null);
435          }
436          return error;
437        }
438      } else {
439        if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
440          DWORD error = GetLastError();
441          if (abort_on_error) {
442            __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
443                        __kmp_msg_null);
444          }
445          return error;
446        }
447      }
448      return 0;
449    }
450    int get_system_affinity(bool abort_on_error) override {
451      if (__kmp_num_proc_groups > 1) {
452        this->zero();
453        GROUP_AFFINITY ga;
454        KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
455        if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
456          DWORD error = GetLastError();
457          if (abort_on_error) {
458            __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
459                        KMP_ERR(error), __kmp_msg_null);
460          }
461          return error;
462        }
463        if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
464            (ga.Mask == 0)) {
465          return -1;
466        }
467        mask[ga.Group] = ga.Mask;
468      } else {
469        mask_t newMask, sysMask, retval;
470        if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
471          DWORD error = GetLastError();
472          if (abort_on_error) {
473            __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
474                        KMP_ERR(error), __kmp_msg_null);
475          }
476          return error;
477        }
478        retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
479        if (!retval) {
480          DWORD error = GetLastError();
481          if (abort_on_error) {
482            __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
483                        KMP_ERR(error), __kmp_msg_null);
484          }
485          return error;
486        }
487        newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
488        if (!newMask) {
489          DWORD error = GetLastError();
490          if (abort_on_error) {
491            __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
492                        KMP_ERR(error), __kmp_msg_null);
493          }
494        }
495        *mask = retval;
496      }
497      return 0;
498    }
499    int get_proc_group() const override {
500      int group = -1;
501      if (__kmp_num_proc_groups == 1) {
502        return 1;
503      }
504      for (int i = 0; i < __kmp_num_proc_groups; i++) {
505        if (mask[i] == 0)
506          continue;
507        if (group >= 0)
508          return -1;
509        group = i;
510      }
511      return group;
512    }
513  };
514  void determine_capable(const char *env_var) override {
515    __kmp_affinity_determine_capable(env_var);
516  }
517  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
518  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
519  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
520  KMPAffinity::Mask *allocate_mask_array(int num) override {
521    return new Mask[num];
522  }
523  void deallocate_mask_array(KMPAffinity::Mask *array) override {
524    Mask *windows_array = static_cast<Mask *>(array);
525    delete[] windows_array;
526  }
527  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
528                                      int index) override {
529    Mask *windows_array = static_cast<Mask *>(array);
530    return &(windows_array[index]);
531  }
532  api_type get_api_type() const override { return NATIVE_OS; }
533};
534#endif /* KMP_OS_WINDOWS */
535#endif /* KMP_AFFINITY_SUPPORTED */
536
537class Address {
538public:
539  static const unsigned maxDepth = 32;
540  unsigned labels[maxDepth];
541  unsigned childNums[maxDepth];
542  unsigned depth;
543  unsigned leader;
544  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
545  Address &operator=(const Address &b) {
546    depth = b.depth;
547    for (unsigned i = 0; i < depth; i++) {
548      labels[i] = b.labels[i];
549      childNums[i] = b.childNums[i];
550    }
551    leader = FALSE;
552    return *this;
553  }
554  bool operator==(const Address &b) const {
555    if (depth != b.depth)
556      return false;
557    for (unsigned i = 0; i < depth; i++)
558      if (labels[i] != b.labels[i])
559        return false;
560    return true;
561  }
562  bool isClose(const Address &b, int level) const {
563    if (depth != b.depth)
564      return false;
565    if ((unsigned)level >= depth)
566      return true;
567    for (unsigned i = 0; i < (depth - level); i++)
568      if (labels[i] != b.labels[i])
569        return false;
570    return true;
571  }
572  bool operator!=(const Address &b) const { return !operator==(b); }
573  void print() const {
574    unsigned i;
575    printf("Depth: %u --- ", depth);
576    for (i = 0; i < depth; i++) {
577      printf("%u ", labels[i]);
578    }
579  }
580};
581
582class AddrUnsPair {
583public:
584  Address first;
585  unsigned second;
586  AddrUnsPair(Address _first, unsigned _second)
587      : first(_first), second(_second) {}
588  AddrUnsPair &operator=(const AddrUnsPair &b) {
589    first = b.first;
590    second = b.second;
591    return *this;
592  }
593  void print() const {
594    printf("first = ");
595    first.print();
596    printf(" --- second = %u", second);
597  }
598  bool operator==(const AddrUnsPair &b) const {
599    if (first != b.first)
600      return false;
601    if (second != b.second)
602      return false;
603    return true;
604  }
605  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
606};
607
608static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
609  const Address *aa = &(((const AddrUnsPair *)a)->first);
610  const Address *bb = &(((const AddrUnsPair *)b)->first);
611  unsigned depth = aa->depth;
612  unsigned i;
613  KMP_DEBUG_ASSERT(depth == bb->depth);
614  for (i = 0; i < depth; i++) {
615    if (aa->labels[i] < bb->labels[i])
616      return -1;
617    if (aa->labels[i] > bb->labels[i])
618      return 1;
619  }
620  return 0;
621}
622
623/* A structure for holding machine-specific hierarchy info to be computed once
624   at init. This structure represents a mapping of threads to the actual machine
625   hierarchy, or to our best guess at what the hierarchy might be, for the
626   purpose of performing an efficient barrier. In the worst case, when there is
627   no machine hierarchy information, it produces a tree suitable for a barrier,
628   similar to the tree used in the hyper barrier. */
629class hierarchy_info {
630public:
631  /* Good default values for number of leaves and branching factor, given no
632     affinity information. Behaves a bit like hyper barrier. */
633  static const kmp_uint32 maxLeaves = 4;
634  static const kmp_uint32 minBranch = 4;
635  /** Number of levels in the hierarchy. Typical levels are threads/core,
636      cores/package or socket, packages/node, nodes/machine, etc. We don't want
637      to get specific with nomenclature. When the machine is oversubscribed we
638      add levels to duplicate the hierarchy, doubling the thread capacity of the
639      hierarchy each time we add a level. */
640  kmp_uint32 maxLevels;
641
642  /** This is specifically the depth of the machine configuration hierarchy, in
643      terms of the number of levels along the longest path from root to any
644      leaf. It corresponds to the number of entries in numPerLevel if we exclude
645      all but one trailing 1. */
646  kmp_uint32 depth;
647  kmp_uint32 base_num_threads;
648  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
649  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
650  // 2=initialization in progress
651  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
652
653  /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
654      the parent of a node at level i has. For example, if we have a machine
655      with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
656      {2, 4, 4, 1, 1}. All empty levels are set to 1. */
657  kmp_uint32 *numPerLevel;
658  kmp_uint32 *skipPerLevel;
659
660  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
661    int hier_depth = adr2os[0].first.depth;
662    int level = 0;
663    for (int i = hier_depth - 1; i >= 0; --i) {
664      int max = -1;
665      for (int j = 0; j < num_addrs; ++j) {
666        int next = adr2os[j].first.childNums[i];
667        if (next > max)
668          max = next;
669      }
670      numPerLevel[level] = max + 1;
671      ++level;
672    }
673  }
674
675  hierarchy_info()
676      : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
677
678  void fini() {
679    if (!uninitialized && numPerLevel) {
680      __kmp_free(numPerLevel);
681      numPerLevel = NULL;
682      uninitialized = not_initialized;
683    }
684  }
685
686  void init(AddrUnsPair *adr2os, int num_addrs) {
687    kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
688        &uninitialized, not_initialized, initializing);
689    if (bool_result == 0) { // Wait for initialization
690      while (TCR_1(uninitialized) != initialized)
691        KMP_CPU_PAUSE();
692      return;
693    }
694    KMP_DEBUG_ASSERT(bool_result == 1);
695
696    /* Added explicit initialization of the data fields here to prevent usage of
697       dirty value observed when static library is re-initialized multiple times
698       (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
699       OpenMP). */
700    depth = 1;
701    resizing = 0;
702    maxLevels = 7;
703    numPerLevel =
704        (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
705    skipPerLevel = &(numPerLevel[maxLevels]);
706    for (kmp_uint32 i = 0; i < maxLevels;
707         ++i) { // init numPerLevel[*] to 1 item per level
708      numPerLevel[i] = 1;
709      skipPerLevel[i] = 1;
710    }
711
712    // Sort table by physical ID
713    if (adr2os) {
714      qsort(adr2os, num_addrs, sizeof(*adr2os),
715            __kmp_affinity_cmp_Address_labels);
716      deriveLevels(adr2os, num_addrs);
717    } else {
718      numPerLevel[0] = maxLeaves;
719      numPerLevel[1] = num_addrs / maxLeaves;
720      if (num_addrs % maxLeaves)
721        numPerLevel[1]++;
722    }
723
724    base_num_threads = num_addrs;
725    for (int i = maxLevels - 1; i >= 0;
726         --i) // count non-empty levels to get depth
727      if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
728        depth++;
729
730    kmp_uint32 branch = minBranch;
731    if (numPerLevel[0] == 1)
732      branch = num_addrs / maxLeaves;
733    if (branch < minBranch)
734      branch = minBranch;
735    for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
736      while (numPerLevel[d] > branch ||
737             (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
738        if (numPerLevel[d] & 1)
739          numPerLevel[d]++;
740        numPerLevel[d] = numPerLevel[d] >> 1;
741        if (numPerLevel[d + 1] == 1)
742          depth++;
743        numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
744      }
745      if (numPerLevel[0] == 1) {
746        branch = branch >> 1;
747        if (branch < 4)
748          branch = minBranch;
749      }
750    }
751
752    for (kmp_uint32 i = 1; i < depth; ++i)
753      skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
754    // Fill in hierarchy in the case of oversubscription
755    for (kmp_uint32 i = depth; i < maxLevels; ++i)
756      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
757
758    uninitialized = initialized; // One writer
759  }
760
761  // Resize the hierarchy if nproc changes to something larger than before
762  void resize(kmp_uint32 nproc) {
763    kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
764    while (bool_result == 0) { // someone else is trying to resize
765      KMP_CPU_PAUSE();
766      if (nproc <= base_num_threads) // happy with other thread's resize
767        return;
768      else // try to resize
769        bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
770    }
771    KMP_DEBUG_ASSERT(bool_result != 0);
772    if (nproc <= base_num_threads)
773      return; // happy with other thread's resize
774
775    // Calculate new maxLevels
776    kmp_uint32 old_sz = skipPerLevel[depth - 1];
777    kmp_uint32 incs = 0, old_maxLevels = maxLevels;
778    // First see if old maxLevels is enough to contain new size
779    for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
780      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
781      numPerLevel[i - 1] *= 2;
782      old_sz *= 2;
783      depth++;
784    }
785    if (nproc > old_sz) { // Not enough space, need to expand hierarchy
786      while (nproc > old_sz) {
787        old_sz *= 2;
788        incs++;
789        depth++;
790      }
791      maxLevels += incs;
792
793      // Resize arrays
794      kmp_uint32 *old_numPerLevel = numPerLevel;
795      kmp_uint32 *old_skipPerLevel = skipPerLevel;
796      numPerLevel = skipPerLevel = NULL;
797      numPerLevel =
798          (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
799      skipPerLevel = &(numPerLevel[maxLevels]);
800
801      // Copy old elements from old arrays
802      for (kmp_uint32 i = 0; i < old_maxLevels;
803           ++i) { // init numPerLevel[*] to 1 item per level
804        numPerLevel[i] = old_numPerLevel[i];
805        skipPerLevel[i] = old_skipPerLevel[i];
806      }
807
808      // Init new elements in arrays to 1
809      for (kmp_uint32 i = old_maxLevels; i < maxLevels;
810           ++i) { // init numPerLevel[*] to 1 item per level
811        numPerLevel[i] = 1;
812        skipPerLevel[i] = 1;
813      }
814
815      // Free old arrays
816      __kmp_free(old_numPerLevel);
817    }
818
819    // Fill in oversubscription levels of hierarchy
820    for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
821      skipPerLevel[i] = 2 * skipPerLevel[i - 1];
822
823    base_num_threads = nproc;
824    resizing = 0; // One writer
825  }
826};
827#endif // KMP_AFFINITY_H
828