1345153Sdim/*
2345153Sdim * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3345153Sdim */
4345153Sdim
5345153Sdim//===----------------------------------------------------------------------===//
6345153Sdim//
7353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8353358Sdim// See https://llvm.org/LICENSE.txt for license information.
9353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10345153Sdim//
11345153Sdim//===----------------------------------------------------------------------===//
12345153Sdim
13345153Sdim#include "kmp.h"
14345153Sdim#include "kmp_i18n.h"
15345153Sdim#include "kmp_itt.h"
16345153Sdim
17345153Sdim#define USE_CHECKS_COMMON
18345153Sdim
19345153Sdim#define KMP_INLINE_SUBR 1
20345153Sdim
21345153Sdimvoid kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22345153Sdim                                           void *data_addr, size_t pc_size);
23345153Sdimstruct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
24345153Sdim                                                void *data_addr,
25345153Sdim                                                size_t pc_size);
26345153Sdim
27345153Sdimstruct shared_table __kmp_threadprivate_d_table;
28345153Sdim
29345153Sdimstatic
30345153Sdim#ifdef KMP_INLINE_SUBR
31345153Sdim    __forceinline
32345153Sdim#endif
33345153Sdim    struct private_common *
34345153Sdim    __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
35345153Sdim                                         void *pc_addr)
36345153Sdim
37345153Sdim{
38345153Sdim  struct private_common *tn;
39345153Sdim
40345153Sdim#ifdef KMP_TASK_COMMON_DEBUG
41345153Sdim  KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42345153Sdim                "address %p\n",
43345153Sdim                gtid, pc_addr));
44345153Sdim  dump_list();
45345153Sdim#endif
46345153Sdim
47345153Sdim  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48345153Sdim    if (tn->gbl_addr == pc_addr) {
49345153Sdim#ifdef KMP_TASK_COMMON_DEBUG
50345153Sdim      KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51345153Sdim                    "node %p on list\n",
52345153Sdim                    gtid, pc_addr));
53345153Sdim#endif
54345153Sdim      return tn;
55345153Sdim    }
56345153Sdim  }
57345153Sdim  return 0;
58345153Sdim}
59345153Sdim
60345153Sdimstatic
61345153Sdim#ifdef KMP_INLINE_SUBR
62345153Sdim    __forceinline
63345153Sdim#endif
64345153Sdim    struct shared_common *
65345153Sdim    __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
66345153Sdim                                  void *pc_addr) {
67345153Sdim  struct shared_common *tn;
68345153Sdim
69345153Sdim  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70345153Sdim    if (tn->gbl_addr == pc_addr) {
71345153Sdim#ifdef KMP_TASK_COMMON_DEBUG
72345153Sdim      KC_TRACE(
73345153Sdim          10,
74345153Sdim          ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75345153Sdim           gtid, pc_addr));
76345153Sdim#endif
77345153Sdim      return tn;
78345153Sdim    }
79345153Sdim  }
80345153Sdim  return 0;
81345153Sdim}
82345153Sdim
83345153Sdim// Create a template for the data initialized storage. Either the template is
84345153Sdim// NULL indicating zero fill, or the template is a copy of the original data.
85345153Sdimstatic struct private_data *__kmp_init_common_data(void *pc_addr,
86345153Sdim                                                   size_t pc_size) {
87345153Sdim  struct private_data *d;
88345153Sdim  size_t i;
89345153Sdim  char *p;
90345153Sdim
91345153Sdim  d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
92345153Sdim  /*
93345153Sdim      d->data = 0;  // AC: commented out because __kmp_allocate zeroes the
94345153Sdim     memory
95345153Sdim      d->next = 0;
96345153Sdim  */
97345153Sdim  d->size = pc_size;
98345153Sdim  d->more = 1;
99345153Sdim
100345153Sdim  p = (char *)pc_addr;
101345153Sdim
102345153Sdim  for (i = pc_size; i > 0; --i) {
103345153Sdim    if (*p++ != '\0') {
104345153Sdim      d->data = __kmp_allocate(pc_size);
105345153Sdim      KMP_MEMCPY(d->data, pc_addr, pc_size);
106345153Sdim      break;
107345153Sdim    }
108345153Sdim  }
109345153Sdim
110345153Sdim  return d;
111345153Sdim}
112345153Sdim
113345153Sdim// Initialize the data area from the template.
114345153Sdimstatic void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
115345153Sdim  char *addr = (char *)pc_addr;
116345153Sdim  int i, offset;
117345153Sdim
118345153Sdim  for (offset = 0; d != 0; d = d->next) {
119345153Sdim    for (i = d->more; i > 0; --i) {
120345153Sdim      if (d->data == 0)
121345153Sdim        memset(&addr[offset], '\0', d->size);
122345153Sdim      else
123345153Sdim        KMP_MEMCPY(&addr[offset], d->data, d->size);
124345153Sdim      offset += d->size;
125345153Sdim    }
126345153Sdim  }
127345153Sdim}
128345153Sdim
129345153Sdim/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
130345153Sdimvoid __kmp_common_initialize(void) {
131345153Sdim  if (!TCR_4(__kmp_init_common)) {
132345153Sdim    int q;
133345153Sdim#ifdef KMP_DEBUG
134345153Sdim    int gtid;
135345153Sdim#endif
136345153Sdim
137345153Sdim    __kmp_threadpriv_cache_list = NULL;
138345153Sdim
139345153Sdim#ifdef KMP_DEBUG
140345153Sdim    /* verify the uber masters were initialized */
141345153Sdim    for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
142345153Sdim      if (__kmp_root[gtid]) {
143345153Sdim        KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
144345153Sdim        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
145345153Sdim          KMP_DEBUG_ASSERT(
146345153Sdim              !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
147345153Sdim        /*                    __kmp_root[ gitd ]-> r.r_uber_thread ->
148345153Sdim         * th.th_pri_common -> data[ q ] = 0;*/
149345153Sdim      }
150345153Sdim#endif /* KMP_DEBUG */
151345153Sdim
152345153Sdim    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
153345153Sdim      __kmp_threadprivate_d_table.data[q] = 0;
154345153Sdim
155345153Sdim    TCW_4(__kmp_init_common, TRUE);
156345153Sdim  }
157345153Sdim}
158345153Sdim
159345153Sdim/* Call all destructors for threadprivate data belonging to all threads.
160345153Sdim   Currently unused! */
161345153Sdimvoid __kmp_common_destroy(void) {
162345153Sdim  if (TCR_4(__kmp_init_common)) {
163345153Sdim    int q;
164345153Sdim
165345153Sdim    TCW_4(__kmp_init_common, FALSE);
166345153Sdim
167345153Sdim    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
168345153Sdim      int gtid;
169345153Sdim      struct private_common *tn;
170345153Sdim      struct shared_common *d_tn;
171345153Sdim
172345153Sdim      /* C++ destructors need to be called once per thread before exiting.
173345153Sdim         Don't call destructors for master thread though unless we used copy
174345153Sdim         constructor */
175345153Sdim
176345153Sdim      for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
177345153Sdim           d_tn = d_tn->next) {
178345153Sdim        if (d_tn->is_vec) {
179345153Sdim          if (d_tn->dt.dtorv != 0) {
180345153Sdim            for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
181345153Sdim              if (__kmp_threads[gtid]) {
182345153Sdim                if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
183345153Sdim                                       : (!KMP_UBER_GTID(gtid))) {
184345153Sdim                  tn = __kmp_threadprivate_find_task_common(
185345153Sdim                      __kmp_threads[gtid]->th.th_pri_common, gtid,
186345153Sdim                      d_tn->gbl_addr);
187345153Sdim                  if (tn) {
188345153Sdim                    (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
189345153Sdim                  }
190345153Sdim                }
191345153Sdim              }
192345153Sdim            }
193345153Sdim            if (d_tn->obj_init != 0) {
194345153Sdim              (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
195345153Sdim            }
196345153Sdim          }
197345153Sdim        } else {
198345153Sdim          if (d_tn->dt.dtor != 0) {
199345153Sdim            for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
200345153Sdim              if (__kmp_threads[gtid]) {
201345153Sdim                if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
202345153Sdim                                       : (!KMP_UBER_GTID(gtid))) {
203345153Sdim                  tn = __kmp_threadprivate_find_task_common(
204345153Sdim                      __kmp_threads[gtid]->th.th_pri_common, gtid,
205345153Sdim                      d_tn->gbl_addr);
206345153Sdim                  if (tn) {
207345153Sdim                    (*d_tn->dt.dtor)(tn->par_addr);
208345153Sdim                  }
209345153Sdim                }
210345153Sdim              }
211345153Sdim            }
212345153Sdim            if (d_tn->obj_init != 0) {
213345153Sdim              (*d_tn->dt.dtor)(d_tn->obj_init);
214345153Sdim            }
215345153Sdim          }
216345153Sdim        }
217345153Sdim      }
218345153Sdim      __kmp_threadprivate_d_table.data[q] = 0;
219345153Sdim    }
220345153Sdim  }
221345153Sdim}
222345153Sdim
223345153Sdim/* Call all destructors for threadprivate data belonging to this thread */
224345153Sdimvoid __kmp_common_destroy_gtid(int gtid) {
225345153Sdim  struct private_common *tn;
226345153Sdim  struct shared_common *d_tn;
227345153Sdim
228345153Sdim  if (!TCR_4(__kmp_init_gtid)) {
229345153Sdim    // This is possible when one of multiple roots initiates early library
230345153Sdim    // termination in a sequential region while other teams are active, and its
231345153Sdim    // child threads are about to end.
232345153Sdim    return;
233345153Sdim  }
234345153Sdim
235345153Sdim  KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
236345153Sdim  if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
237345153Sdim
238345153Sdim    if (TCR_4(__kmp_init_common)) {
239345153Sdim
240345153Sdim      /* Cannot do this here since not all threads have destroyed their data */
241345153Sdim      /* TCW_4(__kmp_init_common, FALSE); */
242345153Sdim
243345153Sdim      for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
244345153Sdim
245345153Sdim        d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
246345153Sdim                                             tn->gbl_addr);
247345153Sdim
248345153Sdim        KMP_DEBUG_ASSERT(d_tn);
249345153Sdim
250345153Sdim        if (d_tn->is_vec) {
251345153Sdim          if (d_tn->dt.dtorv != 0) {
252345153Sdim            (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
253345153Sdim          }
254345153Sdim          if (d_tn->obj_init != 0) {
255345153Sdim            (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
256345153Sdim          }
257345153Sdim        } else {
258345153Sdim          if (d_tn->dt.dtor != 0) {
259345153Sdim            (void)(*d_tn->dt.dtor)(tn->par_addr);
260345153Sdim          }
261345153Sdim          if (d_tn->obj_init != 0) {
262345153Sdim            (void)(*d_tn->dt.dtor)(d_tn->obj_init);
263345153Sdim          }
264345153Sdim        }
265345153Sdim      }
266345153Sdim      KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
267345153Sdim                    "complete\n",
268345153Sdim                    gtid));
269345153Sdim    }
270345153Sdim  }
271345153Sdim}
272345153Sdim
273345153Sdim#ifdef KMP_TASK_COMMON_DEBUG
274345153Sdimstatic void dump_list(void) {
275345153Sdim  int p, q;
276345153Sdim
277345153Sdim  for (p = 0; p < __kmp_all_nth; ++p) {
278345153Sdim    if (!__kmp_threads[p])
279345153Sdim      continue;
280345153Sdim    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
281345153Sdim      if (__kmp_threads[p]->th.th_pri_common->data[q]) {
282345153Sdim        struct private_common *tn;
283345153Sdim
284345153Sdim        KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
285345153Sdim
286345153Sdim        for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
287345153Sdim             tn = tn->next) {
288345153Sdim          KC_TRACE(10,
289345153Sdim                   ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
290345153Sdim                    tn->gbl_addr, tn->par_addr));
291345153Sdim        }
292345153Sdim      }
293345153Sdim    }
294345153Sdim  }
295345153Sdim}
296345153Sdim#endif /* KMP_TASK_COMMON_DEBUG */
297345153Sdim
298345153Sdim// NOTE: this routine is to be called only from the serial part of the program.
299345153Sdimvoid kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
300345153Sdim                                           void *data_addr, size_t pc_size) {
301345153Sdim  struct shared_common **lnk_tn, *d_tn;
302345153Sdim  KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
303345153Sdim                   __kmp_threads[gtid]->th.th_root->r.r_active == 0);
304345153Sdim
305345153Sdim  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
306345153Sdim                                       pc_addr);
307345153Sdim
308345153Sdim  if (d_tn == 0) {
309345153Sdim    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
310345153Sdim
311345153Sdim    d_tn->gbl_addr = pc_addr;
312345153Sdim    d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
313345153Sdim    /*
314345153Sdim            d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
315345153Sdim       zeroes the memory
316345153Sdim            d_tn->ct.ctor = 0;
317345153Sdim            d_tn->cct.cctor = 0;;
318345153Sdim            d_tn->dt.dtor = 0;
319345153Sdim            d_tn->is_vec = FALSE;
320345153Sdim            d_tn->vec_len = 0L;
321345153Sdim    */
322345153Sdim    d_tn->cmn_size = pc_size;
323345153Sdim
324345153Sdim    __kmp_acquire_lock(&__kmp_global_lock, gtid);
325345153Sdim
326345153Sdim    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
327345153Sdim
328345153Sdim    d_tn->next = *lnk_tn;
329345153Sdim    *lnk_tn = d_tn;
330345153Sdim
331345153Sdim    __kmp_release_lock(&__kmp_global_lock, gtid);
332345153Sdim  }
333345153Sdim}
334345153Sdim
335345153Sdimstruct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
336345153Sdim                                                void *data_addr,
337345153Sdim                                                size_t pc_size) {
338345153Sdim  struct private_common *tn, **tt;
339345153Sdim  struct shared_common *d_tn;
340345153Sdim
341345153Sdim  /* +++++++++ START OF CRITICAL SECTION +++++++++ */
342345153Sdim  __kmp_acquire_lock(&__kmp_global_lock, gtid);
343345153Sdim
344345153Sdim  tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
345345153Sdim
346345153Sdim  tn->gbl_addr = pc_addr;
347345153Sdim
348345153Sdim  d_tn = __kmp_find_shared_task_common(
349345153Sdim      &__kmp_threadprivate_d_table, gtid,
350345153Sdim      pc_addr); /* Only the MASTER data table exists. */
351345153Sdim
352345153Sdim  if (d_tn != 0) {
353345153Sdim    /* This threadprivate variable has already been seen. */
354345153Sdim
355345153Sdim    if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
356345153Sdim      d_tn->cmn_size = pc_size;
357345153Sdim
358345153Sdim      if (d_tn->is_vec) {
359345153Sdim        if (d_tn->ct.ctorv != 0) {
360345153Sdim          /* Construct from scratch so no prototype exists */
361345153Sdim          d_tn->obj_init = 0;
362345153Sdim        } else if (d_tn->cct.cctorv != 0) {
363345153Sdim          /* Now data initialize the prototype since it was previously
364345153Sdim           * registered */
365345153Sdim          d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
366345153Sdim          (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
367345153Sdim        } else {
368345153Sdim          d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
369345153Sdim        }
370345153Sdim      } else {
371345153Sdim        if (d_tn->ct.ctor != 0) {
372345153Sdim          /* Construct from scratch so no prototype exists */
373345153Sdim          d_tn->obj_init = 0;
374345153Sdim        } else if (d_tn->cct.cctor != 0) {
375345153Sdim          /* Now data initialize the prototype since it was previously
376345153Sdim             registered */
377345153Sdim          d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
378345153Sdim          (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
379345153Sdim        } else {
380345153Sdim          d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
381345153Sdim        }
382345153Sdim      }
383345153Sdim    }
384345153Sdim  } else {
385345153Sdim    struct shared_common **lnk_tn;
386345153Sdim
387345153Sdim    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
388345153Sdim    d_tn->gbl_addr = pc_addr;
389345153Sdim    d_tn->cmn_size = pc_size;
390345153Sdim    d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
391345153Sdim    /*
392345153Sdim            d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
393345153Sdim       zeroes the memory
394345153Sdim            d_tn->ct.ctor = 0;
395345153Sdim            d_tn->cct.cctor = 0;
396345153Sdim            d_tn->dt.dtor = 0;
397345153Sdim            d_tn->is_vec = FALSE;
398345153Sdim            d_tn->vec_len = 0L;
399345153Sdim    */
400345153Sdim    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
401345153Sdim
402345153Sdim    d_tn->next = *lnk_tn;
403345153Sdim    *lnk_tn = d_tn;
404345153Sdim  }
405345153Sdim
406345153Sdim  tn->cmn_size = d_tn->cmn_size;
407345153Sdim
408345153Sdim  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
409345153Sdim    tn->par_addr = (void *)pc_addr;
410345153Sdim  } else {
411345153Sdim    tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
412345153Sdim  }
413345153Sdim
414345153Sdim  __kmp_release_lock(&__kmp_global_lock, gtid);
415345153Sdim/* +++++++++ END OF CRITICAL SECTION +++++++++ */
416345153Sdim
417345153Sdim#ifdef USE_CHECKS_COMMON
418345153Sdim  if (pc_size > d_tn->cmn_size) {
419345153Sdim    KC_TRACE(
420345153Sdim        10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
421345153Sdim             " ,%" KMP_UINTPTR_SPEC ")\n",
422345153Sdim             pc_addr, pc_size, d_tn->cmn_size));
423345153Sdim    KMP_FATAL(TPCommonBlocksInconsist);
424345153Sdim  }
425345153Sdim#endif /* USE_CHECKS_COMMON */
426345153Sdim
427345153Sdim  tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
428345153Sdim
429345153Sdim#ifdef KMP_TASK_COMMON_DEBUG
430345153Sdim  if (*tt != 0) {
431345153Sdim    KC_TRACE(
432345153Sdim        10,
433345153Sdim        ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
434345153Sdim         gtid, pc_addr));
435345153Sdim  }
436345153Sdim#endif
437345153Sdim  tn->next = *tt;
438345153Sdim  *tt = tn;
439345153Sdim
440345153Sdim#ifdef KMP_TASK_COMMON_DEBUG
441345153Sdim  KC_TRACE(10,
442345153Sdim           ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
443345153Sdim            gtid, pc_addr));
444345153Sdim  dump_list();
445345153Sdim#endif
446345153Sdim
447345153Sdim  /* Link the node into a simple list */
448345153Sdim
449345153Sdim  tn->link = __kmp_threads[gtid]->th.th_pri_head;
450345153Sdim  __kmp_threads[gtid]->th.th_pri_head = tn;
451345153Sdim
452345153Sdim  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
453345153Sdim    return tn;
454345153Sdim
455345153Sdim  /* if C++ object with copy constructor, use it;
456345153Sdim   * else if C++ object with constructor, use it for the non-master copies only;
457345153Sdim   * else use pod_init and memcpy
458345153Sdim   *
459345153Sdim   * C++ constructors need to be called once for each non-master thread on
460345153Sdim   * allocate
461345153Sdim   * C++ copy constructors need to be called once for each thread on allocate */
462345153Sdim
463345153Sdim  /* C++ object with constructors/destructors; don't call constructors for
464345153Sdim     master thread though */
465345153Sdim  if (d_tn->is_vec) {
466345153Sdim    if (d_tn->ct.ctorv != 0) {
467345153Sdim      (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
468345153Sdim    } else if (d_tn->cct.cctorv != 0) {
469345153Sdim      (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
470345153Sdim    } else if (tn->par_addr != tn->gbl_addr) {
471345153Sdim      __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
472345153Sdim    }
473345153Sdim  } else {
474345153Sdim    if (d_tn->ct.ctor != 0) {
475345153Sdim      (void)(*d_tn->ct.ctor)(tn->par_addr);
476345153Sdim    } else if (d_tn->cct.cctor != 0) {
477345153Sdim      (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
478345153Sdim    } else if (tn->par_addr != tn->gbl_addr) {
479345153Sdim      __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
480345153Sdim    }
481345153Sdim  }
482345153Sdim  /* !BUILD_OPENMP_C
483345153Sdim      if (tn->par_addr != tn->gbl_addr)
484345153Sdim          __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
485345153Sdim
486345153Sdim  return tn;
487345153Sdim}
488345153Sdim
489345153Sdim/* ------------------------------------------------------------------------ */
490345153Sdim/* We are currently parallel, and we know the thread id.                    */
491345153Sdim/* ------------------------------------------------------------------------ */
492345153Sdim
493345153Sdim/*!
494345153Sdim @ingroup THREADPRIVATE
495345153Sdim
496345153Sdim @param loc source location information
497345153Sdim @param data  pointer to data being privatized
498345153Sdim @param ctor  pointer to constructor function for data
499345153Sdim @param cctor  pointer to copy constructor function for data
500345153Sdim @param dtor  pointer to destructor function for data
501345153Sdim
502345153Sdim Register constructors and destructors for thread private data.
503345153Sdim This function is called when executing in parallel, when we know the thread id.
504345153Sdim*/
505345153Sdimvoid __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
506345153Sdim                                   kmpc_cctor cctor, kmpc_dtor dtor) {
507345153Sdim  struct shared_common *d_tn, **lnk_tn;
508345153Sdim
509345153Sdim  KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
510345153Sdim
511345153Sdim#ifdef USE_CHECKS_COMMON
512345153Sdim  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
513345153Sdim  KMP_ASSERT(cctor == 0);
514345153Sdim#endif /* USE_CHECKS_COMMON */
515345153Sdim
516345153Sdim  /* Only the global data table exists. */
517345153Sdim  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
518345153Sdim
519345153Sdim  if (d_tn == 0) {
520345153Sdim    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
521345153Sdim    d_tn->gbl_addr = data;
522345153Sdim
523345153Sdim    d_tn->ct.ctor = ctor;
524345153Sdim    d_tn->cct.cctor = cctor;
525345153Sdim    d_tn->dt.dtor = dtor;
526345153Sdim    /*
527345153Sdim            d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate
528345153Sdim       zeroes the memory
529345153Sdim            d_tn->vec_len = 0L;
530345153Sdim            d_tn->obj_init = 0;
531345153Sdim            d_tn->pod_init = 0;
532345153Sdim    */
533345153Sdim    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
534345153Sdim
535345153Sdim    d_tn->next = *lnk_tn;
536345153Sdim    *lnk_tn = d_tn;
537345153Sdim  }
538345153Sdim}
539345153Sdim
540345153Sdimvoid *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
541345153Sdim                           size_t size) {
542345153Sdim  void *ret;
543345153Sdim  struct private_common *tn;
544345153Sdim
545345153Sdim  KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
546345153Sdim
547345153Sdim#ifdef USE_CHECKS_COMMON
548345153Sdim  if (!__kmp_init_serial)
549345153Sdim    KMP_FATAL(RTLNotInitialized);
550345153Sdim#endif /* USE_CHECKS_COMMON */
551345153Sdim
552345153Sdim  if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
553345153Sdim    /* The parallel address will NEVER overlap with the data_address */
554345153Sdim    /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
555345153Sdim     * data_address; use data_address = data */
556345153Sdim
557345153Sdim    KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
558345153Sdim                  global_tid));
559345153Sdim    kmp_threadprivate_insert_private_data(global_tid, data, data, size);
560345153Sdim
561345153Sdim    ret = data;
562345153Sdim  } else {
563345153Sdim    KC_TRACE(
564345153Sdim        50,
565345153Sdim        ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
566345153Sdim         global_tid, data));
567345153Sdim    tn = __kmp_threadprivate_find_task_common(
568345153Sdim        __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
569345153Sdim
570345153Sdim    if (tn) {
571345153Sdim      KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
572345153Sdim#ifdef USE_CHECKS_COMMON
573345153Sdim      if ((size_t)size > tn->cmn_size) {
574345153Sdim        KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
575345153Sdim                      " ,%" KMP_UINTPTR_SPEC ")\n",
576345153Sdim                      data, size, tn->cmn_size));
577345153Sdim        KMP_FATAL(TPCommonBlocksInconsist);
578345153Sdim      }
579345153Sdim#endif /* USE_CHECKS_COMMON */
580345153Sdim    } else {
581345153Sdim      /* The parallel address will NEVER overlap with the data_address */
582345153Sdim      /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
583345153Sdim       * data_address = data */
584345153Sdim      KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
585345153Sdim      tn = kmp_threadprivate_insert(global_tid, data, data, size);
586345153Sdim    }
587345153Sdim
588345153Sdim    ret = tn->par_addr;
589345153Sdim  }
590345153Sdim  KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
591345153Sdim                global_tid, ret));
592345153Sdim
593345153Sdim  return ret;
594345153Sdim}
595345153Sdim
596345153Sdimstatic kmp_cached_addr_t *__kmp_find_cache(void *data) {
597345153Sdim  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
598345153Sdim  while (ptr && ptr->data != data)
599345153Sdim    ptr = ptr->next;
600345153Sdim  return ptr;
601345153Sdim}
602345153Sdim
603345153Sdim/*!
604345153Sdim @ingroup THREADPRIVATE
605345153Sdim @param loc source location information
606345153Sdim @param global_tid  global thread number
607345153Sdim @param data  pointer to data to privatize
608345153Sdim @param size  size of data to privatize
609345153Sdim @param cache  pointer to cache
610345153Sdim @return pointer to private storage
611345153Sdim
612345153Sdim Allocate private storage for threadprivate data.
613345153Sdim*/
614345153Sdimvoid *
615345153Sdim__kmpc_threadprivate_cached(ident_t *loc,
616345153Sdim                            kmp_int32 global_tid, // gtid.
617345153Sdim                            void *data, // Pointer to original global variable.
618345153Sdim                            size_t size, // Size of original global variable.
619345153Sdim                            void ***cache) {
620345153Sdim  KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
621345153Sdim                "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
622345153Sdim                global_tid, *cache, data, size));
623345153Sdim
624345153Sdim  if (TCR_PTR(*cache) == 0) {
625345153Sdim    __kmp_acquire_lock(&__kmp_global_lock, global_tid);
626345153Sdim
627345153Sdim    if (TCR_PTR(*cache) == 0) {
628345153Sdim      __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
629345153Sdim      // Compiler often passes in NULL cache, even if it's already been created
630345153Sdim      void **my_cache;
631345153Sdim      kmp_cached_addr_t *tp_cache_addr;
632345153Sdim      // Look for an existing cache
633345153Sdim      tp_cache_addr = __kmp_find_cache(data);
634345153Sdim      if (!tp_cache_addr) { // Cache was never created; do it now
635345153Sdim        __kmp_tp_cached = 1;
636345153Sdim        KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
637345153Sdim                           sizeof(void *) * __kmp_tp_capacity +
638345153Sdim                           sizeof(kmp_cached_addr_t)););
639345153Sdim        // No need to zero the allocated memory; __kmp_allocate does that.
640345153Sdim        KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
641345153Sdim                      "address %p\n",
642345153Sdim                      global_tid, my_cache));
643345153Sdim        /* TODO: free all this memory in __kmp_common_destroy using
644345153Sdim         * __kmp_threadpriv_cache_list */
645345153Sdim        /* Add address of mycache to linked list for cleanup later  */
646345153Sdim        tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
647345153Sdim        tp_cache_addr->addr = my_cache;
648345153Sdim        tp_cache_addr->data = data;
649345153Sdim        tp_cache_addr->compiler_cache = cache;
650345153Sdim        tp_cache_addr->next = __kmp_threadpriv_cache_list;
651345153Sdim        __kmp_threadpriv_cache_list = tp_cache_addr;
652345153Sdim      } else { // A cache was already created; use it
653345153Sdim        my_cache = tp_cache_addr->addr;
654345153Sdim        tp_cache_addr->compiler_cache = cache;
655345153Sdim      }
656345153Sdim      KMP_MB();
657345153Sdim
658345153Sdim      TCW_PTR(*cache, my_cache);
659345153Sdim      __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
660345153Sdim
661345153Sdim      KMP_MB();
662345153Sdim    }
663345153Sdim    __kmp_release_lock(&__kmp_global_lock, global_tid);
664345153Sdim  }
665345153Sdim
666345153Sdim  void *ret;
667345153Sdim  if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
668345153Sdim    ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
669345153Sdim
670345153Sdim    TCW_PTR((*cache)[global_tid], ret);
671345153Sdim  }
672345153Sdim  KC_TRACE(10,
673345153Sdim           ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
674345153Sdim            global_tid, ret));
675345153Sdim  return ret;
676345153Sdim}
677345153Sdim
678345153Sdim// This function should only be called when both __kmp_tp_cached_lock and
679345153Sdim// kmp_forkjoin_lock are held.
680345153Sdimvoid __kmp_threadprivate_resize_cache(int newCapacity) {
681345153Sdim  KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
682345153Sdim                newCapacity));
683345153Sdim
684345153Sdim  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
685345153Sdim
686345153Sdim  while (ptr) {
687345153Sdim    if (ptr->data) { // this location has an active cache; resize it
688345153Sdim      void **my_cache;
689345153Sdim      KMP_ITT_IGNORE(my_cache =
690345153Sdim                         (void **)__kmp_allocate(sizeof(void *) * newCapacity +
691345153Sdim                                                 sizeof(kmp_cached_addr_t)););
692345153Sdim      // No need to zero the allocated memory; __kmp_allocate does that.
693345153Sdim      KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
694345153Sdim                    my_cache));
695345153Sdim      // Now copy old cache into new cache
696345153Sdim      void **old_cache = ptr->addr;
697345153Sdim      for (int i = 0; i < __kmp_tp_capacity; ++i) {
698345153Sdim        my_cache[i] = old_cache[i];
699345153Sdim      }
700345153Sdim
701345153Sdim      // Add address of new my_cache to linked list for cleanup later
702345153Sdim      kmp_cached_addr_t *tp_cache_addr;
703345153Sdim      tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
704345153Sdim      tp_cache_addr->addr = my_cache;
705345153Sdim      tp_cache_addr->data = ptr->data;
706345153Sdim      tp_cache_addr->compiler_cache = ptr->compiler_cache;
707345153Sdim      tp_cache_addr->next = __kmp_threadpriv_cache_list;
708345153Sdim      __kmp_threadpriv_cache_list = tp_cache_addr;
709345153Sdim
710345153Sdim      // Copy new cache to compiler's location: We can copy directly
711345153Sdim      // to (*compiler_cache) if compiler guarantees it will keep
712345153Sdim      // using the same location for the cache. This is not yet true
713345153Sdim      // for some compilers, in which case we have to check if
714345153Sdim      // compiler_cache is still pointing at old cache, and if so, we
715345153Sdim      // can point it at the new cache with an atomic compare&swap
716345153Sdim      // operation. (Old method will always work, but we should shift
717345153Sdim      // to new method (commented line below) when Intel and Clang
718345153Sdim      // compilers use new method.)
719345153Sdim      (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
720345153Sdim                                      my_cache);
721345153Sdim      // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
722345153Sdim
723345153Sdim      // If the store doesn't happen here, the compiler's old behavior will
724345153Sdim      // inevitably call __kmpc_threadprivate_cache with a new location for the
725345153Sdim      // cache, and that function will store the resized cache there at that
726345153Sdim      // point.
727345153Sdim
728345153Sdim      // Nullify old cache's data pointer so we skip it next time
729345153Sdim      ptr->data = NULL;
730345153Sdim    }
731345153Sdim    ptr = ptr->next;
732345153Sdim  }
733345153Sdim  // After all caches are resized, update __kmp_tp_capacity to the new size
734345153Sdim  *(volatile int *)&__kmp_tp_capacity = newCapacity;
735345153Sdim}
736345153Sdim
737345153Sdim/*!
738345153Sdim @ingroup THREADPRIVATE
739345153Sdim @param loc source location information
740345153Sdim @param data  pointer to data being privatized
741345153Sdim @param ctor  pointer to constructor function for data
742345153Sdim @param cctor  pointer to copy constructor function for data
743345153Sdim @param dtor  pointer to destructor function for data
744345153Sdim @param vector_length length of the vector (bytes or elements?)
745345153Sdim Register vector constructors and destructors for thread private data.
746345153Sdim*/
747345153Sdimvoid __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
748345153Sdim                                       kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
749345153Sdim                                       kmpc_dtor_vec dtor,
750345153Sdim                                       size_t vector_length) {
751345153Sdim  struct shared_common *d_tn, **lnk_tn;
752345153Sdim
753345153Sdim  KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
754345153Sdim
755345153Sdim#ifdef USE_CHECKS_COMMON
756345153Sdim  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
757345153Sdim  KMP_ASSERT(cctor == 0);
758345153Sdim#endif /* USE_CHECKS_COMMON */
759345153Sdim
760345153Sdim  d_tn = __kmp_find_shared_task_common(
761345153Sdim      &__kmp_threadprivate_d_table, -1,
762345153Sdim      data); /* Only the global data table exists. */
763345153Sdim
764345153Sdim  if (d_tn == 0) {
765345153Sdim    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
766345153Sdim    d_tn->gbl_addr = data;
767345153Sdim
768345153Sdim    d_tn->ct.ctorv = ctor;
769345153Sdim    d_tn->cct.cctorv = cctor;
770345153Sdim    d_tn->dt.dtorv = dtor;
771345153Sdim    d_tn->is_vec = TRUE;
772345153Sdim    d_tn->vec_len = (size_t)vector_length;
773345153Sdim    // d_tn->obj_init = 0;  // AC: __kmp_allocate zeroes the memory
774345153Sdim    // d_tn->pod_init = 0;
775345153Sdim    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
776345153Sdim
777345153Sdim    d_tn->next = *lnk_tn;
778345153Sdim    *lnk_tn = d_tn;
779345153Sdim  }
780345153Sdim}
781345153Sdim
782345153Sdimvoid __kmp_cleanup_threadprivate_caches() {
783345153Sdim  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
784345153Sdim
785345153Sdim  while (ptr) {
786345153Sdim    void **cache = ptr->addr;
787345153Sdim    __kmp_threadpriv_cache_list = ptr->next;
788345153Sdim    if (*ptr->compiler_cache)
789345153Sdim      *ptr->compiler_cache = NULL;
790345153Sdim    ptr->compiler_cache = NULL;
791345153Sdim    ptr->data = NULL;
792345153Sdim    ptr->addr = NULL;
793345153Sdim    ptr->next = NULL;
794345153Sdim    // Threadprivate data pointed at by cache entries are destroyed at end of
795345153Sdim    // __kmp_launch_thread with __kmp_common_destroy_gtid.
796345153Sdim    __kmp_free(cache); // implicitly frees ptr too
797345153Sdim    ptr = __kmp_threadpriv_cache_list;
798345153Sdim  }
799345153Sdim}
800