1/*
2 * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_i18n.h"
15#include "kmp_itt.h"
16
17#define USE_CHECKS_COMMON
18
19#define KMP_INLINE_SUBR 1
20
21void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22                                           void *data_addr, size_t pc_size);
23struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
24                                                void *data_addr,
25                                                size_t pc_size);
26
27struct shared_table __kmp_threadprivate_d_table;
28
29static
30#ifdef KMP_INLINE_SUBR
31    __forceinline
32#endif
33    struct private_common *
34    __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
35                                         void *pc_addr)
36
37{
38  struct private_common *tn;
39
40#ifdef KMP_TASK_COMMON_DEBUG
41  KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42                "address %p\n",
43                gtid, pc_addr));
44  dump_list();
45#endif
46
47  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48    if (tn->gbl_addr == pc_addr) {
49#ifdef KMP_TASK_COMMON_DEBUG
50      KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51                    "node %p on list\n",
52                    gtid, pc_addr));
53#endif
54      return tn;
55    }
56  }
57  return 0;
58}
59
60static
61#ifdef KMP_INLINE_SUBR
62    __forceinline
63#endif
64    struct shared_common *
65    __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
66                                  void *pc_addr) {
67  struct shared_common *tn;
68
69  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70    if (tn->gbl_addr == pc_addr) {
71#ifdef KMP_TASK_COMMON_DEBUG
72      KC_TRACE(
73          10,
74          ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75           gtid, pc_addr));
76#endif
77      return tn;
78    }
79  }
80  return 0;
81}
82
83// Create a template for the data initialized storage. Either the template is
84// NULL indicating zero fill, or the template is a copy of the original data.
85static struct private_data *__kmp_init_common_data(void *pc_addr,
86                                                   size_t pc_size) {
87  struct private_data *d;
88  size_t i;
89  char *p;
90
91  d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
92  /*
93      d->data = 0;  // AC: commented out because __kmp_allocate zeroes the
94     memory
95      d->next = 0;
96  */
97  d->size = pc_size;
98  d->more = 1;
99
100  p = (char *)pc_addr;
101
102  for (i = pc_size; i > 0; --i) {
103    if (*p++ != '\0') {
104      d->data = __kmp_allocate(pc_size);
105      KMP_MEMCPY(d->data, pc_addr, pc_size);
106      break;
107    }
108  }
109
110  return d;
111}
112
113// Initialize the data area from the template.
114static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
115  char *addr = (char *)pc_addr;
116  int i, offset;
117
118  for (offset = 0; d != 0; d = d->next) {
119    for (i = d->more; i > 0; --i) {
120      if (d->data == 0)
121        memset(&addr[offset], '\0', d->size);
122      else
123        KMP_MEMCPY(&addr[offset], d->data, d->size);
124      offset += d->size;
125    }
126  }
127}
128
129/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
130void __kmp_common_initialize(void) {
131  if (!TCR_4(__kmp_init_common)) {
132    int q;
133#ifdef KMP_DEBUG
134    int gtid;
135#endif
136
137    __kmp_threadpriv_cache_list = NULL;
138
139#ifdef KMP_DEBUG
140    /* verify the uber masters were initialized */
141    for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
142      if (__kmp_root[gtid]) {
143        KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
144        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
145          KMP_DEBUG_ASSERT(
146              !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
147        /*                    __kmp_root[ gitd ]-> r.r_uber_thread ->
148         * th.th_pri_common -> data[ q ] = 0;*/
149      }
150#endif /* KMP_DEBUG */
151
152    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
153      __kmp_threadprivate_d_table.data[q] = 0;
154
155    TCW_4(__kmp_init_common, TRUE);
156  }
157}
158
159/* Call all destructors for threadprivate data belonging to all threads.
160   Currently unused! */
161void __kmp_common_destroy(void) {
162  if (TCR_4(__kmp_init_common)) {
163    int q;
164
165    TCW_4(__kmp_init_common, FALSE);
166
167    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
168      int gtid;
169      struct private_common *tn;
170      struct shared_common *d_tn;
171
172      /* C++ destructors need to be called once per thread before exiting.
173         Don't call destructors for master thread though unless we used copy
174         constructor */
175
176      for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
177           d_tn = d_tn->next) {
178        if (d_tn->is_vec) {
179          if (d_tn->dt.dtorv != 0) {
180            for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
181              if (__kmp_threads[gtid]) {
182                if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
183                                       : (!KMP_UBER_GTID(gtid))) {
184                  tn = __kmp_threadprivate_find_task_common(
185                      __kmp_threads[gtid]->th.th_pri_common, gtid,
186                      d_tn->gbl_addr);
187                  if (tn) {
188                    (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
189                  }
190                }
191              }
192            }
193            if (d_tn->obj_init != 0) {
194              (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
195            }
196          }
197        } else {
198          if (d_tn->dt.dtor != 0) {
199            for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
200              if (__kmp_threads[gtid]) {
201                if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
202                                       : (!KMP_UBER_GTID(gtid))) {
203                  tn = __kmp_threadprivate_find_task_common(
204                      __kmp_threads[gtid]->th.th_pri_common, gtid,
205                      d_tn->gbl_addr);
206                  if (tn) {
207                    (*d_tn->dt.dtor)(tn->par_addr);
208                  }
209                }
210              }
211            }
212            if (d_tn->obj_init != 0) {
213              (*d_tn->dt.dtor)(d_tn->obj_init);
214            }
215          }
216        }
217      }
218      __kmp_threadprivate_d_table.data[q] = 0;
219    }
220  }
221}
222
223/* Call all destructors for threadprivate data belonging to this thread */
224void __kmp_common_destroy_gtid(int gtid) {
225  struct private_common *tn;
226  struct shared_common *d_tn;
227
228  if (!TCR_4(__kmp_init_gtid)) {
229    // This is possible when one of multiple roots initiates early library
230    // termination in a sequential region while other teams are active, and its
231    // child threads are about to end.
232    return;
233  }
234
235  KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
236  if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
237
238    if (TCR_4(__kmp_init_common)) {
239
240      /* Cannot do this here since not all threads have destroyed their data */
241      /* TCW_4(__kmp_init_common, FALSE); */
242
243      for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
244
245        d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
246                                             tn->gbl_addr);
247
248        KMP_DEBUG_ASSERT(d_tn);
249
250        if (d_tn->is_vec) {
251          if (d_tn->dt.dtorv != 0) {
252            (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
253          }
254          if (d_tn->obj_init != 0) {
255            (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
256          }
257        } else {
258          if (d_tn->dt.dtor != 0) {
259            (void)(*d_tn->dt.dtor)(tn->par_addr);
260          }
261          if (d_tn->obj_init != 0) {
262            (void)(*d_tn->dt.dtor)(d_tn->obj_init);
263          }
264        }
265      }
266      KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
267                    "complete\n",
268                    gtid));
269    }
270  }
271}
272
273#ifdef KMP_TASK_COMMON_DEBUG
274static void dump_list(void) {
275  int p, q;
276
277  for (p = 0; p < __kmp_all_nth; ++p) {
278    if (!__kmp_threads[p])
279      continue;
280    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
281      if (__kmp_threads[p]->th.th_pri_common->data[q]) {
282        struct private_common *tn;
283
284        KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
285
286        for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
287             tn = tn->next) {
288          KC_TRACE(10,
289                   ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
290                    tn->gbl_addr, tn->par_addr));
291        }
292      }
293    }
294  }
295}
296#endif /* KMP_TASK_COMMON_DEBUG */
297
298// NOTE: this routine is to be called only from the serial part of the program.
299void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
300                                           void *data_addr, size_t pc_size) {
301  struct shared_common **lnk_tn, *d_tn;
302  KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
303                   __kmp_threads[gtid]->th.th_root->r.r_active == 0);
304
305  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
306                                       pc_addr);
307
308  if (d_tn == 0) {
309    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
310
311    d_tn->gbl_addr = pc_addr;
312    d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
313    /*
314            d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
315       zeroes the memory
316            d_tn->ct.ctor = 0;
317            d_tn->cct.cctor = 0;;
318            d_tn->dt.dtor = 0;
319            d_tn->is_vec = FALSE;
320            d_tn->vec_len = 0L;
321    */
322    d_tn->cmn_size = pc_size;
323
324    __kmp_acquire_lock(&__kmp_global_lock, gtid);
325
326    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
327
328    d_tn->next = *lnk_tn;
329    *lnk_tn = d_tn;
330
331    __kmp_release_lock(&__kmp_global_lock, gtid);
332  }
333}
334
335struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
336                                                void *data_addr,
337                                                size_t pc_size) {
338  struct private_common *tn, **tt;
339  struct shared_common *d_tn;
340
341  /* +++++++++ START OF CRITICAL SECTION +++++++++ */
342  __kmp_acquire_lock(&__kmp_global_lock, gtid);
343
344  tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
345
346  tn->gbl_addr = pc_addr;
347
348  d_tn = __kmp_find_shared_task_common(
349      &__kmp_threadprivate_d_table, gtid,
350      pc_addr); /* Only the MASTER data table exists. */
351
352  if (d_tn != 0) {
353    /* This threadprivate variable has already been seen. */
354
355    if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
356      d_tn->cmn_size = pc_size;
357
358      if (d_tn->is_vec) {
359        if (d_tn->ct.ctorv != 0) {
360          /* Construct from scratch so no prototype exists */
361          d_tn->obj_init = 0;
362        } else if (d_tn->cct.cctorv != 0) {
363          /* Now data initialize the prototype since it was previously
364           * registered */
365          d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
366          (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
367        } else {
368          d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
369        }
370      } else {
371        if (d_tn->ct.ctor != 0) {
372          /* Construct from scratch so no prototype exists */
373          d_tn->obj_init = 0;
374        } else if (d_tn->cct.cctor != 0) {
375          /* Now data initialize the prototype since it was previously
376             registered */
377          d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
378          (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
379        } else {
380          d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
381        }
382      }
383    }
384  } else {
385    struct shared_common **lnk_tn;
386
387    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
388    d_tn->gbl_addr = pc_addr;
389    d_tn->cmn_size = pc_size;
390    d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
391    /*
392            d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
393       zeroes the memory
394            d_tn->ct.ctor = 0;
395            d_tn->cct.cctor = 0;
396            d_tn->dt.dtor = 0;
397            d_tn->is_vec = FALSE;
398            d_tn->vec_len = 0L;
399    */
400    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
401
402    d_tn->next = *lnk_tn;
403    *lnk_tn = d_tn;
404  }
405
406  tn->cmn_size = d_tn->cmn_size;
407
408  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
409    tn->par_addr = (void *)pc_addr;
410  } else {
411    tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
412  }
413
414  __kmp_release_lock(&__kmp_global_lock, gtid);
415/* +++++++++ END OF CRITICAL SECTION +++++++++ */
416
417#ifdef USE_CHECKS_COMMON
418  if (pc_size > d_tn->cmn_size) {
419    KC_TRACE(
420        10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
421             " ,%" KMP_UINTPTR_SPEC ")\n",
422             pc_addr, pc_size, d_tn->cmn_size));
423    KMP_FATAL(TPCommonBlocksInconsist);
424  }
425#endif /* USE_CHECKS_COMMON */
426
427  tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
428
429#ifdef KMP_TASK_COMMON_DEBUG
430  if (*tt != 0) {
431    KC_TRACE(
432        10,
433        ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
434         gtid, pc_addr));
435  }
436#endif
437  tn->next = *tt;
438  *tt = tn;
439
440#ifdef KMP_TASK_COMMON_DEBUG
441  KC_TRACE(10,
442           ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
443            gtid, pc_addr));
444  dump_list();
445#endif
446
447  /* Link the node into a simple list */
448
449  tn->link = __kmp_threads[gtid]->th.th_pri_head;
450  __kmp_threads[gtid]->th.th_pri_head = tn;
451
452  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
453    return tn;
454
455  /* if C++ object with copy constructor, use it;
456   * else if C++ object with constructor, use it for the non-master copies only;
457   * else use pod_init and memcpy
458   *
459   * C++ constructors need to be called once for each non-master thread on
460   * allocate
461   * C++ copy constructors need to be called once for each thread on allocate */
462
463  /* C++ object with constructors/destructors; don't call constructors for
464     master thread though */
465  if (d_tn->is_vec) {
466    if (d_tn->ct.ctorv != 0) {
467      (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
468    } else if (d_tn->cct.cctorv != 0) {
469      (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
470    } else if (tn->par_addr != tn->gbl_addr) {
471      __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
472    }
473  } else {
474    if (d_tn->ct.ctor != 0) {
475      (void)(*d_tn->ct.ctor)(tn->par_addr);
476    } else if (d_tn->cct.cctor != 0) {
477      (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
478    } else if (tn->par_addr != tn->gbl_addr) {
479      __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
480    }
481  }
482  /* !BUILD_OPENMP_C
483      if (tn->par_addr != tn->gbl_addr)
484          __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
485
486  return tn;
487}
488
489/* ------------------------------------------------------------------------ */
490/* We are currently parallel, and we know the thread id.                    */
491/* ------------------------------------------------------------------------ */
492
493/*!
494 @ingroup THREADPRIVATE
495
496 @param loc source location information
497 @param data  pointer to data being privatized
498 @param ctor  pointer to constructor function for data
499 @param cctor  pointer to copy constructor function for data
500 @param dtor  pointer to destructor function for data
501
502 Register constructors and destructors for thread private data.
503 This function is called when executing in parallel, when we know the thread id.
504*/
505void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
506                                   kmpc_cctor cctor, kmpc_dtor dtor) {
507  struct shared_common *d_tn, **lnk_tn;
508
509  KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
510
511#ifdef USE_CHECKS_COMMON
512  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
513  KMP_ASSERT(cctor == 0);
514#endif /* USE_CHECKS_COMMON */
515
516  /* Only the global data table exists. */
517  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
518
519  if (d_tn == 0) {
520    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
521    d_tn->gbl_addr = data;
522
523    d_tn->ct.ctor = ctor;
524    d_tn->cct.cctor = cctor;
525    d_tn->dt.dtor = dtor;
526    /*
527            d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate
528       zeroes the memory
529            d_tn->vec_len = 0L;
530            d_tn->obj_init = 0;
531            d_tn->pod_init = 0;
532    */
533    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
534
535    d_tn->next = *lnk_tn;
536    *lnk_tn = d_tn;
537  }
538}
539
540void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
541                           size_t size) {
542  void *ret;
543  struct private_common *tn;
544
545  KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
546
547#ifdef USE_CHECKS_COMMON
548  if (!__kmp_init_serial)
549    KMP_FATAL(RTLNotInitialized);
550#endif /* USE_CHECKS_COMMON */
551
552  if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
553    /* The parallel address will NEVER overlap with the data_address */
554    /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
555     * data_address; use data_address = data */
556
557    KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
558                  global_tid));
559    kmp_threadprivate_insert_private_data(global_tid, data, data, size);
560
561    ret = data;
562  } else {
563    KC_TRACE(
564        50,
565        ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
566         global_tid, data));
567    tn = __kmp_threadprivate_find_task_common(
568        __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
569
570    if (tn) {
571      KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
572#ifdef USE_CHECKS_COMMON
573      if ((size_t)size > tn->cmn_size) {
574        KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
575                      " ,%" KMP_UINTPTR_SPEC ")\n",
576                      data, size, tn->cmn_size));
577        KMP_FATAL(TPCommonBlocksInconsist);
578      }
579#endif /* USE_CHECKS_COMMON */
580    } else {
581      /* The parallel address will NEVER overlap with the data_address */
582      /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
583       * data_address = data */
584      KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
585      tn = kmp_threadprivate_insert(global_tid, data, data, size);
586    }
587
588    ret = tn->par_addr;
589  }
590  KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
591                global_tid, ret));
592
593  return ret;
594}
595
596static kmp_cached_addr_t *__kmp_find_cache(void *data) {
597  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
598  while (ptr && ptr->data != data)
599    ptr = ptr->next;
600  return ptr;
601}
602
603/*!
604 @ingroup THREADPRIVATE
605 @param loc source location information
606 @param global_tid  global thread number
607 @param data  pointer to data to privatize
608 @param size  size of data to privatize
609 @param cache  pointer to cache
610 @return pointer to private storage
611
612 Allocate private storage for threadprivate data.
613*/
614void *
615__kmpc_threadprivate_cached(ident_t *loc,
616                            kmp_int32 global_tid, // gtid.
617                            void *data, // Pointer to original global variable.
618                            size_t size, // Size of original global variable.
619                            void ***cache) {
620  KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
621                "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
622                global_tid, *cache, data, size));
623
624  if (TCR_PTR(*cache) == 0) {
625    __kmp_acquire_lock(&__kmp_global_lock, global_tid);
626
627    if (TCR_PTR(*cache) == 0) {
628      __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
629      // Compiler often passes in NULL cache, even if it's already been created
630      void **my_cache;
631      kmp_cached_addr_t *tp_cache_addr;
632      // Look for an existing cache
633      tp_cache_addr = __kmp_find_cache(data);
634      if (!tp_cache_addr) { // Cache was never created; do it now
635        __kmp_tp_cached = 1;
636        KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
637                           sizeof(void *) * __kmp_tp_capacity +
638                           sizeof(kmp_cached_addr_t)););
639        // No need to zero the allocated memory; __kmp_allocate does that.
640        KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
641                      "address %p\n",
642                      global_tid, my_cache));
643        /* TODO: free all this memory in __kmp_common_destroy using
644         * __kmp_threadpriv_cache_list */
645        /* Add address of mycache to linked list for cleanup later  */
646        tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
647        tp_cache_addr->addr = my_cache;
648        tp_cache_addr->data = data;
649        tp_cache_addr->compiler_cache = cache;
650        tp_cache_addr->next = __kmp_threadpriv_cache_list;
651        __kmp_threadpriv_cache_list = tp_cache_addr;
652      } else { // A cache was already created; use it
653        my_cache = tp_cache_addr->addr;
654        tp_cache_addr->compiler_cache = cache;
655      }
656      KMP_MB();
657
658      TCW_PTR(*cache, my_cache);
659      __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
660
661      KMP_MB();
662    }
663    __kmp_release_lock(&__kmp_global_lock, global_tid);
664  }
665
666  void *ret;
667  if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
668    ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
669
670    TCW_PTR((*cache)[global_tid], ret);
671  }
672  KC_TRACE(10,
673           ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
674            global_tid, ret));
675  return ret;
676}
677
678// This function should only be called when both __kmp_tp_cached_lock and
679// kmp_forkjoin_lock are held.
680void __kmp_threadprivate_resize_cache(int newCapacity) {
681  KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
682                newCapacity));
683
684  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
685
686  while (ptr) {
687    if (ptr->data) { // this location has an active cache; resize it
688      void **my_cache;
689      KMP_ITT_IGNORE(my_cache =
690                         (void **)__kmp_allocate(sizeof(void *) * newCapacity +
691                                                 sizeof(kmp_cached_addr_t)););
692      // No need to zero the allocated memory; __kmp_allocate does that.
693      KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
694                    my_cache));
695      // Now copy old cache into new cache
696      void **old_cache = ptr->addr;
697      for (int i = 0; i < __kmp_tp_capacity; ++i) {
698        my_cache[i] = old_cache[i];
699      }
700
701      // Add address of new my_cache to linked list for cleanup later
702      kmp_cached_addr_t *tp_cache_addr;
703      tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
704      tp_cache_addr->addr = my_cache;
705      tp_cache_addr->data = ptr->data;
706      tp_cache_addr->compiler_cache = ptr->compiler_cache;
707      tp_cache_addr->next = __kmp_threadpriv_cache_list;
708      __kmp_threadpriv_cache_list = tp_cache_addr;
709
710      // Copy new cache to compiler's location: We can copy directly
711      // to (*compiler_cache) if compiler guarantees it will keep
712      // using the same location for the cache. This is not yet true
713      // for some compilers, in which case we have to check if
714      // compiler_cache is still pointing at old cache, and if so, we
715      // can point it at the new cache with an atomic compare&swap
716      // operation. (Old method will always work, but we should shift
717      // to new method (commented line below) when Intel and Clang
718      // compilers use new method.)
719      (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
720                                      my_cache);
721      // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
722
723      // If the store doesn't happen here, the compiler's old behavior will
724      // inevitably call __kmpc_threadprivate_cache with a new location for the
725      // cache, and that function will store the resized cache there at that
726      // point.
727
728      // Nullify old cache's data pointer so we skip it next time
729      ptr->data = NULL;
730    }
731    ptr = ptr->next;
732  }
733  // After all caches are resized, update __kmp_tp_capacity to the new size
734  *(volatile int *)&__kmp_tp_capacity = newCapacity;
735}
736
737/*!
738 @ingroup THREADPRIVATE
739 @param loc source location information
740 @param data  pointer to data being privatized
741 @param ctor  pointer to constructor function for data
742 @param cctor  pointer to copy constructor function for data
743 @param dtor  pointer to destructor function for data
744 @param vector_length length of the vector (bytes or elements?)
745 Register vector constructors and destructors for thread private data.
746*/
747void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
748                                       kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
749                                       kmpc_dtor_vec dtor,
750                                       size_t vector_length) {
751  struct shared_common *d_tn, **lnk_tn;
752
753  KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
754
755#ifdef USE_CHECKS_COMMON
756  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
757  KMP_ASSERT(cctor == 0);
758#endif /* USE_CHECKS_COMMON */
759
760  d_tn = __kmp_find_shared_task_common(
761      &__kmp_threadprivate_d_table, -1,
762      data); /* Only the global data table exists. */
763
764  if (d_tn == 0) {
765    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
766    d_tn->gbl_addr = data;
767
768    d_tn->ct.ctorv = ctor;
769    d_tn->cct.cctorv = cctor;
770    d_tn->dt.dtorv = dtor;
771    d_tn->is_vec = TRUE;
772    d_tn->vec_len = (size_t)vector_length;
773    // d_tn->obj_init = 0;  // AC: __kmp_allocate zeroes the memory
774    // d_tn->pod_init = 0;
775    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
776
777    d_tn->next = *lnk_tn;
778    *lnk_tn = d_tn;
779  }
780}
781
782void __kmp_cleanup_threadprivate_caches() {
783  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
784
785  while (ptr) {
786    void **cache = ptr->addr;
787    __kmp_threadpriv_cache_list = ptr->next;
788    if (*ptr->compiler_cache)
789      *ptr->compiler_cache = NULL;
790    ptr->compiler_cache = NULL;
791    ptr->data = NULL;
792    ptr->addr = NULL;
793    ptr->next = NULL;
794    // Threadprivate data pointed at by cache entries are destroyed at end of
795    // __kmp_launch_thread with __kmp_common_destroy_gtid.
796    __kmp_free(cache); // implicitly frees ptr too
797    ptr = __kmp_threadpriv_cache_list;
798  }
799}
800