1345153Sdim/* 2345153Sdim * kmp_threadprivate.cpp -- OpenMP threadprivate support library 3345153Sdim */ 4345153Sdim 5345153Sdim//===----------------------------------------------------------------------===// 6345153Sdim// 7353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8353358Sdim// See https://llvm.org/LICENSE.txt for license information. 9353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10345153Sdim// 11345153Sdim//===----------------------------------------------------------------------===// 12345153Sdim 13345153Sdim#include "kmp.h" 14345153Sdim#include "kmp_i18n.h" 15345153Sdim#include "kmp_itt.h" 16345153Sdim 17345153Sdim#define USE_CHECKS_COMMON 18345153Sdim 19345153Sdim#define KMP_INLINE_SUBR 1 20345153Sdim 21345153Sdimvoid kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 22345153Sdim void *data_addr, size_t pc_size); 23345153Sdimstruct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 24345153Sdim void *data_addr, 25345153Sdim size_t pc_size); 26345153Sdim 27345153Sdimstruct shared_table __kmp_threadprivate_d_table; 28345153Sdim 29345153Sdimstatic 30345153Sdim#ifdef KMP_INLINE_SUBR 31345153Sdim __forceinline 32345153Sdim#endif 33345153Sdim struct private_common * 34345153Sdim __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid, 35345153Sdim void *pc_addr) 36345153Sdim 37345153Sdim{ 38345153Sdim struct private_common *tn; 39345153Sdim 40345153Sdim#ifdef KMP_TASK_COMMON_DEBUG 41345153Sdim KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with " 42345153Sdim "address %p\n", 43345153Sdim gtid, pc_addr)); 44345153Sdim dump_list(); 45345153Sdim#endif 46345153Sdim 47345153Sdim for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { 48345153Sdim if (tn->gbl_addr == pc_addr) { 49345153Sdim#ifdef KMP_TASK_COMMON_DEBUG 50345153Sdim KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found " 51345153Sdim "node %p on list\n", 52345153Sdim gtid, pc_addr)); 53345153Sdim#endif 54345153Sdim return tn; 55345153Sdim } 56345153Sdim } 57345153Sdim return 0; 58345153Sdim} 59345153Sdim 60345153Sdimstatic 61345153Sdim#ifdef KMP_INLINE_SUBR 62345153Sdim __forceinline 63345153Sdim#endif 64345153Sdim struct shared_common * 65345153Sdim __kmp_find_shared_task_common(struct shared_table *tbl, int gtid, 66345153Sdim void *pc_addr) { 67345153Sdim struct shared_common *tn; 68345153Sdim 69345153Sdim for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { 70345153Sdim if (tn->gbl_addr == pc_addr) { 71345153Sdim#ifdef KMP_TASK_COMMON_DEBUG 72345153Sdim KC_TRACE( 73345153Sdim 10, 74345153Sdim ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n", 75345153Sdim gtid, pc_addr)); 76345153Sdim#endif 77345153Sdim return tn; 78345153Sdim } 79345153Sdim } 80345153Sdim return 0; 81345153Sdim} 82345153Sdim 83345153Sdim// Create a template for the data initialized storage. Either the template is 84345153Sdim// NULL indicating zero fill, or the template is a copy of the original data. 85345153Sdimstatic struct private_data *__kmp_init_common_data(void *pc_addr, 86345153Sdim size_t pc_size) { 87345153Sdim struct private_data *d; 88345153Sdim size_t i; 89345153Sdim char *p; 90345153Sdim 91345153Sdim d = (struct private_data *)__kmp_allocate(sizeof(struct private_data)); 92345153Sdim /* 93345153Sdim d->data = 0; // AC: commented out because __kmp_allocate zeroes the 94345153Sdim memory 95345153Sdim d->next = 0; 96345153Sdim */ 97345153Sdim d->size = pc_size; 98345153Sdim d->more = 1; 99345153Sdim 100345153Sdim p = (char *)pc_addr; 101345153Sdim 102345153Sdim for (i = pc_size; i > 0; --i) { 103345153Sdim if (*p++ != '\0') { 104345153Sdim d->data = __kmp_allocate(pc_size); 105345153Sdim KMP_MEMCPY(d->data, pc_addr, pc_size); 106345153Sdim break; 107345153Sdim } 108345153Sdim } 109345153Sdim 110345153Sdim return d; 111345153Sdim} 112345153Sdim 113345153Sdim// Initialize the data area from the template. 114345153Sdimstatic void __kmp_copy_common_data(void *pc_addr, struct private_data *d) { 115345153Sdim char *addr = (char *)pc_addr; 116345153Sdim int i, offset; 117345153Sdim 118345153Sdim for (offset = 0; d != 0; d = d->next) { 119345153Sdim for (i = d->more; i > 0; --i) { 120345153Sdim if (d->data == 0) 121345153Sdim memset(&addr[offset], '\0', d->size); 122345153Sdim else 123345153Sdim KMP_MEMCPY(&addr[offset], d->data, d->size); 124345153Sdim offset += d->size; 125345153Sdim } 126345153Sdim } 127345153Sdim} 128345153Sdim 129345153Sdim/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ 130345153Sdimvoid __kmp_common_initialize(void) { 131345153Sdim if (!TCR_4(__kmp_init_common)) { 132345153Sdim int q; 133345153Sdim#ifdef KMP_DEBUG 134345153Sdim int gtid; 135345153Sdim#endif 136345153Sdim 137345153Sdim __kmp_threadpriv_cache_list = NULL; 138345153Sdim 139345153Sdim#ifdef KMP_DEBUG 140345153Sdim /* verify the uber masters were initialized */ 141345153Sdim for (gtid = 0; gtid < __kmp_threads_capacity; gtid++) 142345153Sdim if (__kmp_root[gtid]) { 143345153Sdim KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread); 144345153Sdim for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) 145345153Sdim KMP_DEBUG_ASSERT( 146345153Sdim !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]); 147345153Sdim /* __kmp_root[ gitd ]-> r.r_uber_thread -> 148345153Sdim * th.th_pri_common -> data[ q ] = 0;*/ 149345153Sdim } 150345153Sdim#endif /* KMP_DEBUG */ 151345153Sdim 152345153Sdim for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) 153345153Sdim __kmp_threadprivate_d_table.data[q] = 0; 154345153Sdim 155345153Sdim TCW_4(__kmp_init_common, TRUE); 156345153Sdim } 157345153Sdim} 158345153Sdim 159345153Sdim/* Call all destructors for threadprivate data belonging to all threads. 160345153Sdim Currently unused! */ 161345153Sdimvoid __kmp_common_destroy(void) { 162345153Sdim if (TCR_4(__kmp_init_common)) { 163345153Sdim int q; 164345153Sdim 165345153Sdim TCW_4(__kmp_init_common, FALSE); 166345153Sdim 167345153Sdim for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 168345153Sdim int gtid; 169345153Sdim struct private_common *tn; 170345153Sdim struct shared_common *d_tn; 171345153Sdim 172345153Sdim /* C++ destructors need to be called once per thread before exiting. 173345153Sdim Don't call destructors for master thread though unless we used copy 174345153Sdim constructor */ 175345153Sdim 176345153Sdim for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn; 177345153Sdim d_tn = d_tn->next) { 178345153Sdim if (d_tn->is_vec) { 179345153Sdim if (d_tn->dt.dtorv != 0) { 180345153Sdim for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 181345153Sdim if (__kmp_threads[gtid]) { 182345153Sdim if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) 183345153Sdim : (!KMP_UBER_GTID(gtid))) { 184345153Sdim tn = __kmp_threadprivate_find_task_common( 185345153Sdim __kmp_threads[gtid]->th.th_pri_common, gtid, 186345153Sdim d_tn->gbl_addr); 187345153Sdim if (tn) { 188345153Sdim (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); 189345153Sdim } 190345153Sdim } 191345153Sdim } 192345153Sdim } 193345153Sdim if (d_tn->obj_init != 0) { 194345153Sdim (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); 195345153Sdim } 196345153Sdim } 197345153Sdim } else { 198345153Sdim if (d_tn->dt.dtor != 0) { 199345153Sdim for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 200345153Sdim if (__kmp_threads[gtid]) { 201345153Sdim if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) 202345153Sdim : (!KMP_UBER_GTID(gtid))) { 203345153Sdim tn = __kmp_threadprivate_find_task_common( 204345153Sdim __kmp_threads[gtid]->th.th_pri_common, gtid, 205345153Sdim d_tn->gbl_addr); 206345153Sdim if (tn) { 207345153Sdim (*d_tn->dt.dtor)(tn->par_addr); 208345153Sdim } 209345153Sdim } 210345153Sdim } 211345153Sdim } 212345153Sdim if (d_tn->obj_init != 0) { 213345153Sdim (*d_tn->dt.dtor)(d_tn->obj_init); 214345153Sdim } 215345153Sdim } 216345153Sdim } 217345153Sdim } 218345153Sdim __kmp_threadprivate_d_table.data[q] = 0; 219345153Sdim } 220345153Sdim } 221345153Sdim} 222345153Sdim 223345153Sdim/* Call all destructors for threadprivate data belonging to this thread */ 224345153Sdimvoid __kmp_common_destroy_gtid(int gtid) { 225345153Sdim struct private_common *tn; 226345153Sdim struct shared_common *d_tn; 227345153Sdim 228345153Sdim if (!TCR_4(__kmp_init_gtid)) { 229345153Sdim // This is possible when one of multiple roots initiates early library 230345153Sdim // termination in a sequential region while other teams are active, and its 231345153Sdim // child threads are about to end. 232345153Sdim return; 233345153Sdim } 234345153Sdim 235345153Sdim KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid)); 236345153Sdim if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) { 237345153Sdim 238345153Sdim if (TCR_4(__kmp_init_common)) { 239345153Sdim 240345153Sdim /* Cannot do this here since not all threads have destroyed their data */ 241345153Sdim /* TCW_4(__kmp_init_common, FALSE); */ 242345153Sdim 243345153Sdim for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) { 244345153Sdim 245345153Sdim d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, 246345153Sdim tn->gbl_addr); 247345153Sdim 248345153Sdim KMP_DEBUG_ASSERT(d_tn); 249345153Sdim 250345153Sdim if (d_tn->is_vec) { 251345153Sdim if (d_tn->dt.dtorv != 0) { 252345153Sdim (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); 253345153Sdim } 254345153Sdim if (d_tn->obj_init != 0) { 255345153Sdim (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); 256345153Sdim } 257345153Sdim } else { 258345153Sdim if (d_tn->dt.dtor != 0) { 259345153Sdim (void)(*d_tn->dt.dtor)(tn->par_addr); 260345153Sdim } 261345153Sdim if (d_tn->obj_init != 0) { 262345153Sdim (void)(*d_tn->dt.dtor)(d_tn->obj_init); 263345153Sdim } 264345153Sdim } 265345153Sdim } 266345153Sdim KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors " 267345153Sdim "complete\n", 268345153Sdim gtid)); 269345153Sdim } 270345153Sdim } 271345153Sdim} 272345153Sdim 273345153Sdim#ifdef KMP_TASK_COMMON_DEBUG 274345153Sdimstatic void dump_list(void) { 275345153Sdim int p, q; 276345153Sdim 277345153Sdim for (p = 0; p < __kmp_all_nth; ++p) { 278345153Sdim if (!__kmp_threads[p]) 279345153Sdim continue; 280345153Sdim for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 281345153Sdim if (__kmp_threads[p]->th.th_pri_common->data[q]) { 282345153Sdim struct private_common *tn; 283345153Sdim 284345153Sdim KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p)); 285345153Sdim 286345153Sdim for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn; 287345153Sdim tn = tn->next) { 288345153Sdim KC_TRACE(10, 289345153Sdim ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", 290345153Sdim tn->gbl_addr, tn->par_addr)); 291345153Sdim } 292345153Sdim } 293345153Sdim } 294345153Sdim } 295345153Sdim} 296345153Sdim#endif /* KMP_TASK_COMMON_DEBUG */ 297345153Sdim 298345153Sdim// NOTE: this routine is to be called only from the serial part of the program. 299345153Sdimvoid kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 300345153Sdim void *data_addr, size_t pc_size) { 301345153Sdim struct shared_common **lnk_tn, *d_tn; 302345153Sdim KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 303345153Sdim __kmp_threads[gtid]->th.th_root->r.r_active == 0); 304345153Sdim 305345153Sdim d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, 306345153Sdim pc_addr); 307345153Sdim 308345153Sdim if (d_tn == 0) { 309345153Sdim d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 310345153Sdim 311345153Sdim d_tn->gbl_addr = pc_addr; 312345153Sdim d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); 313345153Sdim /* 314345153Sdim d_tn->obj_init = 0; // AC: commented out because __kmp_allocate 315345153Sdim zeroes the memory 316345153Sdim d_tn->ct.ctor = 0; 317345153Sdim d_tn->cct.cctor = 0;; 318345153Sdim d_tn->dt.dtor = 0; 319345153Sdim d_tn->is_vec = FALSE; 320345153Sdim d_tn->vec_len = 0L; 321345153Sdim */ 322345153Sdim d_tn->cmn_size = pc_size; 323345153Sdim 324345153Sdim __kmp_acquire_lock(&__kmp_global_lock, gtid); 325345153Sdim 326345153Sdim lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); 327345153Sdim 328345153Sdim d_tn->next = *lnk_tn; 329345153Sdim *lnk_tn = d_tn; 330345153Sdim 331345153Sdim __kmp_release_lock(&__kmp_global_lock, gtid); 332345153Sdim } 333345153Sdim} 334345153Sdim 335345153Sdimstruct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 336345153Sdim void *data_addr, 337345153Sdim size_t pc_size) { 338345153Sdim struct private_common *tn, **tt; 339345153Sdim struct shared_common *d_tn; 340345153Sdim 341345153Sdim /* +++++++++ START OF CRITICAL SECTION +++++++++ */ 342345153Sdim __kmp_acquire_lock(&__kmp_global_lock, gtid); 343345153Sdim 344345153Sdim tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common)); 345345153Sdim 346345153Sdim tn->gbl_addr = pc_addr; 347345153Sdim 348345153Sdim d_tn = __kmp_find_shared_task_common( 349345153Sdim &__kmp_threadprivate_d_table, gtid, 350345153Sdim pc_addr); /* Only the MASTER data table exists. */ 351345153Sdim 352345153Sdim if (d_tn != 0) { 353345153Sdim /* This threadprivate variable has already been seen. */ 354345153Sdim 355345153Sdim if (d_tn->pod_init == 0 && d_tn->obj_init == 0) { 356345153Sdim d_tn->cmn_size = pc_size; 357345153Sdim 358345153Sdim if (d_tn->is_vec) { 359345153Sdim if (d_tn->ct.ctorv != 0) { 360345153Sdim /* Construct from scratch so no prototype exists */ 361345153Sdim d_tn->obj_init = 0; 362345153Sdim } else if (d_tn->cct.cctorv != 0) { 363345153Sdim /* Now data initialize the prototype since it was previously 364345153Sdim * registered */ 365345153Sdim d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); 366345153Sdim (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len); 367345153Sdim } else { 368345153Sdim d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); 369345153Sdim } 370345153Sdim } else { 371345153Sdim if (d_tn->ct.ctor != 0) { 372345153Sdim /* Construct from scratch so no prototype exists */ 373345153Sdim d_tn->obj_init = 0; 374345153Sdim } else if (d_tn->cct.cctor != 0) { 375345153Sdim /* Now data initialize the prototype since it was previously 376345153Sdim registered */ 377345153Sdim d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); 378345153Sdim (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr); 379345153Sdim } else { 380345153Sdim d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); 381345153Sdim } 382345153Sdim } 383345153Sdim } 384345153Sdim } else { 385345153Sdim struct shared_common **lnk_tn; 386345153Sdim 387345153Sdim d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 388345153Sdim d_tn->gbl_addr = pc_addr; 389345153Sdim d_tn->cmn_size = pc_size; 390345153Sdim d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); 391345153Sdim /* 392345153Sdim d_tn->obj_init = 0; // AC: commented out because __kmp_allocate 393345153Sdim zeroes the memory 394345153Sdim d_tn->ct.ctor = 0; 395345153Sdim d_tn->cct.cctor = 0; 396345153Sdim d_tn->dt.dtor = 0; 397345153Sdim d_tn->is_vec = FALSE; 398345153Sdim d_tn->vec_len = 0L; 399345153Sdim */ 400345153Sdim lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); 401345153Sdim 402345153Sdim d_tn->next = *lnk_tn; 403345153Sdim *lnk_tn = d_tn; 404345153Sdim } 405345153Sdim 406345153Sdim tn->cmn_size = d_tn->cmn_size; 407345153Sdim 408345153Sdim if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) { 409345153Sdim tn->par_addr = (void *)pc_addr; 410345153Sdim } else { 411345153Sdim tn->par_addr = (void *)__kmp_allocate(tn->cmn_size); 412345153Sdim } 413345153Sdim 414345153Sdim __kmp_release_lock(&__kmp_global_lock, gtid); 415345153Sdim/* +++++++++ END OF CRITICAL SECTION +++++++++ */ 416345153Sdim 417345153Sdim#ifdef USE_CHECKS_COMMON 418345153Sdim if (pc_size > d_tn->cmn_size) { 419345153Sdim KC_TRACE( 420345153Sdim 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC 421345153Sdim " ,%" KMP_UINTPTR_SPEC ")\n", 422345153Sdim pc_addr, pc_size, d_tn->cmn_size)); 423345153Sdim KMP_FATAL(TPCommonBlocksInconsist); 424345153Sdim } 425345153Sdim#endif /* USE_CHECKS_COMMON */ 426345153Sdim 427345153Sdim tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]); 428345153Sdim 429345153Sdim#ifdef KMP_TASK_COMMON_DEBUG 430345153Sdim if (*tt != 0) { 431345153Sdim KC_TRACE( 432345153Sdim 10, 433345153Sdim ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", 434345153Sdim gtid, pc_addr)); 435345153Sdim } 436345153Sdim#endif 437345153Sdim tn->next = *tt; 438345153Sdim *tt = tn; 439345153Sdim 440345153Sdim#ifdef KMP_TASK_COMMON_DEBUG 441345153Sdim KC_TRACE(10, 442345153Sdim ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", 443345153Sdim gtid, pc_addr)); 444345153Sdim dump_list(); 445345153Sdim#endif 446345153Sdim 447345153Sdim /* Link the node into a simple list */ 448345153Sdim 449345153Sdim tn->link = __kmp_threads[gtid]->th.th_pri_head; 450345153Sdim __kmp_threads[gtid]->th.th_pri_head = tn; 451345153Sdim 452345153Sdim if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) 453345153Sdim return tn; 454345153Sdim 455345153Sdim /* if C++ object with copy constructor, use it; 456345153Sdim * else if C++ object with constructor, use it for the non-master copies only; 457345153Sdim * else use pod_init and memcpy 458345153Sdim * 459345153Sdim * C++ constructors need to be called once for each non-master thread on 460345153Sdim * allocate 461345153Sdim * C++ copy constructors need to be called once for each thread on allocate */ 462345153Sdim 463345153Sdim /* C++ object with constructors/destructors; don't call constructors for 464345153Sdim master thread though */ 465345153Sdim if (d_tn->is_vec) { 466345153Sdim if (d_tn->ct.ctorv != 0) { 467345153Sdim (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len); 468345153Sdim } else if (d_tn->cct.cctorv != 0) { 469345153Sdim (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len); 470345153Sdim } else if (tn->par_addr != tn->gbl_addr) { 471345153Sdim __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); 472345153Sdim } 473345153Sdim } else { 474345153Sdim if (d_tn->ct.ctor != 0) { 475345153Sdim (void)(*d_tn->ct.ctor)(tn->par_addr); 476345153Sdim } else if (d_tn->cct.cctor != 0) { 477345153Sdim (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init); 478345153Sdim } else if (tn->par_addr != tn->gbl_addr) { 479345153Sdim __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); 480345153Sdim } 481345153Sdim } 482345153Sdim /* !BUILD_OPENMP_C 483345153Sdim if (tn->par_addr != tn->gbl_addr) 484345153Sdim __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ 485345153Sdim 486345153Sdim return tn; 487345153Sdim} 488345153Sdim 489345153Sdim/* ------------------------------------------------------------------------ */ 490345153Sdim/* We are currently parallel, and we know the thread id. */ 491345153Sdim/* ------------------------------------------------------------------------ */ 492345153Sdim 493345153Sdim/*! 494345153Sdim @ingroup THREADPRIVATE 495345153Sdim 496345153Sdim @param loc source location information 497345153Sdim @param data pointer to data being privatized 498345153Sdim @param ctor pointer to constructor function for data 499345153Sdim @param cctor pointer to copy constructor function for data 500345153Sdim @param dtor pointer to destructor function for data 501345153Sdim 502345153Sdim Register constructors and destructors for thread private data. 503345153Sdim This function is called when executing in parallel, when we know the thread id. 504345153Sdim*/ 505345153Sdimvoid __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, 506345153Sdim kmpc_cctor cctor, kmpc_dtor dtor) { 507345153Sdim struct shared_common *d_tn, **lnk_tn; 508345153Sdim 509345153Sdim KC_TRACE(10, ("__kmpc_threadprivate_register: called\n")); 510345153Sdim 511345153Sdim#ifdef USE_CHECKS_COMMON 512345153Sdim /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 513345153Sdim KMP_ASSERT(cctor == 0); 514345153Sdim#endif /* USE_CHECKS_COMMON */ 515345153Sdim 516345153Sdim /* Only the global data table exists. */ 517345153Sdim d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data); 518345153Sdim 519345153Sdim if (d_tn == 0) { 520345153Sdim d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 521345153Sdim d_tn->gbl_addr = data; 522345153Sdim 523345153Sdim d_tn->ct.ctor = ctor; 524345153Sdim d_tn->cct.cctor = cctor; 525345153Sdim d_tn->dt.dtor = dtor; 526345153Sdim /* 527345153Sdim d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate 528345153Sdim zeroes the memory 529345153Sdim d_tn->vec_len = 0L; 530345153Sdim d_tn->obj_init = 0; 531345153Sdim d_tn->pod_init = 0; 532345153Sdim */ 533345153Sdim lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); 534345153Sdim 535345153Sdim d_tn->next = *lnk_tn; 536345153Sdim *lnk_tn = d_tn; 537345153Sdim } 538345153Sdim} 539345153Sdim 540345153Sdimvoid *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, 541345153Sdim size_t size) { 542345153Sdim void *ret; 543345153Sdim struct private_common *tn; 544345153Sdim 545345153Sdim KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid)); 546345153Sdim 547345153Sdim#ifdef USE_CHECKS_COMMON 548345153Sdim if (!__kmp_init_serial) 549345153Sdim KMP_FATAL(RTLNotInitialized); 550345153Sdim#endif /* USE_CHECKS_COMMON */ 551345153Sdim 552345153Sdim if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) { 553345153Sdim /* The parallel address will NEVER overlap with the data_address */ 554345153Sdim /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the 555345153Sdim * data_address; use data_address = data */ 556345153Sdim 557345153Sdim KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n", 558345153Sdim global_tid)); 559345153Sdim kmp_threadprivate_insert_private_data(global_tid, data, data, size); 560345153Sdim 561345153Sdim ret = data; 562345153Sdim } else { 563345153Sdim KC_TRACE( 564345153Sdim 50, 565345153Sdim ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", 566345153Sdim global_tid, data)); 567345153Sdim tn = __kmp_threadprivate_find_task_common( 568345153Sdim __kmp_threads[global_tid]->th.th_pri_common, global_tid, data); 569345153Sdim 570345153Sdim if (tn) { 571345153Sdim KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid)); 572345153Sdim#ifdef USE_CHECKS_COMMON 573345153Sdim if ((size_t)size > tn->cmn_size) { 574345153Sdim KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC 575345153Sdim " ,%" KMP_UINTPTR_SPEC ")\n", 576345153Sdim data, size, tn->cmn_size)); 577345153Sdim KMP_FATAL(TPCommonBlocksInconsist); 578345153Sdim } 579345153Sdim#endif /* USE_CHECKS_COMMON */ 580345153Sdim } else { 581345153Sdim /* The parallel address will NEVER overlap with the data_address */ 582345153Sdim /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use 583345153Sdim * data_address = data */ 584345153Sdim KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid)); 585345153Sdim tn = kmp_threadprivate_insert(global_tid, data, data, size); 586345153Sdim } 587345153Sdim 588345153Sdim ret = tn->par_addr; 589345153Sdim } 590345153Sdim KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", 591345153Sdim global_tid, ret)); 592345153Sdim 593345153Sdim return ret; 594345153Sdim} 595345153Sdim 596345153Sdimstatic kmp_cached_addr_t *__kmp_find_cache(void *data) { 597345153Sdim kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 598345153Sdim while (ptr && ptr->data != data) 599345153Sdim ptr = ptr->next; 600345153Sdim return ptr; 601345153Sdim} 602345153Sdim 603345153Sdim/*! 604345153Sdim @ingroup THREADPRIVATE 605345153Sdim @param loc source location information 606345153Sdim @param global_tid global thread number 607345153Sdim @param data pointer to data to privatize 608345153Sdim @param size size of data to privatize 609345153Sdim @param cache pointer to cache 610345153Sdim @return pointer to private storage 611345153Sdim 612345153Sdim Allocate private storage for threadprivate data. 613345153Sdim*/ 614345153Sdimvoid * 615345153Sdim__kmpc_threadprivate_cached(ident_t *loc, 616345153Sdim kmp_int32 global_tid, // gtid. 617345153Sdim void *data, // Pointer to original global variable. 618345153Sdim size_t size, // Size of original global variable. 619345153Sdim void ***cache) { 620345153Sdim KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, " 621345153Sdim "address: %p, size: %" KMP_SIZE_T_SPEC "\n", 622345153Sdim global_tid, *cache, data, size)); 623345153Sdim 624345153Sdim if (TCR_PTR(*cache) == 0) { 625345153Sdim __kmp_acquire_lock(&__kmp_global_lock, global_tid); 626345153Sdim 627345153Sdim if (TCR_PTR(*cache) == 0) { 628345153Sdim __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 629345153Sdim // Compiler often passes in NULL cache, even if it's already been created 630345153Sdim void **my_cache; 631345153Sdim kmp_cached_addr_t *tp_cache_addr; 632345153Sdim // Look for an existing cache 633345153Sdim tp_cache_addr = __kmp_find_cache(data); 634345153Sdim if (!tp_cache_addr) { // Cache was never created; do it now 635345153Sdim __kmp_tp_cached = 1; 636345153Sdim KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate( 637345153Sdim sizeof(void *) * __kmp_tp_capacity + 638345153Sdim sizeof(kmp_cached_addr_t));); 639345153Sdim // No need to zero the allocated memory; __kmp_allocate does that. 640345153Sdim KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at " 641345153Sdim "address %p\n", 642345153Sdim global_tid, my_cache)); 643345153Sdim /* TODO: free all this memory in __kmp_common_destroy using 644345153Sdim * __kmp_threadpriv_cache_list */ 645345153Sdim /* Add address of mycache to linked list for cleanup later */ 646345153Sdim tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; 647345153Sdim tp_cache_addr->addr = my_cache; 648345153Sdim tp_cache_addr->data = data; 649345153Sdim tp_cache_addr->compiler_cache = cache; 650345153Sdim tp_cache_addr->next = __kmp_threadpriv_cache_list; 651345153Sdim __kmp_threadpriv_cache_list = tp_cache_addr; 652345153Sdim } else { // A cache was already created; use it 653345153Sdim my_cache = tp_cache_addr->addr; 654345153Sdim tp_cache_addr->compiler_cache = cache; 655345153Sdim } 656345153Sdim KMP_MB(); 657345153Sdim 658345153Sdim TCW_PTR(*cache, my_cache); 659345153Sdim __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 660345153Sdim 661345153Sdim KMP_MB(); 662345153Sdim } 663345153Sdim __kmp_release_lock(&__kmp_global_lock, global_tid); 664345153Sdim } 665345153Sdim 666345153Sdim void *ret; 667345153Sdim if ((ret = TCR_PTR((*cache)[global_tid])) == 0) { 668345153Sdim ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size); 669345153Sdim 670345153Sdim TCW_PTR((*cache)[global_tid], ret); 671345153Sdim } 672345153Sdim KC_TRACE(10, 673345153Sdim ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", 674345153Sdim global_tid, ret)); 675345153Sdim return ret; 676345153Sdim} 677345153Sdim 678345153Sdim// This function should only be called when both __kmp_tp_cached_lock and 679345153Sdim// kmp_forkjoin_lock are held. 680345153Sdimvoid __kmp_threadprivate_resize_cache(int newCapacity) { 681345153Sdim KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n", 682345153Sdim newCapacity)); 683345153Sdim 684345153Sdim kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 685345153Sdim 686345153Sdim while (ptr) { 687345153Sdim if (ptr->data) { // this location has an active cache; resize it 688345153Sdim void **my_cache; 689345153Sdim KMP_ITT_IGNORE(my_cache = 690345153Sdim (void **)__kmp_allocate(sizeof(void *) * newCapacity + 691345153Sdim sizeof(kmp_cached_addr_t));); 692345153Sdim // No need to zero the allocated memory; __kmp_allocate does that. 693345153Sdim KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n", 694345153Sdim my_cache)); 695345153Sdim // Now copy old cache into new cache 696345153Sdim void **old_cache = ptr->addr; 697345153Sdim for (int i = 0; i < __kmp_tp_capacity; ++i) { 698345153Sdim my_cache[i] = old_cache[i]; 699345153Sdim } 700345153Sdim 701345153Sdim // Add address of new my_cache to linked list for cleanup later 702345153Sdim kmp_cached_addr_t *tp_cache_addr; 703345153Sdim tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity]; 704345153Sdim tp_cache_addr->addr = my_cache; 705345153Sdim tp_cache_addr->data = ptr->data; 706345153Sdim tp_cache_addr->compiler_cache = ptr->compiler_cache; 707345153Sdim tp_cache_addr->next = __kmp_threadpriv_cache_list; 708345153Sdim __kmp_threadpriv_cache_list = tp_cache_addr; 709345153Sdim 710345153Sdim // Copy new cache to compiler's location: We can copy directly 711345153Sdim // to (*compiler_cache) if compiler guarantees it will keep 712345153Sdim // using the same location for the cache. This is not yet true 713345153Sdim // for some compilers, in which case we have to check if 714345153Sdim // compiler_cache is still pointing at old cache, and if so, we 715345153Sdim // can point it at the new cache with an atomic compare&swap 716345153Sdim // operation. (Old method will always work, but we should shift 717345153Sdim // to new method (commented line below) when Intel and Clang 718345153Sdim // compilers use new method.) 719345153Sdim (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache, 720345153Sdim my_cache); 721345153Sdim // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache); 722345153Sdim 723345153Sdim // If the store doesn't happen here, the compiler's old behavior will 724345153Sdim // inevitably call __kmpc_threadprivate_cache with a new location for the 725345153Sdim // cache, and that function will store the resized cache there at that 726345153Sdim // point. 727345153Sdim 728345153Sdim // Nullify old cache's data pointer so we skip it next time 729345153Sdim ptr->data = NULL; 730345153Sdim } 731345153Sdim ptr = ptr->next; 732345153Sdim } 733345153Sdim // After all caches are resized, update __kmp_tp_capacity to the new size 734345153Sdim *(volatile int *)&__kmp_tp_capacity = newCapacity; 735345153Sdim} 736345153Sdim 737345153Sdim/*! 738345153Sdim @ingroup THREADPRIVATE 739345153Sdim @param loc source location information 740345153Sdim @param data pointer to data being privatized 741345153Sdim @param ctor pointer to constructor function for data 742345153Sdim @param cctor pointer to copy constructor function for data 743345153Sdim @param dtor pointer to destructor function for data 744345153Sdim @param vector_length length of the vector (bytes or elements?) 745345153Sdim Register vector constructors and destructors for thread private data. 746345153Sdim*/ 747345153Sdimvoid __kmpc_threadprivate_register_vec(ident_t *loc, void *data, 748345153Sdim kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, 749345153Sdim kmpc_dtor_vec dtor, 750345153Sdim size_t vector_length) { 751345153Sdim struct shared_common *d_tn, **lnk_tn; 752345153Sdim 753345153Sdim KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n")); 754345153Sdim 755345153Sdim#ifdef USE_CHECKS_COMMON 756345153Sdim /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 757345153Sdim KMP_ASSERT(cctor == 0); 758345153Sdim#endif /* USE_CHECKS_COMMON */ 759345153Sdim 760345153Sdim d_tn = __kmp_find_shared_task_common( 761345153Sdim &__kmp_threadprivate_d_table, -1, 762345153Sdim data); /* Only the global data table exists. */ 763345153Sdim 764345153Sdim if (d_tn == 0) { 765345153Sdim d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 766345153Sdim d_tn->gbl_addr = data; 767345153Sdim 768345153Sdim d_tn->ct.ctorv = ctor; 769345153Sdim d_tn->cct.cctorv = cctor; 770345153Sdim d_tn->dt.dtorv = dtor; 771345153Sdim d_tn->is_vec = TRUE; 772345153Sdim d_tn->vec_len = (size_t)vector_length; 773345153Sdim // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory 774345153Sdim // d_tn->pod_init = 0; 775345153Sdim lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); 776345153Sdim 777345153Sdim d_tn->next = *lnk_tn; 778345153Sdim *lnk_tn = d_tn; 779345153Sdim } 780345153Sdim} 781345153Sdim 782345153Sdimvoid __kmp_cleanup_threadprivate_caches() { 783345153Sdim kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 784345153Sdim 785345153Sdim while (ptr) { 786345153Sdim void **cache = ptr->addr; 787345153Sdim __kmp_threadpriv_cache_list = ptr->next; 788345153Sdim if (*ptr->compiler_cache) 789345153Sdim *ptr->compiler_cache = NULL; 790345153Sdim ptr->compiler_cache = NULL; 791345153Sdim ptr->data = NULL; 792345153Sdim ptr->addr = NULL; 793345153Sdim ptr->next = NULL; 794345153Sdim // Threadprivate data pointed at by cache entries are destroyed at end of 795345153Sdim // __kmp_launch_thread with __kmp_common_destroy_gtid. 796345153Sdim __kmp_free(cache); // implicitly frees ptr too 797345153Sdim ptr = __kmp_threadpriv_cache_list; 798345153Sdim } 799345153Sdim} 800