1//===---------- emutls.c - Implements __emutls_get_address ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <stdint.h>
10#include <stdlib.h>
11#include <string.h>
12
13#include "int_lib.h"
14
15#ifdef __BIONIC__
16// There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
17// to round 2. We need to delay deallocation because:
18//  - Android versions older than M lack __cxa_thread_atexit_impl, so apps
19//    use a pthread key destructor to call C++ destructors.
20//  - Apps might use __thread/thread_local variables in pthread destructors.
21// We can't wait until the final two rounds, because jemalloc needs two rounds
22// after the final malloc/free call to free its thread-specific data (see
23// https://reviews.llvm.org/D46978#1107507).
24#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
25#else
26#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
27#endif
28
29#if defined(_MSC_VER) && !defined(__clang__)
30// MSVC raises a warning about a nonstandard extension being used for the 0
31// sized element in this array. Disable this for warn-as-error builds.
32#pragma warning(push)
33#pragma warning(disable : 4200)
34#endif
35
36typedef struct emutls_address_array {
37  uintptr_t skip_destructor_rounds;
38  uintptr_t size; // number of elements in the 'data' array
39  void *data[];
40} emutls_address_array;
41
42#if defined(_MSC_VER) && !defined(__clang__)
43#pragma warning(pop)
44#endif
45
46static void emutls_shutdown(emutls_address_array *array);
47
48#ifndef _WIN32
49
50#include <pthread.h>
51
52static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
53static pthread_key_t emutls_pthread_key;
54static bool emutls_key_created = false;
55
56typedef unsigned int gcc_word __attribute__((mode(word)));
57typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
58
59// Default is not to use posix_memalign, so systems like Android
60// can use thread local data without heavier POSIX memory allocators.
61#ifndef EMUTLS_USE_POSIX_MEMALIGN
62#define EMUTLS_USE_POSIX_MEMALIGN 0
63#endif
64
65static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
66  void *base;
67#if EMUTLS_USE_POSIX_MEMALIGN
68  if (posix_memalign(&base, align, size) != 0)
69    abort();
70#else
71#define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *))
72  char *object;
73  if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
74    abort();
75  base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) &
76                  ~(uintptr_t)(align - 1));
77
78  ((void **)base)[-1] = object;
79#endif
80  return base;
81}
82
83static __inline void emutls_memalign_free(void *base) {
84#if EMUTLS_USE_POSIX_MEMALIGN
85  free(base);
86#else
87  // The mallocated address is in ((void**)base)[-1]
88  free(((void **)base)[-1]);
89#endif
90}
91
92static __inline void emutls_setspecific(emutls_address_array *value) {
93  pthread_setspecific(emutls_pthread_key, (void *)value);
94}
95
96static __inline emutls_address_array *emutls_getspecific(void) {
97  return (emutls_address_array *)pthread_getspecific(emutls_pthread_key);
98}
99
100static void emutls_key_destructor(void *ptr) {
101  emutls_address_array *array = (emutls_address_array *)ptr;
102  if (array->skip_destructor_rounds > 0) {
103    // emutls is deallocated using a pthread key destructor. These
104    // destructors are called in several rounds to accommodate destructor
105    // functions that (re)initialize key values with pthread_setspecific.
106    // Delay the emutls deallocation to accommodate other end-of-thread
107    // cleanup tasks like calling thread_local destructors (e.g. the
108    // __cxa_thread_atexit fallback in libc++abi).
109    array->skip_destructor_rounds--;
110    emutls_setspecific(array);
111  } else {
112    emutls_shutdown(array);
113    free(ptr);
114  }
115}
116
117static __inline void emutls_init(void) {
118  if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
119    abort();
120  emutls_key_created = true;
121}
122
123static __inline void emutls_init_once(void) {
124  static pthread_once_t once = PTHREAD_ONCE_INIT;
125  pthread_once(&once, emutls_init);
126}
127
128static __inline void emutls_lock(void) { pthread_mutex_lock(&emutls_mutex); }
129
130static __inline void emutls_unlock(void) { pthread_mutex_unlock(&emutls_mutex); }
131
132#else // _WIN32
133
134#include <assert.h>
135#include <malloc.h>
136#include <stdio.h>
137#include <windows.h>
138
139static LPCRITICAL_SECTION emutls_mutex;
140static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
141
142typedef uintptr_t gcc_word;
143typedef void *gcc_pointer;
144
145static void win_error(DWORD last_err, const char *hint) {
146  char *buffer = NULL;
147  if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
148                         FORMAT_MESSAGE_FROM_SYSTEM |
149                         FORMAT_MESSAGE_MAX_WIDTH_MASK,
150                     NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
151    fprintf(stderr, "Windows error: %s\n", buffer);
152  } else {
153    fprintf(stderr, "Unknown Windows error: %s\n", hint);
154  }
155  LocalFree(buffer);
156}
157
158static __inline void win_abort(DWORD last_err, const char *hint) {
159  win_error(last_err, hint);
160  abort();
161}
162
163static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
164  void *base = _aligned_malloc(size, align);
165  if (!base)
166    win_abort(GetLastError(), "_aligned_malloc");
167  return base;
168}
169
170static __inline void emutls_memalign_free(void *base) { _aligned_free(base); }
171
172static void emutls_exit(void) {
173  if (emutls_mutex) {
174    DeleteCriticalSection(emutls_mutex);
175    _aligned_free(emutls_mutex);
176    emutls_mutex = NULL;
177  }
178  if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
179    emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index));
180    TlsFree(emutls_tls_index);
181    emutls_tls_index = TLS_OUT_OF_INDEXES;
182  }
183}
184
185static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
186  (void)p0;
187  (void)p1;
188  (void)p2;
189  emutls_mutex =
190      (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
191  if (!emutls_mutex) {
192    win_error(GetLastError(), "_aligned_malloc");
193    return FALSE;
194  }
195  InitializeCriticalSection(emutls_mutex);
196
197  emutls_tls_index = TlsAlloc();
198  if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
199    emutls_exit();
200    win_error(GetLastError(), "TlsAlloc");
201    return FALSE;
202  }
203  atexit(&emutls_exit);
204  return TRUE;
205}
206
207static __inline void emutls_init_once(void) {
208  static INIT_ONCE once;
209  InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
210}
211
212static __inline void emutls_lock(void) { EnterCriticalSection(emutls_mutex); }
213
214static __inline void emutls_unlock(void) { LeaveCriticalSection(emutls_mutex); }
215
216static __inline void emutls_setspecific(emutls_address_array *value) {
217  if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0)
218    win_abort(GetLastError(), "TlsSetValue");
219}
220
221static __inline emutls_address_array *emutls_getspecific(void) {
222  LPVOID value = TlsGetValue(emutls_tls_index);
223  if (value == NULL) {
224    const DWORD err = GetLastError();
225    if (err != ERROR_SUCCESS)
226      win_abort(err, "TlsGetValue");
227  }
228  return (emutls_address_array *)value;
229}
230
231// Provide atomic load/store functions for emutls_get_index if built with MSVC.
232#if !defined(__ATOMIC_RELEASE)
233#include <intrin.h>
234
235enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
236
237static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
238  assert(type == __ATOMIC_ACQUIRE);
239  // These return the previous value - but since we do an OR with 0,
240  // it's equivalent to a plain load.
241#ifdef _WIN64
242  return InterlockedOr64(ptr, 0);
243#else
244  return InterlockedOr(ptr, 0);
245#endif
246}
247
248static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
249  assert(type == __ATOMIC_RELEASE);
250  InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
251}
252
253#endif // __ATOMIC_RELEASE
254
255#endif // _WIN32
256
257static size_t emutls_num_object = 0; // number of allocated TLS objects
258
259// Free the allocated TLS data
260static void emutls_shutdown(emutls_address_array *array) {
261  if (array) {
262    uintptr_t i;
263    for (i = 0; i < array->size; ++i) {
264      if (array->data[i])
265        emutls_memalign_free(array->data[i]);
266    }
267  }
268}
269
270// For every TLS variable xyz,
271// there is one __emutls_control variable named __emutls_v.xyz.
272// If xyz has non-zero initial value, __emutls_v.xyz's "value"
273// will point to __emutls_t.xyz, which has the initial value.
274typedef struct __emutls_control {
275  // Must use gcc_word here, instead of size_t, to match GCC.  When
276  // gcc_word is larger than size_t, the upper extra bits are all
277  // zeros.  We can use variables of size_t to operate on size and
278  // align.
279  gcc_word size;  // size of the object in bytes
280  gcc_word align; // alignment of the object in bytes
281  union {
282    uintptr_t index; // data[index-1] is the object address
283    void *address;   // object address, when in single thread env
284  } object;
285  void *value; // null or non-zero initial value for the object
286} __emutls_control;
287
288// Emulated TLS objects are always allocated at run-time.
289static __inline void *emutls_allocate_object(__emutls_control *control) {
290  // Use standard C types, check with gcc's emutls.o.
291  COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
292  COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *));
293
294  size_t size = control->size;
295  size_t align = control->align;
296  void *base;
297  if (align < sizeof(void *))
298    align = sizeof(void *);
299  // Make sure that align is power of 2.
300  if ((align & (align - 1)) != 0)
301    abort();
302
303  base = emutls_memalign_alloc(align, size);
304  if (control->value)
305    memcpy(base, control->value, size);
306  else
307    memset(base, 0, size);
308  return base;
309}
310
311// Returns control->object.index; set index if not allocated yet.
312static __inline uintptr_t emutls_get_index(__emutls_control *control) {
313  uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
314  if (!index) {
315    emutls_init_once();
316    emutls_lock();
317    index = control->object.index;
318    if (!index) {
319      index = ++emutls_num_object;
320      __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
321    }
322    emutls_unlock();
323  }
324  return index;
325}
326
327// Updates newly allocated thread local emutls_address_array.
328static __inline void emutls_check_array_set_size(emutls_address_array *array,
329                                                 uintptr_t size) {
330  if (array == NULL)
331    abort();
332  array->size = size;
333  emutls_setspecific(array);
334}
335
336// Returns the new 'data' array size, number of elements,
337// which must be no smaller than the given index.
338static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
339  // Need to allocate emutls_address_array with extra slots
340  // to store the header.
341  // Round up the emutls_address_array size to multiple of 16.
342  uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
343  return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
344}
345
346// Returns the size in bytes required for an emutls_address_array with
347// N number of elements for data field.
348static __inline uintptr_t emutls_asize(uintptr_t N) {
349  return N * sizeof(void *) + sizeof(emutls_address_array);
350}
351
352// Returns the thread local emutls_address_array.
353// Extends its size if necessary to hold address at index.
354static __inline emutls_address_array *
355emutls_get_address_array(uintptr_t index) {
356  emutls_address_array *array = emutls_getspecific();
357  if (array == NULL) {
358    uintptr_t new_size = emutls_new_data_array_size(index);
359    array = (emutls_address_array *)malloc(emutls_asize(new_size));
360    if (array) {
361      memset(array->data, 0, new_size * sizeof(void *));
362      array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
363    }
364    emutls_check_array_set_size(array, new_size);
365  } else if (index > array->size) {
366    uintptr_t orig_size = array->size;
367    uintptr_t new_size = emutls_new_data_array_size(index);
368    array = (emutls_address_array *)realloc(array, emutls_asize(new_size));
369    if (array)
370      memset(array->data + orig_size, 0,
371             (new_size - orig_size) * sizeof(void *));
372    emutls_check_array_set_size(array, new_size);
373  }
374  return array;
375}
376
377#ifndef _WIN32
378// Our emulated TLS implementation relies on local state (e.g. for the pthread
379// key), and if we duplicate this state across different shared libraries,
380// accesses to the same TLS variable from different shared libraries will yield
381// different results (see https://github.com/android/ndk/issues/1551 for an
382// example). __emutls_get_address is the only external entry point for emulated
383// TLS, and by making it default visibility and weak, we can rely on the dynamic
384// linker to coalesce multiple copies at runtime and ensure a single unique copy
385// of TLS state. This is a best effort; it won't work if the user is linking
386// with -Bsymbolic or -Bsymbolic-functions, and it also won't work on Windows,
387// where the dynamic linker has no notion of coalescing weak symbols at runtime.
388// A more robust solution would be to create a separate shared library for
389// emulated TLS, to ensure a single copy of its state.
390__attribute__((visibility("default"), weak))
391#endif
392void *__emutls_get_address(__emutls_control *control) {
393  uintptr_t index = emutls_get_index(control);
394  emutls_address_array *array = emutls_get_address_array(index--);
395  if (array->data[index] == NULL)
396    array->data[index] = emutls_allocate_object(control);
397  return array->data[index];
398}
399
400#ifdef __BIONIC__
401// Called by Bionic on dlclose to delete the emutls pthread key.
402__attribute__((visibility("hidden"))) void __emutls_unregister_key(void) {
403  if (emutls_key_created) {
404    pthread_key_delete(emutls_pthread_key);
405    emutls_key_created = false;
406  }
407}
408#endif
409