1#include "libc.h"
2#include "zircon_impl.h"
3#include "threads_impl.h"
4
5#include <zircon/process.h>
6#include <zircon/syscalls.h>
7#include <stddef.h>
8#include <string.h>
9
10static pthread_rwlock_t allocation_lock = PTHREAD_RWLOCK_INITIALIZER;
11
12// Many threads could be reading the TLS state.
13static void thread_allocation_acquire(void) {
14    pthread_rwlock_rdlock(&allocation_lock);
15}
16
17// dlopen calls this under another lock. Only one dlopen call can be
18// modifying state at a time.
19void __thread_allocation_inhibit(void) {
20    pthread_rwlock_wrlock(&allocation_lock);
21}
22
23void __thread_allocation_release(void) {
24    pthread_rwlock_unlock(&allocation_lock);
25}
26
27static inline size_t round_up_to_page(size_t sz) {
28    return (sz + PAGE_SIZE - 1) & -PAGE_SIZE;
29}
30
31static ptrdiff_t offset_for_module(const struct tls_module* module) {
32#ifdef TLS_ABOVE_TP
33    return module->offset;
34#else
35    return - module->offset;
36#endif
37}
38
39__NO_SAFESTACK static thrd_t copy_tls(unsigned char* mem, size_t alloc) {
40    thrd_t td;
41    struct tls_module* p;
42    size_t i;
43    void** dtv;
44
45#ifdef TLS_ABOVE_TP
46    // *-----------------------------------------------------------------------*
47    // | pthread | tcb | X | tls_1 | ... | tlsN | ... | tls_cnt | dtv[1] | ... |
48    // *-----------------------------------------------------------------------*
49    // ^         ^         ^             ^            ^
50    // td        tp      dtv[1]       dtv[n+1]       dtv
51    //
52    // Note: The TCB is actually the last member of pthread.
53    // See: "Addenda to, and Errata in, the ABI for the ARM Architecture"
54
55    dtv = (void**)(mem + libc.tls_size) - (libc.tls_cnt + 1);
56    // We need to make sure that the thread pointer is maximally aligned so
57    // that tp + dtv[N] is aligned to align_N no matter what N is. So we need
58    // 'mem' to be such that if mem == td then td->head is maximially aligned.
59    // To do this we need take &td->head (e.g. mem + offset of head) and align
60    // it then subtract out the offset of ->head to ensure that &td->head is
61    // aligned.
62    uintptr_t tp = (uintptr_t)mem + PTHREAD_TP_OFFSET;
63    tp = (tp + libc.tls_align - 1) & -libc.tls_align;
64    td = (thrd_t)(tp - PTHREAD_TP_OFFSET);
65    // Now mem should be the new thread pointer.
66    mem = (unsigned char*)tp;
67#else
68    // *-----------------------------------------------------------------------*
69    // | tls_cnt | dtv[1] | ... | tls_n | ... | tls_1 | tcb | pthread | unused |
70    // *-----------------------------------------------------------------------*
71    // ^                        ^             ^       ^
72    // dtv                   dtv[n+1]       dtv[1]  tp/td
73    //
74    // Note: The TCB is actually the first member of pthread.
75    dtv = (void**)mem;
76
77    mem += alloc - sizeof(struct pthread);
78    mem -= (uintptr_t)mem & (libc.tls_align - 1);
79    td = (thrd_t)mem;
80#endif
81
82    for (i = 1, p = libc.tls_head; p; i++, p = p->next) {
83        dtv[i] = mem + offset_for_module(p);
84        memcpy(dtv[i], p->image, p->len);
85    }
86
87    dtv[0] = (void*)libc.tls_cnt;
88    td->head.dtv = dtv;
89    return td;
90}
91
92__NO_SAFESTACK static bool map_block(zx_handle_t parent_vmar,
93                                     zx_handle_t vmo, size_t vmo_offset,
94                                     size_t size, size_t before, size_t after,
95                                     struct iovec* mapping,
96                                     struct iovec* region) {
97    region->iov_len = before + size + after;
98    zx_handle_t vmar;
99    uintptr_t addr;
100    zx_status_t status = _zx_vmar_allocate(parent_vmar,
101                                           ZX_VM_CAN_MAP_READ |
102                                           ZX_VM_CAN_MAP_WRITE |
103                                           ZX_VM_CAN_MAP_SPECIFIC,
104                                            0, region->iov_len, &vmar, &addr);
105    if (status != ZX_OK)
106        return true;
107    region->iov_base = (void*)addr;
108    status = _zx_vmar_map(vmar,
109                          ZX_VM_PERM_READ |
110                          ZX_VM_PERM_WRITE |
111                          ZX_VM_SPECIFIC,
112                          before, vmo, vmo_offset, size, &addr);
113    if (status != ZX_OK)
114        _zx_vmar_destroy(vmar);
115    _zx_handle_close(vmar);
116    mapping->iov_base = (void*)addr;
117    mapping->iov_len = size;
118    return status != ZX_OK;
119}
120
121// This allocates all the per-thread memory for a new thread about to
122// be created, or for the initial thread at startup.  It's called
123// either at startup or under thread_allocation_acquire.  Hence,
124// it's serialized with any dynamic linker changes to the TLS
125// bookkeeping.
126//
127// This conceptually allocates four things, but concretely allocates
128// three separate blocks.
129// 1. The safe stack (where the thread's SP will point).
130// 2. The unsafe stack (where __builtin___get_unsafe_stack_ptr() will point).
131// 3. The thread descriptor (struct pthread).  The thread pointer points
132//    into this (where into it depends on the machine ABI).
133// 4. The static TLS area.  The ELF TLS ABI for the Initial Exec model
134//    mandates a fixed distance from the thread pointer to the TLS area
135//    across all threads.  So effectively this must always be allocated
136//    as part of the same block with the thread descriptor.
137// This function also copies in the TLS initializer data.
138// It initializes the basic thread descriptor fields.
139// Everything else is zero-initialized.
140
141__NO_SAFESTACK thrd_t __allocate_thread(
142    size_t requested_guard_size,
143    size_t requested_stack_size,
144    const char* thread_name,
145    char vmo_name[ZX_MAX_NAME_LEN]) {
146    thread_allocation_acquire();
147
148    const size_t guard_size =
149        requested_guard_size == 0 ? 0 : round_up_to_page(requested_guard_size);
150    const size_t stack_size = round_up_to_page(requested_stack_size);
151
152    const size_t tls_size = libc.tls_size;
153    const size_t tcb_size = round_up_to_page(tls_size);
154
155    const size_t vmo_size = tcb_size + stack_size * 2;
156    zx_handle_t vmo;
157    zx_status_t status = _zx_vmo_create(vmo_size, 0, &vmo);
158    if (status != ZX_OK) {
159        __thread_allocation_release();
160        return NULL;
161    }
162    struct iovec tcb, tcb_region;
163    if (map_block(_zx_vmar_root_self(), vmo, 0, tcb_size, PAGE_SIZE, PAGE_SIZE,
164                  &tcb, &tcb_region)) {
165        __thread_allocation_release();
166        _zx_handle_close(vmo);
167        return NULL;
168    }
169
170    thrd_t td = copy_tls(tcb.iov_base, tcb.iov_len);
171
172    // At this point all our access to global TLS state is done, so we
173    // can allow dlopen again.
174    __thread_allocation_release();
175
176    // For the initial thread, it's too early to call snprintf because
177    // it's not __NO_SAFESTACK.
178    if (vmo_name != NULL) {
179        // For other threads, try to give the VMO a name that includes
180        // the thrd_t value (and the TLS size if that fits too), but
181        // don't use a truncated value since that would be confusing to
182        // interpret.
183        if (snprintf(vmo_name, ZX_MAX_NAME_LEN, "%s:%p/TLS=%#zx",
184                     thread_name, td, tls_size) < ZX_MAX_NAME_LEN ||
185            snprintf(vmo_name, ZX_MAX_NAME_LEN, "%s:%p",
186                     thread_name, td) < ZX_MAX_NAME_LEN)
187            thread_name = vmo_name;
188    }
189    _zx_object_set_property(vmo, ZX_PROP_NAME,
190                            thread_name, strlen(thread_name));
191
192    if (map_block(_zx_vmar_root_self(), vmo,
193                  tcb_size, stack_size, guard_size, 0,
194                  &td->safe_stack, &td->safe_stack_region)) {
195        _zx_vmar_unmap(_zx_vmar_root_self(),
196                       (uintptr_t)tcb_region.iov_base, tcb_region.iov_len);
197        _zx_handle_close(vmo);
198        return NULL;
199    }
200
201    if (map_block(_zx_vmar_root_self(), vmo,
202                  tcb_size + stack_size, stack_size, guard_size, 0,
203                  &td->unsafe_stack, &td->unsafe_stack_region)) {
204        _zx_vmar_unmap(_zx_vmar_root_self(),
205                       (uintptr_t)td->safe_stack_region.iov_base,
206                       td->safe_stack_region.iov_len);
207        _zx_vmar_unmap(_zx_vmar_root_self(),
208                       (uintptr_t)tcb_region.iov_base, tcb_region.iov_len);
209        _zx_handle_close(vmo);
210        return NULL;
211    }
212
213    _zx_handle_close(vmo);
214    td->tcb_region = tcb_region;
215    td->locale = &libc.global_locale;
216    td->head.tp = (uintptr_t)pthread_to_tp(td);
217    td->abi.stack_guard = __stack_chk_guard;
218    td->abi.unsafe_sp =
219        (uintptr_t)td->unsafe_stack.iov_base + td->unsafe_stack.iov_len;
220    return td;
221}
222