trampoline.c revision 1.1
1/* $NetBSD: trampoline.c,v 1.1 2024/02/18 20:57:51 christos Exp $ */ 2 3/* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16/*! \file */ 17 18#include <inttypes.h> 19#include <stdlib.h> 20#include <uv.h> 21 22#include <isc/mem.h> 23#include <isc/once.h> 24#include <isc/thread.h> 25#include <isc/util.h> 26 27#include "trampoline_p.h" 28 29#define ISC__TRAMPOLINE_UNUSED 0 30 31struct isc__trampoline { 32 int tid; /* const */ 33 uintptr_t self; 34 isc_threadfunc_t start; 35 isc_threadarg_t arg; 36 void *jemalloc_enforce_init; 37}; 38 39/* 40 * We can't use isc_mem API here, because it's called too 41 * early and when the isc_mem_debugging flags are changed 42 * later and ISC_MEM_DEBUGSIZE or ISC_MEM_DEBUGCTX flags are 43 * added, neither isc_mem_put() nor isc_mem_free() can be used 44 * to free up the memory allocated here because the flags were 45 * not set when calling isc_mem_get() or isc_mem_allocate() 46 * here. 47 * 48 * Since this is a single allocation at library load and deallocation at library 49 * unload, using the standard allocator without the tracking is fine for this 50 * single purpose. 51 * 52 * We can't use isc_mutex API either, because we track whether the mutexes get 53 * properly destroyed, and we intentionally leak the static mutex here without 54 * destroying it to prevent data race between library destructor running while 55 * thread is being still created. 56 */ 57 58static uv_mutex_t isc__trampoline_lock; 59static isc__trampoline_t **trampolines; 60#if defined(HAVE_THREAD_LOCAL) 61#include <threads.h> 62thread_local size_t isc_tid_v = SIZE_MAX; 63#elif defined(HAVE___THREAD) 64__thread size_t isc_tid_v = SIZE_MAX; 65#elif HAVE___DECLSPEC_THREAD 66__declspec(thread) size_t isc_tid_v = SIZE_MAX; 67#endif /* if defined(HAVE_THREAD_LOCAL) */ 68static size_t isc__trampoline_min = 1; 69static size_t isc__trampoline_max = 65; 70 71static isc_once_t start_once = ISC_ONCE_INIT; 72static isc_once_t stop_once = ISC_ONCE_INIT; 73 74static isc__trampoline_t * 75isc__trampoline_new(int tid, isc_threadfunc_t start, isc_threadarg_t arg) { 76 isc__trampoline_t *trampoline = calloc(1, sizeof(*trampoline)); 77 RUNTIME_CHECK(trampoline != NULL); 78 79 *trampoline = (isc__trampoline_t){ 80 .tid = tid, 81 .start = start, 82 .arg = arg, 83 .self = ISC__TRAMPOLINE_UNUSED, 84 }; 85 86 return (trampoline); 87} 88 89static void 90do_init(void) { 91 uv_mutex_init(&isc__trampoline_lock); 92 93 trampolines = calloc(isc__trampoline_max, sizeof(trampolines[0])); 94 RUNTIME_CHECK(trampolines != NULL); 95 96 /* Get the trampoline slot 0 for the main thread */ 97 trampolines[0] = isc__trampoline_new(0, NULL, NULL); 98 isc_tid_v = trampolines[0]->tid; 99 trampolines[0]->self = isc_thread_self(); 100 101 /* Initialize the other trampolines */ 102 for (size_t i = 1; i < isc__trampoline_max; i++) { 103 trampolines[i] = NULL; 104 } 105 isc__trampoline_min = 1; 106} 107 108void 109isc__trampoline_initialize(void) { 110 isc_once_do(&start_once, do_init); 111} 112 113static void 114do_shutdown(void) { 115 /* 116 * When the program using the library exits abruptly and the library 117 * gets unloaded, there might be some existing trampolines from unjoined 118 * threads. We intentionally ignore those and don't check whether all 119 * trampolines have been cleared before exiting, so we leak a little bit 120 * of resources here, including the lock. 121 */ 122 free(trampolines[0]); 123} 124 125void 126isc__trampoline_shutdown(void) { 127 isc_once_do(&stop_once, do_shutdown); 128} 129 130isc__trampoline_t * 131isc__trampoline_get(isc_threadfunc_t start, isc_threadarg_t arg) { 132 isc__trampoline_t **tmp = NULL; 133 isc__trampoline_t *trampoline = NULL; 134 uv_mutex_lock(&isc__trampoline_lock); 135again: 136 for (size_t i = isc__trampoline_min; i < isc__trampoline_max; i++) { 137 if (trampolines[i] == NULL) { 138 trampoline = isc__trampoline_new(i, start, arg); 139 trampolines[i] = trampoline; 140 isc__trampoline_min = i + 1; 141 goto done; 142 } 143 } 144 tmp = calloc(2 * isc__trampoline_max, sizeof(trampolines[0])); 145 RUNTIME_CHECK(tmp != NULL); 146 for (size_t i = 0; i < isc__trampoline_max; i++) { 147 tmp[i] = trampolines[i]; 148 } 149 for (size_t i = isc__trampoline_max; i < 2 * isc__trampoline_max; i++) { 150 tmp[i] = NULL; 151 } 152 free(trampolines); 153 trampolines = tmp; 154 isc__trampoline_max = isc__trampoline_max * 2; 155 goto again; 156done: 157 INSIST(trampoline != NULL); 158 uv_mutex_unlock(&isc__trampoline_lock); 159 160 return (trampoline); 161} 162 163void 164isc__trampoline_detach(isc__trampoline_t *trampoline) { 165 uv_mutex_lock(&isc__trampoline_lock); 166 REQUIRE(trampoline->self == isc_thread_self()); 167 REQUIRE(trampoline->tid > 0); 168 REQUIRE((size_t)trampoline->tid < isc__trampoline_max); 169 REQUIRE(trampolines[trampoline->tid] == trampoline); 170 171 trampolines[trampoline->tid] = NULL; 172 173 if (isc__trampoline_min > (size_t)trampoline->tid) { 174 isc__trampoline_min = trampoline->tid; 175 } 176 177 free(trampoline->jemalloc_enforce_init); 178 free(trampoline); 179 180 uv_mutex_unlock(&isc__trampoline_lock); 181 return; 182} 183 184void 185isc__trampoline_attach(isc__trampoline_t *trampoline) { 186 uv_mutex_lock(&isc__trampoline_lock); 187 REQUIRE(trampoline->self == ISC__TRAMPOLINE_UNUSED); 188 REQUIRE(trampoline->tid > 0); 189 REQUIRE((size_t)trampoline->tid < isc__trampoline_max); 190 REQUIRE(trampolines[trampoline->tid] == trampoline); 191 192 /* Initialize the trampoline */ 193 isc_tid_v = trampoline->tid; 194 trampoline->self = isc_thread_self(); 195 196 /* 197 * Ensure every thread starts with a malloc() call to prevent memory 198 * bloat caused by a jemalloc quirk. While this dummy allocation is 199 * not used for anything, free() must not be immediately called for it 200 * so that an optimizing compiler does not strip away such a pair of 201 * malloc() + free() calls altogether, as it would foil the fix. 202 */ 203 trampoline->jemalloc_enforce_init = malloc(8); 204 uv_mutex_unlock(&isc__trampoline_lock); 205} 206 207isc_threadresult_t 208isc__trampoline_run(isc_threadarg_t arg) { 209 isc__trampoline_t *trampoline = (isc__trampoline_t *)arg; 210 isc_threadresult_t result; 211 212 isc__trampoline_attach(trampoline); 213 214 /* Run the main function */ 215 result = (trampoline->start)(trampoline->arg); 216 217 isc__trampoline_detach(trampoline); 218 219 return (result); 220} 221