1/*	$NetBSD: trampoline.c,v 1.1 2024/02/18 20:57:51 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16/*! \file */
17
18#include <inttypes.h>
19#include <stdlib.h>
20#include <uv.h>
21
22#include <isc/mem.h>
23#include <isc/once.h>
24#include <isc/thread.h>
25#include <isc/util.h>
26
27#include "trampoline_p.h"
28
29#define ISC__TRAMPOLINE_UNUSED 0
30
31struct isc__trampoline {
32	int tid; /* const */
33	uintptr_t self;
34	isc_threadfunc_t start;
35	isc_threadarg_t arg;
36	void *jemalloc_enforce_init;
37};
38
39/*
40 * We can't use isc_mem API here, because it's called too
41 * early and when the isc_mem_debugging flags are changed
42 * later and ISC_MEM_DEBUGSIZE or ISC_MEM_DEBUGCTX flags are
43 * added, neither isc_mem_put() nor isc_mem_free() can be used
44 * to free up the memory allocated here because the flags were
45 * not set when calling isc_mem_get() or isc_mem_allocate()
46 * here.
47 *
48 * Since this is a single allocation at library load and deallocation at library
49 * unload, using the standard allocator without the tracking is fine for this
50 * single purpose.
51 *
52 * We can't use isc_mutex API either, because we track whether the mutexes get
53 * properly destroyed, and we intentionally leak the static mutex here without
54 * destroying it to prevent data race between library destructor running while
55 * thread is being still created.
56 */
57
58static uv_mutex_t isc__trampoline_lock;
59static isc__trampoline_t **trampolines;
60#if defined(HAVE_THREAD_LOCAL)
61#include <threads.h>
62thread_local size_t isc_tid_v = SIZE_MAX;
63#elif defined(HAVE___THREAD)
64__thread size_t isc_tid_v = SIZE_MAX;
65#elif HAVE___DECLSPEC_THREAD
66__declspec(thread) size_t isc_tid_v = SIZE_MAX;
67#endif /* if defined(HAVE_THREAD_LOCAL) */
68static size_t isc__trampoline_min = 1;
69static size_t isc__trampoline_max = 65;
70
71static isc_once_t start_once = ISC_ONCE_INIT;
72static isc_once_t stop_once = ISC_ONCE_INIT;
73
74static isc__trampoline_t *
75isc__trampoline_new(int tid, isc_threadfunc_t start, isc_threadarg_t arg) {
76	isc__trampoline_t *trampoline = calloc(1, sizeof(*trampoline));
77	RUNTIME_CHECK(trampoline != NULL);
78
79	*trampoline = (isc__trampoline_t){
80		.tid = tid,
81		.start = start,
82		.arg = arg,
83		.self = ISC__TRAMPOLINE_UNUSED,
84	};
85
86	return (trampoline);
87}
88
89static void
90do_init(void) {
91	uv_mutex_init(&isc__trampoline_lock);
92
93	trampolines = calloc(isc__trampoline_max, sizeof(trampolines[0]));
94	RUNTIME_CHECK(trampolines != NULL);
95
96	/* Get the trampoline slot 0 for the main thread */
97	trampolines[0] = isc__trampoline_new(0, NULL, NULL);
98	isc_tid_v = trampolines[0]->tid;
99	trampolines[0]->self = isc_thread_self();
100
101	/* Initialize the other trampolines */
102	for (size_t i = 1; i < isc__trampoline_max; i++) {
103		trampolines[i] = NULL;
104	}
105	isc__trampoline_min = 1;
106}
107
108void
109isc__trampoline_initialize(void) {
110	isc_once_do(&start_once, do_init);
111}
112
113static void
114do_shutdown(void) {
115	/*
116	 * When the program using the library exits abruptly and the library
117	 * gets unloaded, there might be some existing trampolines from unjoined
118	 * threads.  We intentionally ignore those and don't check whether all
119	 * trampolines have been cleared before exiting, so we leak a little bit
120	 * of resources here, including the lock.
121	 */
122	free(trampolines[0]);
123}
124
125void
126isc__trampoline_shutdown(void) {
127	isc_once_do(&stop_once, do_shutdown);
128}
129
130isc__trampoline_t *
131isc__trampoline_get(isc_threadfunc_t start, isc_threadarg_t arg) {
132	isc__trampoline_t **tmp = NULL;
133	isc__trampoline_t *trampoline = NULL;
134	uv_mutex_lock(&isc__trampoline_lock);
135again:
136	for (size_t i = isc__trampoline_min; i < isc__trampoline_max; i++) {
137		if (trampolines[i] == NULL) {
138			trampoline = isc__trampoline_new(i, start, arg);
139			trampolines[i] = trampoline;
140			isc__trampoline_min = i + 1;
141			goto done;
142		}
143	}
144	tmp = calloc(2 * isc__trampoline_max, sizeof(trampolines[0]));
145	RUNTIME_CHECK(tmp != NULL);
146	for (size_t i = 0; i < isc__trampoline_max; i++) {
147		tmp[i] = trampolines[i];
148	}
149	for (size_t i = isc__trampoline_max; i < 2 * isc__trampoline_max; i++) {
150		tmp[i] = NULL;
151	}
152	free(trampolines);
153	trampolines = tmp;
154	isc__trampoline_max = isc__trampoline_max * 2;
155	goto again;
156done:
157	INSIST(trampoline != NULL);
158	uv_mutex_unlock(&isc__trampoline_lock);
159
160	return (trampoline);
161}
162
163void
164isc__trampoline_detach(isc__trampoline_t *trampoline) {
165	uv_mutex_lock(&isc__trampoline_lock);
166	REQUIRE(trampoline->self == isc_thread_self());
167	REQUIRE(trampoline->tid > 0);
168	REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
169	REQUIRE(trampolines[trampoline->tid] == trampoline);
170
171	trampolines[trampoline->tid] = NULL;
172
173	if (isc__trampoline_min > (size_t)trampoline->tid) {
174		isc__trampoline_min = trampoline->tid;
175	}
176
177	free(trampoline->jemalloc_enforce_init);
178	free(trampoline);
179
180	uv_mutex_unlock(&isc__trampoline_lock);
181	return;
182}
183
184void
185isc__trampoline_attach(isc__trampoline_t *trampoline) {
186	uv_mutex_lock(&isc__trampoline_lock);
187	REQUIRE(trampoline->self == ISC__TRAMPOLINE_UNUSED);
188	REQUIRE(trampoline->tid > 0);
189	REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
190	REQUIRE(trampolines[trampoline->tid] == trampoline);
191
192	/* Initialize the trampoline */
193	isc_tid_v = trampoline->tid;
194	trampoline->self = isc_thread_self();
195
196	/*
197	 * Ensure every thread starts with a malloc() call to prevent memory
198	 * bloat caused by a jemalloc quirk.  While this dummy allocation is
199	 * not used for anything, free() must not be immediately called for it
200	 * so that an optimizing compiler does not strip away such a pair of
201	 * malloc() + free() calls altogether, as it would foil the fix.
202	 */
203	trampoline->jemalloc_enforce_init = malloc(8);
204	uv_mutex_unlock(&isc__trampoline_lock);
205}
206
207isc_threadresult_t
208isc__trampoline_run(isc_threadarg_t arg) {
209	isc__trampoline_t *trampoline = (isc__trampoline_t *)arg;
210	isc_threadresult_t result;
211
212	isc__trampoline_attach(trampoline);
213
214	/* Run the main function */
215	result = (trampoline->start)(trampoline->arg);
216
217	isc__trampoline_detach(trampoline);
218
219	return (result);
220}
221