1/*	$NetBSD: trampoline.c,v 1.3 2024/02/21 22:52:29 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16/*! \file */
17
18#include <inttypes.h>
19#include <stdlib.h>
20#include <uv.h>
21
22#include <isc/mem.h>
23#include <isc/once.h>
24#include <isc/thread.h>
25#include <isc/util.h>
26
27#include "mem_p.h"
28#include "trampoline_p.h"
29
30#define ISC__TRAMPOLINE_UNUSED 0
31
32struct isc__trampoline {
33	int tid; /* const */
34	uintptr_t self;
35	isc_threadfunc_t start;
36	isc_threadarg_t arg;
37	void *jemalloc_enforce_init;
38};
39
40/*
41 * We can't use isc_mem API here, because it's called too early and the
42 * isc_mem_debugging flags can be changed later causing mismatch between flags
43 * used for isc_mem_get() and isc_mem_put().
44 *
45 * Since this is a single allocation at library load and deallocation at library
46 * unload, using the standard allocator without the tracking is fine for this
47 * single purpose.
48 *
49 * We can't use isc_mutex API either, because we track whether the mutexes get
50 * properly destroyed, and we intentionally leak the static mutex here without
51 * destroying it to prevent data race between library destructor running while
52 * thread is being still created.
53 */
54
55static uv_mutex_t isc__trampoline_lock;
56static isc__trampoline_t **trampolines;
57thread_local size_t isc_tid_v = SIZE_MAX;
58static size_t isc__trampoline_min = 1;
59static size_t isc__trampoline_max = 65;
60
61static isc__trampoline_t *
62isc__trampoline_new(int tid, isc_threadfunc_t start, isc_threadarg_t arg) {
63	isc__trampoline_t *trampoline = calloc(1, sizeof(*trampoline));
64	RUNTIME_CHECK(trampoline != NULL);
65
66	*trampoline = (isc__trampoline_t){
67		.tid = tid,
68		.start = start,
69		.arg = arg,
70		.self = ISC__TRAMPOLINE_UNUSED,
71	};
72
73	return (trampoline);
74}
75
76void
77isc__trampoline_initialize(void) {
78	uv_mutex_init(&isc__trampoline_lock);
79
80	trampolines = calloc(isc__trampoline_max, sizeof(trampolines[0]));
81	RUNTIME_CHECK(trampolines != NULL);
82
83	/* Get the trampoline slot 0 for the main thread */
84	trampolines[0] = isc__trampoline_new(0, NULL, NULL);
85	isc_tid_v = trampolines[0]->tid;
86	trampolines[0]->self = isc_thread_self();
87
88	/* Initialize the other trampolines */
89	for (size_t i = 1; i < isc__trampoline_max; i++) {
90		trampolines[i] = NULL;
91	}
92	isc__trampoline_min = 1;
93}
94
95void
96isc__trampoline_shutdown(void) {
97	/*
98	 * When the program using the library exits abruptly and the library
99	 * gets unloaded, there might be some existing trampolines from unjoined
100	 * threads.  We intentionally ignore those and don't check whether all
101	 * trampolines have been cleared before exiting, so we leak a little bit
102	 * of resources here, including the lock.
103	 */
104	free(trampolines[0]);
105}
106
107isc__trampoline_t *
108isc__trampoline_get(isc_threadfunc_t start, isc_threadarg_t arg) {
109	isc__trampoline_t **tmp = NULL;
110	isc__trampoline_t *trampoline = NULL;
111	uv_mutex_lock(&isc__trampoline_lock);
112again:
113	for (size_t i = isc__trampoline_min; i < isc__trampoline_max; i++) {
114		if (trampolines[i] == NULL) {
115			trampoline = isc__trampoline_new(i, start, arg);
116			trampolines[i] = trampoline;
117			isc__trampoline_min = i + 1;
118			goto done;
119		}
120	}
121	tmp = calloc(2 * isc__trampoline_max, sizeof(trampolines[0]));
122	RUNTIME_CHECK(tmp != NULL);
123	for (size_t i = 0; i < isc__trampoline_max; i++) {
124		tmp[i] = trampolines[i];
125	}
126	for (size_t i = isc__trampoline_max; i < 2 * isc__trampoline_max; i++) {
127		tmp[i] = NULL;
128	}
129	free(trampolines);
130	trampolines = tmp;
131	isc__trampoline_max = isc__trampoline_max * 2;
132	goto again;
133done:
134	INSIST(trampoline != NULL);
135	uv_mutex_unlock(&isc__trampoline_lock);
136
137	return (trampoline);
138}
139
140void
141isc__trampoline_detach(isc__trampoline_t *trampoline) {
142	uv_mutex_lock(&isc__trampoline_lock);
143	REQUIRE(trampoline->self == isc_thread_self());
144	REQUIRE(trampoline->tid > 0);
145	REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
146	REQUIRE(trampolines[trampoline->tid] == trampoline);
147
148	trampolines[trampoline->tid] = NULL;
149
150	if (isc__trampoline_min > (size_t)trampoline->tid) {
151		isc__trampoline_min = trampoline->tid;
152	}
153
154	isc__mem_free_noctx(trampoline->jemalloc_enforce_init, 8);
155	free(trampoline);
156
157	uv_mutex_unlock(&isc__trampoline_lock);
158	return;
159}
160
161void
162isc__trampoline_attach(isc__trampoline_t *trampoline) {
163	uv_mutex_lock(&isc__trampoline_lock);
164	REQUIRE(trampoline->self == ISC__TRAMPOLINE_UNUSED);
165	REQUIRE(trampoline->tid > 0);
166	REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
167	REQUIRE(trampolines[trampoline->tid] == trampoline);
168
169	/* Initialize the trampoline */
170	isc_tid_v = trampoline->tid;
171	trampoline->self = isc_thread_self();
172
173	/*
174	 * Ensure every thread starts with a malloc() call to prevent memory
175	 * bloat caused by a jemalloc quirk.  While this dummy allocation is
176	 * not used for anything, free() must not be immediately called for it
177	 * so that an optimizing compiler does not strip away such a pair of
178	 * malloc() + free() calls altogether, as it would foil the fix.
179	 */
180	trampoline->jemalloc_enforce_init = isc__mem_alloc_noctx(8);
181	uv_mutex_unlock(&isc__trampoline_lock);
182}
183
184isc_threadresult_t
185isc__trampoline_run(isc_threadarg_t arg) {
186	isc__trampoline_t *trampoline = (isc__trampoline_t *)arg;
187	isc_threadresult_t result;
188
189	isc__trampoline_attach(trampoline);
190
191	/* Run the main function */
192	result = (trampoline->start)(trampoline->arg);
193
194	isc__trampoline_detach(trampoline);
195
196	return (result);
197}
198