11592Srgrimes/*
21592Srgrimes * SPDX-License-Identifier: CDDL 1.0
31592Srgrimes *
41592Srgrimes * Copyright (c) 2022 Christos Margiolis <christos@FreeBSD.org>
51592Srgrimes * Copyright (c) 2022 Mark Johnston <markj@FreeBSD.org>
61592Srgrimes * Copyright (c) 2023 The FreeBSD Foundation
71592Srgrimes *
81592Srgrimes * Portions of this software were developed by Christos Margiolis
91592Srgrimes * <christos@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
101592Srgrimes */
111592Srgrimes
121592Srgrimes#include <sys/param.h>
131592Srgrimes#include <sys/bitset.h>
141592Srgrimes#include <sys/cred.h>
151592Srgrimes#include <sys/eventhandler.h>
161592Srgrimes#include <sys/kernel.h>
171592Srgrimes#include <sys/lock.h>
181592Srgrimes#include <sys/malloc.h>
191592Srgrimes#include <sys/proc.h>
201592Srgrimes#include <sys/queue.h>
211592Srgrimes#include <sys/sx.h>
221592Srgrimes
231592Srgrimes#include <vm/vm.h>
241592Srgrimes#include <vm/vm_param.h>
251592Srgrimes#include <vm/pmap.h>
261592Srgrimes#include <vm/vm_map.h>
271592Srgrimes#include <vm/vm_kern.h>
281592Srgrimes#include <vm/vm_object.h>
291592Srgrimes
301592Srgrimes#include <cddl/dev/dtrace/dtrace_cddl.h>
311592Srgrimes
321592Srgrimes#include "kinst.h"
331592Srgrimes#include "kinst_isa.h"
341592Srgrimes
351592Srgrimes#define KINST_TRAMP_FILL_PATTERN	((kinst_patchval_t []){KINST_PATCHVAL})
361592Srgrimes#define KINST_TRAMP_FILL_SIZE		sizeof(kinst_patchval_t)
371592Srgrimes
381592Srgrimes#define KINST_TRAMPCHUNK_SIZE		PAGE_SIZE
391592Srgrimes#define KINST_TRAMPS_PER_CHUNK		(KINST_TRAMPCHUNK_SIZE / KINST_TRAMP_SIZE)
401592Srgrimes
411592Srgrimesstruct trampchunk {
421592Srgrimes	TAILQ_ENTRY(trampchunk) next;
431592Srgrimes	uint8_t *addr;
4431329Scharnier	/* 0 -> allocated, 1 -> free */
451592Srgrimes	BITSET_DEFINE(, KINST_TRAMPS_PER_CHUNK) free;
4631329Scharnier};
4731329Scharnier
4850476Speterstatic TAILQ_HEAD(, trampchunk)	kinst_trampchunks =
491592Srgrimes    TAILQ_HEAD_INITIALIZER(kinst_trampchunks);
501592Srgrimesstatic struct sx		kinst_tramp_sx;
511592SrgrimesSX_SYSINIT(kinst_tramp_sx, &kinst_tramp_sx, "kinst tramp");
521592Srgrimes#ifdef __amd64__
531592Srgrimesstatic eventhandler_tag		kinst_thread_ctor_handler;
541592Srgrimesstatic eventhandler_tag		kinst_thread_dtor_handler;
551592Srgrimes#endif
561592Srgrimes
571592Srgrimes/*
581592Srgrimes * Fill the trampolines with KINST_TRAMP_FILL_PATTERN so that the kernel will
591592Srgrimes * crash cleanly if things somehow go wrong.
601592Srgrimes */
6156668Sshinstatic void
621592Srgrimeskinst_trampoline_fill(uint8_t *addr, int size)
631592Srgrimes{
641592Srgrimes	int i;
651592Srgrimes
661592Srgrimes	for (i = 0; i < size; i += KINST_TRAMP_FILL_SIZE) {
671592Srgrimes		memcpy(&addr[i], KINST_TRAMP_FILL_PATTERN,
681592Srgrimes		    KINST_TRAMP_FILL_SIZE);
691592Srgrimes	}
701592Srgrimes}
7113139Speter
7275535Sphkstatic struct trampchunk *
731592Srgrimeskinst_trampchunk_alloc(void)
741592Srgrimes{
751592Srgrimes	struct trampchunk *chunk;
7656668Sshin	vm_offset_t trampaddr;
771592Srgrimes	int error __diagused;
781592Srgrimes
791592Srgrimes	sx_assert(&kinst_tramp_sx, SX_XLOCKED);
8017435Spst
811592Srgrimes#ifdef __amd64__
821592Srgrimes	/*
831592Srgrimes	 * To simplify population of trampolines, we follow the amd64 kernel's
841592Srgrimes	 * code model and allocate them above KERNBASE, i.e., in the top 2GB of
851592Srgrimes	 * the kernel's virtual address space (not the case for other
861592Srgrimes	 * platforms).
871592Srgrimes	 */
8827650Sdavidn	trampaddr = KERNBASE;
8927650Sdavidn#else
901592Srgrimes	trampaddr = VM_MIN_KERNEL_ADDRESS;
911592Srgrimes#endif
921592Srgrimes	/*
931592Srgrimes	 * Allocate virtual memory for the trampoline chunk. The returned
9470102Sphk	 * address is saved in "trampaddr". Trampolines must be executable so
9570102Sphk	 * max_prot must include VM_PROT_EXECUTE.
961592Srgrimes	 */
971592Srgrimes	error = vm_map_find(kernel_map, NULL, 0, &trampaddr,
981592Srgrimes	    KINST_TRAMPCHUNK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
991592Srgrimes	    0);
1001592Srgrimes	if (error != KERN_SUCCESS) {
1011592Srgrimes		KINST_LOG("trampoline chunk allocation failed: %d", error);
1021592Srgrimes		return (NULL);
1031592Srgrimes	}
1041592Srgrimes
10556668Sshin	error = kmem_back(kernel_object, trampaddr, KINST_TRAMPCHUNK_SIZE,
10656668Sshin	    M_WAITOK | M_EXEC);
1071592Srgrimes	KASSERT(error == KERN_SUCCESS, ("kmem_back failed: %d", error));
1081592Srgrimes
1091592Srgrimes	kinst_trampoline_fill((uint8_t *)trampaddr, KINST_TRAMPCHUNK_SIZE);
1101592Srgrimes
1111592Srgrimes	/* Allocate a tracker for this chunk. */
1121592Srgrimes	chunk = malloc(sizeof(*chunk), M_KINST, M_WAITOK);
1131592Srgrimes	chunk->addr = (void *)trampaddr;
1141592Srgrimes	BIT_FILL(KINST_TRAMPS_PER_CHUNK, &chunk->free);
1151592Srgrimes
1161592Srgrimes	TAILQ_INSERT_HEAD(&kinst_trampchunks, chunk, next);
11756668Sshin
1181592Srgrimes	return (chunk);
1191592Srgrimes}
1201592Srgrimes
1211592Srgrimesstatic void
1221592Srgrimeskinst_trampchunk_free(struct trampchunk *chunk)
1231592Srgrimes{
1241592Srgrimes	sx_assert(&kinst_tramp_sx, SX_XLOCKED);
1251592Srgrimes
1261592Srgrimes	TAILQ_REMOVE(&kinst_trampchunks, chunk, next);
1271592Srgrimes	kmem_unback(kernel_object, (vm_offset_t)chunk->addr,
12856668Sshin	    KINST_TRAMPCHUNK_SIZE);
1291592Srgrimes	(void)vm_map_remove(kernel_map, (vm_offset_t)chunk->addr,
13075535Sphk	    (vm_offset_t)(chunk->addr + KINST_TRAMPCHUNK_SIZE));
1311592Srgrimes	free(chunk, M_KINST);
1321592Srgrimes}
1331592Srgrimes
1341592Srgrimesstatic uint8_t *
1351592Srgrimeskinst_trampoline_alloc_locked(int how)
1361592Srgrimes{
1371592Srgrimes	struct trampchunk *chunk;
13870102Sphk	uint8_t *tramp;
13970102Sphk	int off;
1401592Srgrimes
14156668Sshin	sx_assert(&kinst_tramp_sx, SX_XLOCKED);
14256668Sshin
1431592Srgrimes	TAILQ_FOREACH(chunk, &kinst_trampchunks, next) {
1441592Srgrimes		/* All trampolines from this chunk are already allocated. */
1451592Srgrimes		if ((off = BIT_FFS(KINST_TRAMPS_PER_CHUNK, &chunk->free)) == 0)
1461592Srgrimes			continue;
1471592Srgrimes		/* BIT_FFS() returns indices starting at 1 instead of 0. */
1481592Srgrimes		off--;
1491592Srgrimes		break;
1501592Srgrimes	}
1511592Srgrimes	if (chunk == NULL) {
1521592Srgrimes		if ((how & M_NOWAIT) != 0)
1531592Srgrimes			return (NULL);
1541592Srgrimes
1551592Srgrimes		if ((chunk = kinst_trampchunk_alloc()) == NULL) {
1561592Srgrimes#ifdef __amd64__
1571592Srgrimes			/*
1581592Srgrimes			 * We didn't find any free trampoline in the current
1591592Srgrimes			 * list, allocate a new one.  If that fails the
1601592Srgrimes			 * provider will no longer be reliable, so try to warn
1611592Srgrimes			 * the user.
1621592Srgrimes			 */
1631592Srgrimes			static bool once = true;
1641592Srgrimes
1651592Srgrimes			if (once) {
1661592Srgrimes				once = false;
1671592Srgrimes				KINST_LOG(
1681592Srgrimes				    "kinst: failed to allocate trampoline, "
16975556Sgreen				    "probes may not fire");
17075556Sgreen			}
17175556Sgreen#endif
17275556Sgreen			return (NULL);
17317433Spst		}
1741592Srgrimes		off = 0;
17556668Sshin	}
17656668Sshin	BIT_CLR(KINST_TRAMPS_PER_CHUNK, off, &chunk->free);
17756668Sshin	tramp = chunk->addr + off * KINST_TRAMP_SIZE;
17856668Sshin	return (tramp);
17956668Sshin}
18056668Sshin
18156668Sshinuint8_t *
18256668Sshinkinst_trampoline_alloc(int how)
18356668Sshin{
18456668Sshin	uint8_t *tramp;
18556668Sshin
18656668Sshin	sx_xlock(&kinst_tramp_sx);
18756668Sshin	tramp = kinst_trampoline_alloc_locked(how);
18856668Sshin	sx_xunlock(&kinst_tramp_sx);
18956668Sshin	return (tramp);
19056668Sshin}
19156668Sshin
19256668Sshinstatic void
19356668Sshinkinst_trampoline_dealloc_locked(uint8_t *tramp, bool freechunks)
19456668Sshin{
19556668Sshin	struct trampchunk *chunk;
19656668Sshin	int off;
19756668Sshin
19856668Sshin	sx_assert(&kinst_tramp_sx, SX_XLOCKED);
19956668Sshin
20056668Sshin	if (tramp == NULL)
20156668Sshin		return;
20256668Sshin
20356668Sshin	TAILQ_FOREACH(chunk, &kinst_trampchunks, next) {
20456668Sshin		for (off = 0; off < KINST_TRAMPS_PER_CHUNK; off++) {
20556668Sshin			if (chunk->addr + off * KINST_TRAMP_SIZE == tramp) {
20656668Sshin				kinst_trampoline_fill(tramp, KINST_TRAMP_SIZE);
20756668Sshin				BIT_SET(KINST_TRAMPS_PER_CHUNK, off,
20856668Sshin				    &chunk->free);
20956668Sshin				if (freechunks &&
21056668Sshin				    BIT_ISFULLSET(KINST_TRAMPS_PER_CHUNK,
21156668Sshin				    &chunk->free))
21256668Sshin					kinst_trampchunk_free(chunk);
21356668Sshin				return;
21456668Sshin			}
21556668Sshin		}
21656668Sshin	}
21756668Sshin	panic("%s: did not find trampoline chunk for %p", __func__, tramp);
21856668Sshin}
21956668Sshin
22056668Sshinvoid
22156668Sshinkinst_trampoline_dealloc(uint8_t *tramp)
22256668Sshin{
22356668Sshin	sx_xlock(&kinst_tramp_sx);
22456668Sshin	kinst_trampoline_dealloc_locked(tramp, true);
22556668Sshin	sx_xunlock(&kinst_tramp_sx);
22656668Sshin}
22756668Sshin
22856668Sshin#ifdef __amd64__
22956668Sshinstatic void
23056668Sshinkinst_thread_ctor(void *arg __unused, struct thread *td)
23156668Sshin{
23256668Sshin	td->t_kinst_tramp = kinst_trampoline_alloc(M_WAITOK);
23356668Sshin}
23456668Sshin
23556668Sshinstatic void
23656668Sshinkinst_thread_dtor(void *arg __unused, struct thread *td)
23756668Sshin{
23856668Sshin	void *tramp;
23956668Sshin
24056668Sshin	tramp = td->t_kinst_tramp;
24156668Sshin	td->t_kinst_tramp = NULL;
24256668Sshin
24356668Sshin	/*
24456668Sshin	 * This assumes that the thread_dtor event permits sleeping, which
24556668Sshin	 * appears to be true for the time being.
24656668Sshin	 */
24756668Sshin	kinst_trampoline_dealloc(tramp);
24856668Sshin}
24956668Sshin#endif
25056668Sshin
25156668Sshinint
25256668Sshinkinst_trampoline_init(void)
25317433Spst{
25456668Sshin#ifdef __amd64__
25517433Spst	struct proc *p;
25656668Sshin	struct thread *td;
25756668Sshin	void *tramp;
25856668Sshin	int error;
25956668Sshin
26056668Sshin	kinst_thread_ctor_handler = EVENTHANDLER_REGISTER(thread_ctor,
2611592Srgrimes	    kinst_thread_ctor, NULL, EVENTHANDLER_PRI_ANY);
26256668Sshin	kinst_thread_dtor_handler = EVENTHANDLER_REGISTER(thread_dtor,
26356668Sshin	    kinst_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
26456668Sshin
26556668Sshin	error = 0;
26656668Sshin	tramp = NULL;
26756668Sshin
26856668Sshin	sx_slock(&allproc_lock);
26956668Sshin	sx_xlock(&kinst_tramp_sx);
27056668Sshin	FOREACH_PROC_IN_SYSTEM(p) {
27156668Sshinretry:
27256668Sshin		PROC_LOCK(p);
27356668Sshin		FOREACH_THREAD_IN_PROC(p, td) {
27456668Sshin			if (td->t_kinst_tramp != NULL)
27556668Sshin				continue;
27656668Sshin			if (tramp == NULL) {
27756668Sshin				/*
27856668Sshin				 * Try to allocate a trampoline without dropping
27956668Sshin				 * the process lock.  If all chunks are fully
28056668Sshin				 * utilized, we must release the lock and try
28156668Sshin				 * again.
28256668Sshin				 */
28356668Sshin				tramp = kinst_trampoline_alloc_locked(M_NOWAIT);
28456668Sshin				if (tramp == NULL) {
28556668Sshin					PROC_UNLOCK(p);
28656668Sshin					tramp = kinst_trampoline_alloc_locked(
28756668Sshin					    M_WAITOK);
28856668Sshin					if (tramp == NULL) {
28956668Sshin						/*
29056668Sshin						 * Let the unload handler clean
29156668Sshin						 * up.
29256668Sshin						 */
29356668Sshin						error = ENOMEM;
29456668Sshin						goto out;
29556668Sshin					} else
29656668Sshin						goto retry;
29756668Sshin				}
29856668Sshin			}
29956668Sshin			td->t_kinst_tramp = tramp;
30056668Sshin			tramp = NULL;
30156668Sshin		}
30256668Sshin		PROC_UNLOCK(p);
30356668Sshin	}
30456668Sshinout:
30556668Sshin	sx_xunlock(&kinst_tramp_sx);
30656668Sshin	sx_sunlock(&allproc_lock);
30756668Sshin#else
30856668Sshin	int error = 0;
30956668Sshin
31056668Sshin	sx_xlock(&kinst_tramp_sx);
31156668Sshin	TAILQ_INIT(&kinst_trampchunks);
31256668Sshin	sx_xunlock(&kinst_tramp_sx);
31356668Sshin#endif
31456668Sshin
3151592Srgrimes	return (error);
31617433Spst}
3171592Srgrimes
31856668Sshinint
31956668Sshinkinst_trampoline_deinit(void)
32056668Sshin{
32117433Spst#ifdef __amd64__
3221592Srgrimes	struct trampchunk *chunk, *tmp;
32356668Sshin	struct proc *p;
32456668Sshin	struct thread *td;
32556668Sshin
32656668Sshin	EVENTHANDLER_DEREGISTER(thread_ctor, kinst_thread_ctor_handler);
32756668Sshin	EVENTHANDLER_DEREGISTER(thread_dtor, kinst_thread_dtor_handler);
32856668Sshin
32956668Sshin	sx_slock(&allproc_lock);
33070102Sphk	sx_xlock(&kinst_tramp_sx);
33156668Sshin	FOREACH_PROC_IN_SYSTEM(p) {
33256668Sshin		PROC_LOCK(p);
33356668Sshin		FOREACH_THREAD_IN_PROC(p, td) {
33456668Sshin			kinst_trampoline_dealloc_locked(td->t_kinst_tramp,
33556668Sshin			    false);
33656668Sshin			td->t_kinst_tramp = NULL;
33756668Sshin		}
33856668Sshin		PROC_UNLOCK(p);
33956668Sshin	}
34056668Sshin	sx_sunlock(&allproc_lock);
34156668Sshin	TAILQ_FOREACH_SAFE(chunk, &kinst_trampchunks, next, tmp)
34256668Sshin		kinst_trampchunk_free(chunk);
34356668Sshin	sx_xunlock(&kinst_tramp_sx);
34456668Sshin#else
34556668Sshin	struct trampchunk *chunk, *tmp;
34656668Sshin
34756668Sshin	sx_xlock(&kinst_tramp_sx);
34856668Sshin	TAILQ_FOREACH_SAFE(chunk, &kinst_trampchunks, next, tmp)
34956668Sshin		kinst_trampchunk_free(chunk);
35070102Sphk	sx_xunlock(&kinst_tramp_sx);
35156668Sshin#endif
35256668Sshin
35356668Sshin	return (0);
35456668Sshin}
35556668Sshin