1// SPDX-License-Identifier: LGPL-2.1
2/*
3 * rseq.c
4 *
5 * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; only
10 * version 2.1 of the License.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 */
17
18#define _GNU_SOURCE
19#include <errno.h>
20#include <sched.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <unistd.h>
25#include <syscall.h>
26#include <assert.h>
27#include <signal.h>
28#include <limits.h>
29#include <dlfcn.h>
30#include <stddef.h>
31#include <sys/auxv.h>
32#include <linux/auxvec.h>
33
34#include <linux/compiler.h>
35
36#include "../kselftest.h"
37#include "rseq.h"
38
39/*
40 * Define weak versions to play nice with binaries that are statically linked
41 * against a libc that doesn't support registering its own rseq.
42 */
43__weak ptrdiff_t __rseq_offset;
44__weak unsigned int __rseq_size;
45__weak unsigned int __rseq_flags;
46
47static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
48static const unsigned int *libc_rseq_size_p = &__rseq_size;
49static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
50
51/* Offset from the thread pointer to the rseq area. */
52ptrdiff_t rseq_offset;
53
54/*
55 * Size of the registered rseq area. 0 if the registration was
56 * unsuccessful.
57 */
58unsigned int rseq_size = -1U;
59
60/* Flags used during rseq registration.  */
61unsigned int rseq_flags;
62
63/*
64 * rseq feature size supported by the kernel. 0 if the registration was
65 * unsuccessful.
66 */
67unsigned int rseq_feature_size = -1U;
68
69static int rseq_ownership;
70static int rseq_reg_success;	/* At least one rseq registration has succeded. */
71
72/* Allocate a large area for the TLS. */
73#define RSEQ_THREAD_AREA_ALLOC_SIZE	1024
74
75/* Original struct rseq feature size is 20 bytes. */
76#define ORIG_RSEQ_FEATURE_SIZE		20
77
78/* Original struct rseq allocation size is 32 bytes. */
79#define ORIG_RSEQ_ALLOC_SIZE		32
80
81static
82__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
83	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
84};
85
86static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
87		    int flags, uint32_t sig)
88{
89	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
90}
91
92static int sys_getcpu(unsigned *cpu, unsigned *node)
93{
94	return syscall(__NR_getcpu, cpu, node, NULL);
95}
96
97int rseq_available(void)
98{
99	int rc;
100
101	rc = sys_rseq(NULL, 0, 0, 0);
102	if (rc != -1)
103		abort();
104	switch (errno) {
105	case ENOSYS:
106		return 0;
107	case EINVAL:
108		return 1;
109	default:
110		abort();
111	}
112}
113
114int rseq_register_current_thread(void)
115{
116	int rc;
117
118	if (!rseq_ownership) {
119		/* Treat libc's ownership as a successful registration. */
120		return 0;
121	}
122	rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
123	if (rc) {
124		if (RSEQ_READ_ONCE(rseq_reg_success)) {
125			/* Incoherent success/failure within process. */
126			abort();
127		}
128		return -1;
129	}
130	assert(rseq_current_cpu_raw() >= 0);
131	RSEQ_WRITE_ONCE(rseq_reg_success, 1);
132	return 0;
133}
134
135int rseq_unregister_current_thread(void)
136{
137	int rc;
138
139	if (!rseq_ownership) {
140		/* Treat libc's ownership as a successful unregistration. */
141		return 0;
142	}
143	rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
144	if (rc)
145		return -1;
146	return 0;
147}
148
149static
150unsigned int get_rseq_feature_size(void)
151{
152	unsigned long auxv_rseq_feature_size, auxv_rseq_align;
153
154	auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
155	assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
156
157	auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
158	assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
159	if (auxv_rseq_feature_size)
160		return auxv_rseq_feature_size;
161	else
162		return ORIG_RSEQ_FEATURE_SIZE;
163}
164
165static __attribute__((constructor))
166void rseq_init(void)
167{
168	/*
169	 * If the libc's registered rseq size isn't already valid, it may be
170	 * because the binary is dynamically linked and not necessarily due to
171	 * libc not having registered a restartable sequence.  Try to find the
172	 * symbols if that's the case.
173	 */
174	if (!*libc_rseq_size_p) {
175		libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
176		libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
177		libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
178	}
179	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
180			*libc_rseq_size_p != 0) {
181		/* rseq registration owned by glibc */
182		rseq_offset = *libc_rseq_offset_p;
183		rseq_size = *libc_rseq_size_p;
184		rseq_flags = *libc_rseq_flags_p;
185		rseq_feature_size = get_rseq_feature_size();
186		if (rseq_feature_size > rseq_size)
187			rseq_feature_size = rseq_size;
188		return;
189	}
190	rseq_ownership = 1;
191	if (!rseq_available()) {
192		rseq_size = 0;
193		rseq_feature_size = 0;
194		return;
195	}
196	rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
197	rseq_flags = 0;
198	rseq_feature_size = get_rseq_feature_size();
199	if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
200		rseq_size = ORIG_RSEQ_ALLOC_SIZE;
201	else
202		rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
203}
204
205static __attribute__((destructor))
206void rseq_exit(void)
207{
208	if (!rseq_ownership)
209		return;
210	rseq_offset = 0;
211	rseq_size = -1U;
212	rseq_feature_size = -1U;
213	rseq_ownership = 0;
214}
215
216int32_t rseq_fallback_current_cpu(void)
217{
218	int32_t cpu;
219
220	cpu = sched_getcpu();
221	if (cpu < 0) {
222		perror("sched_getcpu()");
223		abort();
224	}
225	return cpu;
226}
227
228int32_t rseq_fallback_current_node(void)
229{
230	uint32_t cpu_id, node_id;
231	int ret;
232
233	ret = sys_getcpu(&cpu_id, &node_id);
234	if (ret) {
235		perror("sys_getcpu()");
236		return ret;
237	}
238	return (int32_t) node_id;
239}
240