1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
4 *
5 * This test starts a transaction and triggers a signal, forcing a pagefault to
6 * happen when the kernel signal handling code touches the user signal stack.
7 *
8 * In order to avoid pre-faulting the signal stack memory and to force the
9 * pagefault to happen precisely in the kernel signal handling code, the
10 * pagefault handling is done in userspace using the userfaultfd facility.
11 *
12 * Further pagefaults are triggered by crafting the signal handler's ucontext
13 * to point to additional memory regions managed by the userfaultfd, so using
14 * the same mechanism used to avoid pre-faulting the signal stack memory.
15 *
16 * On failure (bug is present) kernel crashes or never returns control back to
17 * userspace. If bug is not present, tests completes almost immediately.
18 */
19
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <linux/userfaultfd.h>
24#include <poll.h>
25#include <unistd.h>
26#include <sys/ioctl.h>
27#include <sys/syscall.h>
28#include <fcntl.h>
29#include <sys/mman.h>
30#include <pthread.h>
31#include <signal.h>
32#include <errno.h>
33
34#include "tm.h"
35
36
37#define UF_MEM_SIZE 655360	/* 10 x 64k pages */
38
39/* Memory handled by userfaultfd */
40static char *uf_mem;
41static size_t uf_mem_offset = 0;
42
43/*
44 * Data that will be copied into the faulting pages (instead of zero-filled
45 * pages). This is used to make the test more reliable and avoid segfaulting
46 * when we return from the signal handler. Since we are making the signal
47 * handler's ucontext point to newly allocated memory, when that memory is
48 * paged-in it will contain the expected content.
49 */
50static char backing_mem[UF_MEM_SIZE];
51
52static size_t pagesize;
53
54/*
55 * Return a chunk of at least 'size' bytes of memory that will be handled by
56 * userfaultfd. If 'backing_data' is not NULL, its content will be save to
57 * 'backing_mem' and then copied into the faulting pages when the page fault
58 * is handled.
59 */
60void *get_uf_mem(size_t size, void *backing_data)
61{
62	void *ret;
63
64	if (uf_mem_offset + size > UF_MEM_SIZE) {
65		fprintf(stderr, "Requesting more uf_mem than expected!\n");
66		exit(EXIT_FAILURE);
67	}
68
69	ret = &uf_mem[uf_mem_offset];
70
71	/* Save the data that will be copied into the faulting page */
72	if (backing_data != NULL)
73		memcpy(&backing_mem[uf_mem_offset], backing_data, size);
74
75	/* Reserve the requested amount of uf_mem */
76	uf_mem_offset += size;
77	/* Keep uf_mem_offset aligned to the page size (round up) */
78	uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
79
80	return ret;
81}
82
83void *fault_handler_thread(void *arg)
84{
85	struct uffd_msg msg;	/* Data read from userfaultfd */
86	long uffd;		/* userfaultfd file descriptor */
87	struct uffdio_copy uffdio_copy;
88	struct pollfd pollfd;
89	ssize_t nread, offset;
90
91	uffd = (long) arg;
92
93	for (;;) {
94		pollfd.fd = uffd;
95		pollfd.events = POLLIN;
96		if (poll(&pollfd, 1, -1) == -1) {
97			perror("poll() failed");
98			exit(EXIT_FAILURE);
99		}
100
101		nread = read(uffd, &msg, sizeof(msg));
102		if (nread == 0) {
103			fprintf(stderr, "read(): EOF on userfaultfd\n");
104			exit(EXIT_FAILURE);
105		}
106
107		if (nread == -1) {
108			perror("read() failed");
109			exit(EXIT_FAILURE);
110		}
111
112		/* We expect only one kind of event */
113		if (msg.event != UFFD_EVENT_PAGEFAULT) {
114			fprintf(stderr, "Unexpected event on userfaultfd\n");
115			exit(EXIT_FAILURE);
116		}
117
118		/*
119		 * We need to handle page faults in units of pages(!).
120		 * So, round faulting address down to page boundary.
121		 */
122		uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
123
124		offset = (char *) uffdio_copy.dst - uf_mem;
125		uffdio_copy.src = (unsigned long) &backing_mem[offset];
126
127		uffdio_copy.len = pagesize;
128		uffdio_copy.mode = 0;
129		uffdio_copy.copy = 0;
130		if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
131			perror("ioctl-UFFDIO_COPY failed");
132			exit(EXIT_FAILURE);
133		}
134	}
135}
136
137void setup_uf_mem(void)
138{
139	long uffd;		/* userfaultfd file descriptor */
140	pthread_t thr;
141	struct uffdio_api uffdio_api;
142	struct uffdio_register uffdio_register;
143	int ret;
144
145	pagesize = sysconf(_SC_PAGE_SIZE);
146
147	/* Create and enable userfaultfd object */
148	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
149	if (uffd == -1) {
150		perror("userfaultfd() failed");
151		exit(EXIT_FAILURE);
152	}
153	uffdio_api.api = UFFD_API;
154	uffdio_api.features = 0;
155	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
156		perror("ioctl-UFFDIO_API failed");
157		exit(EXIT_FAILURE);
158	}
159
160	/*
161	 * Create a private anonymous mapping. The memory will be demand-zero
162	 * paged, that is, not yet allocated. When we actually touch the memory
163	 * the related page will be allocated via the userfaultfd mechanism.
164	 */
165	uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
166		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
167	if (uf_mem == MAP_FAILED) {
168		perror("mmap() failed");
169		exit(EXIT_FAILURE);
170	}
171
172	/*
173	 * Register the memory range of the mapping we've just mapped to be
174	 * handled by the userfaultfd object. In 'mode' we request to track
175	 * missing pages (i.e. pages that have not yet been faulted-in).
176	 */
177	uffdio_register.range.start = (unsigned long) uf_mem;
178	uffdio_register.range.len = UF_MEM_SIZE;
179	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
180	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
181		perror("ioctl-UFFDIO_REGISTER");
182		exit(EXIT_FAILURE);
183	}
184
185	/* Create a thread that will process the userfaultfd events */
186	ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
187	if (ret != 0) {
188		fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
189		exit(EXIT_FAILURE);
190	}
191}
192
193/*
194 * Assumption: the signal was delivered while userspace was in transactional or
195 * suspended state, i.e. uc->uc_link != NULL.
196 */
197void signal_handler(int signo, siginfo_t *si, void *uc)
198{
199	ucontext_t *ucp = uc;
200
201	/* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
202	ucp->uc_link->uc_mcontext.regs->nip += 4;
203
204	ucp->uc_mcontext.v_regs =
205		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
206
207	ucp->uc_link->uc_mcontext.v_regs =
208		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
209
210	ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
211}
212
213bool have_userfaultfd(void)
214{
215	long rc;
216
217	errno = 0;
218	rc = syscall(__NR_userfaultfd, -1);
219
220	return rc == 0 || errno != ENOSYS;
221}
222
223int tm_signal_pagefault(void)
224{
225	struct sigaction sa;
226	stack_t ss;
227
228	SKIP_IF(!have_htm());
229	SKIP_IF(htm_is_synthetic());
230	SKIP_IF(!have_userfaultfd());
231
232	setup_uf_mem();
233
234	/*
235	 * Set an alternative stack that will generate a page fault when the
236	 * signal is raised. The page fault will be treated via userfaultfd,
237	 * i.e. via fault_handler_thread.
238	 */
239	ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
240	ss.ss_size = SIGSTKSZ;
241	ss.ss_flags = 0;
242	if (sigaltstack(&ss, NULL) == -1) {
243		perror("sigaltstack() failed");
244		exit(EXIT_FAILURE);
245	}
246
247	sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
248	sa.sa_sigaction = signal_handler;
249	if (sigaction(SIGTRAP, &sa, NULL) == -1) {
250		perror("sigaction() failed");
251		exit(EXIT_FAILURE);
252	}
253
254	/* Trigger a SIGTRAP in transactional state */
255	asm __volatile__(
256			"tbegin.;"
257			"beq    1f;"
258			"trap;"
259			"1: ;"
260			: : : "memory");
261
262	/* Trigger a SIGTRAP in suspended state */
263	asm __volatile__(
264			"tbegin.;"
265			"beq    1f;"
266			"tsuspend.;"
267			"trap;"
268			"tresume.;"
269			"1: ;"
270			: : : "memory");
271
272	return EXIT_SUCCESS;
273}
274
275int main(int argc, char **argv)
276{
277	/*
278	 * Depending on kernel config, the TM Bad Thing might not result in a
279	 * crash, instead the kernel never returns control back to userspace, so
280	 * set a tight timeout. If the test passes it completes almost
281	 * immediately.
282	 */
283	test_harness_set_timeout(2);
284	return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
285}
286