1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Test handling of code that might set PTE/PMD dirty in read-only VMAs.
4 * Setting a PTE/PMD dirty must not accidentally set the PTE/PMD writable.
5 *
6 * Copyright 2023, Red Hat, Inc.
7 *
8 * Author(s): David Hildenbrand <david@redhat.com>
9 */
10#include <fcntl.h>
11#include <signal.h>
12#include <unistd.h>
13#include <string.h>
14#include <errno.h>
15#include <stdlib.h>
16#include <stdbool.h>
17#include <stdint.h>
18#include <sys/mman.h>
19#include <setjmp.h>
20#include <sys/syscall.h>
21#include <sys/ioctl.h>
22#include <linux/userfaultfd.h>
23#include <linux/mempolicy.h>
24
25#include "../kselftest.h"
26#include "vm_util.h"
27
28static size_t pagesize;
29static size_t thpsize;
30static int mem_fd;
31static int pagemap_fd;
32static sigjmp_buf env;
33
34static void signal_handler(int sig)
35{
36	if (sig == SIGSEGV)
37		siglongjmp(env, 1);
38	siglongjmp(env, 2);
39}
40
41static void do_test_write_sigsegv(char *mem)
42{
43	char orig = *mem;
44	int ret;
45
46	if (signal(SIGSEGV, signal_handler) == SIG_ERR) {
47		ksft_test_result_fail("signal() failed\n");
48		return;
49	}
50
51	ret = sigsetjmp(env, 1);
52	if (!ret)
53		*mem = orig + 1;
54
55	if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
56		ksft_test_result_fail("signal() failed\n");
57
58	ksft_test_result(ret == 1 && *mem == orig,
59			 "SIGSEGV generated, page not modified\n");
60}
61
62static char *mmap_thp_range(int prot, char **_mmap_mem, size_t *_mmap_size)
63{
64	const size_t mmap_size = 2 * thpsize;
65	char *mem, *mmap_mem;
66
67	mmap_mem = mmap(NULL, mmap_size, prot, MAP_PRIVATE|MAP_ANON,
68			-1, 0);
69	if (mmap_mem == MAP_FAILED) {
70		ksft_test_result_fail("mmap() failed\n");
71		return MAP_FAILED;
72	}
73	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
74
75	if (madvise(mem, thpsize, MADV_HUGEPAGE)) {
76		ksft_test_result_skip("MADV_HUGEPAGE failed\n");
77		munmap(mmap_mem, mmap_size);
78		return MAP_FAILED;
79	}
80
81	*_mmap_mem = mmap_mem;
82	*_mmap_size = mmap_size;
83	return mem;
84}
85
86static void test_ptrace_write(void)
87{
88	char data = 1;
89	char *mem;
90	int ret;
91
92	ksft_print_msg("[INFO] PTRACE write access\n");
93
94	mem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
95	if (mem == MAP_FAILED) {
96		ksft_test_result_fail("mmap() failed\n");
97		return;
98	}
99
100	/* Fault in the shared zeropage. */
101	if (*mem != 0) {
102		ksft_test_result_fail("Memory not zero\n");
103		goto munmap;
104	}
105
106	/*
107	 * Unshare the page (populating a fresh anon page that might be set
108	 * dirty in the PTE) in the read-only VMA using ptrace (FOLL_FORCE).
109	 */
110	lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
111	ret = write(mem_fd, &data, 1);
112	if (ret != 1 || *mem != data) {
113		ksft_test_result_fail("write() failed\n");
114		goto munmap;
115	}
116
117	do_test_write_sigsegv(mem);
118munmap:
119	munmap(mem, pagesize);
120}
121
122static void test_ptrace_write_thp(void)
123{
124	char *mem, *mmap_mem;
125	size_t mmap_size;
126	char data = 1;
127	int ret;
128
129	ksft_print_msg("[INFO] PTRACE write access to THP\n");
130
131	mem = mmap_thp_range(PROT_READ, &mmap_mem, &mmap_size);
132	if (mem == MAP_FAILED)
133		return;
134
135	/*
136	 * Write to the first subpage in the read-only VMA using
137	 * ptrace(FOLL_FORCE), eventually placing a fresh THP that is marked
138	 * dirty in the PMD.
139	 */
140	lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
141	ret = write(mem_fd, &data, 1);
142	if (ret != 1 || *mem != data) {
143		ksft_test_result_fail("write() failed\n");
144		goto munmap;
145	}
146
147	/* MM populated a THP if we got the last subpage populated as well. */
148	if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
149		ksft_test_result_skip("Did not get a THP populated\n");
150		goto munmap;
151	}
152
153	do_test_write_sigsegv(mem);
154munmap:
155	munmap(mmap_mem, mmap_size);
156}
157
158static void test_page_migration(void)
159{
160	char *mem;
161
162	ksft_print_msg("[INFO] Page migration\n");
163
164	mem = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON,
165		   -1, 0);
166	if (mem == MAP_FAILED) {
167		ksft_test_result_fail("mmap() failed\n");
168		return;
169	}
170
171	/* Populate a fresh page and dirty it. */
172	memset(mem, 1, pagesize);
173	if (mprotect(mem, pagesize, PROT_READ)) {
174		ksft_test_result_fail("mprotect() failed\n");
175		goto munmap;
176	}
177
178	/* Trigger page migration. Might not be available or fail. */
179	if (syscall(__NR_mbind, mem, pagesize, MPOL_LOCAL, NULL, 0x7fful,
180		    MPOL_MF_MOVE)) {
181		ksft_test_result_skip("mbind() failed\n");
182		goto munmap;
183	}
184
185	do_test_write_sigsegv(mem);
186munmap:
187	munmap(mem, pagesize);
188}
189
190static void test_page_migration_thp(void)
191{
192	char *mem, *mmap_mem;
193	size_t mmap_size;
194
195	ksft_print_msg("[INFO] Page migration of THP\n");
196
197	mem = mmap_thp_range(PROT_READ|PROT_WRITE, &mmap_mem, &mmap_size);
198	if (mem == MAP_FAILED)
199		return;
200
201	/*
202	 * Write to the first page, which might populate a fresh anon THP
203	 * and dirty it.
204	 */
205	memset(mem, 1, pagesize);
206	if (mprotect(mem, thpsize, PROT_READ)) {
207		ksft_test_result_fail("mprotect() failed\n");
208		goto munmap;
209	}
210
211	/* MM populated a THP if we got the last subpage populated as well. */
212	if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
213		ksft_test_result_skip("Did not get a THP populated\n");
214		goto munmap;
215	}
216
217	/* Trigger page migration. Might not be available or fail. */
218	if (syscall(__NR_mbind, mem, thpsize, MPOL_LOCAL, NULL, 0x7fful,
219		    MPOL_MF_MOVE)) {
220		ksft_test_result_skip("mbind() failed\n");
221		goto munmap;
222	}
223
224	do_test_write_sigsegv(mem);
225munmap:
226	munmap(mmap_mem, mmap_size);
227}
228
229static void test_pte_mapped_thp(void)
230{
231	char *mem, *mmap_mem;
232	size_t mmap_size;
233
234	ksft_print_msg("[INFO] PTE-mapping a THP\n");
235
236	mem = mmap_thp_range(PROT_READ|PROT_WRITE, &mmap_mem, &mmap_size);
237	if (mem == MAP_FAILED)
238		return;
239
240	/*
241	 * Write to the first page, which might populate a fresh anon THP
242	 * and dirty it.
243	 */
244	memset(mem, 1, pagesize);
245	if (mprotect(mem, thpsize, PROT_READ)) {
246		ksft_test_result_fail("mprotect() failed\n");
247		goto munmap;
248	}
249
250	/* MM populated a THP if we got the last subpage populated as well. */
251	if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
252		ksft_test_result_skip("Did not get a THP populated\n");
253		goto munmap;
254	}
255
256	/* Trigger PTE-mapping the THP by mprotect'ing the last subpage. */
257	if (mprotect(mem + thpsize - pagesize, pagesize,
258		     PROT_READ|PROT_WRITE)) {
259		ksft_test_result_fail("mprotect() failed\n");
260		goto munmap;
261	}
262
263	do_test_write_sigsegv(mem);
264munmap:
265	munmap(mmap_mem, mmap_size);
266}
267
268#ifdef __NR_userfaultfd
269static void test_uffdio_copy(void)
270{
271	struct uffdio_register uffdio_register;
272	struct uffdio_copy uffdio_copy;
273	struct uffdio_api uffdio_api;
274	char *dst, *src;
275	int uffd;
276
277	ksft_print_msg("[INFO] UFFDIO_COPY\n");
278
279	src = malloc(pagesize);
280	memset(src, 1, pagesize);
281	dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
282	if (dst == MAP_FAILED) {
283		ksft_test_result_fail("mmap() failed\n");
284		return;
285	}
286
287	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
288	if (uffd < 0) {
289		ksft_test_result_skip("__NR_userfaultfd failed\n");
290		goto munmap;
291	}
292
293	uffdio_api.api = UFFD_API;
294	uffdio_api.features = 0;
295	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
296		ksft_test_result_fail("UFFDIO_API failed\n");
297		goto close_uffd;
298	}
299
300	uffdio_register.range.start = (unsigned long) dst;
301	uffdio_register.range.len = pagesize;
302	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
303	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
304		ksft_test_result_fail("UFFDIO_REGISTER failed\n");
305		goto close_uffd;
306	}
307
308	/* Place a page in a read-only VMA, which might set the PTE dirty. */
309	uffdio_copy.dst = (unsigned long) dst;
310	uffdio_copy.src = (unsigned long) src;
311	uffdio_copy.len = pagesize;
312	uffdio_copy.mode = 0;
313	if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
314		ksft_test_result_fail("UFFDIO_COPY failed\n");
315		goto close_uffd;
316	}
317
318	do_test_write_sigsegv(dst);
319close_uffd:
320	close(uffd);
321munmap:
322	munmap(dst, pagesize);
323	free(src);
324}
325#endif /* __NR_userfaultfd */
326
327int main(void)
328{
329	int err, tests = 2;
330
331	pagesize = getpagesize();
332	thpsize = read_pmd_pagesize();
333	if (thpsize) {
334		ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
335			       thpsize / 1024);
336		tests += 3;
337	}
338#ifdef __NR_userfaultfd
339	tests += 1;
340#endif /* __NR_userfaultfd */
341
342	ksft_print_header();
343	ksft_set_plan(tests);
344
345	mem_fd = open("/proc/self/mem", O_RDWR);
346	if (mem_fd < 0)
347		ksft_exit_fail_msg("opening /proc/self/mem failed\n");
348	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
349	if (pagemap_fd < 0)
350		ksft_exit_fail_msg("opening /proc/self/pagemap failed\n");
351
352	/*
353	 * On some ptrace(FOLL_FORCE) write access via /proc/self/mem in
354	 * read-only VMAs, the kernel may set the PTE/PMD dirty.
355	 */
356	test_ptrace_write();
357	if (thpsize)
358		test_ptrace_write_thp();
359	/*
360	 * On page migration, the kernel may set the PTE/PMD dirty when
361	 * remapping the page.
362	 */
363	test_page_migration();
364	if (thpsize)
365		test_page_migration_thp();
366	/* PTE-mapping a THP might propagate the dirty PMD bit to the PTEs. */
367	if (thpsize)
368		test_pte_mapped_thp();
369	/* Placing a fresh page via userfaultfd may set the PTE dirty. */
370#ifdef __NR_userfaultfd
371	test_uffdio_copy();
372#endif /* __NR_userfaultfd */
373
374	err = ksft_get_fail_cnt();
375	if (err)
376		ksft_exit_fail_msg("%d out of %d tests failed\n",
377				   err, ksft_test_num());
378	return ksft_exit_pass();
379}
380