1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * GUP long-term page pinning tests.
4 *
5 * Copyright 2023, Red Hat, Inc.
6 *
7 * Author(s): David Hildenbrand <david@redhat.com>
8 */
9#define _GNU_SOURCE
10#include <stdlib.h>
11#include <string.h>
12#include <stdbool.h>
13#include <stdint.h>
14#include <unistd.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <assert.h>
18#include <sys/mman.h>
19#include <sys/ioctl.h>
20#include <sys/vfs.h>
21#include <linux/magic.h>
22#include <linux/memfd.h>
23
24#include "local_config.h"
25#ifdef LOCAL_CONFIG_HAVE_LIBURING
26#include <liburing.h>
27#endif /* LOCAL_CONFIG_HAVE_LIBURING */
28
29#include "../../../../mm/gup_test.h"
30#include "../kselftest.h"
31#include "vm_util.h"
32
33static size_t pagesize;
34static int nr_hugetlbsizes;
35static size_t hugetlbsizes[10];
36static int gup_fd;
37
38static __fsword_t get_fs_type(int fd)
39{
40	struct statfs fs;
41	int ret;
42
43	do {
44		ret = fstatfs(fd, &fs);
45	} while (ret && errno == EINTR);
46
47	return ret ? 0 : fs.f_type;
48}
49
50static bool fs_is_unknown(__fsword_t fs_type)
51{
52	/*
53	 * We only support some filesystems in our tests when dealing with
54	 * R/W long-term pinning. For these filesystems, we can be fairly sure
55	 * whether they support it or not.
56	 */
57	switch (fs_type) {
58	case TMPFS_MAGIC:
59	case HUGETLBFS_MAGIC:
60	case BTRFS_SUPER_MAGIC:
61	case EXT4_SUPER_MAGIC:
62	case XFS_SUPER_MAGIC:
63		return false;
64	default:
65		return true;
66	}
67}
68
69static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type)
70{
71	assert(!fs_is_unknown(fs_type));
72	switch (fs_type) {
73	case TMPFS_MAGIC:
74	case HUGETLBFS_MAGIC:
75		return true;
76	default:
77		return false;
78	}
79}
80
81enum test_type {
82	TEST_TYPE_RO,
83	TEST_TYPE_RO_FAST,
84	TEST_TYPE_RW,
85	TEST_TYPE_RW_FAST,
86#ifdef LOCAL_CONFIG_HAVE_LIBURING
87	TEST_TYPE_IOURING,
88#endif /* LOCAL_CONFIG_HAVE_LIBURING */
89};
90
91static void do_test(int fd, size_t size, enum test_type type, bool shared)
92{
93	__fsword_t fs_type = get_fs_type(fd);
94	bool should_work;
95	char *mem;
96	int ret;
97
98	if (ftruncate(fd, size)) {
99		ksft_test_result_fail("ftruncate() failed\n");
100		return;
101	}
102
103	if (fallocate(fd, 0, 0, size)) {
104		if (size == pagesize)
105			ksft_test_result_fail("fallocate() failed\n");
106		else
107			ksft_test_result_skip("need more free huge pages\n");
108		return;
109	}
110
111	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
112		   shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
113	if (mem == MAP_FAILED) {
114		if (size == pagesize || shared)
115			ksft_test_result_fail("mmap() failed\n");
116		else
117			ksft_test_result_skip("need more free huge pages\n");
118		return;
119	}
120
121	/*
122	 * Fault in the page writable such that GUP-fast can eventually pin
123	 * it immediately.
124	 */
125	memset(mem, 0, size);
126
127	switch (type) {
128	case TEST_TYPE_RO:
129	case TEST_TYPE_RO_FAST:
130	case TEST_TYPE_RW:
131	case TEST_TYPE_RW_FAST: {
132		struct pin_longterm_test args;
133		const bool fast = type == TEST_TYPE_RO_FAST ||
134				  type == TEST_TYPE_RW_FAST;
135		const bool rw = type == TEST_TYPE_RW ||
136				type == TEST_TYPE_RW_FAST;
137
138		if (gup_fd < 0) {
139			ksft_test_result_skip("gup_test not available\n");
140			break;
141		}
142
143		if (rw && shared && fs_is_unknown(fs_type)) {
144			ksft_test_result_skip("Unknown filesystem\n");
145			return;
146		}
147		/*
148		 * R/O pinning or pinning in a private mapping is always
149		 * expected to work. Otherwise, we expect long-term R/W pinning
150		 * to only succeed for special fielesystems.
151		 */
152		should_work = !shared || !rw ||
153			      fs_supports_writable_longterm_pinning(fs_type);
154
155		args.addr = (__u64)(uintptr_t)mem;
156		args.size = size;
157		args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
158		args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
159		ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
160		if (ret && errno == EINVAL) {
161			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
162			break;
163		} else if (ret && errno == EFAULT) {
164			ksft_test_result(!should_work, "Should have failed\n");
165			break;
166		} else if (ret) {
167			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
168			break;
169		}
170
171		if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
172			ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
173
174		/*
175		 * TODO: if the kernel ever supports long-term R/W pinning on
176		 * some previously unsupported filesystems, we might want to
177		 * perform some additional tests for possible data corruptions.
178		 */
179		ksft_test_result(should_work, "Should have worked\n");
180		break;
181	}
182#ifdef LOCAL_CONFIG_HAVE_LIBURING
183	case TEST_TYPE_IOURING: {
184		struct io_uring ring;
185		struct iovec iov;
186
187		/* io_uring always pins pages writable. */
188		if (shared && fs_is_unknown(fs_type)) {
189			ksft_test_result_skip("Unknown filesystem\n");
190			return;
191		}
192		should_work = !shared ||
193			      fs_supports_writable_longterm_pinning(fs_type);
194
195		/* Skip on errors, as we might just lack kernel support. */
196		ret = io_uring_queue_init(1, &ring, 0);
197		if (ret < 0) {
198			ksft_test_result_skip("io_uring_queue_init() failed\n");
199			break;
200		}
201		/*
202		 * Register the range as a fixed buffer. This will FOLL_WRITE |
203		 * FOLL_PIN | FOLL_LONGTERM the range.
204		 */
205		iov.iov_base = mem;
206		iov.iov_len = size;
207		ret = io_uring_register_buffers(&ring, &iov, 1);
208		/* Only new kernels return EFAULT. */
209		if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
210			    errno == EFAULT)) {
211			ksft_test_result(!should_work, "Should have failed\n");
212		} else if (ret) {
213			/*
214			 * We might just lack support or have insufficient
215			 * MEMLOCK limits.
216			 */
217			ksft_test_result_skip("io_uring_register_buffers() failed\n");
218		} else {
219			ksft_test_result(should_work, "Should have worked\n");
220			io_uring_unregister_buffers(&ring);
221		}
222
223		io_uring_queue_exit(&ring);
224		break;
225	}
226#endif /* LOCAL_CONFIG_HAVE_LIBURING */
227	default:
228		assert(false);
229	}
230
231	munmap(mem, size);
232}
233
234typedef void (*test_fn)(int fd, size_t size);
235
236static void run_with_memfd(test_fn fn, const char *desc)
237{
238	int fd;
239
240	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
241
242	fd = memfd_create("test", 0);
243	if (fd < 0) {
244		ksft_test_result_fail("memfd_create() failed\n");
245		return;
246	}
247
248	fn(fd, pagesize);
249	close(fd);
250}
251
252static void run_with_tmpfile(test_fn fn, const char *desc)
253{
254	FILE *file;
255	int fd;
256
257	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
258
259	file = tmpfile();
260	if (!file) {
261		ksft_test_result_fail("tmpfile() failed\n");
262		return;
263	}
264
265	fd = fileno(file);
266	if (fd < 0) {
267		ksft_test_result_fail("fileno() failed\n");
268		goto close;
269	}
270
271	fn(fd, pagesize);
272close:
273	fclose(file);
274}
275
276static void run_with_local_tmpfile(test_fn fn, const char *desc)
277{
278	char filename[] = __FILE__"_tmpfile_XXXXXX";
279	int fd;
280
281	ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
282
283	fd = mkstemp(filename);
284	if (fd < 0) {
285		ksft_test_result_fail("mkstemp() failed\n");
286		return;
287	}
288
289	if (unlink(filename)) {
290		ksft_test_result_fail("unlink() failed\n");
291		goto close;
292	}
293
294	fn(fd, pagesize);
295close:
296	close(fd);
297}
298
299static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
300				   size_t hugetlbsize)
301{
302	int flags = MFD_HUGETLB;
303	int fd;
304
305	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
306		       hugetlbsize / 1024);
307
308	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
309
310	fd = memfd_create("test", flags);
311	if (fd < 0) {
312		ksft_test_result_skip("memfd_create() failed\n");
313		return;
314	}
315
316	fn(fd, hugetlbsize);
317	close(fd);
318}
319
320struct test_case {
321	const char *desc;
322	test_fn fn;
323};
324
325static void test_shared_rw_pin(int fd, size_t size)
326{
327	do_test(fd, size, TEST_TYPE_RW, true);
328}
329
330static void test_shared_rw_fast_pin(int fd, size_t size)
331{
332	do_test(fd, size, TEST_TYPE_RW_FAST, true);
333}
334
335static void test_shared_ro_pin(int fd, size_t size)
336{
337	do_test(fd, size, TEST_TYPE_RO, true);
338}
339
340static void test_shared_ro_fast_pin(int fd, size_t size)
341{
342	do_test(fd, size, TEST_TYPE_RO_FAST, true);
343}
344
345static void test_private_rw_pin(int fd, size_t size)
346{
347	do_test(fd, size, TEST_TYPE_RW, false);
348}
349
350static void test_private_rw_fast_pin(int fd, size_t size)
351{
352	do_test(fd, size, TEST_TYPE_RW_FAST, false);
353}
354
355static void test_private_ro_pin(int fd, size_t size)
356{
357	do_test(fd, size, TEST_TYPE_RO, false);
358}
359
360static void test_private_ro_fast_pin(int fd, size_t size)
361{
362	do_test(fd, size, TEST_TYPE_RO_FAST, false);
363}
364
365#ifdef LOCAL_CONFIG_HAVE_LIBURING
366static void test_shared_iouring(int fd, size_t size)
367{
368	do_test(fd, size, TEST_TYPE_IOURING, true);
369}
370
371static void test_private_iouring(int fd, size_t size)
372{
373	do_test(fd, size, TEST_TYPE_IOURING, false);
374}
375#endif /* LOCAL_CONFIG_HAVE_LIBURING */
376
377static const struct test_case test_cases[] = {
378	{
379		"R/W longterm GUP pin in MAP_SHARED file mapping",
380		test_shared_rw_pin,
381	},
382	{
383		"R/W longterm GUP-fast pin in MAP_SHARED file mapping",
384		test_shared_rw_fast_pin,
385	},
386	{
387		"R/O longterm GUP pin in MAP_SHARED file mapping",
388		test_shared_ro_pin,
389	},
390	{
391		"R/O longterm GUP-fast pin in MAP_SHARED file mapping",
392		test_shared_ro_fast_pin,
393	},
394	{
395		"R/W longterm GUP pin in MAP_PRIVATE file mapping",
396		test_private_rw_pin,
397	},
398	{
399		"R/W longterm GUP-fast pin in MAP_PRIVATE file mapping",
400		test_private_rw_fast_pin,
401	},
402	{
403		"R/O longterm GUP pin in MAP_PRIVATE file mapping",
404		test_private_ro_pin,
405	},
406	{
407		"R/O longterm GUP-fast pin in MAP_PRIVATE file mapping",
408		test_private_ro_fast_pin,
409	},
410#ifdef LOCAL_CONFIG_HAVE_LIBURING
411	{
412		"io_uring fixed buffer with MAP_SHARED file mapping",
413		test_shared_iouring,
414	},
415	{
416		"io_uring fixed buffer with MAP_PRIVATE file mapping",
417		test_private_iouring,
418	},
419#endif /* LOCAL_CONFIG_HAVE_LIBURING */
420};
421
422static void run_test_case(struct test_case const *test_case)
423{
424	int i;
425
426	run_with_memfd(test_case->fn, test_case->desc);
427	run_with_tmpfile(test_case->fn, test_case->desc);
428	run_with_local_tmpfile(test_case->fn, test_case->desc);
429	for (i = 0; i < nr_hugetlbsizes; i++)
430		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
431				       hugetlbsizes[i]);
432}
433
434static int tests_per_test_case(void)
435{
436	return 3 + nr_hugetlbsizes;
437}
438
439int main(int argc, char **argv)
440{
441	int i, err;
442
443	pagesize = getpagesize();
444	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
445						    ARRAY_SIZE(hugetlbsizes));
446
447	ksft_print_header();
448	ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case());
449
450	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
451
452	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
453		run_test_case(&test_cases[i]);
454
455	err = ksft_get_fail_cnt();
456	if (err)
457		ksft_exit_fail_msg("%d out of %d tests failed\n",
458				   err, ksft_test_num());
459	return ksft_exit_pass();
460}
461