1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * KSM functional tests
4 *
5 * Copyright 2022, Red Hat, Inc.
6 *
7 * Author(s): David Hildenbrand <david@redhat.com>
8 */
9#define _GNU_SOURCE
10#include <stdlib.h>
11#include <string.h>
12#include <stdbool.h>
13#include <stdint.h>
14#include <unistd.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <sys/mman.h>
18#include <sys/prctl.h>
19#include <sys/syscall.h>
20#include <sys/ioctl.h>
21#include <sys/wait.h>
22#include <linux/userfaultfd.h>
23
24#include "../kselftest.h"
25#include "vm_util.h"
26
27#define KiB 1024u
28#define MiB (1024 * KiB)
29#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
30
31static int mem_fd;
32static int ksm_fd;
33static int ksm_full_scans_fd;
34static int proc_self_ksm_stat_fd;
35static int proc_self_ksm_merging_pages_fd;
36static int ksm_use_zero_pages_fd;
37static int pagemap_fd;
38static size_t pagesize;
39
40static bool range_maps_duplicates(char *addr, unsigned long size)
41{
42	unsigned long offs_a, offs_b, pfn_a, pfn_b;
43
44	/*
45	 * There is no easy way to check if there are KSM pages mapped into
46	 * this range. We only check that the range does not map the same PFN
47	 * twice by comparing each pair of mapped pages.
48	 */
49	for (offs_a = 0; offs_a < size; offs_a += pagesize) {
50		pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a);
51		/* Page not present or PFN not exposed by the kernel. */
52		if (pfn_a == -1ul || !pfn_a)
53			continue;
54
55		for (offs_b = offs_a + pagesize; offs_b < size;
56		     offs_b += pagesize) {
57			pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b);
58			if (pfn_b == -1ul || !pfn_b)
59				continue;
60			if (pfn_a == pfn_b)
61				return true;
62		}
63	}
64	return false;
65}
66
67static long get_my_ksm_zero_pages(void)
68{
69	char buf[200];
70	char *substr_ksm_zero;
71	size_t value_pos;
72	ssize_t read_size;
73	unsigned long my_ksm_zero_pages;
74
75	if (!proc_self_ksm_stat_fd)
76		return 0;
77
78	read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
79	if (read_size < 0)
80		return -errno;
81
82	buf[read_size] = 0;
83
84	substr_ksm_zero = strstr(buf, "ksm_zero_pages");
85	if (!substr_ksm_zero)
86		return 0;
87
88	value_pos = strcspn(substr_ksm_zero, "0123456789");
89	my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10);
90
91	return my_ksm_zero_pages;
92}
93
94static long get_my_merging_pages(void)
95{
96	char buf[10];
97	ssize_t ret;
98
99	if (proc_self_ksm_merging_pages_fd < 0)
100		return proc_self_ksm_merging_pages_fd;
101
102	ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
103	if (ret <= 0)
104		return -errno;
105	buf[ret] = 0;
106
107	return strtol(buf, NULL, 10);
108}
109
110static long ksm_get_full_scans(void)
111{
112	char buf[10];
113	ssize_t ret;
114
115	ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
116	if (ret <= 0)
117		return -errno;
118	buf[ret] = 0;
119
120	return strtol(buf, NULL, 10);
121}
122
123static int ksm_merge(void)
124{
125	long start_scans, end_scans;
126
127	/* Wait for two full scans such that any possible merging happened. */
128	start_scans = ksm_get_full_scans();
129	if (start_scans < 0)
130		return start_scans;
131	if (write(ksm_fd, "1", 1) != 1)
132		return -errno;
133	do {
134		end_scans = ksm_get_full_scans();
135		if (end_scans < 0)
136			return end_scans;
137	} while (end_scans < start_scans + 2);
138
139	return 0;
140}
141
142static int ksm_unmerge(void)
143{
144	if (write(ksm_fd, "2", 1) != 1)
145		return -errno;
146	return 0;
147}
148
149static char *mmap_and_merge_range(char val, unsigned long size, int prot,
150				  bool use_prctl)
151{
152	char *map;
153	int ret;
154
155	/* Stabilize accounting by disabling KSM completely. */
156	if (ksm_unmerge()) {
157		ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
158		return MAP_FAILED;
159	}
160
161	if (get_my_merging_pages() > 0) {
162		ksft_test_result_fail("Still pages merged\n");
163		return MAP_FAILED;
164	}
165
166	map = mmap(NULL, size, PROT_READ|PROT_WRITE,
167		   MAP_PRIVATE|MAP_ANON, -1, 0);
168	if (map == MAP_FAILED) {
169		ksft_test_result_fail("mmap() failed\n");
170		return MAP_FAILED;
171	}
172
173	/* Don't use THP. Ignore if THP are not around on a kernel. */
174	if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
175		ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
176		goto unmap;
177	}
178
179	/* Make sure each page contains the same values to merge them. */
180	memset(map, val, size);
181
182	if (mprotect(map, size, prot)) {
183		ksft_test_result_skip("mprotect() failed\n");
184		goto unmap;
185	}
186
187	if (use_prctl) {
188		ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
189		if (ret < 0 && errno == EINVAL) {
190			ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
191			goto unmap;
192		} else if (ret) {
193			ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
194			goto unmap;
195		}
196	} else if (madvise(map, size, MADV_MERGEABLE)) {
197		ksft_test_result_fail("MADV_MERGEABLE failed\n");
198		goto unmap;
199	}
200
201	/* Run KSM to trigger merging and wait. */
202	if (ksm_merge()) {
203		ksft_test_result_fail("Running KSM failed\n");
204		goto unmap;
205	}
206
207	/*
208	 * Check if anything was merged at all. Ignore the zero page that is
209	 * accounted differently (depending on kernel support).
210	 */
211	if (val && !get_my_merging_pages()) {
212		ksft_test_result_fail("No pages got merged\n");
213		goto unmap;
214	}
215
216	return map;
217unmap:
218	munmap(map, size);
219	return MAP_FAILED;
220}
221
222static void test_unmerge(void)
223{
224	const unsigned int size = 2 * MiB;
225	char *map;
226
227	ksft_print_msg("[RUN] %s\n", __func__);
228
229	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
230	if (map == MAP_FAILED)
231		return;
232
233	if (madvise(map, size, MADV_UNMERGEABLE)) {
234		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
235		goto unmap;
236	}
237
238	ksft_test_result(!range_maps_duplicates(map, size),
239			 "Pages were unmerged\n");
240unmap:
241	munmap(map, size);
242}
243
244static void test_unmerge_zero_pages(void)
245{
246	const unsigned int size = 2 * MiB;
247	char *map;
248	unsigned int offs;
249	unsigned long pages_expected;
250
251	ksft_print_msg("[RUN] %s\n", __func__);
252
253	if (proc_self_ksm_stat_fd < 0) {
254		ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n");
255		return;
256	}
257	if (ksm_use_zero_pages_fd < 0) {
258		ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
259		return;
260	}
261	if (write(ksm_use_zero_pages_fd, "1", 1) != 1) {
262		ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
263		return;
264	}
265
266	/* Let KSM deduplicate zero pages. */
267	map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false);
268	if (map == MAP_FAILED)
269		return;
270
271	/* Check if ksm_zero_pages is updated correctly after KSM merging */
272	pages_expected = size / pagesize;
273	if (pages_expected != get_my_ksm_zero_pages()) {
274		ksft_test_result_fail("'ksm_zero_pages' updated after merging\n");
275		goto unmap;
276	}
277
278	/* Try to unmerge half of the region */
279	if (madvise(map, size / 2, MADV_UNMERGEABLE)) {
280		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
281		goto unmap;
282	}
283
284	/* Check if ksm_zero_pages is updated correctly after unmerging */
285	pages_expected /= 2;
286	if (pages_expected != get_my_ksm_zero_pages()) {
287		ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n");
288		goto unmap;
289	}
290
291	/* Trigger unmerging of the other half by writing to the pages. */
292	for (offs = size / 2; offs < size; offs += pagesize)
293		*((unsigned int *)&map[offs]) = offs;
294
295	/* Now we should have no zeropages remaining. */
296	if (get_my_ksm_zero_pages()) {
297		ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n");
298		goto unmap;
299	}
300
301	/* Check if ksm zero pages are really unmerged */
302	ksft_test_result(!range_maps_duplicates(map, size),
303			"KSM zero pages were unmerged\n");
304unmap:
305	munmap(map, size);
306}
307
308static void test_unmerge_discarded(void)
309{
310	const unsigned int size = 2 * MiB;
311	char *map;
312
313	ksft_print_msg("[RUN] %s\n", __func__);
314
315	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
316	if (map == MAP_FAILED)
317		return;
318
319	/* Discard half of all mapped pages so we have pte_none() entries. */
320	if (madvise(map, size / 2, MADV_DONTNEED)) {
321		ksft_test_result_fail("MADV_DONTNEED failed\n");
322		goto unmap;
323	}
324
325	if (madvise(map, size, MADV_UNMERGEABLE)) {
326		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
327		goto unmap;
328	}
329
330	ksft_test_result(!range_maps_duplicates(map, size),
331			 "Pages were unmerged\n");
332unmap:
333	munmap(map, size);
334}
335
336#ifdef __NR_userfaultfd
337static void test_unmerge_uffd_wp(void)
338{
339	struct uffdio_writeprotect uffd_writeprotect;
340	const unsigned int size = 2 * MiB;
341	struct uffdio_api uffdio_api;
342	char *map;
343	int uffd;
344
345	ksft_print_msg("[RUN] %s\n", __func__);
346
347	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
348	if (map == MAP_FAILED)
349		return;
350
351	/* See if UFFD is around. */
352	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
353	if (uffd < 0) {
354		ksft_test_result_skip("__NR_userfaultfd failed\n");
355		goto unmap;
356	}
357
358	/* See if UFFD-WP is around. */
359	uffdio_api.api = UFFD_API;
360	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
361	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
362		ksft_test_result_fail("UFFDIO_API failed\n");
363		goto close_uffd;
364	}
365	if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
366		ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n");
367		goto close_uffd;
368	}
369
370	/* Register UFFD-WP, no need for an actual handler. */
371	if (uffd_register(uffd, map, size, false, true, false)) {
372		ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n");
373		goto close_uffd;
374	}
375
376	/* Write-protect the range using UFFD-WP. */
377	uffd_writeprotect.range.start = (unsigned long) map;
378	uffd_writeprotect.range.len = size;
379	uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
380	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
381		ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n");
382		goto close_uffd;
383	}
384
385	if (madvise(map, size, MADV_UNMERGEABLE)) {
386		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
387		goto close_uffd;
388	}
389
390	ksft_test_result(!range_maps_duplicates(map, size),
391			 "Pages were unmerged\n");
392close_uffd:
393	close(uffd);
394unmap:
395	munmap(map, size);
396}
397#endif
398
399/* Verify that KSM can be enabled / queried with prctl. */
400static void test_prctl(void)
401{
402	int ret;
403
404	ksft_print_msg("[RUN] %s\n", __func__);
405
406	ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
407	if (ret < 0 && errno == EINVAL) {
408		ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
409		return;
410	} else if (ret) {
411		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
412		return;
413	}
414
415	ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
416	if (ret < 0) {
417		ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
418		return;
419	} else if (ret != 1) {
420		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 not effective\n");
421		return;
422	}
423
424	ret = prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
425	if (ret) {
426		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
427		return;
428	}
429
430	ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
431	if (ret < 0) {
432		ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
433		return;
434	} else if (ret != 0) {
435		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 not effective\n");
436		return;
437	}
438
439	ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n");
440}
441
442/* Verify that prctl ksm flag is inherited. */
443static void test_prctl_fork(void)
444{
445	int ret, status;
446	pid_t child_pid;
447
448	ksft_print_msg("[RUN] %s\n", __func__);
449
450	ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
451	if (ret < 0 && errno == EINVAL) {
452		ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
453		return;
454	} else if (ret) {
455		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
456		return;
457	}
458
459	child_pid = fork();
460	if (!child_pid) {
461		exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0));
462	} else if (child_pid < 0) {
463		ksft_test_result_fail("fork() failed\n");
464		return;
465	}
466
467	if (waitpid(child_pid, &status, 0) < 0) {
468		ksft_test_result_fail("waitpid() failed\n");
469		return;
470	} else if (WEXITSTATUS(status) != 1) {
471		ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
472		return;
473	}
474
475	if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
476		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
477		return;
478	}
479
480	ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
481}
482
483static int ksm_fork_exec_child(void)
484{
485	/* Test if KSM is enabled for the process. */
486	return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1;
487}
488
489static void test_prctl_fork_exec(void)
490{
491	int ret, status;
492	pid_t child_pid;
493
494	ksft_print_msg("[RUN] %s\n", __func__);
495
496	ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
497	if (ret < 0 && errno == EINVAL) {
498		ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
499		return;
500	} else if (ret) {
501		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
502		return;
503	}
504
505	child_pid = fork();
506	if (child_pid == -1) {
507		ksft_test_result_skip("fork() failed\n");
508		return;
509	} else if (child_pid == 0) {
510		char *prg_name = "./ksm_functional_tests";
511		char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME };
512
513		execv(prg_name, argv_for_program);
514		return;
515	}
516
517	if (waitpid(child_pid, &status, 0) > 0) {
518		if (WIFEXITED(status)) {
519			status = WEXITSTATUS(status);
520			if (status) {
521				ksft_test_result_fail("KSM not enabled\n");
522				return;
523			}
524		} else {
525			ksft_test_result_fail("program didn't terminate normally\n");
526			return;
527		}
528	} else {
529		ksft_test_result_fail("waitpid() failed\n");
530		return;
531	}
532
533	if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
534		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
535		return;
536	}
537
538	ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
539}
540
541static void test_prctl_unmerge(void)
542{
543	const unsigned int size = 2 * MiB;
544	char *map;
545
546	ksft_print_msg("[RUN] %s\n", __func__);
547
548	map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true);
549	if (map == MAP_FAILED)
550		return;
551
552	if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
553		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
554		goto unmap;
555	}
556
557	ksft_test_result(!range_maps_duplicates(map, size),
558			 "Pages were unmerged\n");
559unmap:
560	munmap(map, size);
561}
562
563static void test_prot_none(void)
564{
565	const unsigned int size = 2 * MiB;
566	char *map;
567	int i;
568
569	ksft_print_msg("[RUN] %s\n", __func__);
570
571	map = mmap_and_merge_range(0x11, size, PROT_NONE, false);
572	if (map == MAP_FAILED)
573		goto unmap;
574
575	/* Store a unique value in each page on one half using ptrace */
576	for (i = 0; i < size / 2; i += pagesize) {
577		lseek(mem_fd, (uintptr_t) map + i, SEEK_SET);
578		if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) {
579			ksft_test_result_fail("ptrace write failed\n");
580			goto unmap;
581		}
582	}
583
584	/* Trigger unsharing on the other half. */
585	if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) {
586		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
587		goto unmap;
588	}
589
590	ksft_test_result(!range_maps_duplicates(map, size),
591			 "Pages were unmerged\n");
592unmap:
593	munmap(map, size);
594}
595
596int main(int argc, char **argv)
597{
598	unsigned int tests = 8;
599	int err;
600
601	if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
602		exit(ksm_fork_exec_child() == 1 ? 0 : 1);
603	}
604
605#ifdef __NR_userfaultfd
606	tests++;
607#endif
608
609	ksft_print_header();
610	ksft_set_plan(tests);
611
612	pagesize = getpagesize();
613
614	mem_fd = open("/proc/self/mem", O_RDWR);
615	if (mem_fd < 0)
616		ksft_exit_fail_msg("opening /proc/self/mem failed\n");
617	ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
618	if (ksm_fd < 0)
619		ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
620	ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
621	if (ksm_full_scans_fd < 0)
622		ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
623	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
624	if (pagemap_fd < 0)
625		ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
626	proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
627	proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
628					      O_RDONLY);
629	ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
630
631	test_unmerge();
632	test_unmerge_zero_pages();
633	test_unmerge_discarded();
634#ifdef __NR_userfaultfd
635	test_unmerge_uffd_wp();
636#endif
637
638	test_prot_none();
639
640	test_prctl();
641	test_prctl_fork();
642	test_prctl_fork_exec();
643	test_prctl_unmerge();
644
645	err = ksft_get_fail_cnt();
646	if (err)
647		ksft_exit_fail_msg("%d out of %d tests failed\n",
648				   err, ksft_test_num());
649	return ksft_exit_pass();
650}
651