// SPDX-License-Identifier: GPL-2.0-only /* * Userfaultfd unit tests. * * Copyright (C) 2015-2023 Red Hat, Inc. */ #include "uffd-common.h" #include "../../../../mm/gup_test.h" #ifdef __NR_userfaultfd /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) #define MEM_ANON BIT_ULL(0) #define MEM_SHMEM BIT_ULL(1) #define MEM_SHMEM_PRIVATE BIT_ULL(2) #define MEM_HUGETLB BIT_ULL(3) #define MEM_HUGETLB_PRIVATE BIT_ULL(4) #define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \ MEM_HUGETLB | MEM_HUGETLB_PRIVATE) #define ALIGN_UP(x, align_to) \ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) struct mem_type { const char *name; unsigned int mem_flag; uffd_test_ops_t *mem_ops; bool shared; }; typedef struct mem_type mem_type_t; mem_type_t mem_types[] = { { .name = "anon", .mem_flag = MEM_ANON, .mem_ops = &anon_uffd_test_ops, .shared = false, }, { .name = "shmem", .mem_flag = MEM_SHMEM, .mem_ops = &shmem_uffd_test_ops, .shared = true, }, { .name = "shmem-private", .mem_flag = MEM_SHMEM_PRIVATE, .mem_ops = &shmem_uffd_test_ops, .shared = false, }, { .name = "hugetlb", .mem_flag = MEM_HUGETLB, .mem_ops = &hugetlb_uffd_test_ops, .shared = true, }, { .name = "hugetlb-private", .mem_flag = MEM_HUGETLB_PRIVATE, .mem_ops = &hugetlb_uffd_test_ops, .shared = false, }, }; /* Arguments to be passed over to each uffd unit test */ struct uffd_test_args { mem_type_t *mem_type; }; typedef struct uffd_test_args uffd_test_args_t; /* Returns: UFFD_TEST_* */ typedef void (*uffd_test_fn)(uffd_test_args_t *); typedef struct { const char *name; uffd_test_fn uffd_fn; unsigned int mem_targets; uint64_t uffd_feature_required; uffd_test_case_ops_t *test_case_ops; } uffd_test_case_t; static void uffd_test_report(void) { printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n", ksft_get_pass_cnt(), ksft_get_xskip_cnt(), ksft_get_fail_cnt(), ksft_test_num()); } static void uffd_test_pass(void) { printf("done\n"); ksft_inc_pass_cnt(); } #define uffd_test_start(...) do { \ printf("Testing "); \ printf(__VA_ARGS__); \ printf("... "); \ fflush(stdout); \ } while (0) #define uffd_test_fail(...) do { \ printf("failed [reason: "); \ printf(__VA_ARGS__); \ printf("]\n"); \ ksft_inc_fail_cnt(); \ } while (0) static void uffd_test_skip(const char *message) { printf("skipped [reason: %s]\n", message); ksft_inc_xskip_cnt(); } /* * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll * return 1 even if some test failed as long as uffd supported, because in * that case we still want to proceed with the rest uffd unit tests. */ static int test_uffd_api(bool use_dev) { struct uffdio_api uffdio_api; int uffd; uffd_test_start("UFFDIO_API (with %s)", use_dev ? "/dev/userfaultfd" : "syscall"); if (use_dev) uffd = uffd_open_dev(UFFD_FLAGS); else uffd = uffd_open_sys(UFFD_FLAGS); if (uffd < 0) { uffd_test_skip("cannot open userfaultfd handle"); return 0; } /* Test wrong UFFD_API */ uffdio_api.api = 0xab; uffdio_api.features = 0; if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { uffd_test_fail("UFFDIO_API should fail with wrong api but didn't"); goto out; } /* Test wrong feature bit */ uffdio_api.api = UFFD_API; uffdio_api.features = BIT_ULL(63); if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't"); goto out; } /* Test normal UFFDIO_API */ uffdio_api.api = UFFD_API; uffdio_api.features = 0; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { uffd_test_fail("UFFDIO_API should succeed but failed"); goto out; } /* Test double requests of UFFDIO_API with a random feature set */ uffdio_api.features = BIT_ULL(0); if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) { uffd_test_fail("UFFDIO_API should reject initialized uffd"); goto out; } uffd_test_pass(); out: close(uffd); /* We have a valid uffd handle */ return 1; } /* * This function initializes the global variables. TODO: remove global * vars and then remove this. */ static int uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, mem_type_t *mem_type, const char **errmsg) { map_shared = mem_type->shared; uffd_test_ops = mem_type->mem_ops; uffd_test_case_ops = test->test_case_ops; if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) page_size = default_huge_page_size(); else page_size = psize(); nr_pages = UFFD_TEST_MEM_SIZE / page_size; /* TODO: remove this global var.. it's so ugly */ nr_cpus = 1; /* Initialize test arguments */ args->mem_type = mem_type; return uffd_test_ctx_init(test->uffd_feature_required, errmsg); } static bool uffd_feature_supported(uffd_test_case_t *test) { uint64_t features; if (uffd_get_features(&features)) return false; return (features & test->uffd_feature_required) == test->uffd_feature_required; } static int pagemap_open(void) { int fd = open("/proc/self/pagemap", O_RDONLY); if (fd < 0) err("open pagemap"); return fd; } /* This macro let __LINE__ works in err() */ #define pagemap_check_wp(value, wp) do { \ if (!!(value & PM_UFFD_WP) != wp) \ err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \ } while (0) typedef struct { int parent_uffd, child_uffd; } fork_event_args; static void *fork_event_consumer(void *data) { fork_event_args *args = data; struct uffd_msg msg = { 0 }; /* Read until a full msg received */ while (uffd_read_msg(args->parent_uffd, &msg)); if (msg.event != UFFD_EVENT_FORK) err("wrong message: %u\n", msg.event); /* Just to be properly freed later */ args->child_uffd = msg.arg.fork.ufd; return NULL; } typedef struct { int gup_fd; bool pinned; } pin_args; /* * Returns 0 if succeed, <0 for errors. pin_pages() needs to be paired * with unpin_pages(). Currently it needs to be RO longterm pin to satisfy * all needs of the test cases (e.g., trigger unshare, trigger fork() early * CoW, etc.). */ static int pin_pages(pin_args *args, void *buffer, size_t size) { struct pin_longterm_test test = { .addr = (uintptr_t)buffer, .size = size, /* Read-only pins */ .flags = 0, }; if (args->pinned) err("already pinned"); args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); if (args->gup_fd < 0) return -errno; if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) { /* Even if gup_test existed, can be an old gup_test / kernel */ close(args->gup_fd); return -errno; } args->pinned = true; return 0; } static void unpin_pages(pin_args *args) { if (!args->pinned) err("unpin without pin first"); if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP)) err("PIN_LONGTERM_TEST_STOP"); close(args->gup_fd); args->pinned = false; } static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) { fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 }; pthread_t thread; pid_t child; uint64_t value; int fd, result; /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); } child = fork(); if (!child) { /* Open the pagemap fd of the child itself */ pin_args args = {}; fd = pagemap_open(); if (test_pin && pin_pages(&args, area_dst, page_size)) /* * Normally when reach here we have pinned in * previous tests, so shouldn't fail anymore */ err("pin page failed in child"); value = pagemap_get_entry(fd, area_dst); /* * After fork(), we should handle uffd-wp bit differently: * * (1) when with EVENT_FORK, it should persist * (2) when without EVENT_FORK, it should be dropped */ pagemap_check_wp(value, with_event); if (test_pin) unpin_pages(&args); /* Succeed */ exit(0); } waitpid(child, &result, 0); if (with_event) { if (pthread_join(thread, NULL)) err("pthread_join()"); if (args.child_uffd < 0) err("Didn't receive child uffd"); close(args.child_uffd); } return result; } static void uffd_wp_unpopulated_test(uffd_test_args_t *args) { uint64_t value; int pagemap_fd; if (uffd_register(uffd, area_dst, nr_pages * page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Test applying pte marker to anon unpopulated */ wp_range(uffd, (uint64_t)area_dst, page_size, true); value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, true); /* Test unprotect on anon pte marker */ wp_range(uffd, (uint64_t)area_dst, page_size, false); value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, false); /* Test zap on anon marker */ wp_range(uffd, (uint64_t)area_dst, page_size, true); if (madvise(area_dst, page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, false); /* Test fault in after marker removed */ *area_dst = 1; value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, false); /* Drop it to make pte none again */ if (madvise(area_dst, page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); /* Test read-zero-page upon pte marker */ wp_range(uffd, (uint64_t)area_dst, page_size, true); *(volatile char *)area_dst; /* Drop it to make pte none again */ if (madvise(area_dst, page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); uffd_test_pass(); } static void uffd_wp_fork_test_common(uffd_test_args_t *args, bool with_event) { int pagemap_fd; uint64_t value; if (uffd_register(uffd, area_dst, nr_pages * page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ *area_dst = 1; wp_range(uffd, (uint64_t)area_dst, page_size, true); value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, true); if (pagemap_test_fork(uffd, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in present pte", with_event ? "missing" : "stall"); goto out; } /* * This is an attempt for zapping the pgtable so as to test the * markers. * * For private mappings, PAGEOUT will only work on exclusive ptes * (PM_MMAP_EXCLUSIVE) which we should satisfy. * * For shared, PAGEOUT may not work. Use DONTNEED instead which * plays a similar role of zapping (rather than freeing the page) * to expose pte markers. */ if (args->mem_type->shared) { if (madvise(area_dst, page_size, MADV_DONTNEED)) err("MADV_DONTNEED"); } else { /* * NOTE: ignore retval because private-hugetlb doesn't yet * support swapping, so it could fail. */ madvise(area_dst, page_size, MADV_PAGEOUT); } /* Uffd-wp should persist even swapped out */ value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, true); if (pagemap_test_fork(uffd, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", with_event ? "missing" : "stall"); goto out; } /* Unprotect; this tests swap pte modifications */ wp_range(uffd, (uint64_t)area_dst, page_size, false); value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, false); /* Fault in the page from disk */ *area_dst = 2; value = pagemap_get_entry(pagemap_fd, area_dst); pagemap_check_wp(value, false); uffd_test_pass(); out: if (uffd_unregister(uffd, area_dst, nr_pages * page_size)) err("unregister failed"); close(pagemap_fd); } static void uffd_wp_fork_test(uffd_test_args_t *args) { uffd_wp_fork_test_common(args, false); } static void uffd_wp_fork_with_event_test(uffd_test_args_t *args) { uffd_wp_fork_test_common(args, true); } static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, bool with_event) { int pagemap_fd; pin_args pin_args = {}; if (uffd_register(uffd, area_dst, page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ *area_dst = 1; wp_range(uffd, (uint64_t)area_dst, page_size, true); /* * 1. First pin, then fork(). This tests fork() special path when * doing early CoW if the page is private. */ if (pin_pages(&pin_args, area_dst, page_size)) { uffd_test_skip("Possibly CONFIG_GUP_TEST missing " "or unprivileged"); close(pagemap_fd); uffd_unregister(uffd, area_dst, page_size); return; } if (pagemap_test_fork(uffd, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", with_event ? "missing" : "stall"); unpin_pages(&pin_args); goto out; } unpin_pages(&pin_args); /* * 2. First fork(), then pin (in the child, where test_pin==true). * This tests COR, aka, page unsharing on private memories. */ if (pagemap_test_fork(uffd, with_event, true)) { uffd_test_fail("Detected %s uffd-wp bit when RO pin", with_event ? "missing" : "stall"); goto out; } uffd_test_pass(); out: if (uffd_unregister(uffd, area_dst, page_size)) err("register failed"); close(pagemap_fd); } static void uffd_wp_fork_pin_test(uffd_test_args_t *args) { uffd_wp_fork_pin_test_common(args, false); } static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args) { uffd_wp_fork_pin_test_common(args, true); } static void check_memory_contents(char *p) { unsigned long i, j; uint8_t expected_byte; for (i = 0; i < nr_pages; ++i) { expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); for (j = 0; j < page_size; j++) { uint8_t v = *(uint8_t *)(p + (i * page_size) + j); if (v != expected_byte) err("unexpected page contents"); } } } static void uffd_minor_test_common(bool test_collapse, bool test_wp) { unsigned long p; pthread_t uffd_mon; char c; struct uffd_args args = { 0 }; /* * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing * both do not make much sense. */ assert(!(test_collapse && test_wp)); if (uffd_register(uffd, area_dst_alias, nr_pages * page_size, /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ false, test_wp, true)) err("register failure"); /* * After registering with UFFD, populate the non-UFFD-registered side of * the shared mapping. This should *not* trigger any UFFD minor faults. */ for (p = 0; p < nr_pages; ++p) memset(area_dst + (p * page_size), p % ((uint8_t)-1), page_size); args.apply_wp = test_wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); /* * Read each of the pages back using the UFFD-registered mapping. We * expect that the first time we touch a page, it will result in a minor * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ check_memory_contents(area_dst_alias); if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); if (test_collapse) { if (madvise(area_dst_alias, nr_pages * page_size, MADV_COLLAPSE)) { /* It's fine to fail for this one... */ uffd_test_skip("MADV_COLLAPSE failed"); return; } uffd_test_ops->check_pmd_mapping(area_dst, nr_pages * page_size / read_pmd_pagesize()); /* * This won't cause uffd-fault - it purely just makes sure there * was no corruption. */ check_memory_contents(area_dst_alias); } if (args.missing_faults != 0 || args.minor_faults != nr_pages) uffd_test_fail("stats check error"); else uffd_test_pass(); } void uffd_minor_test(uffd_test_args_t *args) { uffd_minor_test_common(false, false); } void uffd_minor_wp_test(uffd_test_args_t *args) { uffd_minor_test_common(false, true); } void uffd_minor_collapse_test(uffd_test_args_t *args) { uffd_minor_test_common(true, false); } static sigjmp_buf jbuf, *sigbuf; static void sighndl(int sig, siginfo_t *siginfo, void *ptr) { if (sig == SIGBUS) { if (sigbuf) siglongjmp(*sigbuf, 1); abort(); } } /* * For non-cooperative userfaultfd test we fork() a process that will * generate pagefaults, will mremap the area monitored by the * userfaultfd and at last this process will release the monitored * area. * For the anonymous and shared memory the area is divided into two * parts, the first part is accessed before mremap, and the second * part is accessed after mremap. Since hugetlbfs does not support * mremap, the entire monitored area is accessed in a single pass for * HUGETLB_TEST. * The release of the pages currently generates event for shmem and * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked * for hugetlb. * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register * monitored area, generate pagefaults and test that signal is delivered. * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2 * test robustness use case - we release monitored area, fork a process * that will generate pagefaults and verify signal is generated. * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal * feature. Using monitor thread, verify no userfault events are generated. */ static int faulting_process(int signal_test, bool wp) { unsigned long nr, i; unsigned long long count; unsigned long split_nr_pages; unsigned long lastnr; struct sigaction act; volatile unsigned long signalled = 0; split_nr_pages = (nr_pages + 1) / 2; if (signal_test) { sigbuf = &jbuf; memset(&act, 0, sizeof(act)); act.sa_sigaction = sighndl; act.sa_flags = SA_SIGINFO; if (sigaction(SIGBUS, &act, 0)) err("sigaction"); lastnr = (unsigned long)-1; } for (nr = 0; nr < split_nr_pages; nr++) { volatile int steps = 1; unsigned long offset = nr * page_size; if (signal_test) { if (sigsetjmp(*sigbuf, 1) != 0) { if (steps == 1 && nr == lastnr) err("Signal repeated"); lastnr = nr; if (signal_test == 1) { if (steps == 1) { /* This is a MISSING request */ steps++; if (copy_page(uffd, offset, wp)) signalled++; } else { /* This is a WP request */ assert(steps == 2); wp_range(uffd, (__u64)area_dst + offset, page_size, false); } } else { signalled++; continue; } } } count = *area_count(area_dst, nr); if (count != count_verify[nr]) err("nr %lu memory corruption %llu %llu\n", nr, count, count_verify[nr]); /* * Trigger write protection if there is by writing * the same value back. */ *area_count(area_dst, nr) = count; } if (signal_test) return signalled != split_nr_pages; area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, area_src); if (area_dst == MAP_FAILED) err("mremap"); /* Reset area_src since we just clobbered it */ area_src = NULL; for (; nr < nr_pages; nr++) { count = *area_count(area_dst, nr); if (count != count_verify[nr]) { err("nr %lu memory corruption %llu %llu\n", nr, count, count_verify[nr]); } /* * Trigger write protection if there is by writing * the same value back. */ *area_count(area_dst, nr) = count; } uffd_test_ops->release_pages(area_dst); for (nr = 0; nr < nr_pages; nr++) for (i = 0; i < page_size; i++) if (*(area_dst + nr * page_size + i) != 0) err("page %lu offset %lu is not zero", nr, i); return 0; } static void uffd_sigbus_test_common(bool wp) { unsigned long userfaults; pthread_t uffd_mon; pid_t pid; int err; char c; struct uffd_args args = { 0 }; ready_for_fork = false; fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, true, wp, false)) err("register failure"); if (faulting_process(1, wp)) err("faulting process failed"); uffd_test_ops->release_pages(area_dst); args.apply_wp = wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); while (!ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); if (pid < 0) err("fork"); if (!pid) exit(faulting_process(2, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, (void **)&userfaults)) err("pthread_join()"); if (userfaults) uffd_test_fail("Signal test failed, userfaults: %ld", userfaults); else uffd_test_pass(); } static void uffd_sigbus_test(uffd_test_args_t *args) { uffd_sigbus_test_common(false); } static void uffd_sigbus_wp_test(uffd_test_args_t *args) { uffd_sigbus_test_common(true); } static void uffd_events_test_common(bool wp) { pthread_t uffd_mon; pid_t pid; int err; char c; struct uffd_args args = { 0 }; ready_for_fork = false; fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, true, wp, false)) err("register failure"); args.apply_wp = wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); while (!ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); if (pid < 0) err("fork"); if (!pid) exit(faulting_process(0, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); if (args.missing_faults != nr_pages) uffd_test_fail("Fault counts wrong"); else uffd_test_pass(); } static void uffd_events_test(uffd_test_args_t *args) { uffd_events_test_common(false); } static void uffd_events_wp_test(uffd_test_args_t *args) { uffd_events_test_common(true); } static void retry_uffdio_zeropage(int ufd, struct uffdio_zeropage *uffdio_zeropage) { uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, uffdio_zeropage->range.len, 0); if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { if (uffdio_zeropage->zeropage != -EEXIST) err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)uffdio_zeropage->zeropage); } else { err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)uffdio_zeropage->zeropage); } } static bool do_uffdio_zeropage(int ufd, bool has_zeropage) { struct uffdio_zeropage uffdio_zeropage = { 0 }; int ret; __s64 res; uffdio_zeropage.range.start = (unsigned long) area_dst; uffdio_zeropage.range.len = page_size; uffdio_zeropage.mode = 0; ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); res = uffdio_zeropage.zeropage; if (ret) { /* real retval in ufdio_zeropage.zeropage */ if (has_zeropage) err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res); else if (res != -EINVAL) err("UFFDIO_ZEROPAGE not -EINVAL"); } else if (has_zeropage) { if (res != page_size) err("UFFDIO_ZEROPAGE unexpected size"); else retry_uffdio_zeropage(ufd, &uffdio_zeropage); return true; } else err("UFFDIO_ZEROPAGE succeeded"); return false; } /* * Registers a range with MISSING mode only for zeropage test. Return true * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register() * because we want to detect .ioctls along the way. */ static bool uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) { uint64_t ioctls = 0; if (uffd_register_with_ioctls(uffd, addr, len, true, false, false, &ioctls)) err("zeropage register fail"); return ioctls & (1 << _UFFDIO_ZEROPAGE); } /* exercise UFFDIO_ZEROPAGE */ static void uffd_zeropage_test(uffd_test_args_t *args) { bool has_zeropage; int i; has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size); if (area_dst_alias) /* Ignore the retval; we already have it */ uffd_register_detect_zeropage(uffd, area_dst_alias, page_size); if (do_uffdio_zeropage(uffd, has_zeropage)) for (i = 0; i < page_size; i++) if (area_dst[i] != 0) err("data non-zero at offset %d\n", i); if (uffd_unregister(uffd, area_dst, page_size)) err("unregister"); if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size)) err("unregister"); uffd_test_pass(); } static void uffd_register_poison(int uffd, void *addr, uint64_t len) { uint64_t ioctls = 0; uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON); if (uffd_register_with_ioctls(uffd, addr, len, true, false, false, &ioctls)) err("poison register fail"); if ((ioctls & expected) != expected) err("registered area doesn't support COPY and POISON ioctls"); } static void do_uffdio_poison(int uffd, unsigned long offset) { struct uffdio_poison uffdio_poison = { 0 }; int ret; __s64 res; uffdio_poison.range.start = (unsigned long) area_dst + offset; uffdio_poison.range.len = page_size; uffdio_poison.mode = 0; ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); res = uffdio_poison.updated; if (ret) err("UFFDIO_POISON error: %"PRId64, (int64_t)res); else if (res != page_size) err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); } static void uffd_poison_handle_fault( struct uffd_msg *msg, struct uffd_args *args) { unsigned long offset; if (msg->event != UFFD_EVENT_PAGEFAULT) err("unexpected msg event %u", msg->event); if (msg->arg.pagefault.flags & (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) err("unexpected fault type %llu", msg->arg.pagefault.flags); offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; offset &= ~(page_size-1); /* Odd pages -> copy zeroed page; even pages -> poison. */ if (offset & page_size) copy_page(uffd, offset, false); else do_uffdio_poison(uffd, offset); } static void uffd_poison_test(uffd_test_args_t *targs) { pthread_t uffd_mon; char c; struct uffd_args args = { 0 }; struct sigaction act = { 0 }; unsigned long nr_sigbus = 0; unsigned long nr; fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); uffd_register_poison(uffd, area_dst, nr_pages * page_size); memset(area_src, 0, nr_pages * page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); sigbuf = &jbuf; act.sa_sigaction = sighndl; act.sa_flags = SA_SIGINFO; if (sigaction(SIGBUS, &act, 0)) err("sigaction"); for (nr = 0; nr < nr_pages; ++nr) { unsigned long offset = nr * page_size; const char *bytes = (const char *) area_dst + offset; const char *i; if (sigsetjmp(*sigbuf, 1)) { /* * Access below triggered a SIGBUS, which was caught by * sighndl, which then jumped here. Count this SIGBUS, * and move on to next page. */ ++nr_sigbus; continue; } for (i = bytes; i < bytes + page_size; ++i) { if (*i) err("nonzero byte in area_dst (%p) at %p: %u", area_dst, i, *i); } } if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); if (nr_sigbus != nr_pages / 2) err("expected to receive %lu SIGBUS, actually received %lu", nr_pages / 2, nr_sigbus); uffd_test_pass(); } static void uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, unsigned long len) { unsigned long offset; if (msg->event != UFFD_EVENT_PAGEFAULT) err("unexpected msg event %u", msg->event); if (msg->arg.pagefault.flags & (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) err("unexpected fault type %llu", msg->arg.pagefault.flags); offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; offset &= ~(len-1); if (move_page(uffd, offset, len)) args->missing_faults++; } static void uffd_move_handle_fault(struct uffd_msg *msg, struct uffd_args *args) { uffd_move_handle_fault_common(msg, args, page_size); } static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, struct uffd_args *args) { uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); } static void uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) { unsigned long nr; pthread_t uffd_mon; char c; unsigned long long count; struct uffd_args args = { 0 }; char *orig_area_src, *orig_area_dst; unsigned long step_size, step_count; unsigned long src_offs = 0; unsigned long dst_offs = 0; /* Prevent source pages from being mapped more than once */ if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) err("madvise(MADV_DONTFORK) failure"); if (uffd_register(uffd, area_dst, nr_pages * page_size, true, false, false)) err("register failure"); args.handle_fault = handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); step_size = chunk_size / page_size; step_count = nr_pages / step_size; if (chunk_size > page_size) { char *aligned_src = ALIGN_UP(area_src, chunk_size); char *aligned_dst = ALIGN_UP(area_dst, chunk_size); if (aligned_src != area_src || aligned_dst != area_dst) { src_offs = (aligned_src - area_src) / page_size; dst_offs = (aligned_dst - area_dst) / page_size; step_count--; } orig_area_src = area_src; orig_area_dst = area_dst; area_src = aligned_src; area_dst = aligned_dst; } /* * Read each of the pages back using the UFFD-registered mapping. We * expect that the first time we touch a page, it will result in a missing * fault. uffd_poll_thread will resolve the fault by moving source * page to destination. */ for (nr = 0; nr < step_count * step_size; nr += step_size) { unsigned long i; /* Check area_src content */ for (i = 0; i < step_size; i++) { count = *area_count(area_src, nr + i); if (count != count_verify[src_offs + nr + i]) err("nr %lu source memory invalid %llu %llu\n", nr + i, count, count_verify[src_offs + nr + i]); } /* Faulting into area_dst should move the page or the huge page */ for (i = 0; i < step_size; i++) { count = *area_count(area_dst, nr + i); if (count != count_verify[dst_offs + nr + i]) err("nr %lu memory corruption %llu %llu\n", nr, count, count_verify[dst_offs + nr + i]); } /* Re-check area_src content which should be empty */ for (i = 0; i < step_size; i++) { count = *area_count(area_src, nr + i); if (count != 0) err("nr %lu move failed %llu %llu\n", nr, count, count_verify[src_offs + nr + i]); } } if (step_size > page_size) { area_src = orig_area_src; area_dst = orig_area_dst; } if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); if (args.missing_faults != step_count || args.minor_faults != 0) uffd_test_fail("stats check error"); else uffd_test_pass(); } static void uffd_move_test(uffd_test_args_t *targs) { uffd_move_test_common(targs, page_size, uffd_move_handle_fault); } static void uffd_move_pmd_test(uffd_test_args_t *targs) { if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) err("madvise(MADV_HUGEPAGE) failure"); uffd_move_test_common(targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } static void uffd_move_pmd_split_test(uffd_test_args_t *targs) { if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) err("madvise(MADV_NOHUGEPAGE) failure"); uffd_move_test_common(targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } static int prevent_hugepages(const char **errmsg) { /* This should be done before source area is populated */ if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ if (errno != EINVAL) { if (errmsg) *errmsg = "madvise(MADV_NOHUGEPAGE) failed"; return -errno; } } return 0; } static int request_hugepages(const char **errmsg) { /* This should be done before source area is populated */ if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { if (errmsg) { *errmsg = (errno == EINVAL) ? "CONFIG_TRANSPARENT_HUGEPAGE is not set" : "madvise(MADV_HUGEPAGE) failed"; } return -errno; } return 0; } struct uffd_test_case_ops uffd_move_test_case_ops = { .post_alloc = prevent_hugepages, }; struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { .post_alloc = request_hugepages, }; /* * Test the returned uffdio_register.ioctls with different register modes. * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. */ static void do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) { uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); mem_type_t *mem_type = args->mem_type; int ret; ret = uffd_register_with_ioctls(uffd, area_dst, page_size, miss, wp, minor, &ioctls); /* * Handle special cases of UFFDIO_REGISTER here where it should * just fail with -EINVAL first.. * * Case 1: register MINOR on anon * Case 2: register with no mode selected */ if ((minor && (mem_type->mem_flag == MEM_ANON)) || (!miss && !wp && !minor)) { if (ret != -EINVAL) err("register (miss=%d, wp=%d, minor=%d) failed " "with wrong errno=%d", miss, wp, minor, ret); return; } /* UFFDIO_REGISTER should succeed, then check ioctls returned */ if (miss) expected |= BIT_ULL(_UFFDIO_COPY); if (wp) expected |= BIT_ULL(_UFFDIO_WRITEPROTECT); if (minor) expected |= BIT_ULL(_UFFDIO_CONTINUE); if ((ioctls & expected) != expected) err("unexpected uffdio_register.ioctls " "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); if (uffd_unregister(uffd, area_dst, page_size)) err("unregister"); } static void uffd_register_ioctls_test(uffd_test_args_t *args) { int miss, wp, minor; for (miss = 0; miss <= 1; miss++) for (wp = 0; wp <= 1; wp++) for (minor = 0; minor <= 1; minor++) do_register_ioctls_test(args, miss, wp, minor); uffd_test_pass(); } uffd_test_case_t uffd_tests[] = { { /* Test returned uffdio_register.ioctls. */ .name = "register-ioctls", .uffd_fn = uffd_register_ioctls_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS | UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM | UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM, }, { .name = "zeropage", .uffd_fn = uffd_zeropage_test, .mem_targets = MEM_ALL, .uffd_feature_required = 0, }, { .name = "move", .uffd_fn = uffd_move_test, .mem_targets = MEM_ANON, .uffd_feature_required = UFFD_FEATURE_MOVE, .test_case_ops = &uffd_move_test_case_ops, }, { .name = "move-pmd", .uffd_fn = uffd_move_pmd_test, .mem_targets = MEM_ANON, .uffd_feature_required = UFFD_FEATURE_MOVE, .test_case_ops = &uffd_move_test_pmd_case_ops, }, { .name = "move-pmd-split", .uffd_fn = uffd_move_pmd_split_test, .mem_targets = MEM_ANON, .uffd_feature_required = UFFD_FEATURE_MOVE, .test_case_ops = &uffd_move_test_pmd_case_ops, }, { .name = "wp-fork", .uffd_fn = uffd_wp_fork_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, { .name = "wp-fork-with-event", .uffd_fn = uffd_wp_fork_with_event_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM | /* when set, child process should inherit uffd-wp bits */ UFFD_FEATURE_EVENT_FORK, }, { .name = "wp-fork-pin", .uffd_fn = uffd_wp_fork_pin_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, { .name = "wp-fork-pin-with-event", .uffd_fn = uffd_wp_fork_pin_with_event_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM | /* when set, child process should inherit uffd-wp bits */ UFFD_FEATURE_EVENT_FORK, }, { .name = "wp-unpopulated", .uffd_fn = uffd_wp_unpopulated_test, .mem_targets = MEM_ANON, .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED, }, { .name = "minor", .uffd_fn = uffd_minor_test, .mem_targets = MEM_SHMEM | MEM_HUGETLB, .uffd_feature_required = UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM, }, { .name = "minor-wp", .uffd_fn = uffd_minor_wp_test, .mem_targets = MEM_SHMEM | MEM_HUGETLB, .uffd_feature_required = UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM | UFFD_FEATURE_PAGEFAULT_FLAG_WP | /* * HACK: here we leveraged WP_UNPOPULATED to detect whether * minor mode supports wr-protect. There's no feature flag * for it so this is the best we can test against. */ UFFD_FEATURE_WP_UNPOPULATED, }, { .name = "minor-collapse", .uffd_fn = uffd_minor_collapse_test, /* MADV_COLLAPSE only works with shmem */ .mem_targets = MEM_SHMEM, /* We can't test MADV_COLLAPSE, so try our luck */ .uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM, }, { .name = "sigbus", .uffd_fn = uffd_sigbus_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_SIGBUS | UFFD_FEATURE_EVENT_FORK, }, { .name = "sigbus-wp", .uffd_fn = uffd_sigbus_wp_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_SIGBUS | UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, { .name = "events", .uffd_fn = uffd_events_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE, }, { .name = "events-wp", .uffd_fn = uffd_events_wp_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE | UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, { .name = "poison", .uffd_fn = uffd_poison_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_POISON, }, }; static void usage(const char *prog) { printf("usage: %s [-f TESTNAME]\n", prog); puts(""); puts(" -f: test name to filter (e.g., event)"); puts(" -h: show the help msg"); puts(" -l: list tests only"); puts(""); exit(KSFT_FAIL); } int main(int argc, char *argv[]) { int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t); int n_mems = sizeof(mem_types) / sizeof(mem_type_t); const char *test_filter = NULL; bool list_only = false; uffd_test_case_t *test; mem_type_t *mem_type; uffd_test_args_t args; const char *errmsg; int has_uffd, opt; int i, j; while ((opt = getopt(argc, argv, "f:hl")) != -1) { switch (opt) { case 'f': test_filter = optarg; break; case 'l': list_only = true; break; case 'h': default: /* Unknown */ usage(argv[0]); break; } } if (!test_filter && !list_only) { has_uffd = test_uffd_api(false); has_uffd |= test_uffd_api(true); if (!has_uffd) { printf("Userfaultfd not supported or unprivileged, skip all tests\n"); exit(KSFT_SKIP); } } for (i = 0; i < n_tests; i++) { test = &uffd_tests[i]; if (test_filter && !strstr(test->name, test_filter)) continue; if (list_only) { printf("%s\n", test->name); continue; } for (j = 0; j < n_mems; j++) { mem_type = &mem_types[j]; if (!(test->mem_targets & mem_type->mem_flag)) continue; uffd_test_start("%s on %s", test->name, mem_type->name); if ((mem_type->mem_flag == MEM_HUGETLB || mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && (default_huge_page_size() == 0)) { uffd_test_skip("huge page size is 0, feature missing?"); continue; } if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; } if (uffd_setup_environment(&args, test, mem_type, &errmsg)) { uffd_test_skip(errmsg); continue; } test->uffd_fn(&args); uffd_test_ctx_clear(); } } if (!list_only) uffd_test_report(); return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } #else /* __NR_userfaultfd */ #warning "missing __NR_userfaultfd definition" int main(void) { printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); return KSFT_SKIP; } #endif /* __NR_userfaultfd */