1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 2022, Google LLC. 4 */ 5#define _GNU_SOURCE /* for program_invocation_short_name */ 6#include <fcntl.h> 7#include <limits.h> 8#include <pthread.h> 9#include <sched.h> 10#include <signal.h> 11#include <stdio.h> 12#include <stdlib.h> 13#include <string.h> 14#include <sys/ioctl.h> 15 16#include <linux/compiler.h> 17#include <linux/kernel.h> 18#include <linux/kvm_para.h> 19#include <linux/memfd.h> 20#include <linux/sizes.h> 21 22#include <test_util.h> 23#include <kvm_util.h> 24#include <processor.h> 25 26#define BASE_DATA_SLOT 10 27#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) 28#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) 29 30/* Horrific macro so that the line info is captured accurately :-( */ 31#define memcmp_g(gpa, pattern, size) \ 32do { \ 33 uint8_t *mem = (uint8_t *)gpa; \ 34 size_t i; \ 35 \ 36 for (i = 0; i < size; i++) \ 37 __GUEST_ASSERT(mem[i] == pattern, \ 38 "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \ 39 pattern, i, gpa + i, mem[i]); \ 40} while (0) 41 42static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) 43{ 44 size_t i; 45 46 for (i = 0; i < size; i++) 47 TEST_ASSERT(mem[i] == pattern, 48 "Host expected 0x%x at gpa 0x%lx, got 0x%x", 49 pattern, gpa + i, mem[i]); 50} 51 52/* 53 * Run memory conversion tests with explicit conversion: 54 * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit 55 * to back/unback private memory. Subsequent accesses by guest to the gpa range 56 * will not cause exit to userspace. 57 * 58 * Test memory conversion scenarios with following steps: 59 * 1) Access private memory using private access and verify that memory contents 60 * are not visible to userspace. 61 * 2) Convert memory to shared using explicit conversions and ensure that 62 * userspace is able to access the shared regions. 63 * 3) Convert memory back to private using explicit conversions and ensure that 64 * userspace is again not able to access converted private regions. 65 */ 66 67#define GUEST_STAGE(o, s) { .offset = o, .size = s } 68 69enum ucall_syncs { 70 SYNC_SHARED, 71 SYNC_PRIVATE, 72}; 73 74static void guest_sync_shared(uint64_t gpa, uint64_t size, 75 uint8_t current_pattern, uint8_t new_pattern) 76{ 77 GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); 78} 79 80static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) 81{ 82 GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); 83} 84 85/* Arbitrary values, KVM doesn't care about the attribute flags. */ 86#define MAP_GPA_SET_ATTRIBUTES BIT(0) 87#define MAP_GPA_SHARED BIT(1) 88#define MAP_GPA_DO_FALLOCATE BIT(2) 89 90static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, 91 bool do_fallocate) 92{ 93 uint64_t flags = MAP_GPA_SET_ATTRIBUTES; 94 95 if (map_shared) 96 flags |= MAP_GPA_SHARED; 97 if (do_fallocate) 98 flags |= MAP_GPA_DO_FALLOCATE; 99 kvm_hypercall_map_gpa_range(gpa, size, flags); 100} 101 102static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) 103{ 104 guest_map_mem(gpa, size, true, do_fallocate); 105} 106 107static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) 108{ 109 guest_map_mem(gpa, size, false, do_fallocate); 110} 111 112struct { 113 uint64_t offset; 114 uint64_t size; 115} static const test_ranges[] = { 116 GUEST_STAGE(0, PAGE_SIZE), 117 GUEST_STAGE(0, SZ_2M), 118 GUEST_STAGE(PAGE_SIZE, PAGE_SIZE), 119 GUEST_STAGE(PAGE_SIZE, SZ_2M), 120 GUEST_STAGE(SZ_2M, PAGE_SIZE), 121}; 122 123static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) 124{ 125 const uint8_t def_p = 0xaa; 126 const uint8_t init_p = 0xcc; 127 uint64_t j; 128 int i; 129 130 /* Memory should be shared by default. */ 131 memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE); 132 memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE); 133 guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p); 134 135 memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); 136 137 for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { 138 uint64_t gpa = base_gpa + test_ranges[i].offset; 139 uint64_t size = test_ranges[i].size; 140 uint8_t p1 = 0x11; 141 uint8_t p2 = 0x22; 142 uint8_t p3 = 0x33; 143 uint8_t p4 = 0x44; 144 145 /* 146 * Set the test region to pattern one to differentiate it from 147 * the data range as a whole (contains the initial pattern). 148 */ 149 memset((void *)gpa, p1, size); 150 151 /* 152 * Convert to private, set and verify the private data, and 153 * then verify that the rest of the data (map shared) still 154 * holds the initial pattern, and that the host always sees the 155 * shared memory (initial pattern). Unlike shared memory, 156 * punching a hole in private memory is destructive, i.e. 157 * previous values aren't guaranteed to be preserved. 158 */ 159 guest_map_private(gpa, size, do_fallocate); 160 161 if (size > PAGE_SIZE) { 162 memset((void *)gpa, p2, PAGE_SIZE); 163 goto skip; 164 } 165 166 memset((void *)gpa, p2, size); 167 guest_sync_private(gpa, size, p1); 168 169 /* 170 * Verify that the private memory was set to pattern two, and 171 * that shared memory still holds the initial pattern. 172 */ 173 memcmp_g(gpa, p2, size); 174 if (gpa > base_gpa) 175 memcmp_g(base_gpa, init_p, gpa - base_gpa); 176 if (gpa + size < base_gpa + PER_CPU_DATA_SIZE) 177 memcmp_g(gpa + size, init_p, 178 (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size)); 179 180 /* 181 * Convert odd-number page frames back to shared to verify KVM 182 * also correctly handles holes in private ranges. 183 */ 184 for (j = 0; j < size; j += PAGE_SIZE) { 185 if ((j >> PAGE_SHIFT) & 1) { 186 guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate); 187 guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3); 188 189 memcmp_g(gpa + j, p3, PAGE_SIZE); 190 } else { 191 guest_sync_private(gpa + j, PAGE_SIZE, p1); 192 } 193 } 194 195skip: 196 /* 197 * Convert the entire region back to shared, explicitly write 198 * pattern three to fill in the even-number frames before 199 * asking the host to verify (and write pattern four). 200 */ 201 guest_map_shared(gpa, size, do_fallocate); 202 memset((void *)gpa, p3, size); 203 guest_sync_shared(gpa, size, p3, p4); 204 memcmp_g(gpa, p4, size); 205 206 /* Reset the shared memory back to the initial pattern. */ 207 memset((void *)gpa, init_p, size); 208 209 /* 210 * Free (via PUNCH_HOLE) *all* private memory so that the next 211 * iteration starts from a clean slate, e.g. with respect to 212 * whether or not there are pages/folios in guest_mem. 213 */ 214 guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true); 215 } 216} 217 218static void guest_punch_hole(uint64_t gpa, uint64_t size) 219{ 220 /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ 221 uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; 222 223 kvm_hypercall_map_gpa_range(gpa, size, flags); 224} 225 226/* 227 * Test that PUNCH_HOLE actually frees memory by punching holes without doing a 228 * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating 229 * (subsequent fault) should zero memory. 230 */ 231static void guest_test_punch_hole(uint64_t base_gpa, bool precise) 232{ 233 const uint8_t init_p = 0xcc; 234 int i; 235 236 /* 237 * Convert the entire range to private, this testcase is all about 238 * punching holes in guest_memfd, i.e. shared mappings aren't needed. 239 */ 240 guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); 241 242 for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { 243 uint64_t gpa = base_gpa + test_ranges[i].offset; 244 uint64_t size = test_ranges[i].size; 245 246 /* 247 * Free all memory before each iteration, even for the !precise 248 * case where the memory will be faulted back in. Freeing and 249 * reallocating should obviously work, and freeing all memory 250 * minimizes the probability of cross-testcase influence. 251 */ 252 guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE); 253 254 /* Fault-in and initialize memory, and verify the pattern. */ 255 if (precise) { 256 memset((void *)gpa, init_p, size); 257 memcmp_g(gpa, init_p, size); 258 } else { 259 memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE); 260 memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); 261 } 262 263 /* 264 * Punch a hole at the target range and verify that reads from 265 * the guest succeed and return zeroes. 266 */ 267 guest_punch_hole(gpa, size); 268 memcmp_g(gpa, 0, size); 269 } 270} 271 272static void guest_code(uint64_t base_gpa) 273{ 274 /* 275 * Run the conversion test twice, with and without doing fallocate() on 276 * the guest_memfd backing when converting between shared and private. 277 */ 278 guest_test_explicit_conversion(base_gpa, false); 279 guest_test_explicit_conversion(base_gpa, true); 280 281 /* 282 * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd 283 * faulted in, once with only the target range faulted in. 284 */ 285 guest_test_punch_hole(base_gpa, false); 286 guest_test_punch_hole(base_gpa, true); 287 GUEST_DONE(); 288} 289 290static void handle_exit_hypercall(struct kvm_vcpu *vcpu) 291{ 292 struct kvm_run *run = vcpu->run; 293 uint64_t gpa = run->hypercall.args[0]; 294 uint64_t size = run->hypercall.args[1] * PAGE_SIZE; 295 bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; 296 bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; 297 bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; 298 struct kvm_vm *vm = vcpu->vm; 299 300 TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE, 301 "Wanted MAP_GPA_RANGE (%u), got '%llu'", 302 KVM_HC_MAP_GPA_RANGE, run->hypercall.nr); 303 304 if (do_fallocate) 305 vm_guest_mem_fallocate(vm, gpa, size, map_shared); 306 307 if (set_attributes) 308 vm_set_memory_attributes(vm, gpa, size, 309 map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE); 310 run->hypercall.ret = 0; 311} 312 313static bool run_vcpus; 314 315static void *__test_mem_conversions(void *__vcpu) 316{ 317 struct kvm_vcpu *vcpu = __vcpu; 318 struct kvm_run *run = vcpu->run; 319 struct kvm_vm *vm = vcpu->vm; 320 struct ucall uc; 321 322 while (!READ_ONCE(run_vcpus)) 323 ; 324 325 for ( ;; ) { 326 vcpu_run(vcpu); 327 328 if (run->exit_reason == KVM_EXIT_HYPERCALL) { 329 handle_exit_hypercall(vcpu); 330 continue; 331 } 332 333 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 334 "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", 335 run->exit_reason, exit_reason_str(run->exit_reason)); 336 337 switch (get_ucall(vcpu, &uc)) { 338 case UCALL_ABORT: 339 REPORT_GUEST_ASSERT(uc); 340 case UCALL_SYNC: { 341 uint64_t gpa = uc.args[1]; 342 size_t size = uc.args[2]; 343 size_t i; 344 345 TEST_ASSERT(uc.args[0] == SYNC_SHARED || 346 uc.args[0] == SYNC_PRIVATE, 347 "Unknown sync command '%ld'", uc.args[0]); 348 349 for (i = 0; i < size; i += vm->page_size) { 350 size_t nr_bytes = min_t(size_t, vm->page_size, size - i); 351 uint8_t *hva = addr_gpa2hva(vm, gpa + i); 352 353 /* In all cases, the host should observe the shared data. */ 354 memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); 355 356 /* For shared, write the new pattern to guest memory. */ 357 if (uc.args[0] == SYNC_SHARED) 358 memset(hva, uc.args[4], nr_bytes); 359 } 360 break; 361 } 362 case UCALL_DONE: 363 return NULL; 364 default: 365 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 366 } 367 } 368} 369 370static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, 371 uint32_t nr_memslots) 372{ 373 /* 374 * Allocate enough memory so that each vCPU's chunk of memory can be 375 * naturally aligned with respect to the size of the backing store. 376 */ 377 const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type)); 378 const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment); 379 const size_t memfd_size = per_cpu_size * nr_vcpus; 380 const size_t slot_size = memfd_size / nr_memslots; 381 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; 382 pthread_t threads[KVM_MAX_VCPUS]; 383 struct kvm_vm *vm; 384 int memfd, i, r; 385 386 const struct vm_shape shape = { 387 .mode = VM_MODE_DEFAULT, 388 .type = KVM_X86_SW_PROTECTED_VM, 389 }; 390 391 TEST_ASSERT(slot_size * nr_memslots == memfd_size, 392 "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)", 393 memfd_size, nr_memslots); 394 vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus); 395 396 vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE)); 397 398 memfd = vm_create_guest_memfd(vm, memfd_size, 0); 399 400 for (i = 0; i < nr_memslots; i++) 401 vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i, 402 BASE_DATA_SLOT + i, slot_size / vm->page_size, 403 KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); 404 405 for (i = 0; i < nr_vcpus; i++) { 406 uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; 407 408 vcpu_args_set(vcpus[i], 1, gpa); 409 410 /* 411 * Map only what is needed so that an out-of-bounds access 412 * results #PF => SHUTDOWN instead of data corruption. 413 */ 414 virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size); 415 416 pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]); 417 } 418 419 WRITE_ONCE(run_vcpus, true); 420 421 for (i = 0; i < nr_vcpus; i++) 422 pthread_join(threads[i], NULL); 423 424 kvm_vm_free(vm); 425 426 /* 427 * Allocate and free memory from the guest_memfd after closing the VM 428 * fd. The guest_memfd is gifted a reference to its owning VM, i.e. 429 * should prevent the VM from being fully destroyed until the last 430 * reference to the guest_memfd is also put. 431 */ 432 r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); 433 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 434 435 r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); 436 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 437 438 close(memfd); 439} 440 441static void usage(const char *cmd) 442{ 443 puts(""); 444 printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd); 445 puts(""); 446 backing_src_help("-s"); 447 puts(""); 448 puts(" -n: specify the number of vcpus (default: 1)"); 449 puts(""); 450 puts(" -m: specify the number of memslots (default: 1)"); 451 puts(""); 452} 453 454int main(int argc, char *argv[]) 455{ 456 enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; 457 uint32_t nr_memslots = 1; 458 uint32_t nr_vcpus = 1; 459 int opt; 460 461 TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); 462 463 while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) { 464 switch (opt) { 465 case 's': 466 src_type = parse_backing_src_type(optarg); 467 break; 468 case 'n': 469 nr_vcpus = atoi_positive("nr_vcpus", optarg); 470 break; 471 case 'm': 472 nr_memslots = atoi_positive("nr_memslots", optarg); 473 break; 474 case 'h': 475 default: 476 usage(argv[0]); 477 exit(0); 478 } 479 } 480 481 test_mem_conversions(src_type, nr_vcpus, nr_memslots); 482 483 return 0; 484} 485