1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * KVM_GET/SET_* tests
4 *
5 * Copyright (C) 2018, Red Hat, Inc.
6 *
7 * Tests for vCPU state save/restore, including nested guest state.
8 */
9#define _GNU_SOURCE /* for program_invocation_short_name */
10#include <fcntl.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <sys/ioctl.h>
15
16#include "test_util.h"
17
18#include "kvm_util.h"
19#include "processor.h"
20#include "vmx.h"
21#include "svm_util.h"
22
23#define L2_GUEST_STACK_SIZE 256
24
25void svm_l2_guest_code(void)
26{
27	GUEST_SYNC(4);
28	/* Exit to L1 */
29	vmcall();
30	GUEST_SYNC(6);
31	/* Done, exit to L1 and never come back.  */
32	vmcall();
33}
34
35static void svm_l1_guest_code(struct svm_test_data *svm)
36{
37	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
38	struct vmcb *vmcb = svm->vmcb;
39
40	GUEST_ASSERT(svm->vmcb_gpa);
41	/* Prepare for L2 execution. */
42	generic_svm_setup(svm, svm_l2_guest_code,
43			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
44
45	GUEST_SYNC(3);
46	run_guest(vmcb, svm->vmcb_gpa);
47	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
48	GUEST_SYNC(5);
49	vmcb->save.rip += 3;
50	run_guest(vmcb, svm->vmcb_gpa);
51	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
52	GUEST_SYNC(7);
53}
54
55void vmx_l2_guest_code(void)
56{
57	GUEST_SYNC(6);
58
59	/* Exit to L1 */
60	vmcall();
61
62	/* L1 has now set up a shadow VMCS for us.  */
63	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
64	GUEST_SYNC(10);
65	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
66	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
67	GUEST_SYNC(11);
68	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
69	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
70	GUEST_SYNC(12);
71
72	/* Done, exit to L1 and never come back.  */
73	vmcall();
74}
75
76static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
77{
78	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
79
80	GUEST_ASSERT(vmx_pages->vmcs_gpa);
81	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
82	GUEST_SYNC(3);
83	GUEST_ASSERT(load_vmcs(vmx_pages));
84	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
85
86	GUEST_SYNC(4);
87	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
88
89	prepare_vmcs(vmx_pages, vmx_l2_guest_code,
90		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
91
92	GUEST_SYNC(5);
93	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
94	GUEST_ASSERT(!vmlaunch());
95	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
96	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
97
98	/* Check that the launched state is preserved.  */
99	GUEST_ASSERT(vmlaunch());
100
101	GUEST_ASSERT(!vmresume());
102	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
103
104	GUEST_SYNC(7);
105	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
106
107	GUEST_ASSERT(!vmresume());
108	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
109
110	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
111
112	vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
113	vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
114
115	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
116	GUEST_ASSERT(vmlaunch());
117	GUEST_SYNC(8);
118	GUEST_ASSERT(vmlaunch());
119	GUEST_ASSERT(vmresume());
120
121	vmwrite(GUEST_RIP, 0xc0ffee);
122	GUEST_SYNC(9);
123	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
124
125	GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
126	GUEST_ASSERT(!vmresume());
127	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
128
129	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
130	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
131	GUEST_ASSERT(vmlaunch());
132	GUEST_ASSERT(vmresume());
133	GUEST_SYNC(13);
134	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
135	GUEST_ASSERT(vmlaunch());
136	GUEST_ASSERT(vmresume());
137}
138
139static void __attribute__((__flatten__)) guest_code(void *arg)
140{
141	GUEST_SYNC(1);
142
143	if (this_cpu_has(X86_FEATURE_XSAVE)) {
144		uint64_t supported_xcr0 = this_cpu_supported_xcr0();
145		uint8_t buffer[4096];
146
147		memset(buffer, 0xcc, sizeof(buffer));
148
149		set_cr4(get_cr4() | X86_CR4_OSXSAVE);
150		GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
151
152		xsetbv(0, xgetbv(0) | supported_xcr0);
153
154		/*
155		 * Modify state for all supported xfeatures to take them out of
156		 * their "init" state, i.e. to make them show up in XSTATE_BV.
157		 *
158		 * Note off-by-default features, e.g. AMX, are out of scope for
159		 * this particular testcase as they have a different ABI.
160		 */
161		GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
162		asm volatile ("fincstp");
163
164		GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
165		asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
166
167		if (supported_xcr0 & XFEATURE_MASK_YMM)
168			asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
169
170		if (supported_xcr0 & XFEATURE_MASK_AVX512) {
171			asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
172			asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
173			asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
174		}
175
176		if (this_cpu_has(X86_FEATURE_MPX)) {
177			uint64_t bounds[2] = { 10, 0xffffffffull };
178			uint64_t output[2] = { };
179
180			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
181			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
182
183			/*
184			 * Don't bother trying to get BNDCSR into the INUSE
185			 * state.  MSR_IA32_BNDCFGS doesn't count as it isn't
186			 * managed via XSAVE/XRSTOR, and BNDCFGU can only be
187			 * modified by XRSTOR.  Stuffing XSTATE_BV in the host
188			 * is simpler than doing XRSTOR here in the guest.
189			 *
190			 * However, temporarily enable MPX in BNDCFGS so that
191			 * BNDMOV actually loads BND1.  If MPX isn't *fully*
192			 * enabled, all MPX instructions are treated as NOPs.
193			 *
194			 * Hand encode "bndmov (%rax),%bnd1" as support for MPX
195			 * mnemonics/registers has been removed from gcc and
196			 * clang (and was never fully supported by clang).
197			 */
198			wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
199			asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
200			/*
201			 * Hand encode "bndmov %bnd1, (%rax)" to sanity check
202			 * that BND1 actually got loaded.
203			 */
204			asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
205			wrmsr(MSR_IA32_BNDCFGS, 0);
206
207			GUEST_ASSERT_EQ(bounds[0], output[0]);
208			GUEST_ASSERT_EQ(bounds[1], output[1]);
209		}
210		if (this_cpu_has(X86_FEATURE_PKU)) {
211			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
212			set_cr4(get_cr4() | X86_CR4_PKE);
213			GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
214
215			wrpkru(-1u);
216		}
217	}
218
219	GUEST_SYNC(2);
220
221	if (arg) {
222		if (this_cpu_has(X86_FEATURE_SVM))
223			svm_l1_guest_code(arg);
224		else
225			vmx_l1_guest_code(arg);
226	}
227
228	GUEST_DONE();
229}
230
231int main(int argc, char *argv[])
232{
233	uint64_t *xstate_bv, saved_xstate_bv;
234	vm_vaddr_t nested_gva = 0;
235	struct kvm_cpuid2 empty_cpuid = {};
236	struct kvm_regs regs1, regs2;
237	struct kvm_vcpu *vcpu, *vcpuN;
238	struct kvm_vm *vm;
239	struct kvm_x86_state *state;
240	struct ucall uc;
241	int stage;
242
243	/* Create VM */
244	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
245
246	vcpu_regs_get(vcpu, &regs1);
247
248	if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
249		if (kvm_cpu_has(X86_FEATURE_SVM))
250			vcpu_alloc_svm(vm, &nested_gva);
251		else if (kvm_cpu_has(X86_FEATURE_VMX))
252			vcpu_alloc_vmx(vm, &nested_gva);
253	}
254
255	if (!nested_gva)
256		pr_info("will skip nested state checks\n");
257
258	vcpu_args_set(vcpu, 1, nested_gva);
259
260	for (stage = 1;; stage++) {
261		vcpu_run(vcpu);
262		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
263
264		switch (get_ucall(vcpu, &uc)) {
265		case UCALL_ABORT:
266			REPORT_GUEST_ASSERT(uc);
267			/* NOT REACHED */
268		case UCALL_SYNC:
269			break;
270		case UCALL_DONE:
271			goto done;
272		default:
273			TEST_FAIL("Unknown ucall %lu", uc.cmd);
274		}
275
276		/* UCALL_SYNC is handled here.  */
277		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
278			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
279			    stage, (ulong)uc.args[1]);
280
281		state = vcpu_save_state(vcpu);
282		memset(&regs1, 0, sizeof(regs1));
283		vcpu_regs_get(vcpu, &regs1);
284
285		kvm_vm_release(vm);
286
287		/* Restore state in a new VM.  */
288		vcpu = vm_recreate_with_one_vcpu(vm);
289		vcpu_load_state(vcpu, state);
290
291		/*
292		 * Restore XSAVE state in a dummy vCPU, first without doing
293		 * KVM_SET_CPUID2, and then with an empty guest CPUID.  Except
294		 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
295		 * allow KVM_SET_XSAVE regardless of guest CPUID.  Manually
296		 * load only XSAVE state, MSRs in particular have a much more
297		 * convoluted ABI.
298		 *
299		 * Load two versions of XSAVE state: one with the actual guest
300		 * XSAVE state, and one with all supported features forced "on"
301		 * in xstate_bv, e.g. to ensure that KVM allows loading all
302		 * supported features, even if something goes awry in saving
303		 * the original snapshot.
304		 */
305		xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
306		saved_xstate_bv = *xstate_bv;
307
308		vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
309		vcpu_xsave_set(vcpuN, state->xsave);
310		*xstate_bv = kvm_cpu_supported_xcr0();
311		vcpu_xsave_set(vcpuN, state->xsave);
312
313		vcpu_init_cpuid(vcpuN, &empty_cpuid);
314		vcpu_xsave_set(vcpuN, state->xsave);
315		*xstate_bv = saved_xstate_bv;
316		vcpu_xsave_set(vcpuN, state->xsave);
317
318		kvm_x86_state_cleanup(state);
319
320		memset(&regs2, 0, sizeof(regs2));
321		vcpu_regs_get(vcpu, &regs2);
322		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
323			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
324			    (ulong) regs2.rdi, (ulong) regs2.rsi);
325	}
326
327done:
328	kvm_vm_free(vm);
329}
330