vmcs.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD: stable/11/sys/amd64/vmm/intel/vmcs.c 330897 2018-03-14 03:19:51Z eadler $
29 */
30
31#include "opt_ddb.h"
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/11/sys/amd64/vmm/intel/vmcs.c 330897 2018-03-14 03:19:51Z eadler $");
35
36#include <sys/param.h>
37#include <sys/sysctl.h>
38#include <sys/systm.h>
39#include <sys/pcpu.h>
40
41#include <vm/vm.h>
42#include <vm/pmap.h>
43
44#include <machine/segments.h>
45#include <machine/vmm.h>
46#include "vmm_host.h"
47#include "vmx_cpufunc.h"
48#include "vmcs.h"
49#include "ept.h"
50#include "vmx.h"
51
52#ifdef DDB
53#include <ddb/ddb.h>
54#endif
55
56SYSCTL_DECL(_hw_vmm_vmx);
57
58static int no_flush_rsb;
59SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
60    &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
61
62static uint64_t
63vmcs_fix_regval(uint32_t encoding, uint64_t val)
64{
65
66	switch (encoding) {
67	case VMCS_GUEST_CR0:
68		val = vmx_fix_cr0(val);
69		break;
70	case VMCS_GUEST_CR4:
71		val = vmx_fix_cr4(val);
72		break;
73	default:
74		break;
75	}
76	return (val);
77}
78
79static uint32_t
80vmcs_field_encoding(int ident)
81{
82	switch (ident) {
83	case VM_REG_GUEST_CR0:
84		return (VMCS_GUEST_CR0);
85	case VM_REG_GUEST_CR3:
86		return (VMCS_GUEST_CR3);
87	case VM_REG_GUEST_CR4:
88		return (VMCS_GUEST_CR4);
89	case VM_REG_GUEST_DR7:
90		return (VMCS_GUEST_DR7);
91	case VM_REG_GUEST_RSP:
92		return (VMCS_GUEST_RSP);
93	case VM_REG_GUEST_RIP:
94		return (VMCS_GUEST_RIP);
95	case VM_REG_GUEST_RFLAGS:
96		return (VMCS_GUEST_RFLAGS);
97	case VM_REG_GUEST_ES:
98		return (VMCS_GUEST_ES_SELECTOR);
99	case VM_REG_GUEST_CS:
100		return (VMCS_GUEST_CS_SELECTOR);
101	case VM_REG_GUEST_SS:
102		return (VMCS_GUEST_SS_SELECTOR);
103	case VM_REG_GUEST_DS:
104		return (VMCS_GUEST_DS_SELECTOR);
105	case VM_REG_GUEST_FS:
106		return (VMCS_GUEST_FS_SELECTOR);
107	case VM_REG_GUEST_GS:
108		return (VMCS_GUEST_GS_SELECTOR);
109	case VM_REG_GUEST_TR:
110		return (VMCS_GUEST_TR_SELECTOR);
111	case VM_REG_GUEST_LDTR:
112		return (VMCS_GUEST_LDTR_SELECTOR);
113	case VM_REG_GUEST_EFER:
114		return (VMCS_GUEST_IA32_EFER);
115	case VM_REG_GUEST_PDPTE0:
116		return (VMCS_GUEST_PDPTE0);
117	case VM_REG_GUEST_PDPTE1:
118		return (VMCS_GUEST_PDPTE1);
119	case VM_REG_GUEST_PDPTE2:
120		return (VMCS_GUEST_PDPTE2);
121	case VM_REG_GUEST_PDPTE3:
122		return (VMCS_GUEST_PDPTE3);
123	default:
124		return (-1);
125	}
126
127}
128
129static int
130vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
131{
132
133	switch (seg) {
134	case VM_REG_GUEST_ES:
135		*base = VMCS_GUEST_ES_BASE;
136		*lim = VMCS_GUEST_ES_LIMIT;
137		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
138		break;
139	case VM_REG_GUEST_CS:
140		*base = VMCS_GUEST_CS_BASE;
141		*lim = VMCS_GUEST_CS_LIMIT;
142		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
143		break;
144	case VM_REG_GUEST_SS:
145		*base = VMCS_GUEST_SS_BASE;
146		*lim = VMCS_GUEST_SS_LIMIT;
147		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
148		break;
149	case VM_REG_GUEST_DS:
150		*base = VMCS_GUEST_DS_BASE;
151		*lim = VMCS_GUEST_DS_LIMIT;
152		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
153		break;
154	case VM_REG_GUEST_FS:
155		*base = VMCS_GUEST_FS_BASE;
156		*lim = VMCS_GUEST_FS_LIMIT;
157		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
158		break;
159	case VM_REG_GUEST_GS:
160		*base = VMCS_GUEST_GS_BASE;
161		*lim = VMCS_GUEST_GS_LIMIT;
162		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
163		break;
164	case VM_REG_GUEST_TR:
165		*base = VMCS_GUEST_TR_BASE;
166		*lim = VMCS_GUEST_TR_LIMIT;
167		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
168		break;
169	case VM_REG_GUEST_LDTR:
170		*base = VMCS_GUEST_LDTR_BASE;
171		*lim = VMCS_GUEST_LDTR_LIMIT;
172		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
173		break;
174	case VM_REG_GUEST_IDTR:
175		*base = VMCS_GUEST_IDTR_BASE;
176		*lim = VMCS_GUEST_IDTR_LIMIT;
177		*acc = VMCS_INVALID_ENCODING;
178		break;
179	case VM_REG_GUEST_GDTR:
180		*base = VMCS_GUEST_GDTR_BASE;
181		*lim = VMCS_GUEST_GDTR_LIMIT;
182		*acc = VMCS_INVALID_ENCODING;
183		break;
184	default:
185		return (EINVAL);
186	}
187
188	return (0);
189}
190
191int
192vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
193{
194	int error;
195	uint32_t encoding;
196
197	/*
198	 * If we need to get at vmx-specific state in the VMCS we can bypass
199	 * the translation of 'ident' to 'encoding' by simply setting the
200	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
201	 * set to 0) in the encodings for the VMCS so we are free to use the
202	 * sign bit.
203	 */
204	if (ident < 0)
205		encoding = ident & 0x7fffffff;
206	else
207		encoding = vmcs_field_encoding(ident);
208
209	if (encoding == (uint32_t)-1)
210		return (EINVAL);
211
212	if (!running)
213		VMPTRLD(vmcs);
214
215	error = vmread(encoding, retval);
216
217	if (!running)
218		VMCLEAR(vmcs);
219
220	return (error);
221}
222
223int
224vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
225{
226	int error;
227	uint32_t encoding;
228
229	if (ident < 0)
230		encoding = ident & 0x7fffffff;
231	else
232		encoding = vmcs_field_encoding(ident);
233
234	if (encoding == (uint32_t)-1)
235		return (EINVAL);
236
237	val = vmcs_fix_regval(encoding, val);
238
239	if (!running)
240		VMPTRLD(vmcs);
241
242	error = vmwrite(encoding, val);
243
244	if (!running)
245		VMCLEAR(vmcs);
246
247	return (error);
248}
249
250int
251vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
252{
253	int error;
254	uint32_t base, limit, access;
255
256	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
257	if (error != 0)
258		panic("vmcs_setdesc: invalid segment register %d", seg);
259
260	if (!running)
261		VMPTRLD(vmcs);
262	if ((error = vmwrite(base, desc->base)) != 0)
263		goto done;
264
265	if ((error = vmwrite(limit, desc->limit)) != 0)
266		goto done;
267
268	if (access != VMCS_INVALID_ENCODING) {
269		if ((error = vmwrite(access, desc->access)) != 0)
270			goto done;
271	}
272done:
273	if (!running)
274		VMCLEAR(vmcs);
275	return (error);
276}
277
278int
279vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
280{
281	int error;
282	uint32_t base, limit, access;
283	uint64_t u64;
284
285	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
286	if (error != 0)
287		panic("vmcs_getdesc: invalid segment register %d", seg);
288
289	if (!running)
290		VMPTRLD(vmcs);
291	if ((error = vmread(base, &u64)) != 0)
292		goto done;
293	desc->base = u64;
294
295	if ((error = vmread(limit, &u64)) != 0)
296		goto done;
297	desc->limit = u64;
298
299	if (access != VMCS_INVALID_ENCODING) {
300		if ((error = vmread(access, &u64)) != 0)
301			goto done;
302		desc->access = u64;
303	}
304done:
305	if (!running)
306		VMCLEAR(vmcs);
307	return (error);
308}
309
310int
311vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
312{
313	int error;
314
315	VMPTRLD(vmcs);
316
317	/*
318	 * Guest MSRs are saved in the VM-exit MSR-store area.
319	 * Guest MSRs are loaded from the VM-entry MSR-load area.
320	 * Both areas point to the same location in memory.
321	 */
322	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
323		goto done;
324	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
325		goto done;
326
327	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
328		goto done;
329	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
330		goto done;
331
332	error = 0;
333done:
334	VMCLEAR(vmcs);
335	return (error);
336}
337
338int
339vmcs_init(struct vmcs *vmcs)
340{
341	int error, codesel, datasel, tsssel;
342	u_long cr0, cr4, efer;
343	uint64_t pat, fsbase, idtrbase;
344
345	codesel = vmm_get_host_codesel();
346	datasel = vmm_get_host_datasel();
347	tsssel = vmm_get_host_tsssel();
348
349	/*
350	 * Make sure we have a "current" VMCS to work with.
351	 */
352	VMPTRLD(vmcs);
353
354	/* Host state */
355
356	/* Initialize host IA32_PAT MSR */
357	pat = vmm_get_host_pat();
358	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
359		goto done;
360
361	/* Load the IA32_EFER MSR */
362	efer = vmm_get_host_efer();
363	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
364		goto done;
365
366	/* Load the control registers */
367
368	cr0 = vmm_get_host_cr0();
369	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
370		goto done;
371
372	cr4 = vmm_get_host_cr4() | CR4_VMXE;
373	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
374		goto done;
375
376	/* Load the segment selectors */
377	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
378		goto done;
379
380	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
381		goto done;
382
383	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
384		goto done;
385
386	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
387		goto done;
388
389	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
390		goto done;
391
392	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
393		goto done;
394
395	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
396		goto done;
397
398	/*
399	 * Load the Base-Address for %fs and idtr.
400	 *
401	 * Note that we exclude %gs, tss and gdtr here because their base
402	 * address is pcpu specific.
403	 */
404	fsbase = vmm_get_host_fsbase();
405	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
406		goto done;
407
408	idtrbase = vmm_get_host_idtrbase();
409	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
410		goto done;
411
412	/* instruction pointer */
413	if (no_flush_rsb) {
414		if ((error = vmwrite(VMCS_HOST_RIP,
415		    (u_long)vmx_exit_guest)) != 0)
416			goto done;
417	} else {
418		if ((error = vmwrite(VMCS_HOST_RIP,
419		    (u_long)vmx_exit_guest_flush_rsb)) != 0)
420			goto done;
421	}
422
423	/* link pointer */
424	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
425		goto done;
426done:
427	VMCLEAR(vmcs);
428	return (error);
429}
430
431#ifdef DDB
432extern int vmxon_enabled[];
433
434DB_SHOW_COMMAND(vmcs, db_show_vmcs)
435{
436	uint64_t cur_vmcs, val;
437	uint32_t exit;
438
439	if (!vmxon_enabled[curcpu]) {
440		db_printf("VMX not enabled\n");
441		return;
442	}
443
444	if (have_addr) {
445		db_printf("Only current VMCS supported\n");
446		return;
447	}
448
449	vmptrst(&cur_vmcs);
450	if (cur_vmcs == VMCS_INITIAL) {
451		db_printf("No current VM context\n");
452		return;
453	}
454	db_printf("VMCS: %jx\n", cur_vmcs);
455	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
456	db_printf("Activity: ");
457	val = vmcs_read(VMCS_GUEST_ACTIVITY);
458	switch (val) {
459	case 0:
460		db_printf("Active");
461		break;
462	case 1:
463		db_printf("HLT");
464		break;
465	case 2:
466		db_printf("Shutdown");
467		break;
468	case 3:
469		db_printf("Wait for SIPI");
470		break;
471	default:
472		db_printf("Unknown: %#lx", val);
473	}
474	db_printf("\n");
475	exit = vmcs_read(VMCS_EXIT_REASON);
476	if (exit & 0x80000000)
477		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
478	else
479		db_printf("Exit Reason: %u\n", exit & 0xffff);
480	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
481	db_printf("Guest Linear Address: %#lx\n",
482	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
483	switch (exit & 0x8000ffff) {
484	case EXIT_REASON_EXCEPTION:
485	case EXIT_REASON_EXT_INTR:
486		val = vmcs_read(VMCS_EXIT_INTR_INFO);
487		db_printf("Interrupt Type: ");
488		switch (val >> 8 & 0x7) {
489		case 0:
490			db_printf("external");
491			break;
492		case 2:
493			db_printf("NMI");
494			break;
495		case 3:
496			db_printf("HW exception");
497			break;
498		case 4:
499			db_printf("SW exception");
500			break;
501		default:
502			db_printf("?? %lu", val >> 8 & 0x7);
503			break;
504		}
505		db_printf("  Vector: %lu", val & 0xff);
506		if (val & 0x800)
507			db_printf("  Error Code: %lx",
508			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
509		db_printf("\n");
510		break;
511	case EXIT_REASON_EPT_FAULT:
512	case EXIT_REASON_EPT_MISCONFIG:
513		db_printf("Guest Physical Address: %#lx\n",
514		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
515		break;
516	}
517	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
518}
519#endif
520