vmcs.c revision 242122
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include "opt_ddb.h"
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/pcpu.h>
37
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <machine/segments.h>
42#include <machine/pmap.h>
43
44#include <machine/vmm.h>
45#include "vmcs.h"
46#include "vmx_cpufunc.h"
47#include "ept.h"
48#include "vmx.h"
49
50#ifdef DDB
51#include <ddb/ddb.h>
52#endif
53
54static uint64_t
55vmcs_fix_regval(uint32_t encoding, uint64_t val)
56{
57
58	switch (encoding) {
59	case VMCS_GUEST_CR0:
60		val = vmx_fix_cr0(val);
61		break;
62	case VMCS_GUEST_CR4:
63		val = vmx_fix_cr4(val);
64		break;
65	default:
66		break;
67	}
68	return (val);
69}
70
71static uint32_t
72vmcs_field_encoding(int ident)
73{
74	switch (ident) {
75	case VM_REG_GUEST_CR0:
76		return (VMCS_GUEST_CR0);
77	case VM_REG_GUEST_CR3:
78		return (VMCS_GUEST_CR3);
79	case VM_REG_GUEST_CR4:
80		return (VMCS_GUEST_CR4);
81	case VM_REG_GUEST_DR7:
82		return (VMCS_GUEST_DR7);
83	case VM_REG_GUEST_RSP:
84		return (VMCS_GUEST_RSP);
85	case VM_REG_GUEST_RIP:
86		return (VMCS_GUEST_RIP);
87	case VM_REG_GUEST_RFLAGS:
88		return (VMCS_GUEST_RFLAGS);
89	case VM_REG_GUEST_ES:
90		return (VMCS_GUEST_ES_SELECTOR);
91	case VM_REG_GUEST_CS:
92		return (VMCS_GUEST_CS_SELECTOR);
93	case VM_REG_GUEST_SS:
94		return (VMCS_GUEST_SS_SELECTOR);
95	case VM_REG_GUEST_DS:
96		return (VMCS_GUEST_DS_SELECTOR);
97	case VM_REG_GUEST_FS:
98		return (VMCS_GUEST_FS_SELECTOR);
99	case VM_REG_GUEST_GS:
100		return (VMCS_GUEST_GS_SELECTOR);
101	case VM_REG_GUEST_TR:
102		return (VMCS_GUEST_TR_SELECTOR);
103	case VM_REG_GUEST_LDTR:
104		return (VMCS_GUEST_LDTR_SELECTOR);
105	case VM_REG_GUEST_EFER:
106		return (VMCS_GUEST_IA32_EFER);
107	default:
108		return (-1);
109	}
110
111}
112
113static int
114vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
115{
116
117	switch (seg) {
118	case VM_REG_GUEST_ES:
119		*base = VMCS_GUEST_ES_BASE;
120		*lim = VMCS_GUEST_ES_LIMIT;
121		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
122		break;
123	case VM_REG_GUEST_CS:
124		*base = VMCS_GUEST_CS_BASE;
125		*lim = VMCS_GUEST_CS_LIMIT;
126		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
127		break;
128	case VM_REG_GUEST_SS:
129		*base = VMCS_GUEST_SS_BASE;
130		*lim = VMCS_GUEST_SS_LIMIT;
131		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
132		break;
133	case VM_REG_GUEST_DS:
134		*base = VMCS_GUEST_DS_BASE;
135		*lim = VMCS_GUEST_DS_LIMIT;
136		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
137		break;
138	case VM_REG_GUEST_FS:
139		*base = VMCS_GUEST_FS_BASE;
140		*lim = VMCS_GUEST_FS_LIMIT;
141		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
142		break;
143	case VM_REG_GUEST_GS:
144		*base = VMCS_GUEST_GS_BASE;
145		*lim = VMCS_GUEST_GS_LIMIT;
146		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
147		break;
148	case VM_REG_GUEST_TR:
149		*base = VMCS_GUEST_TR_BASE;
150		*lim = VMCS_GUEST_TR_LIMIT;
151		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
152		break;
153	case VM_REG_GUEST_LDTR:
154		*base = VMCS_GUEST_LDTR_BASE;
155		*lim = VMCS_GUEST_LDTR_LIMIT;
156		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
157		break;
158	case VM_REG_GUEST_IDTR:
159		*base = VMCS_GUEST_IDTR_BASE;
160		*lim = VMCS_GUEST_IDTR_LIMIT;
161		*acc = VMCS_INVALID_ENCODING;
162		break;
163	case VM_REG_GUEST_GDTR:
164		*base = VMCS_GUEST_GDTR_BASE;
165		*lim = VMCS_GUEST_GDTR_LIMIT;
166		*acc = VMCS_INVALID_ENCODING;
167		break;
168	default:
169		return (EINVAL);
170	}
171
172	return (0);
173}
174
175int
176vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval)
177{
178	int error;
179	uint32_t encoding;
180
181	/*
182	 * If we need to get at vmx-specific state in the VMCS we can bypass
183	 * the translation of 'ident' to 'encoding' by simply setting the
184	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
185	 * set to 0) in the encodings for the VMCS so we are free to use the
186	 * sign bit.
187	 */
188	if (ident < 0)
189		encoding = ident & 0x7fffffff;
190	else
191		encoding = vmcs_field_encoding(ident);
192
193	if (encoding == (uint32_t)-1)
194		return (EINVAL);
195
196	VMPTRLD(vmcs);
197	error = vmread(encoding, retval);
198	VMCLEAR(vmcs);
199	return (error);
200}
201
202int
203vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val)
204{
205	int error;
206	uint32_t encoding;
207
208	if (ident < 0)
209		encoding = ident & 0x7fffffff;
210	else
211		encoding = vmcs_field_encoding(ident);
212
213	if (encoding == (uint32_t)-1)
214		return (EINVAL);
215
216	val = vmcs_fix_regval(encoding, val);
217
218	VMPTRLD(vmcs);
219	error = vmwrite(encoding, val);
220	VMCLEAR(vmcs);
221	return (error);
222}
223
224int
225vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
226{
227	int error;
228	uint32_t base, limit, access;
229
230	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
231	if (error != 0)
232		panic("vmcs_setdesc: invalid segment register %d", seg);
233
234	VMPTRLD(vmcs);
235	if ((error = vmwrite(base, desc->base)) != 0)
236		goto done;
237
238	if ((error = vmwrite(limit, desc->limit)) != 0)
239		goto done;
240
241	if (access != VMCS_INVALID_ENCODING) {
242		if ((error = vmwrite(access, desc->access)) != 0)
243			goto done;
244	}
245done:
246	VMCLEAR(vmcs);
247	return (error);
248}
249
250int
251vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
252{
253	int error;
254	uint32_t base, limit, access;
255	uint64_t u64;
256
257	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
258	if (error != 0)
259		panic("vmcs_getdesc: invalid segment register %d", seg);
260
261	VMPTRLD(vmcs);
262	if ((error = vmread(base, &u64)) != 0)
263		goto done;
264	desc->base = u64;
265
266	if ((error = vmread(limit, &u64)) != 0)
267		goto done;
268	desc->limit = u64;
269
270	if (access != VMCS_INVALID_ENCODING) {
271		if ((error = vmread(access, &u64)) != 0)
272			goto done;
273		desc->access = u64;
274	}
275done:
276	VMCLEAR(vmcs);
277	return (error);
278}
279
280int
281vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
282{
283	int error;
284
285	VMPTRLD(vmcs);
286
287	/*
288	 * Guest MSRs are saved in the VM-exit MSR-store area.
289	 * Guest MSRs are loaded from the VM-entry MSR-load area.
290	 * Both areas point to the same location in memory.
291	 */
292	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
293		goto done;
294	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
295		goto done;
296
297	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
298		goto done;
299	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
300		goto done;
301
302	error = 0;
303done:
304	VMCLEAR(vmcs);
305	return (error);
306}
307
308int
309vmcs_set_defaults(struct vmcs *vmcs,
310		  u_long host_rip, u_long host_rsp, u_long ept_pml4,
311		  uint32_t pinbased_ctls, uint32_t procbased_ctls,
312		  uint32_t procbased_ctls2, uint32_t exit_ctls,
313		  uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
314{
315	int error, codesel, datasel, tsssel;
316	u_long cr0, cr4, efer;
317	uint64_t eptp, pat;
318	uint32_t exc_bitmap;
319
320	codesel = GSEL(GCODE_SEL, SEL_KPL);
321	datasel = GSEL(GDATA_SEL, SEL_KPL);
322	tsssel = GSEL(GPROC0_SEL, SEL_KPL);
323
324	/*
325	 * Make sure we have a "current" VMCS to work with.
326	 */
327	VMPTRLD(vmcs);
328
329	/*
330	 * Load the VMX controls
331	 */
332	if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
333		goto done;
334	if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
335		goto done;
336	if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
337		goto done;
338	if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
339		goto done;
340	if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
341		goto done;
342
343	/* Guest state */
344
345	/* Initialize guest IA32_PAT MSR with the default value */
346	pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
347	      PAT_VALUE(1, PAT_WRITE_THROUGH)	|
348	      PAT_VALUE(2, PAT_UNCACHED)	|
349	      PAT_VALUE(3, PAT_UNCACHEABLE)	|
350	      PAT_VALUE(4, PAT_WRITE_BACK)	|
351	      PAT_VALUE(5, PAT_WRITE_THROUGH)	|
352	      PAT_VALUE(6, PAT_UNCACHED)	|
353	      PAT_VALUE(7, PAT_UNCACHEABLE);
354	if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
355		goto done;
356
357	/* Host state */
358
359	/* Initialize host IA32_PAT MSR */
360	pat = rdmsr(MSR_PAT);
361	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
362		goto done;
363
364	/* Load the IA32_EFER MSR */
365	efer = rdmsr(MSR_EFER);
366	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
367		goto done;
368
369	/* Load the control registers */
370
371	/*
372	 * We always want CR0.TS to be set when the processor does a VM exit.
373	 *
374	 * With emulation turned on unconditionally after a VM exit, we are
375	 * able to trap inadvertent use of the FPU until the guest FPU state
376	 * has been safely squirreled away.
377	 */
378	cr0 = rcr0() | CR0_TS;
379	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
380		goto done;
381
382	cr4 = rcr4();
383	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
384		goto done;
385
386	/* Load the segment selectors */
387	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
388		goto done;
389
390	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
391		goto done;
392
393	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
394		goto done;
395
396	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
397		goto done;
398
399	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
400		goto done;
401
402	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
403		goto done;
404
405	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
406		goto done;
407
408	/*
409	 * Load the Base-Address for %fs and idtr.
410	 *
411	 * Note that we exclude %gs, tss and gdtr here because their base
412	 * address is pcpu specific.
413	 */
414	if ((error = vmwrite(VMCS_HOST_FS_BASE, 0)) != 0)
415		goto done;
416
417	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, r_idt.rd_base)) != 0)
418		goto done;
419
420	/* instruction pointer */
421	if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
422		goto done;
423
424	/* stack pointer */
425	if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
426		goto done;
427
428	/* eptp */
429	eptp = EPTP(ept_pml4);
430	if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
431		goto done;
432
433	/* vpid */
434	if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
435		goto done;
436
437	/* msr bitmap */
438	if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
439		goto done;
440
441	/* exception bitmap */
442	exc_bitmap = 1 << IDT_MC;
443	if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
444		goto done;
445
446	/* link pointer */
447	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
448		goto done;
449done:
450	VMCLEAR(vmcs);
451	return (error);
452}
453
454uint64_t
455vmcs_read(uint32_t encoding)
456{
457	int error;
458	uint64_t val;
459
460	error = vmread(encoding, &val);
461	if (error != 0)
462		panic("vmcs_read(%u) error %d", encoding, error);
463
464	return (val);
465}
466
467#ifdef DDB
468extern int vmxon_enabled[];
469
470DB_SHOW_COMMAND(vmcs, db_show_vmcs)
471{
472	uint64_t cur_vmcs, val;
473	uint32_t exit;
474
475	if (!vmxon_enabled[curcpu]) {
476		db_printf("VMX not enabled\n");
477		return;
478	}
479
480	if (have_addr) {
481		db_printf("Only current VMCS supported\n");
482		return;
483	}
484
485	vmptrst(&cur_vmcs);
486	if (cur_vmcs == VMCS_INITIAL) {
487		db_printf("No current VM context\n");
488		return;
489	}
490	db_printf("VMCS: %jx\n", cur_vmcs);
491	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
492	db_printf("Activity: ");
493	val = vmcs_read(VMCS_GUEST_ACTIVITY);
494	switch (val) {
495	case 0:
496		db_printf("Active");
497		break;
498	case 1:
499		db_printf("HLT");
500		break;
501	case 2:
502		db_printf("Shutdown");
503		break;
504	case 3:
505		db_printf("Wait for SIPI");
506		break;
507	default:
508		db_printf("Unknown: %#lx", val);
509	}
510	db_printf("\n");
511	exit = vmcs_read(VMCS_EXIT_REASON);
512	if (exit & 0x80000000)
513		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
514	else
515		db_printf("Exit Reason: %u\n", exit & 0xffff);
516	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
517	db_printf("Guest Linear Address: %#lx\n",
518	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
519	switch (exit & 0x8000ffff) {
520	case EXIT_REASON_EXCEPTION:
521	case EXIT_REASON_EXT_INTR:
522		val = vmcs_read(VMCS_EXIT_INTERRUPTION_INFO);
523		db_printf("Interrupt Type: ");
524		switch (val >> 8 & 0x7) {
525		case 0:
526			db_printf("external");
527			break;
528		case 2:
529			db_printf("NMI");
530			break;
531		case 3:
532			db_printf("HW exception");
533			break;
534		case 4:
535			db_printf("SW exception");
536			break;
537		default:
538			db_printf("?? %lu", val >> 8 & 0x7);
539			break;
540		}
541		db_printf("  Vector: %lu", val & 0xff);
542		if (val & 0x800)
543			db_printf("  Error Code: %lx",
544			    vmcs_read(VMCS_EXIT_INTERRUPTION_ERROR));
545		db_printf("\n");
546		break;
547	case EXIT_REASON_EPT_FAULT:
548	case EXIT_REASON_EPT_MISCONFIG:
549		db_printf("Guest Physical Address: %#lx\n",
550		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
551		break;
552	}
553	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
554}
555#endif
556