vmcs.c revision 249879
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/amd64/vmm/intel/vmcs.c 249879 2013-04-25 04:56:43Z grehan $
27 */
28
29#include "opt_ddb.h"
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/amd64/vmm/intel/vmcs.c 249879 2013-04-25 04:56:43Z grehan $");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/pcpu.h>
37
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <machine/segments.h>
42#include <machine/pmap.h>
43
44#include <machine/vmm.h>
45#include "vmm_host.h"
46#include "vmcs.h"
47#include "vmx_cpufunc.h"
48#include "ept.h"
49#include "vmx.h"
50
51#ifdef DDB
52#include <ddb/ddb.h>
53#endif
54
55static uint64_t
56vmcs_fix_regval(uint32_t encoding, uint64_t val)
57{
58
59	switch (encoding) {
60	case VMCS_GUEST_CR0:
61		val = vmx_fix_cr0(val);
62		break;
63	case VMCS_GUEST_CR4:
64		val = vmx_fix_cr4(val);
65		break;
66	default:
67		break;
68	}
69	return (val);
70}
71
72static uint32_t
73vmcs_field_encoding(int ident)
74{
75	switch (ident) {
76	case VM_REG_GUEST_CR0:
77		return (VMCS_GUEST_CR0);
78	case VM_REG_GUEST_CR3:
79		return (VMCS_GUEST_CR3);
80	case VM_REG_GUEST_CR4:
81		return (VMCS_GUEST_CR4);
82	case VM_REG_GUEST_DR7:
83		return (VMCS_GUEST_DR7);
84	case VM_REG_GUEST_RSP:
85		return (VMCS_GUEST_RSP);
86	case VM_REG_GUEST_RIP:
87		return (VMCS_GUEST_RIP);
88	case VM_REG_GUEST_RFLAGS:
89		return (VMCS_GUEST_RFLAGS);
90	case VM_REG_GUEST_ES:
91		return (VMCS_GUEST_ES_SELECTOR);
92	case VM_REG_GUEST_CS:
93		return (VMCS_GUEST_CS_SELECTOR);
94	case VM_REG_GUEST_SS:
95		return (VMCS_GUEST_SS_SELECTOR);
96	case VM_REG_GUEST_DS:
97		return (VMCS_GUEST_DS_SELECTOR);
98	case VM_REG_GUEST_FS:
99		return (VMCS_GUEST_FS_SELECTOR);
100	case VM_REG_GUEST_GS:
101		return (VMCS_GUEST_GS_SELECTOR);
102	case VM_REG_GUEST_TR:
103		return (VMCS_GUEST_TR_SELECTOR);
104	case VM_REG_GUEST_LDTR:
105		return (VMCS_GUEST_LDTR_SELECTOR);
106	case VM_REG_GUEST_EFER:
107		return (VMCS_GUEST_IA32_EFER);
108	default:
109		return (-1);
110	}
111
112}
113
114static int
115vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
116{
117
118	switch (seg) {
119	case VM_REG_GUEST_ES:
120		*base = VMCS_GUEST_ES_BASE;
121		*lim = VMCS_GUEST_ES_LIMIT;
122		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
123		break;
124	case VM_REG_GUEST_CS:
125		*base = VMCS_GUEST_CS_BASE;
126		*lim = VMCS_GUEST_CS_LIMIT;
127		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
128		break;
129	case VM_REG_GUEST_SS:
130		*base = VMCS_GUEST_SS_BASE;
131		*lim = VMCS_GUEST_SS_LIMIT;
132		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
133		break;
134	case VM_REG_GUEST_DS:
135		*base = VMCS_GUEST_DS_BASE;
136		*lim = VMCS_GUEST_DS_LIMIT;
137		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
138		break;
139	case VM_REG_GUEST_FS:
140		*base = VMCS_GUEST_FS_BASE;
141		*lim = VMCS_GUEST_FS_LIMIT;
142		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
143		break;
144	case VM_REG_GUEST_GS:
145		*base = VMCS_GUEST_GS_BASE;
146		*lim = VMCS_GUEST_GS_LIMIT;
147		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
148		break;
149	case VM_REG_GUEST_TR:
150		*base = VMCS_GUEST_TR_BASE;
151		*lim = VMCS_GUEST_TR_LIMIT;
152		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
153		break;
154	case VM_REG_GUEST_LDTR:
155		*base = VMCS_GUEST_LDTR_BASE;
156		*lim = VMCS_GUEST_LDTR_LIMIT;
157		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
158		break;
159	case VM_REG_GUEST_IDTR:
160		*base = VMCS_GUEST_IDTR_BASE;
161		*lim = VMCS_GUEST_IDTR_LIMIT;
162		*acc = VMCS_INVALID_ENCODING;
163		break;
164	case VM_REG_GUEST_GDTR:
165		*base = VMCS_GUEST_GDTR_BASE;
166		*lim = VMCS_GUEST_GDTR_LIMIT;
167		*acc = VMCS_INVALID_ENCODING;
168		break;
169	default:
170		return (EINVAL);
171	}
172
173	return (0);
174}
175
176int
177vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
178{
179	int error;
180	uint32_t encoding;
181
182	/*
183	 * If we need to get at vmx-specific state in the VMCS we can bypass
184	 * the translation of 'ident' to 'encoding' by simply setting the
185	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
186	 * set to 0) in the encodings for the VMCS so we are free to use the
187	 * sign bit.
188	 */
189	if (ident < 0)
190		encoding = ident & 0x7fffffff;
191	else
192		encoding = vmcs_field_encoding(ident);
193
194	if (encoding == (uint32_t)-1)
195		return (EINVAL);
196
197	if (!running)
198		VMPTRLD(vmcs);
199
200	error = vmread(encoding, retval);
201
202	if (!running)
203		VMCLEAR(vmcs);
204
205	return (error);
206}
207
208int
209vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
210{
211	int error;
212	uint32_t encoding;
213
214	if (ident < 0)
215		encoding = ident & 0x7fffffff;
216	else
217		encoding = vmcs_field_encoding(ident);
218
219	if (encoding == (uint32_t)-1)
220		return (EINVAL);
221
222	val = vmcs_fix_regval(encoding, val);
223
224	if (!running)
225		VMPTRLD(vmcs);
226
227	error = vmwrite(encoding, val);
228
229	if (!running)
230		VMCLEAR(vmcs);
231
232	return (error);
233}
234
235int
236vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
237{
238	int error;
239	uint32_t base, limit, access;
240
241	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
242	if (error != 0)
243		panic("vmcs_setdesc: invalid segment register %d", seg);
244
245	VMPTRLD(vmcs);
246	if ((error = vmwrite(base, desc->base)) != 0)
247		goto done;
248
249	if ((error = vmwrite(limit, desc->limit)) != 0)
250		goto done;
251
252	if (access != VMCS_INVALID_ENCODING) {
253		if ((error = vmwrite(access, desc->access)) != 0)
254			goto done;
255	}
256done:
257	VMCLEAR(vmcs);
258	return (error);
259}
260
261int
262vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
263{
264	int error;
265	uint32_t base, limit, access;
266	uint64_t u64;
267
268	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
269	if (error != 0)
270		panic("vmcs_getdesc: invalid segment register %d", seg);
271
272	VMPTRLD(vmcs);
273	if ((error = vmread(base, &u64)) != 0)
274		goto done;
275	desc->base = u64;
276
277	if ((error = vmread(limit, &u64)) != 0)
278		goto done;
279	desc->limit = u64;
280
281	if (access != VMCS_INVALID_ENCODING) {
282		if ((error = vmread(access, &u64)) != 0)
283			goto done;
284		desc->access = u64;
285	}
286done:
287	VMCLEAR(vmcs);
288	return (error);
289}
290
291int
292vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
293{
294	int error;
295
296	VMPTRLD(vmcs);
297
298	/*
299	 * Guest MSRs are saved in the VM-exit MSR-store area.
300	 * Guest MSRs are loaded from the VM-entry MSR-load area.
301	 * Both areas point to the same location in memory.
302	 */
303	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
304		goto done;
305	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
306		goto done;
307
308	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
309		goto done;
310	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
311		goto done;
312
313	error = 0;
314done:
315	VMCLEAR(vmcs);
316	return (error);
317}
318
319int
320vmcs_set_defaults(struct vmcs *vmcs,
321		  u_long host_rip, u_long host_rsp, u_long ept_pml4,
322		  uint32_t pinbased_ctls, uint32_t procbased_ctls,
323		  uint32_t procbased_ctls2, uint32_t exit_ctls,
324		  uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
325{
326	int error, codesel, datasel, tsssel;
327	u_long cr0, cr4, efer;
328	uint64_t eptp, pat, fsbase, idtrbase;
329	uint32_t exc_bitmap;
330
331	codesel = vmm_get_host_codesel();
332	datasel = vmm_get_host_datasel();
333	tsssel = vmm_get_host_tsssel();
334
335	/*
336	 * Make sure we have a "current" VMCS to work with.
337	 */
338	VMPTRLD(vmcs);
339
340	/*
341	 * Load the VMX controls
342	 */
343	if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
344		goto done;
345	if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
346		goto done;
347	if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
348		goto done;
349	if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
350		goto done;
351	if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
352		goto done;
353
354	/* Guest state */
355
356	/* Initialize guest IA32_PAT MSR with the default value */
357	pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
358	      PAT_VALUE(1, PAT_WRITE_THROUGH)	|
359	      PAT_VALUE(2, PAT_UNCACHED)	|
360	      PAT_VALUE(3, PAT_UNCACHEABLE)	|
361	      PAT_VALUE(4, PAT_WRITE_BACK)	|
362	      PAT_VALUE(5, PAT_WRITE_THROUGH)	|
363	      PAT_VALUE(6, PAT_UNCACHED)	|
364	      PAT_VALUE(7, PAT_UNCACHEABLE);
365	if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
366		goto done;
367
368	/* Host state */
369
370	/* Initialize host IA32_PAT MSR */
371	pat = vmm_get_host_pat();
372	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
373		goto done;
374
375	/* Load the IA32_EFER MSR */
376	efer = vmm_get_host_efer();
377	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
378		goto done;
379
380	/* Load the control registers */
381
382	cr0 = vmm_get_host_cr0();
383	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
384		goto done;
385
386	cr4 = vmm_get_host_cr4() | CR4_VMXE;
387	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
388		goto done;
389
390	/* Load the segment selectors */
391	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
392		goto done;
393
394	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
395		goto done;
396
397	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
398		goto done;
399
400	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
401		goto done;
402
403	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
404		goto done;
405
406	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
407		goto done;
408
409	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
410		goto done;
411
412	/*
413	 * Load the Base-Address for %fs and idtr.
414	 *
415	 * Note that we exclude %gs, tss and gdtr here because their base
416	 * address is pcpu specific.
417	 */
418	fsbase = vmm_get_host_fsbase();
419	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
420		goto done;
421
422	idtrbase = vmm_get_host_idtrbase();
423	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
424		goto done;
425
426	/* instruction pointer */
427	if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
428		goto done;
429
430	/* stack pointer */
431	if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
432		goto done;
433
434	/* eptp */
435	eptp = EPTP(ept_pml4);
436	if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
437		goto done;
438
439	/* vpid */
440	if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
441		goto done;
442
443	/* msr bitmap */
444	if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
445		goto done;
446
447	/* exception bitmap */
448	exc_bitmap = 1 << IDT_MC;
449	if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
450		goto done;
451
452	/* link pointer */
453	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
454		goto done;
455done:
456	VMCLEAR(vmcs);
457	return (error);
458}
459
460uint64_t
461vmcs_read(uint32_t encoding)
462{
463	int error;
464	uint64_t val;
465
466	error = vmread(encoding, &val);
467	if (error != 0)
468		panic("vmcs_read(%u) error %d", encoding, error);
469
470	return (val);
471}
472
473#ifdef DDB
474extern int vmxon_enabled[];
475
476DB_SHOW_COMMAND(vmcs, db_show_vmcs)
477{
478	uint64_t cur_vmcs, val;
479	uint32_t exit;
480
481	if (!vmxon_enabled[curcpu]) {
482		db_printf("VMX not enabled\n");
483		return;
484	}
485
486	if (have_addr) {
487		db_printf("Only current VMCS supported\n");
488		return;
489	}
490
491	vmptrst(&cur_vmcs);
492	if (cur_vmcs == VMCS_INITIAL) {
493		db_printf("No current VM context\n");
494		return;
495	}
496	db_printf("VMCS: %jx\n", cur_vmcs);
497	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
498	db_printf("Activity: ");
499	val = vmcs_read(VMCS_GUEST_ACTIVITY);
500	switch (val) {
501	case 0:
502		db_printf("Active");
503		break;
504	case 1:
505		db_printf("HLT");
506		break;
507	case 2:
508		db_printf("Shutdown");
509		break;
510	case 3:
511		db_printf("Wait for SIPI");
512		break;
513	default:
514		db_printf("Unknown: %#lx", val);
515	}
516	db_printf("\n");
517	exit = vmcs_read(VMCS_EXIT_REASON);
518	if (exit & 0x80000000)
519		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
520	else
521		db_printf("Exit Reason: %u\n", exit & 0xffff);
522	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
523	db_printf("Guest Linear Address: %#lx\n",
524	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
525	switch (exit & 0x8000ffff) {
526	case EXIT_REASON_EXCEPTION:
527	case EXIT_REASON_EXT_INTR:
528		val = vmcs_read(VMCS_EXIT_INTERRUPTION_INFO);
529		db_printf("Interrupt Type: ");
530		switch (val >> 8 & 0x7) {
531		case 0:
532			db_printf("external");
533			break;
534		case 2:
535			db_printf("NMI");
536			break;
537		case 3:
538			db_printf("HW exception");
539			break;
540		case 4:
541			db_printf("SW exception");
542			break;
543		default:
544			db_printf("?? %lu", val >> 8 & 0x7);
545			break;
546		}
547		db_printf("  Vector: %lu", val & 0xff);
548		if (val & 0x800)
549			db_printf("  Error Code: %lx",
550			    vmcs_read(VMCS_EXIT_INTERRUPTION_ERROR));
551		db_printf("\n");
552		break;
553	case EXIT_REASON_EPT_FAULT:
554	case EXIT_REASON_EPT_MISCONFIG:
555		db_printf("Guest Physical Address: %#lx\n",
556		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
557		break;
558	}
559	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
560}
561#endif
562