1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/11/sys/amd64/vmm/intel/vmcs.c 331722 2018-03-29 02:50:57Z eadler $
27 */
28
29#include "opt_ddb.h"
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: stable/11/sys/amd64/vmm/intel/vmcs.c 331722 2018-03-29 02:50:57Z eadler $");
33
34#include <sys/param.h>
35#include <sys/sysctl.h>
36#include <sys/systm.h>
37#include <sys/pcpu.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41
42#include <machine/segments.h>
43#include <machine/vmm.h>
44#include "vmm_host.h"
45#include "vmx_cpufunc.h"
46#include "vmcs.h"
47#include "ept.h"
48#include "vmx.h"
49
50#ifdef DDB
51#include <ddb/ddb.h>
52#endif
53
54SYSCTL_DECL(_hw_vmm_vmx);
55
56static int no_flush_rsb;
57SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
58    &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
59
60static uint64_t
61vmcs_fix_regval(uint32_t encoding, uint64_t val)
62{
63
64	switch (encoding) {
65	case VMCS_GUEST_CR0:
66		val = vmx_fix_cr0(val);
67		break;
68	case VMCS_GUEST_CR4:
69		val = vmx_fix_cr4(val);
70		break;
71	default:
72		break;
73	}
74	return (val);
75}
76
77static uint32_t
78vmcs_field_encoding(int ident)
79{
80	switch (ident) {
81	case VM_REG_GUEST_CR0:
82		return (VMCS_GUEST_CR0);
83	case VM_REG_GUEST_CR3:
84		return (VMCS_GUEST_CR3);
85	case VM_REG_GUEST_CR4:
86		return (VMCS_GUEST_CR4);
87	case VM_REG_GUEST_DR7:
88		return (VMCS_GUEST_DR7);
89	case VM_REG_GUEST_RSP:
90		return (VMCS_GUEST_RSP);
91	case VM_REG_GUEST_RIP:
92		return (VMCS_GUEST_RIP);
93	case VM_REG_GUEST_RFLAGS:
94		return (VMCS_GUEST_RFLAGS);
95	case VM_REG_GUEST_ES:
96		return (VMCS_GUEST_ES_SELECTOR);
97	case VM_REG_GUEST_CS:
98		return (VMCS_GUEST_CS_SELECTOR);
99	case VM_REG_GUEST_SS:
100		return (VMCS_GUEST_SS_SELECTOR);
101	case VM_REG_GUEST_DS:
102		return (VMCS_GUEST_DS_SELECTOR);
103	case VM_REG_GUEST_FS:
104		return (VMCS_GUEST_FS_SELECTOR);
105	case VM_REG_GUEST_GS:
106		return (VMCS_GUEST_GS_SELECTOR);
107	case VM_REG_GUEST_TR:
108		return (VMCS_GUEST_TR_SELECTOR);
109	case VM_REG_GUEST_LDTR:
110		return (VMCS_GUEST_LDTR_SELECTOR);
111	case VM_REG_GUEST_EFER:
112		return (VMCS_GUEST_IA32_EFER);
113	case VM_REG_GUEST_PDPTE0:
114		return (VMCS_GUEST_PDPTE0);
115	case VM_REG_GUEST_PDPTE1:
116		return (VMCS_GUEST_PDPTE1);
117	case VM_REG_GUEST_PDPTE2:
118		return (VMCS_GUEST_PDPTE2);
119	case VM_REG_GUEST_PDPTE3:
120		return (VMCS_GUEST_PDPTE3);
121	default:
122		return (-1);
123	}
124
125}
126
127static int
128vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
129{
130
131	switch (seg) {
132	case VM_REG_GUEST_ES:
133		*base = VMCS_GUEST_ES_BASE;
134		*lim = VMCS_GUEST_ES_LIMIT;
135		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
136		break;
137	case VM_REG_GUEST_CS:
138		*base = VMCS_GUEST_CS_BASE;
139		*lim = VMCS_GUEST_CS_LIMIT;
140		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
141		break;
142	case VM_REG_GUEST_SS:
143		*base = VMCS_GUEST_SS_BASE;
144		*lim = VMCS_GUEST_SS_LIMIT;
145		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
146		break;
147	case VM_REG_GUEST_DS:
148		*base = VMCS_GUEST_DS_BASE;
149		*lim = VMCS_GUEST_DS_LIMIT;
150		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
151		break;
152	case VM_REG_GUEST_FS:
153		*base = VMCS_GUEST_FS_BASE;
154		*lim = VMCS_GUEST_FS_LIMIT;
155		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
156		break;
157	case VM_REG_GUEST_GS:
158		*base = VMCS_GUEST_GS_BASE;
159		*lim = VMCS_GUEST_GS_LIMIT;
160		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
161		break;
162	case VM_REG_GUEST_TR:
163		*base = VMCS_GUEST_TR_BASE;
164		*lim = VMCS_GUEST_TR_LIMIT;
165		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
166		break;
167	case VM_REG_GUEST_LDTR:
168		*base = VMCS_GUEST_LDTR_BASE;
169		*lim = VMCS_GUEST_LDTR_LIMIT;
170		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
171		break;
172	case VM_REG_GUEST_IDTR:
173		*base = VMCS_GUEST_IDTR_BASE;
174		*lim = VMCS_GUEST_IDTR_LIMIT;
175		*acc = VMCS_INVALID_ENCODING;
176		break;
177	case VM_REG_GUEST_GDTR:
178		*base = VMCS_GUEST_GDTR_BASE;
179		*lim = VMCS_GUEST_GDTR_LIMIT;
180		*acc = VMCS_INVALID_ENCODING;
181		break;
182	default:
183		return (EINVAL);
184	}
185
186	return (0);
187}
188
189int
190vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
191{
192	int error;
193	uint32_t encoding;
194
195	/*
196	 * If we need to get at vmx-specific state in the VMCS we can bypass
197	 * the translation of 'ident' to 'encoding' by simply setting the
198	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
199	 * set to 0) in the encodings for the VMCS so we are free to use the
200	 * sign bit.
201	 */
202	if (ident < 0)
203		encoding = ident & 0x7fffffff;
204	else
205		encoding = vmcs_field_encoding(ident);
206
207	if (encoding == (uint32_t)-1)
208		return (EINVAL);
209
210	if (!running)
211		VMPTRLD(vmcs);
212
213	error = vmread(encoding, retval);
214
215	if (!running)
216		VMCLEAR(vmcs);
217
218	return (error);
219}
220
221int
222vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
223{
224	int error;
225	uint32_t encoding;
226
227	if (ident < 0)
228		encoding = ident & 0x7fffffff;
229	else
230		encoding = vmcs_field_encoding(ident);
231
232	if (encoding == (uint32_t)-1)
233		return (EINVAL);
234
235	val = vmcs_fix_regval(encoding, val);
236
237	if (!running)
238		VMPTRLD(vmcs);
239
240	error = vmwrite(encoding, val);
241
242	if (!running)
243		VMCLEAR(vmcs);
244
245	return (error);
246}
247
248int
249vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
250{
251	int error;
252	uint32_t base, limit, access;
253
254	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
255	if (error != 0)
256		panic("vmcs_setdesc: invalid segment register %d", seg);
257
258	if (!running)
259		VMPTRLD(vmcs);
260	if ((error = vmwrite(base, desc->base)) != 0)
261		goto done;
262
263	if ((error = vmwrite(limit, desc->limit)) != 0)
264		goto done;
265
266	if (access != VMCS_INVALID_ENCODING) {
267		if ((error = vmwrite(access, desc->access)) != 0)
268			goto done;
269	}
270done:
271	if (!running)
272		VMCLEAR(vmcs);
273	return (error);
274}
275
276int
277vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
278{
279	int error;
280	uint32_t base, limit, access;
281	uint64_t u64;
282
283	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
284	if (error != 0)
285		panic("vmcs_getdesc: invalid segment register %d", seg);
286
287	if (!running)
288		VMPTRLD(vmcs);
289	if ((error = vmread(base, &u64)) != 0)
290		goto done;
291	desc->base = u64;
292
293	if ((error = vmread(limit, &u64)) != 0)
294		goto done;
295	desc->limit = u64;
296
297	if (access != VMCS_INVALID_ENCODING) {
298		if ((error = vmread(access, &u64)) != 0)
299			goto done;
300		desc->access = u64;
301	}
302done:
303	if (!running)
304		VMCLEAR(vmcs);
305	return (error);
306}
307
308int
309vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
310{
311	int error;
312
313	VMPTRLD(vmcs);
314
315	/*
316	 * Guest MSRs are saved in the VM-exit MSR-store area.
317	 * Guest MSRs are loaded from the VM-entry MSR-load area.
318	 * Both areas point to the same location in memory.
319	 */
320	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
321		goto done;
322	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
323		goto done;
324
325	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
326		goto done;
327	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
328		goto done;
329
330	error = 0;
331done:
332	VMCLEAR(vmcs);
333	return (error);
334}
335
336int
337vmcs_init(struct vmcs *vmcs)
338{
339	int error, codesel, datasel, tsssel;
340	u_long cr0, cr4, efer;
341	uint64_t pat, fsbase, idtrbase;
342
343	codesel = vmm_get_host_codesel();
344	datasel = vmm_get_host_datasel();
345	tsssel = vmm_get_host_tsssel();
346
347	/*
348	 * Make sure we have a "current" VMCS to work with.
349	 */
350	VMPTRLD(vmcs);
351
352	/* Host state */
353
354	/* Initialize host IA32_PAT MSR */
355	pat = vmm_get_host_pat();
356	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
357		goto done;
358
359	/* Load the IA32_EFER MSR */
360	efer = vmm_get_host_efer();
361	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
362		goto done;
363
364	/* Load the control registers */
365
366	cr0 = vmm_get_host_cr0();
367	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
368		goto done;
369
370	cr4 = vmm_get_host_cr4() | CR4_VMXE;
371	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
372		goto done;
373
374	/* Load the segment selectors */
375	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
376		goto done;
377
378	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
379		goto done;
380
381	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
382		goto done;
383
384	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
385		goto done;
386
387	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
388		goto done;
389
390	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
391		goto done;
392
393	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
394		goto done;
395
396	/*
397	 * Load the Base-Address for %fs and idtr.
398	 *
399	 * Note that we exclude %gs, tss and gdtr here because their base
400	 * address is pcpu specific.
401	 */
402	fsbase = vmm_get_host_fsbase();
403	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
404		goto done;
405
406	idtrbase = vmm_get_host_idtrbase();
407	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
408		goto done;
409
410	/* instruction pointer */
411	if (no_flush_rsb) {
412		if ((error = vmwrite(VMCS_HOST_RIP,
413		    (u_long)vmx_exit_guest)) != 0)
414			goto done;
415	} else {
416		if ((error = vmwrite(VMCS_HOST_RIP,
417		    (u_long)vmx_exit_guest_flush_rsb)) != 0)
418			goto done;
419	}
420
421	/* link pointer */
422	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
423		goto done;
424done:
425	VMCLEAR(vmcs);
426	return (error);
427}
428
429#ifdef DDB
430extern int vmxon_enabled[];
431
432DB_SHOW_COMMAND(vmcs, db_show_vmcs)
433{
434	uint64_t cur_vmcs, val;
435	uint32_t exit;
436
437	if (!vmxon_enabled[curcpu]) {
438		db_printf("VMX not enabled\n");
439		return;
440	}
441
442	if (have_addr) {
443		db_printf("Only current VMCS supported\n");
444		return;
445	}
446
447	vmptrst(&cur_vmcs);
448	if (cur_vmcs == VMCS_INITIAL) {
449		db_printf("No current VM context\n");
450		return;
451	}
452	db_printf("VMCS: %jx\n", cur_vmcs);
453	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
454	db_printf("Activity: ");
455	val = vmcs_read(VMCS_GUEST_ACTIVITY);
456	switch (val) {
457	case 0:
458		db_printf("Active");
459		break;
460	case 1:
461		db_printf("HLT");
462		break;
463	case 2:
464		db_printf("Shutdown");
465		break;
466	case 3:
467		db_printf("Wait for SIPI");
468		break;
469	default:
470		db_printf("Unknown: %#lx", val);
471	}
472	db_printf("\n");
473	exit = vmcs_read(VMCS_EXIT_REASON);
474	if (exit & 0x80000000)
475		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
476	else
477		db_printf("Exit Reason: %u\n", exit & 0xffff);
478	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
479	db_printf("Guest Linear Address: %#lx\n",
480	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
481	switch (exit & 0x8000ffff) {
482	case EXIT_REASON_EXCEPTION:
483	case EXIT_REASON_EXT_INTR:
484		val = vmcs_read(VMCS_EXIT_INTR_INFO);
485		db_printf("Interrupt Type: ");
486		switch (val >> 8 & 0x7) {
487		case 0:
488			db_printf("external");
489			break;
490		case 2:
491			db_printf("NMI");
492			break;
493		case 3:
494			db_printf("HW exception");
495			break;
496		case 4:
497			db_printf("SW exception");
498			break;
499		default:
500			db_printf("?? %lu", val >> 8 & 0x7);
501			break;
502		}
503		db_printf("  Vector: %lu", val & 0xff);
504		if (val & 0x800)
505			db_printf("  Error Code: %lx",
506			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
507		db_printf("\n");
508		break;
509	case EXIT_REASON_EPT_FAULT:
510	case EXIT_REASON_EPT_MISCONFIG:
511		db_printf("Guest Physical Address: %#lx\n",
512		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
513		break;
514	}
515	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
516}
517#endif
518