vmcs.c revision 221914
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/pcpu.h>
35
36#include <vm/vm.h>
37#include <vm/pmap.h>
38
39#include <machine/segments.h>
40#include <machine/pmap.h>
41
42#include <machine/vmm.h>
43#include "vmcs.h"
44#include "vmx_cpufunc.h"
45#include "ept.h"
46#include "vmx.h"
47
48static uint64_t
49vmcs_fix_regval(uint32_t encoding, uint64_t val)
50{
51
52	switch (encoding) {
53	case VMCS_GUEST_CR0:
54		val = vmx_fix_cr0(val);
55		break;
56	case VMCS_GUEST_CR4:
57		val = vmx_fix_cr4(val);
58		break;
59	default:
60		break;
61	}
62	return (val);
63}
64
65static uint32_t
66vmcs_field_encoding(int ident)
67{
68	switch (ident) {
69	case VM_REG_GUEST_CR0:
70		return (VMCS_GUEST_CR0);
71	case VM_REG_GUEST_CR3:
72		return (VMCS_GUEST_CR3);
73	case VM_REG_GUEST_CR4:
74		return (VMCS_GUEST_CR4);
75	case VM_REG_GUEST_DR7:
76		return (VMCS_GUEST_DR7);
77	case VM_REG_GUEST_RSP:
78		return (VMCS_GUEST_RSP);
79	case VM_REG_GUEST_RIP:
80		return (VMCS_GUEST_RIP);
81	case VM_REG_GUEST_RFLAGS:
82		return (VMCS_GUEST_RFLAGS);
83	case VM_REG_GUEST_ES:
84		return (VMCS_GUEST_ES_SELECTOR);
85	case VM_REG_GUEST_CS:
86		return (VMCS_GUEST_CS_SELECTOR);
87	case VM_REG_GUEST_SS:
88		return (VMCS_GUEST_SS_SELECTOR);
89	case VM_REG_GUEST_DS:
90		return (VMCS_GUEST_DS_SELECTOR);
91	case VM_REG_GUEST_FS:
92		return (VMCS_GUEST_FS_SELECTOR);
93	case VM_REG_GUEST_GS:
94		return (VMCS_GUEST_GS_SELECTOR);
95	case VM_REG_GUEST_TR:
96		return (VMCS_GUEST_TR_SELECTOR);
97	case VM_REG_GUEST_LDTR:
98		return (VMCS_GUEST_LDTR_SELECTOR);
99	case VM_REG_GUEST_EFER:
100		return (VMCS_GUEST_IA32_EFER);
101	default:
102		return (-1);
103	}
104
105}
106
107static int
108vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
109{
110
111	switch (seg) {
112	case VM_REG_GUEST_ES:
113		*base = VMCS_GUEST_ES_BASE;
114		*lim = VMCS_GUEST_ES_LIMIT;
115		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
116		break;
117	case VM_REG_GUEST_CS:
118		*base = VMCS_GUEST_CS_BASE;
119		*lim = VMCS_GUEST_CS_LIMIT;
120		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
121		break;
122	case VM_REG_GUEST_SS:
123		*base = VMCS_GUEST_SS_BASE;
124		*lim = VMCS_GUEST_SS_LIMIT;
125		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
126		break;
127	case VM_REG_GUEST_DS:
128		*base = VMCS_GUEST_DS_BASE;
129		*lim = VMCS_GUEST_DS_LIMIT;
130		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
131		break;
132	case VM_REG_GUEST_FS:
133		*base = VMCS_GUEST_FS_BASE;
134		*lim = VMCS_GUEST_FS_LIMIT;
135		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
136		break;
137	case VM_REG_GUEST_GS:
138		*base = VMCS_GUEST_GS_BASE;
139		*lim = VMCS_GUEST_GS_LIMIT;
140		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
141		break;
142	case VM_REG_GUEST_TR:
143		*base = VMCS_GUEST_TR_BASE;
144		*lim = VMCS_GUEST_TR_LIMIT;
145		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
146		break;
147	case VM_REG_GUEST_LDTR:
148		*base = VMCS_GUEST_LDTR_BASE;
149		*lim = VMCS_GUEST_LDTR_LIMIT;
150		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
151		break;
152	case VM_REG_GUEST_IDTR:
153		*base = VMCS_GUEST_IDTR_BASE;
154		*lim = VMCS_GUEST_IDTR_LIMIT;
155		*acc = VMCS_INVALID_ENCODING;
156		break;
157	case VM_REG_GUEST_GDTR:
158		*base = VMCS_GUEST_GDTR_BASE;
159		*lim = VMCS_GUEST_GDTR_LIMIT;
160		*acc = VMCS_INVALID_ENCODING;
161		break;
162	default:
163		return (EINVAL);
164	}
165
166	return (0);
167}
168
169int
170vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval)
171{
172	int error;
173	uint32_t encoding;
174
175	/*
176	 * If we need to get at vmx-specific state in the VMCS we can bypass
177	 * the translation of 'ident' to 'encoding' by simply setting the
178	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
179	 * set to 0) in the encodings for the VMCS so we are free to use the
180	 * sign bit.
181	 */
182	if (ident < 0)
183		encoding = ident & 0x7fffffff;
184	else
185		encoding = vmcs_field_encoding(ident);
186
187	if (encoding == (uint32_t)-1)
188		return (EINVAL);
189
190	VMPTRLD(vmcs);
191	error = vmread(encoding, retval);
192	VMCLEAR(vmcs);
193	return (error);
194}
195
196int
197vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val)
198{
199	int error;
200	uint32_t encoding;
201
202	if (ident < 0)
203		encoding = ident & 0x7fffffff;
204	else
205		encoding = vmcs_field_encoding(ident);
206
207	if (encoding == (uint32_t)-1)
208		return (EINVAL);
209
210	val = vmcs_fix_regval(encoding, val);
211
212	VMPTRLD(vmcs);
213	error = vmwrite(encoding, val);
214	VMCLEAR(vmcs);
215	return (error);
216}
217
218int
219vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
220{
221	int error;
222	uint32_t base, limit, access;
223
224	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
225	if (error != 0)
226		panic("vmcs_setdesc: invalid segment register %d", seg);
227
228	VMPTRLD(vmcs);
229	if ((error = vmwrite(base, desc->base)) != 0)
230		goto done;
231
232	if ((error = vmwrite(limit, desc->limit)) != 0)
233		goto done;
234
235	if (access != VMCS_INVALID_ENCODING) {
236		if ((error = vmwrite(access, desc->access)) != 0)
237			goto done;
238	}
239done:
240	VMCLEAR(vmcs);
241	return (error);
242}
243
244int
245vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
246{
247	int error;
248	uint32_t base, limit, access;
249	uint64_t u64;
250
251	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
252	if (error != 0)
253		panic("vmcs_getdesc: invalid segment register %d", seg);
254
255	VMPTRLD(vmcs);
256	if ((error = vmread(base, &u64)) != 0)
257		goto done;
258	desc->base = u64;
259
260	if ((error = vmread(limit, &u64)) != 0)
261		goto done;
262	desc->limit = u64;
263
264	if (access != VMCS_INVALID_ENCODING) {
265		if ((error = vmread(access, &u64)) != 0)
266			goto done;
267		desc->access = u64;
268	}
269done:
270	VMCLEAR(vmcs);
271	return (error);
272}
273
274int
275vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
276{
277	int error;
278
279	VMPTRLD(vmcs);
280
281	/*
282	 * Guest MSRs are saved in the VM-exit MSR-store area.
283	 * Guest MSRs are loaded from the VM-entry MSR-load area.
284	 * Both areas point to the same location in memory.
285	 */
286	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
287		goto done;
288	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
289		goto done;
290
291	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
292		goto done;
293	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
294		goto done;
295
296	error = 0;
297done:
298	VMCLEAR(vmcs);
299	return (error);
300}
301
302int
303vmcs_set_defaults(struct vmcs *vmcs,
304		  u_long host_rip, u_long host_rsp, u_long ept_pml4,
305		  uint32_t pinbased_ctls, uint32_t procbased_ctls,
306		  uint32_t procbased_ctls2, uint32_t exit_ctls,
307		  uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
308{
309	int error, codesel, datasel, tsssel;
310	u_long cr0, cr4, efer;
311	uint64_t eptp, pat;
312	uint32_t exc_bitmap;
313
314	codesel = GSEL(GCODE_SEL, SEL_KPL);
315	datasel = GSEL(GDATA_SEL, SEL_KPL);
316	tsssel = GSEL(GPROC0_SEL, SEL_KPL);
317
318	/*
319	 * Make sure we have a "current" VMCS to work with.
320	 */
321	VMPTRLD(vmcs);
322
323	/*
324	 * Load the VMX controls
325	 */
326	if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
327		goto done;
328	if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
329		goto done;
330	if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
331		goto done;
332	if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
333		goto done;
334	if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
335		goto done;
336
337	/* Guest state */
338
339	/* Initialize guest IA32_PAT MSR with the default value */
340	pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
341	      PAT_VALUE(1, PAT_WRITE_THROUGH)	|
342	      PAT_VALUE(2, PAT_UNCACHED)	|
343	      PAT_VALUE(3, PAT_UNCACHEABLE)	|
344	      PAT_VALUE(4, PAT_WRITE_BACK)	|
345	      PAT_VALUE(5, PAT_WRITE_THROUGH)	|
346	      PAT_VALUE(6, PAT_UNCACHED)	|
347	      PAT_VALUE(7, PAT_UNCACHEABLE);
348	if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
349		goto done;
350
351	/* Host state */
352
353	/* Initialize host IA32_PAT MSR */
354	pat = rdmsr(MSR_PAT);
355	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
356		goto done;
357
358	/* Load the IA32_EFER MSR */
359	efer = rdmsr(MSR_EFER);
360	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
361		goto done;
362
363	/* Load the control registers */
364	cr0 = rcr0();
365	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
366		goto done;
367
368	cr4 = rcr4();
369	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
370		goto done;
371
372	/* Load the segment selectors */
373	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
374		goto done;
375
376	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
377		goto done;
378
379	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
380		goto done;
381
382	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
383		goto done;
384
385	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
386		goto done;
387
388	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
389		goto done;
390
391	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
392		goto done;
393
394	/*
395	 * Load the Base-Address for %fs and idtr.
396	 *
397	 * Note that we exclude %gs, tss and gdtr here because their base
398	 * address is pcpu specific.
399	 */
400	if ((error = vmwrite(VMCS_HOST_FS_BASE, 0)) != 0)
401		goto done;
402
403	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, r_idt.rd_base)) != 0)
404		goto done;
405
406	/* instruction pointer */
407	if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
408		goto done;
409
410	/* stack pointer */
411	if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
412		goto done;
413
414	/* eptp */
415	eptp = EPTP(ept_pml4);
416	if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
417		goto done;
418
419	/* vpid */
420	if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
421		goto done;
422
423	/* msr bitmap */
424	if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
425		goto done;
426
427	/* exception bitmap */
428	exc_bitmap = 1 << IDT_MC;
429	if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
430		goto done;
431
432	/* link pointer */
433	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
434		goto done;
435done:
436	VMCLEAR(vmcs);
437	return (error);
438}
439
440uint64_t
441vmcs_read(uint32_t encoding)
442{
443	int error;
444	uint64_t val;
445
446	error = vmread(encoding, &val);
447	if (error != 0)
448		panic("vmcs_read(%u) error %d", encoding, error);
449
450	return (val);
451}
452