svm.c revision 267003
1/*-
2 * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: projects/bhyve_svm/sys/amd64/vmm/amd/svm.c 267003 2014-06-03 06:56:54Z grehan $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/smp.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/pcpu.h>
36#include <sys/proc.h>
37
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <machine/cpufunc.h>
42#include <machine/psl.h>
43#include <machine/pmap.h>
44#include <machine/md_var.h>
45#include <machine/vmparam.h>
46#include <machine/specialreg.h>
47#include <machine/segments.h>
48#include <machine/vmm.h>
49#include <machine/vmm_dev.h>
50#include <machine/vmm_instruction_emul.h>
51
52#include <x86/apicreg.h>
53
54#include "vmm_lapic.h"
55#include "vmm_msr.h"
56#include "vmm_stat.h"
57#include "vmm_ktr.h"
58#include "vmm_ioport.h"
59#include "vlapic.h"
60#include "vlapic_priv.h"
61
62#include "x86.h"
63#include "vmcb.h"
64#include "svm.h"
65#include "svm_softc.h"
66#include "npt.h"
67
68/*
69 * SVM CPUID function 0x8000_000A, edx bit decoding.
70 */
71#define AMD_CPUID_SVM_NP		BIT(0)  /* Nested paging or RVI */
72#define AMD_CPUID_SVM_LBR		BIT(1)  /* Last branch virtualization */
73#define AMD_CPUID_SVM_SVML		BIT(2)  /* SVM lock */
74#define AMD_CPUID_SVM_NRIP_SAVE		BIT(3)  /* Next RIP is saved */
75#define AMD_CPUID_SVM_TSC_RATE		BIT(4)  /* TSC rate control. */
76#define AMD_CPUID_SVM_VMCB_CLEAN	BIT(5)  /* VMCB state caching */
77#define AMD_CPUID_SVM_ASID_FLUSH	BIT(6)  /* Flush by ASID */
78#define AMD_CPUID_SVM_DECODE_ASSIST	BIT(7)  /* Decode assist */
79#define AMD_CPUID_SVM_PAUSE_INC		BIT(10) /* Pause intercept filter. */
80#define AMD_CPUID_SVM_PAUSE_FTH		BIT(12) /* Pause filter threshold */
81
82MALLOC_DEFINE(M_SVM, "svm", "svm");
83MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic");
84
85/* Per-CPU context area. */
86extern struct pcpu __pcpu[];
87
88static bool svm_vmexit(struct svm_softc *svm_sc, int vcpu,
89			struct vm_exit *vmexit);
90static int svm_msr_rw_ok(uint8_t *btmap, uint64_t msr);
91static int svm_msr_rd_ok(uint8_t *btmap, uint64_t msr);
92static int svm_msr_index(uint64_t msr, int *index, int *bit);
93
94static uint32_t svm_feature; /* AMD SVM features. */
95
96/*
97 * Starting guest ASID, 0 is reserved for host.
98 * Each guest will have its own unique ASID.
99 */
100static uint32_t guest_asid = 1;
101
102/*
103 * Max ASID processor can support.
104 * This limit the maximum number of virtual machines that can be created.
105 */
106static int max_asid;
107
108/*
109 * SVM host state saved area of size 4KB for each core.
110 */
111static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
112
113/*
114 * S/w saved host context.
115 */
116static struct svm_regctx host_ctx[MAXCPU];
117
118static VMM_STAT_AMD(VCPU_EXITINTINFO, "Valid EXITINTINFO");
119
120/*
121 * Common function to enable or disabled SVM for a CPU.
122 */
123static int
124cpu_svm_enable_disable(boolean_t enable)
125{
126	uint64_t efer_msr;
127
128	efer_msr = rdmsr(MSR_EFER);
129
130	if (enable)
131		efer_msr |= EFER_SVM;
132	else
133		efer_msr &= ~EFER_SVM;
134
135	wrmsr(MSR_EFER, efer_msr);
136
137	return(0);
138}
139
140/*
141 * Disable SVM on a CPU.
142 */
143static void
144svm_disable(void *arg __unused)
145{
146
147	(void)cpu_svm_enable_disable(FALSE);
148}
149
150/*
151 * Disable SVM for all CPUs.
152 */
153static int
154svm_cleanup(void)
155{
156
157	smp_rendezvous(NULL, svm_disable, NULL, NULL);
158	return (0);
159}
160
161/*
162 * Check for required BHyVe SVM features in a CPU.
163 */
164static int
165svm_cpuid_features(void)
166{
167	u_int regs[4];
168
169	/* CPUID Fn8000_000A is for SVM */
170	do_cpuid(0x8000000A, regs);
171	svm_feature = regs[3];
172
173	printf("SVM rev: 0x%x NASID:0x%x\n", regs[0] & 0xFF, regs[1]);
174	max_asid = regs[1];
175
176	printf("SVM Features:0x%b\n", svm_feature,
177		"\020"
178		"\001NP"		/* Nested paging */
179		"\002LbrVirt"		/* LBR virtualization */
180		"\003SVML"		/* SVM lock */
181		"\004NRIPS"		/* NRIP save */
182		"\005TscRateMsr"	/* MSR based TSC rate control */
183		"\006VmcbClean"		/* VMCB clean bits */
184		"\007FlushByAsid"	/* Flush by ASID */
185		"\010DecodeAssist"	/* Decode assist */
186		"\011<b20>"
187		"\012<b20>"
188		"\013PauseFilter"
189		"\014<b20>"
190		"\015PauseFilterThreshold"
191		"\016AVIC"
192		);
193
194	/* SVM Lock */
195	if (!(svm_feature & AMD_CPUID_SVM_SVML)) {
196		printf("SVM is disabled by BIOS, please enable in BIOS.\n");
197		return (ENXIO);
198	}
199
200	/*
201	 * bhyve need RVI to work.
202	 */
203	if (!(svm_feature & AMD_CPUID_SVM_NP)) {
204		printf("Missing Nested paging or RVI SVM support in processor.\n");
205		return (EIO);
206	}
207
208	if (svm_feature & AMD_CPUID_SVM_NRIP_SAVE)
209		return (0);
210
211	return (EIO);
212}
213
214/*
215 * Enable SVM for a CPU.
216 */
217static void
218svm_enable(void *arg __unused)
219{
220	uint64_t hsave_pa;
221
222	(void)cpu_svm_enable_disable(TRUE);
223
224	hsave_pa = vtophys(hsave[curcpu]);
225	wrmsr(MSR_VM_HSAVE_PA, hsave_pa);
226
227	if (rdmsr(MSR_VM_HSAVE_PA) != hsave_pa) {
228		panic("VM_HSAVE_PA is wrong on CPU%d\n", curcpu);
229	}
230}
231
232/*
233 * Check if a processor support SVM.
234 */
235static int
236is_svm_enabled(void)
237{
238	uint64_t msr;
239
240	 /* Section 15.4 Enabling SVM from APM2. */
241	if ((amd_feature2 & AMDID2_SVM) == 0) {
242		printf("SVM is not supported on this processor.\n");
243		return (ENXIO);
244	}
245
246	msr = rdmsr(MSR_VM_CR);
247	/* Make sure SVM is not disabled by BIOS. */
248	if ((msr & VM_CR_SVMDIS) == 0) {
249		return svm_cpuid_features();
250	}
251
252	printf("SVM disabled by Key, consult TPM/BIOS manual.\n");
253	return (ENXIO);
254}
255
256/*
257 * Enable SVM on CPU and initialize nested page table h/w.
258 */
259static int
260svm_init(int ipinum)
261{
262	int err;
263
264	err = is_svm_enabled();
265	if (err)
266		return (err);
267
268
269	svm_npt_init(ipinum);
270
271	/* Start SVM on all CPUs */
272	smp_rendezvous(NULL, svm_enable, NULL, NULL);
273
274	return (0);
275}
276
277static void
278svm_restore(void)
279{
280	svm_enable(NULL);
281}
282/*
283 * Get index and bit position for a MSR in MSR permission
284 * bitmap. Two bits are used for each MSR, lower bit is
285 * for read and higher bit is for write.
286 */
287static int
288svm_msr_index(uint64_t msr, int *index, int *bit)
289{
290	uint32_t base, off;
291
292/* Pentium compatible MSRs */
293#define MSR_PENTIUM_START 	0
294#define MSR_PENTIUM_END 	0x1FFF
295/* AMD 6th generation and Intel compatible MSRs */
296#define MSR_AMD6TH_START 	0xC0000000UL
297#define MSR_AMD6TH_END 		0xC0001FFFUL
298/* AMD 7th and 8th generation compatible MSRs */
299#define MSR_AMD7TH_START 	0xC0010000UL
300#define MSR_AMD7TH_END 		0xC0011FFFUL
301
302	*index = -1;
303	*bit = (msr % 4) * 2;
304	base = 0;
305
306	if (msr >= MSR_PENTIUM_START && msr <= MSR_PENTIUM_END) {
307		*index = msr / 4;
308		return (0);
309	}
310
311	base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1);
312	if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
313		off = (msr - MSR_AMD6TH_START);
314		*index = (off + base) / 4;
315		return (0);
316	}
317
318	base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
319	if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
320		off = (msr - MSR_AMD7TH_START);
321		*index = (off + base) / 4;
322		return (0);
323	}
324
325	return (EIO);
326}
327
328/*
329 * Give virtual cpu the complete access to MSR(read & write).
330 */
331static int
332svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write)
333{
334	int index, bit, err;
335
336	err = svm_msr_index(msr, &index, &bit);
337	if (err) {
338		ERR("MSR 0x%lx is not writeable by guest.\n", msr);
339		return (err);
340	}
341
342	if (index < 0 || index > (SVM_MSR_BITMAP_SIZE)) {
343		ERR("MSR 0x%lx index out of range(%d).\n", msr, index);
344		return (EINVAL);
345	}
346	if (bit < 0 || bit > 8) {
347		ERR("MSR 0x%lx bit out of range(%d).\n", msr, bit);
348		return (EINVAL);
349	}
350
351	/* Disable intercept for read and write. */
352	if (read)
353		perm_bitmap[index] &= ~(1UL << bit);
354	if (write)
355		perm_bitmap[index] &= ~(2UL << bit);
356	CTR2(KTR_VMM, "Guest has control:0x%x on SVM:MSR(0x%lx).\n",
357		(perm_bitmap[index] >> bit) & 0x3, msr);
358
359	return (0);
360}
361
362static int
363svm_msr_rw_ok(uint8_t *perm_bitmap, uint64_t msr)
364{
365	return svm_msr_perm(perm_bitmap, msr, true, true);
366}
367
368static int
369svm_msr_rd_ok(uint8_t *perm_bitmap, uint64_t msr)
370{
371	return svm_msr_perm(perm_bitmap, msr, true, false);
372}
373/*
374 * Initialise VCPU.
375 */
376static int
377svm_init_vcpu(struct svm_vcpu *vcpu, vm_paddr_t iopm_pa, vm_paddr_t msrpm_pa,
378		vm_paddr_t pml4_pa, uint8_t asid)
379{
380
381	vcpu->lastcpu = NOCPU;
382	vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
383
384	/*
385	 * Initiaise VMCB persistent area of vcpu.
386	 * 1. Permission bitmap for MSR and IO space.
387	 * 2. Nested paging.
388	 * 3. ASID of virtual machine.
389	 */
390	if (svm_init_vmcb(&vcpu->vmcb, iopm_pa, msrpm_pa, pml4_pa)) {
391			return (EIO);
392	}
393
394	return (0);
395}
396/*
397 * Initialise a virtual machine.
398 */
399static void *
400svm_vminit(struct vm *vm, pmap_t pmap)
401{
402	struct svm_softc *svm_sc;
403	vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;
404	int i;
405
406	if (guest_asid >= max_asid) {
407		ERR("Host support max ASID:%d, can't create more guests.\n",
408			max_asid);
409		return (NULL);
410	}
411
412	svm_sc = (struct svm_softc *)malloc(sizeof (struct svm_softc),
413			M_SVM, M_WAITOK | M_ZERO);
414
415	svm_sc->vm = vm;
416	svm_sc->svm_feature = svm_feature;
417	svm_sc->vcpu_cnt = VM_MAXCPU;
418	svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
419	/*
420	 * Each guest has its own unique ASID.
421	 * ASID(Address Space Identifier) is used by TLB entry.
422	 */
423	svm_sc->asid = guest_asid++;
424
425	/*
426	 * Intercept MSR access to all MSRs except GSBASE, FSBASE,... etc.
427	 */
428	 memset(svm_sc->msr_bitmap, 0xFF, sizeof(svm_sc->msr_bitmap));
429
430	/*
431	 * Following MSR can be completely controlled by virtual machines
432	 * since access to following are translated to access to VMCB.
433	 */
434	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_GSBASE);
435	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_FSBASE);
436	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_KGSBASE);
437
438	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_STAR);
439	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_LSTAR);
440	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_CSTAR);
441	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SF_MASK);
442	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_CS_MSR);
443	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_ESP_MSR);
444	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_EIP_MSR);
445
446	/* For Nested Paging/RVI only. */
447	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_PAT);
448
449	 /* Intercept access to all I/O ports. */
450	memset(svm_sc->iopm_bitmap, 0xFF, sizeof(svm_sc->iopm_bitmap));
451
452	/* Cache physical address for multiple vcpus. */
453	iopm_pa = vtophys(svm_sc->iopm_bitmap);
454	msrpm_pa = vtophys(svm_sc->msr_bitmap);
455	pml4_pa = svm_sc->nptp;
456
457	for (i = 0; i < svm_sc->vcpu_cnt; i++) {
458		if (svm_init_vcpu(svm_get_vcpu(svm_sc, i), iopm_pa, msrpm_pa,
459				pml4_pa, svm_sc->asid)) {
460			ERR("SVM couldn't initialise VCPU%d\n", i);
461			goto cleanup;
462		}
463	}
464
465	return (svm_sc);
466
467cleanup:
468	free(svm_sc, M_SVM);
469	return (NULL);
470}
471
472/*
473 * Handle guest I/O intercept.
474 */
475static bool
476svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
477{
478	struct vmcb_ctrl *ctrl;
479	struct vmcb_state *state;
480	uint64_t info1;
481
482	state = svm_get_vmcb_state(svm_sc, vcpu);
483	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
484	info1 = ctrl->exitinfo1;
485
486	vmexit->exitcode 	= VM_EXITCODE_INOUT;
487	vmexit->u.inout.in 	= (info1 & BIT(0)) ? 1 : 0;
488	vmexit->u.inout.string 	= (info1 & BIT(2)) ? 1 : 0;
489	vmexit->u. inout.rep 	= (info1 & BIT(3)) ? 1 : 0;
490	vmexit->u.inout.bytes 	= (info1 >> 4) & 0x7;
491	vmexit->u.inout.port 	= (uint16_t)(info1 >> 16);
492	vmexit->u.inout.eax 	= (uint32_t)(state->rax);
493
494	return (false);
495}
496
497static int
498svm_npf_paging(uint64_t exitinfo1)
499{
500
501	if (exitinfo1 & VMCB_NPF_INFO1_W)
502		return (VM_PROT_WRITE);
503
504	return (VM_PROT_READ);
505}
506
507static bool
508svm_npf_emul_fault(uint64_t exitinfo1)
509{
510
511	if (exitinfo1 & VMCB_NPF_INFO1_ID) {
512		return (false);
513	}
514
515	if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
516		return (false);
517	}
518
519	if ((exitinfo1 & VMCB_NPF_INFO1_GPA) == 0) {
520		return (false);
521	}
522
523	return (true);
524}
525
526/*
527 * Special handling of EFER MSR.
528 * SVM guest must have SVM EFER bit set, prohibit guest from cleareing SVM
529 * enable bit in EFER.
530 */
531static void
532svm_efer(struct svm_softc *svm_sc, int vcpu, boolean_t write)
533{
534	struct svm_regctx *swctx;
535	struct vmcb_state *state;
536
537	state = svm_get_vmcb_state(svm_sc, vcpu);
538	swctx = svm_get_guest_regctx(svm_sc, vcpu);
539
540	if (write) {
541		state->efer = ((swctx->e.g.sctx_rdx & (uint32_t)~0) << 32) |
542				((uint32_t)state->rax) | EFER_SVM;
543	} else {
544		state->rax = (uint32_t)state->efer;
545		swctx->e.g.sctx_rdx = (uint32_t)(state->efer >> 32);
546	}
547}
548
549static enum vm_cpu_mode
550svm_vcpu_mode(uint64_t efer)
551{
552
553	if (efer & EFER_LMA)
554		return (CPU_MODE_64BIT);
555	else
556		return (CPU_MODE_COMPATIBILITY);
557}
558
559static enum vm_paging_mode
560svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer)
561{
562
563	if ((cr0 & CR0_PG) == 0)
564		return (PAGING_MODE_FLAT);
565	if ((cr4 & CR4_PAE) == 0)
566		return (PAGING_MODE_32);
567	if (efer & EFER_LME)
568		return (PAGING_MODE_64);
569	else
570		return (PAGING_MODE_PAE);
571}
572
573/*
574 * Determine the cause of virtual cpu exit and handle VMEXIT.
575 * Return: false - Break vcpu execution loop and handle vmexit
576 *		   in kernel or user space.
577 *	   true  - Continue vcpu run.
578 */
579static bool
580svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
581{
582	struct vmcb_state *state;
583	struct vmcb_ctrl *ctrl;
584	struct svm_regctx *ctx;
585	uint64_t code, info1, info2, val;
586	uint32_t eax, ecx, edx;
587	bool update_rip, loop, retu;
588
589	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
590
591	state = svm_get_vmcb_state(svm_sc, vcpu);
592	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
593	ctx   = svm_get_guest_regctx(svm_sc, vcpu);
594	code  = ctrl->exitcode;
595	info1 = ctrl->exitinfo1;
596	info2 = ctrl->exitinfo2;
597
598	update_rip = true;
599	loop = true;
600	vmexit->exitcode = VM_EXITCODE_VMX;
601	vmexit->u.vmx.status = 0;
602
603	switch (code) {
604		case	VMCB_EXIT_MC: /* Machine Check. */
605			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
606			vmexit->exitcode = VM_EXITCODE_MTRAP;
607			loop = false;
608			break;
609
610		case	VMCB_EXIT_MSR:	/* MSR access. */
611			eax = state->rax;
612			ecx = ctx->sctx_rcx;
613			edx = ctx->e.g.sctx_rdx;
614
615			if (ecx == MSR_EFER) {
616				VCPU_CTR0(svm_sc->vm, vcpu,"VMEXIT EFER\n");
617				svm_efer(svm_sc, vcpu, info1);
618				break;
619			}
620
621			retu = false;
622			if (info1) {
623				/* VM exited because of write MSR */
624				vmm_stat_incr(svm_sc->vm, vcpu,
625					VMEXIT_WRMSR, 1);
626				vmexit->exitcode = VM_EXITCODE_WRMSR;
627				vmexit->u.msr.code = ecx;
628				val = (uint64_t)edx << 32 | eax;
629				if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val,
630					&retu)) {
631					vmexit->u.msr.wval = val;
632					loop = false;
633				} else
634					loop = retu ? false : true;
635
636				VCPU_CTR3(svm_sc->vm, vcpu,
637					"VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
638					loop ? "kernel" : "user", val, ecx);
639			} else {
640				vmm_stat_incr(svm_sc->vm, vcpu,
641					VMEXIT_RDMSR, 1);
642				vmexit->exitcode = VM_EXITCODE_RDMSR;
643				vmexit->u.msr.code = ecx;
644				if (emulate_rdmsr(svm_sc->vm, vcpu, ecx,
645					&retu)) {
646					loop = false;
647				} else
648					loop = retu ? false : true;
649				VCPU_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
650					" MSB=0x%08x, LSB=%08x @0x%x",
651					ctx->e.g.sctx_rdx, state->rax, ecx);
652			}
653
654#define MSR_AMDK8_IPM           0xc0010055
655			/*
656			 * We can't hide AMD C1E idle capability since its
657			 * based on CPU generation, for now ignore access to
658			 * this MSR by vcpus
659			 * XXX: special handling of AMD C1E - Ignore.
660			 */
661			 if (ecx == MSR_AMDK8_IPM)
662				loop = true;
663			break;
664
665		case VMCB_EXIT_INTR:
666			/*
667			 * Exit on External Interrupt.
668			 * Give host interrupt handler to run and if its guest
669			 * interrupt, local APIC will inject event in guest.
670			 */
671			update_rip = false;
672			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt"
673				" RIP:0x%lx.\n", state->rip);
674			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
675			break;
676
677		case VMCB_EXIT_IO:
678			loop = svm_handle_io(svm_sc, vcpu, vmexit);
679			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
680			update_rip = true;
681			break;
682
683		case VMCB_EXIT_CPUID:
684			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
685			(void)x86_emulate_cpuid(svm_sc->vm, vcpu,
686					(uint32_t *)&state->rax,
687					(uint32_t *)&ctx->sctx_rbx,
688					(uint32_t *)&ctx->sctx_rcx,
689					(uint32_t *)&ctx->e.g.sctx_rdx);
690			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT CPUID\n");
691			break;
692
693		case VMCB_EXIT_HLT:
694			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
695 			if (ctrl->v_irq) {
696				 /* Interrupt is pending, can't halt guest. */
697				vmm_stat_incr(svm_sc->vm, vcpu,
698					VMEXIT_HLT_IGNORED, 1);
699				VCPU_CTR0(svm_sc->vm, vcpu,
700					"VMEXIT halt ignored.");
701			} else {
702				VCPU_CTR0(svm_sc->vm, vcpu,
703					"VMEXIT halted CPU.");
704				vmexit->exitcode = VM_EXITCODE_HLT;
705				vmexit->u.hlt.rflags = state->rflags;
706				loop = false;
707
708			}
709			break;
710
711		case VMCB_EXIT_PAUSE:
712			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT pause");
713			vmexit->exitcode = VM_EXITCODE_PAUSE;
714			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
715
716			break;
717
718		case VMCB_EXIT_NPF:
719			loop = false;
720			update_rip = false;
721
722        		if (info1 & VMCB_NPF_INFO1_RSV) {
723 				VCPU_CTR2(svm_sc->vm, vcpu, "SVM_ERR:NPT"
724					" reserved bit is set,"
725					"INFO1:0x%lx INFO2:0x%lx .\n",
726					info1, info2);
727        			break;
728			}
729
730			 /* EXITINFO2 has the physical fault address (GPA). */
731			if(vm_mem_allocated(svm_sc->vm, info2)) {
732 				VCPU_CTR3(svm_sc->vm, vcpu, "SVM:NPF-paging,"
733					"RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n",
734				 	state->rip, info1, info2);
735				vmexit->exitcode = VM_EXITCODE_PAGING;
736				vmexit->u.paging.gpa = info2;
737				vmexit->u.paging.fault_type =
738					svm_npf_paging(info1);
739				vmm_stat_incr(svm_sc->vm, vcpu,
740					VMEXIT_NESTED_FAULT, 1);
741			} else if (svm_npf_emul_fault(info1)) {
742 				VCPU_CTR3(svm_sc->vm, vcpu, "SVM:NPF inst_emul,"
743					"RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n",
744					state->rip, info1, info2);
745				vmexit->exitcode = VM_EXITCODE_INST_EMUL;
746				vmexit->u.inst_emul.gpa = info2;
747				vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
748				vmexit->u.inst_emul.paging.cr3 = state->cr3;
749				vmexit->u.inst_emul.paging.cpu_mode =
750					svm_vcpu_mode(state->efer);
751				vmexit->u.inst_emul.paging.paging_mode =
752					svm_paging_mode(state->cr0, state->cr4,
753                                                 state->efer);
754				/* XXX: get CPL from SS */
755				vmexit->u.inst_emul.paging.cpl = 0;
756				/*
757				 * If DecodeAssist SVM feature doesn't exist,
758				 * we don't have faulty instuction length. New
759				 * RIP will be calculated based on software
760				 * instruction emulation.
761				 */
762				vmexit->inst_length = VIE_INST_SIZE;
763				vmm_stat_incr(svm_sc->vm, vcpu,
764					VMEXIT_INST_EMUL, 1);
765			}
766
767			break;
768
769		case VMCB_EXIT_SHUTDOWN:
770			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT shutdown.");
771			loop = false;
772			break;
773
774		case VMCB_EXIT_INVALID:
775			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT INVALID.");
776			loop = false;
777			break;
778
779		default:
780			 /* Return to user space. */
781			loop = false;
782			update_rip = false;
783			VCPU_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx"
784				" EXITINFO1: 0x%lx EXITINFO2:0x%lx\n",
785		 		ctrl->exitcode, info1, info2);
786			VCPU_CTR3(svm_sc->vm, vcpu, "SVM:RIP: 0x%lx nRIP:0x%lx"
787				" Inst decoder len:%d\n", state->rip,
788				ctrl->nrip, ctrl->inst_decode_size);
789			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
790			break;
791	}
792
793	vmexit->rip = state->rip;
794	if (update_rip) {
795		if (ctrl->nrip == 0) {
796 			VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:nRIP is not set "
797				 "for RIP0x%lx.\n", state->rip);
798			vmexit->exitcode = VM_EXITCODE_VMX;
799		} else
800			vmexit->rip = ctrl->nrip;
801	}
802
803	/* If vcpu execution is continued, update RIP. */
804	if (loop) {
805		state->rip = vmexit->rip;
806	}
807
808	if (state->rip == 0) {
809		VCPU_CTR0(svm_sc->vm, vcpu, "SVM_ERR:RIP is NULL\n");
810		vmexit->exitcode = VM_EXITCODE_VMX;
811	}
812
813	return (loop);
814}
815
816/*
817 * Inject NMI to virtual cpu.
818 */
819static int
820svm_inject_nmi(struct svm_softc *svm_sc, int vcpu)
821{
822	struct vmcb_ctrl *ctrl;
823
824	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
825
826	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
827	 /* Can't inject another NMI if last one is pending.*/
828	if (!vm_nmi_pending(svm_sc->vm, vcpu))
829		return (0);
830
831	 /* Inject NMI, vector number is not used.*/
832	if (vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_NMI, IDT_NMI, 0, false)) {
833		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:NMI injection failed.\n");
834		return (EIO);
835	}
836
837	/* Acknowledge the request is accepted.*/
838	vm_nmi_clear(svm_sc->vm, vcpu);
839
840	VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Injected NMI.\n");
841
842	return (1);
843}
844
845/*
846 * Inject event to virtual cpu.
847 */
848static void
849svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu, struct vlapic *vlapic)
850{
851	struct vmcb_ctrl *ctrl;
852	struct vmcb_state *state;
853	struct vm_exception exc;
854	int vector;
855
856	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
857
858	state = svm_get_vmcb_state(svm_sc, vcpu);
859	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
860
861	/* Can't inject multiple events at once. */
862	if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
863		VCPU_CTR1(svm_sc->vm, vcpu,
864			"SVM:Last event(0x%lx) is pending.\n", ctrl->eventinj);
865		return ;
866	}
867
868	/* Wait for guest to come out of interrupt shadow. */
869	if (ctrl->intr_shadow) {
870		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Guest in interrupt shadow.\n");
871		return;
872	}
873
874	if (vm_exception_pending(svm_sc->vm, vcpu, &exc)) {
875		KASSERT(exc.vector >= 0 && exc.vector < 32,
876			("Exception vector% invalid", exc.vector));
877		if (vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_EXCEPTION,
878			exc.vector, exc.error_code,
879			exc.error_code_valid)) {
880			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Exception%d injection"
881				" failed.\n", exc.vector);
882			return;
883		}
884	}
885	/* NMI event has priority over interrupts.*/
886	if (svm_inject_nmi(svm_sc, vcpu)) {
887		return;
888	}
889
890        /* Ask the local apic for a vector to inject */
891        if (!vlapic_pending_intr(vlapic, &vector))
892                return;
893
894	if (vector < 32 || vector > 255) {
895		VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:Event injection"
896			"invalid vector=%d.\n", vector);
897		ERR("SVM_ERR:Event injection invalid vector=%d.\n", vector);
898		return;
899	}
900
901	if ((state->rflags & PSL_I) == 0) {
902		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Interrupt is disabled\n");
903		return;
904	}
905
906	if (vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false)) {
907		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Event injection failed to"
908			" vector=%d.\n", vector);
909		return;
910	}
911
912	/* Acknowledge that event is accepted.*/
913	vlapic_intr_accepted(vlapic, vector);
914	VCPU_CTR1(svm_sc->vm, vcpu, "SVM:event injected,vector=%d.\n", vector);
915}
916
917/*
918 * Restore host Task Register selector type after every vcpu exit.
919 */
920static void
921setup_tss_type(void)
922{
923	struct system_segment_descriptor *desc;
924
925	desc = (struct system_segment_descriptor *)&gdt[curcpu * NGDT +
926		GPROC0_SEL];
927	/*
928	 * Task selector that should be restored in host is
929	 * 64-bit available(9), not what is read(0xb), see
930	 * APMvol2 Rev3.21 4.8.3 System Descriptors table.
931	 */
932	desc->sd_type = 9;
933}
934
935static void
936svm_handle_exitintinfo(struct svm_softc *svm_sc, int vcpu)
937{
938	struct vmcb_ctrl *ctrl;
939	uint64_t intinfo;
940
941	ctrl  	= svm_get_vmcb_ctrl(svm_sc, vcpu);
942
943	/*
944	 * VMEXIT while delivering an exception or interrupt.
945	 * Inject it as virtual interrupt.
946	 * Section 15.7.2 Intercepts during IDT interrupt delivery.
947	 */
948	intinfo = ctrl->exitintinfo;
949
950	if (intinfo & VMCB_EXITINTINFO_VALID) {
951		vmm_stat_incr(svm_sc->vm, vcpu, VCPU_EXITINTINFO, 1);
952		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:EXITINTINFO:0x%lx is valid\n",
953			intinfo);
954		if (vmcb_eventinject(ctrl, VMCB_EXITINTINFO_TYPE(intinfo),
955			VMCB_EXITINTINFO_VECTOR(intinfo),
956			VMCB_EXITINTINFO_EC(intinfo),
957			VMCB_EXITINTINFO_EC_VALID & intinfo)) {
958			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:couldn't inject pending"
959				" interrupt, exitintinfo:0x%lx\n", intinfo);
960		}
961	}
962}
963/*
964 * Start vcpu with specified RIP.
965 */
966static int
967svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
968	void *rend_cookie, void *suspended_cookie)
969{
970	struct svm_regctx *hctx, *gctx;
971	struct svm_softc *svm_sc;
972	struct svm_vcpu *vcpustate;
973	struct vmcb_state *state;
974	struct vmcb_ctrl *ctrl;
975	struct vm_exit *vmexit;
976	struct vlapic *vlapic;
977	struct vm *vm;
978	uint64_t vmcb_pa;
979	static uint64_t host_cr2;
980	bool loop;	/* Continue vcpu execution loop. */
981
982	loop = true;
983	svm_sc = arg;
984	vm = svm_sc->vm;
985
986	vcpustate = svm_get_vcpu(svm_sc, vcpu);
987	state = svm_get_vmcb_state(svm_sc, vcpu);
988	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
989	vmexit = vm_exitinfo(vm, vcpu);
990	vlapic = vm_lapic(vm, vcpu);
991
992	gctx = svm_get_guest_regctx(svm_sc, vcpu);
993	hctx = &host_ctx[curcpu];
994	vmcb_pa = svm_sc->vcpu[vcpu].vmcb_pa;
995
996	if (vcpustate->lastcpu != curcpu) {
997		/* Virtual CPU is running on a diiferent CPU now.*/
998		vmm_stat_incr(vm, vcpu, VCPU_MIGRATIONS, 1);
999
1000		/*
1001		 * Flush all TLB mapping for this guest on this CPU,
1002		 * it might have stale entries.
1003		 */
1004		ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST;
1005
1006		/* Can't use any cached VMCB state by cpu.*/
1007		ctrl->vmcb_clean = VMCB_CACHE_NONE;
1008	} else {
1009		/*
1010		 * XXX: Using same ASID for all vcpus of a VM will cause TLB
1011		 * corruption. This can easily be produced by muxing two vcpus
1012		 * on same core.
1013		 * For now, flush guest TLB for every vmrun.
1014		 */
1015		ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST;
1016
1017		/*
1018		 * This is the same cpu on which vcpu last ran so don't
1019		 * need to reload all VMCB state.
1020		 * ASID is unique for a guest.
1021		 * IOPM is unchanged.
1022		 * RVI/EPT is unchanged.
1023		 *
1024		 */
1025		ctrl->vmcb_clean = VMCB_CACHE_ASID |
1026				VMCB_CACHE_IOPM |
1027				VMCB_CACHE_NP;
1028	}
1029
1030	vcpustate->lastcpu = curcpu;
1031	VCPU_CTR3(vm, vcpu, "SVM:Enter vmrun RIP:0x%lx"
1032		" inst len=%d/%d\n",
1033		rip, vmexit->inst_length,
1034		vmexit->u.inst_emul.vie.num_valid);
1035	/* Update Guest RIP */
1036	state->rip = rip;
1037
1038	do {
1039		vmexit->inst_length = 0;
1040		 /* We are asked to give the cpu by scheduler. */
1041		if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) {
1042			vmexit->exitcode = VM_EXITCODE_BOGUS;
1043			vmm_stat_incr(vm, vcpu, VMEXIT_ASTPENDING, 1);
1044			VCPU_CTR1(vm, vcpu,
1045				"SVM: ASTPENDING, RIP:0x%lx\n", state->rip);
1046			vmexit->rip = state->rip;
1047			break;
1048		}
1049
1050		if (vcpu_suspended(suspended_cookie)) {
1051			vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1052			vmexit->rip = state->rip;
1053			break;
1054		}
1055
1056		if (vcpu_rendezvous_pending(rend_cookie)) {
1057			vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
1058			vmm_stat_incr(vm, vcpu, VMEXIT_RENDEZVOUS, 1);
1059			VCPU_CTR1(vm, vcpu,
1060				"SVM: VCPU rendezvous, RIP:0x%lx\n",
1061				state->rip);
1062			vmexit->rip = state->rip;
1063			break;
1064		}
1065
1066		(void)svm_set_vmcb(svm_get_vmcb(svm_sc, vcpu), svm_sc->asid);
1067
1068		svm_handle_exitintinfo(svm_sc, vcpu);
1069
1070		(void)svm_inj_interrupts(svm_sc, vcpu, vlapic);
1071
1072		/* Change TSS type to available.*/
1073		setup_tss_type();
1074
1075		/*
1076		 * Disable global interrupt to guarantee atomicity
1077		 * during loading of guest state.
1078		 * See 15.5.1 "Loading guest state" APM2.
1079		 */
1080		disable_gintr();
1081
1082		save_cr2(&host_cr2);
1083		load_cr2(&state->cr2);
1084
1085
1086		/* Launch Virtual Machine. */
1087		svm_launch(vmcb_pa, gctx, hctx);
1088
1089		save_cr2(&state->cr2);
1090		load_cr2(&host_cr2);
1091
1092		/*
1093		 * Only GDTR and IDTR of host is saved and restore by SVM,
1094		 * LDTR and TR need to be restored by VMM.
1095		 * XXX: kernel doesn't use LDT, only user space.
1096		 */
1097		ltr(GSEL(GPROC0_SEL, SEL_KPL));
1098
1099		/*
1100		 * Guest FS and GS selector are stashed by vmload and vmsave.
1101		 * Host FS and GS selector are stashed by svm_launch().
1102		 * Host GS base that holds per-cpu need to be restored before
1103		 * enabling global interrupt.
1104		 * FS is not used by FreeBSD kernel and kernel does restore
1105		 * back FS selector and base of user before returning to
1106		 * userland.
1107		 *
1108		 * Note: You can't use 'curcpu' which uses pcpu.
1109		 */
1110		wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[vcpustate->lastcpu]);
1111		wrmsr(MSR_KGSBASE, (uint64_t)&__pcpu[vcpustate->lastcpu]);
1112
1113		/* vcpu exit with glbal interrupt disabled. */
1114		enable_gintr();
1115
1116		/* Handle #VMEXIT and if required return to user space. */
1117		loop = svm_vmexit(svm_sc, vcpu, vmexit);
1118		vcpustate->loop++;
1119		vmm_stat_incr(vm, vcpu, VMEXIT_COUNT, 1);
1120
1121	} while (loop);
1122
1123	return (0);
1124}
1125
1126/*
1127 * Cleanup for virtual machine.
1128 */
1129static void
1130svm_vmcleanup(void *arg)
1131{
1132	struct svm_softc *svm_sc;
1133
1134	svm_sc = arg;
1135
1136	VCPU_CTR0(svm_sc->vm, 0, "SVM:cleanup\n");
1137
1138	free(svm_sc, M_SVM);
1139}
1140
1141/*
1142 * Return pointer to hypervisor saved register state.
1143 */
1144static register_t *
1145swctx_regptr(struct svm_regctx *regctx, int reg)
1146{
1147
1148	switch (reg) {
1149		case VM_REG_GUEST_RBX:
1150			return (&regctx->sctx_rbx);
1151		case VM_REG_GUEST_RCX:
1152			return (&regctx->sctx_rcx);
1153		case VM_REG_GUEST_RDX:
1154			return (&regctx->e.g.sctx_rdx);
1155		case VM_REG_GUEST_RDI:
1156			return (&regctx->e.g.sctx_rdi);
1157		case VM_REG_GUEST_RSI:
1158			return (&regctx->e.g.sctx_rsi);
1159		case VM_REG_GUEST_RBP:
1160			return (&regctx->sctx_rbp);
1161		case VM_REG_GUEST_R8:
1162			return (&regctx->sctx_r8);
1163		case VM_REG_GUEST_R9:
1164			return (&regctx->sctx_r9);
1165		case VM_REG_GUEST_R10:
1166			return (&regctx->sctx_r10);
1167		case VM_REG_GUEST_R11:
1168			return (&regctx->sctx_r11);
1169		case VM_REG_GUEST_R12:
1170			return (&regctx->sctx_r12);
1171		case VM_REG_GUEST_R13:
1172			return (&regctx->sctx_r13);
1173		case VM_REG_GUEST_R14:
1174			return (&regctx->sctx_r14);
1175		case VM_REG_GUEST_R15:
1176			return (&regctx->sctx_r15);
1177		default:
1178			ERR("Unknown register requested, reg=%d.\n", reg);
1179			break;
1180	}
1181
1182	return (NULL);
1183}
1184
1185/*
1186 * Interface to read guest registers.
1187 * This can be SVM h/w saved or hypervisor saved register.
1188 */
1189static int
1190svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
1191{
1192	struct svm_softc *svm_sc;
1193	struct vmcb *vmcb;
1194	register_t *reg;
1195
1196	svm_sc = arg;
1197	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1198
1199	vmcb = svm_get_vmcb(svm_sc, vcpu);
1200
1201	if (vmcb_read(vmcb, ident, val) == 0) {
1202		return (0);
1203	}
1204
1205	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
1206
1207	if (reg != NULL) {
1208		*val = *reg;
1209		return (0);
1210	}
1211
1212 	ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident);
1213	return (EINVAL);
1214}
1215
1216/*
1217 * Interface to write to guest registers.
1218 * This can be SVM h/w saved or hypervisor saved register.
1219 */
1220static int
1221svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
1222{
1223	struct svm_softc *svm_sc;
1224	struct vmcb *vmcb;
1225	register_t *reg;
1226
1227	svm_sc = arg;
1228	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1229
1230	vmcb = svm_get_vmcb(svm_sc, vcpu);
1231	if (vmcb_write(vmcb, ident, val) == 0) {
1232		return (0);
1233	}
1234
1235	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
1236
1237	if (reg != NULL) {
1238		*reg = val;
1239		return (0);
1240	}
1241
1242 	ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident);
1243	return (EINVAL);
1244}
1245
1246
1247/*
1248 * Inteface to set various descriptors.
1249 */
1250static int
1251svm_setdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
1252{
1253	struct svm_softc *svm_sc;
1254	struct vmcb *vmcb;
1255	struct vmcb_segment *seg;
1256	uint16_t attrib;
1257
1258	svm_sc = arg;
1259	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1260
1261	vmcb = svm_get_vmcb(svm_sc, vcpu);
1262
1263	VCPU_CTR1(svm_sc->vm, vcpu, "SVM:set_desc: Type%d\n", type);
1264
1265	seg = vmcb_seg(vmcb, type);
1266	if (seg == NULL) {
1267		ERR("SVM_ERR:Unsupported segment type%d\n", type);
1268		return (EINVAL);
1269	}
1270
1271	/* Map seg_desc access to VMCB attribute format.*/
1272	attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF);
1273	VCPU_CTR3(svm_sc->vm, vcpu, "SVM:[sel %d attribute 0x%x limit:0x%x]\n",
1274		type, desc->access, desc->limit);
1275	seg->attrib = attrib;
1276	seg->base = desc->base;
1277	seg->limit = desc->limit;
1278
1279	return (0);
1280}
1281
1282/*
1283 * Interface to get guest descriptor.
1284 */
1285static int
1286svm_getdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
1287{
1288	struct svm_softc *svm_sc;
1289	struct vmcb_segment	*seg;
1290
1291	svm_sc = arg;
1292	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1293
1294	VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_desc: Type%d\n", type);
1295
1296	seg = vmcb_seg(svm_get_vmcb(svm_sc, vcpu), type);
1297	if (!seg) {
1298		ERR("SVM_ERR:Unsupported segment type%d\n", type);
1299		return (EINVAL);
1300	}
1301
1302	/* Map seg_desc access to VMCB attribute format.*/
1303	desc->access = ((seg->attrib & 0xF00) << 4) | (seg->attrib & 0xFF);
1304	desc->base = seg->base;
1305	desc->limit = seg->limit;
1306
1307	return (0);
1308}
1309
1310static int
1311svm_setcap(void *arg, int vcpu, int type, int val)
1312{
1313	struct svm_softc *svm_sc;
1314	struct vmcb_ctrl *ctrl;
1315	int ret = ENOENT;
1316
1317	svm_sc = arg;
1318	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1319
1320	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1321
1322	switch (type) {
1323		case VM_CAP_HALT_EXIT:
1324			if (val)
1325				ctrl->ctrl1 |= VMCB_INTCPT_HLT;
1326			else
1327				ctrl->ctrl1 &= ~VMCB_INTCPT_HLT;
1328			ret = 0;
1329			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Halt exit %s.\n",
1330				val ? "enabled": "disabled");
1331			break;
1332
1333		case VM_CAP_PAUSE_EXIT:
1334			if (val)
1335				ctrl->ctrl1 |= VMCB_INTCPT_PAUSE;
1336			else
1337				ctrl->ctrl1 &= ~VMCB_INTCPT_PAUSE;
1338			ret = 0;
1339			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Pause exit %s.\n",
1340				val ? "enabled": "disabled");
1341			break;
1342
1343		case VM_CAP_MTRAP_EXIT:
1344			if (val)
1345				ctrl->exception |= BIT(IDT_MC);
1346			else
1347				ctrl->exception &= ~BIT(IDT_MC);
1348			ret = 0;
1349			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:MC exit %s.\n",
1350				val ? "enabled": "disabled");
1351			break;
1352
1353		case VM_CAP_UNRESTRICTED_GUEST:
1354			/* SVM doesn't need special capability for SMP.*/
1355			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Set_gap:Unrestricted "
1356			"always enabled.\n");
1357			ret = 0;
1358			break;
1359
1360		default:
1361			break;
1362		}
1363
1364	return (ret);
1365}
1366
1367static int
1368svm_getcap(void *arg, int vcpu, int type, int *retval)
1369{
1370	struct svm_softc *svm_sc;
1371	struct vmcb_ctrl *ctrl;
1372
1373	svm_sc = arg;
1374	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1375
1376	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1377
1378	switch (type) {
1379		case VM_CAP_HALT_EXIT:
1380		*retval = (ctrl->ctrl1 & VMCB_INTCPT_HLT) ? 1 : 0;
1381		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Halt exit %s.\n",
1382			*retval ? "enabled": "disabled");
1383		break;
1384
1385		case VM_CAP_PAUSE_EXIT:
1386		*retval = (ctrl->ctrl1 & VMCB_INTCPT_PAUSE) ? 1 : 0;
1387		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Pause exit %s.\n",
1388			*retval ? "enabled": "disabled");
1389		break;
1390
1391		case VM_CAP_MTRAP_EXIT:
1392		*retval = (ctrl->exception & BIT(IDT_MC)) ? 1 : 0;
1393		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:MC exit %s.\n",
1394			*retval ? "enabled": "disabled");
1395		break;
1396
1397	case VM_CAP_UNRESTRICTED_GUEST:
1398		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:get_cap:Unrestricted.\n");
1399		*retval = 1;
1400		break;
1401		default:
1402		break;
1403	}
1404
1405	return (0);
1406}
1407
1408static struct vlapic *
1409svm_vlapic_init(void *arg, int vcpuid)
1410{
1411	struct svm_softc *svm_sc;
1412	struct vlapic *vlapic;
1413
1414	svm_sc = arg;
1415	vlapic = malloc(sizeof(struct vlapic), M_SVM_VLAPIC, M_WAITOK | M_ZERO);
1416	vlapic->vm = svm_sc->vm;
1417	vlapic->vcpuid = vcpuid;
1418	vlapic->apic_page = (struct LAPIC *)&svm_sc->apic_page[vcpuid];
1419
1420	vlapic_init(vlapic);
1421
1422	return (vlapic);
1423}
1424
1425static void
1426svm_vlapic_cleanup(void *arg, struct vlapic *vlapic)
1427{
1428
1429        vlapic_cleanup(vlapic);
1430        free(vlapic, M_SVM_VLAPIC);
1431}
1432
1433struct vmm_ops vmm_ops_amd = {
1434	svm_init,
1435	svm_cleanup,
1436	svm_restore,
1437	svm_vminit,
1438	svm_vmrun,
1439	svm_vmcleanup,
1440	svm_getreg,
1441	svm_setreg,
1442	svm_getdesc,
1443	svm_setdesc,
1444	svm_getcap,
1445	svm_setcap,
1446	svm_npt_alloc,
1447	svm_npt_free,
1448	svm_vlapic_init,
1449	svm_vlapic_cleanup
1450};
1451