svm.c revision 256588
1/*-
2 * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: projects/bhyve_svm/sys/amd64/vmm/amd/svm.c 256588 2013-10-16 05:43:03Z grehan $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/smp.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/pcpu.h>
36#include <sys/proc.h>
37
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <machine/cpufunc.h>
42#include <machine/psl.h>
43#include <machine/pmap.h>
44#include <machine/md_var.h>
45#include <machine/vmparam.h>
46#include <machine/specialreg.h>
47#include <machine/segments.h>
48#include <machine/vmm.h>
49
50#include <x86/apicreg.h>
51
52#include "vmm_lapic.h"
53#include "vmm_msr.h"
54#include "vmm_stat.h"
55#include "vmm_ktr.h"
56
57#include "x86.h"
58#include "vmcb.h"
59#include "svm.h"
60#include "svm_softc.h"
61#include "npt.h"
62
63/*
64 * SVM CPUID function 0x8000_000Ai, edx bit decoding.
65 */
66#define AMD_CPUID_SVM_NP		BIT(0)  /* Nested paging or RVI */
67#define AMD_CPUID_SVM_LBR		BIT(1)  /* Last branch virtualization */
68#define AMD_CPUID_SVM_SVML		BIT(2)  /* SVM lock */
69#define AMD_CPUID_SVM_NRIP_SAVE		BIT(3)  /* Next RIP is saved */
70#define AMD_CPUID_SVM_TSC_RATE		BIT(4)  /* TSC rate control. */
71#define AMD_CPUID_SVM_VMCB_CLEAN	BIT(5)  /* VMCB state caching */
72#define AMD_CPUID_SVM_ASID_FLUSH	BIT(6)  /* Flush by ASID */
73#define AMD_CPUID_SVM_DECODE_ASSIST	BIT(7)  /* Decode assist */
74#define AMD_CPUID_SVM_PAUSE_INC		BIT(10) /* Pause intercept filter. */
75#define AMD_CPUID_SVM_PAUSE_FTH		BIT(12) /* Pause filter threshold */
76
77MALLOC_DEFINE(M_SVM, "svm", "svm");
78
79/* Per-CPU context area. */
80extern struct pcpu __pcpu[];
81
82static int svm_vmexit(struct svm_softc *svm_sc, int vcpu,
83			struct vm_exit *vmexit);
84static int svm_msr_rw_ok(uint8_t *btmap, uint64_t msr);
85static int svm_msr_index(uint64_t msr, int *index, int *bit);
86
87static uint32_t svm_feature; /* AMD SVM features. */
88
89/*
90 * Starting guest ASID, 0 is reserved for host.
91 * Each guest will have its own unique ASID.
92 */
93static uint32_t guest_asid = 1;
94
95/*
96 * Max ASID processor can support.
97 * This limit the maximum number of virtual machines that can be created.
98 */
99static int max_asid;
100
101/*
102 * Statistics
103 */
104static VMM_STAT_AMD(VMEXIT_NPF_LAPIC, "vm exits due to Local APIC access");
105
106/*
107 * SVM host state saved area of size 4KB for each core.
108 */
109static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
110
111/*
112 * S/w saved host context.
113 */
114static struct svm_regctx host_ctx[MAXCPU];
115
116/*
117 * Common function to enable or disabled SVM for a CPU.
118 */
119static int
120cpu_svm_enable_disable(boolean_t enable)
121{
122	uint64_t efer_msr;
123
124	efer_msr = rdmsr(MSR_EFER);
125
126	if (enable) {
127		efer_msr |= EFER_SVM;
128	} else {
129		efer_msr &= ~EFER_SVM;
130	}
131
132	wrmsr(MSR_EFER, efer_msr);
133
134	if(rdmsr(MSR_EFER) != efer_msr) {
135		ERR("SVM couldn't be enabled on CPU%d.\n", curcpu);
136		return (EIO);
137	}
138
139	return(0);
140}
141
142/*
143 * Disable SVM on a CPU.
144 */
145static void
146svm_disable(void *arg __unused)
147{
148
149	(void)cpu_svm_enable_disable(FALSE);
150}
151
152/*
153 * Disable SVM for all CPUs.
154 */
155static int
156svm_cleanup(void)
157{
158
159	smp_rendezvous(NULL, svm_disable, NULL, NULL);
160	return (0);
161}
162
163/*
164 * Check for required BHyVe SVM features in a CPU.
165 */
166static int
167svm_cpuid_features(void)
168{
169	u_int regs[4];
170
171	/* CPUID Fn8000_000A is for SVM */
172	do_cpuid(0x8000000A, regs);
173	svm_feature = regs[3];
174
175	printf("SVM rev: 0x%x NASID:0x%x\n", regs[0] & 0xFF, regs[1]);
176	max_asid = regs[1];
177
178	printf("SVM Features:0x%b\n", svm_feature,
179		"\020"
180		"\001NP"		/* Nested paging */
181		"\002LbrVirt"		/* LBR virtualization */
182		"\003SVML"		/* SVM lock */
183		"\004NRIPS"		/* NRIP save */
184		"\005TscRateMsr"	/* MSR based TSC rate control */
185		"\006VmcbClean"		/* VMCB clean bits */
186		"\007FlushByAsid"	/* Flush by ASID */
187		"\010DecodeAssist"	/* Decode assist */
188		"\011<b20>"
189		"\012<b20>"
190		"\013PauseFilter"
191		"\014<b20>"
192		"\013PauseFilterThreshold"
193		);
194
195	/* SVM Lock */
196	if (!(svm_feature & AMD_CPUID_SVM_SVML)) {
197		printf("SVM is disabled by BIOS, please enable in BIOS.\n");
198		return (ENXIO);
199	}
200
201	/*
202	 * XXX: BHyVe need EPT or RVI to work.
203	 */
204	if (!(svm_feature & AMD_CPUID_SVM_NP)) {
205		printf("Missing Nested paging or RVI SVM support in processor.\n");
206		return (EIO);
207	}
208
209	if (svm_feature & (AMD_CPUID_SVM_NRIP_SAVE |
210			AMD_CPUID_SVM_DECODE_ASSIST)) {
211		return (0);
212	}
213	/* XXX: Should never be here? */
214	printf("Processor doesn't support nRIP or decode assist, can't"
215		"run BhyVe.\n");
216	return (EIO);
217}
218
219/*
220 * Enable SVM for a CPU.
221 */
222static void
223svm_enable(void *arg __unused)
224{
225	uint64_t hsave_pa;
226
227	(void)cpu_svm_enable_disable(TRUE);
228
229	hsave_pa = vtophys(hsave[curcpu]);
230	wrmsr(MSR_VM_HSAVE_PA, hsave_pa);
231
232	if (rdmsr(MSR_VM_HSAVE_PA) != hsave_pa) {
233		panic("VM_HSAVE_PA is wrong on CPU%d\n", curcpu);
234	}
235}
236
237/*
238 * Check if a processor support SVM.
239 */
240static int
241is_svm_enabled(void)
242{
243	uint64_t msr;
244
245	 /* Section 15.4 Enabling SVM from APM2. */
246	if ((amd_feature2 & AMDID2_SVM) == 0) {
247		printf("SVM is not supported on this processor.\n");
248		return (ENXIO);
249	}
250
251	msr = rdmsr(MSR_VM_CR);
252	/* Make sure SVM is not disabled by BIOS. */
253	if ((msr & VM_CR_SVMDIS) == 0) {
254		return svm_cpuid_features();
255	}
256
257	printf("SVM disabled by Key, consult TPM/BIOS manual.\n");
258	return (ENXIO);
259}
260
261/*
262 * Enable SVM on CPU and initialize nested page table h/w.
263 */
264static int
265svm_init(void)
266{
267	int err;
268
269	err = is_svm_enabled();
270	if (err) {
271		return (err);
272	}
273
274	svm_npt_init();
275
276	/* Start SVM on all CPUs */
277	smp_rendezvous(NULL, svm_enable, NULL, NULL);
278
279	return(0);
280}
281
282/*
283 * Get index and bit position for a MSR in MSR permission
284 * bitmap. Two bits are used for each MSR, lower bit is
285 * for read and higher bit is for write.
286 */
287static int
288svm_msr_index(uint64_t msr, int *index, int *bit)
289{
290	uint32_t base, off;
291
292/* Pentium compatible MSRs */
293#define MSR_PENTIUM_START 	0
294#define MSR_PENTIUM_END 	0x1FFF
295/* AMD 6th generation and Intel compatible MSRs */
296#define MSR_AMD6TH_START 	0xC0000000UL
297#define MSR_AMD6TH_END 		0xC0001FFFUL
298/* AMD 7th and 8th generation compatible MSRs */
299#define MSR_AMD7TH_START 	0xC0010000UL
300#define MSR_AMD7TH_END 		0xC0011FFFUL
301
302	*index = -1;
303	*bit = (msr % 4) * 2;
304	base = 0;
305
306	if (msr >= MSR_PENTIUM_START && msr <= MSR_PENTIUM_END) {
307		*index = msr / 4;
308		return (0);
309	}
310
311	base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1);
312	if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
313		off = (msr - MSR_AMD6TH_START);
314		*index = (off + base) / 4;
315		return (0);
316	}
317
318	base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
319	if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
320		off = (msr - MSR_AMD7TH_START);
321		*index = (off + base) / 4;
322		return (0);
323	}
324
325	return (EIO);
326}
327
328/*
329 * Give virtual cpu the complete access to MSR(read & write).
330 */
331static int
332svm_msr_rw_ok(uint8_t *perm_bitmap, uint64_t msr)
333{
334	int index, bit, err;
335
336	err = svm_msr_index(msr, &index, &bit);
337	if (err) {
338		ERR("MSR 0x%lx is not writeable by guest.\n", msr);
339		return (err);
340	}
341
342	if (index < 0 || index > (SVM_MSR_BITMAP_SIZE)) {
343		ERR("MSR 0x%lx index out of range(%d).\n", msr, index);
344		return (EINVAL);
345	}
346	if (bit < 0 || bit > 8) {
347		ERR("MSR 0x%lx bit out of range(%d).\n", msr, bit);
348		return (EINVAL);
349	}
350
351	/* Disable intercept for read and write. */
352	perm_bitmap[index] &= ~(3 << bit);
353	CTR1(KTR_VMM, "Guest has full control on SVM:MSR(0x%lx).\n", msr);
354
355	return (0);
356}
357
358/*
359 * Initialise VCPU.
360 */
361static int
362svm_init_vcpu(struct svm_vcpu *vcpu, vm_paddr_t iopm_pa, vm_paddr_t msrpm_pa,
363		vm_paddr_t pml4_pa, uint8_t asid)
364{
365
366	vcpu->lastcpu = NOCPU;
367	vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
368
369	/*
370	 * Initiaise VMCB persistent area of vcpu.
371	 * 1. Permission bitmap for MSR and IO space.
372	 * 2. Nested paging.
373	 * 3. ASID of virtual machine.
374	 */
375	if (svm_init_vmcb(&vcpu->vmcb, iopm_pa, msrpm_pa, pml4_pa)) {
376			return (EIO);
377	}
378
379	return (0);
380}
381
382/*
383 * Initialise a virtual machine.
384 */
385static void *
386svm_vminit(struct vm *vm)
387{
388	struct svm_softc *svm_sc;
389	vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;
390	int i;
391
392	if (guest_asid >= max_asid) {
393		ERR("Host support max ASID:%d, can't create more guests.\n",
394			max_asid);
395		return (NULL);
396	}
397
398	svm_sc = (struct svm_softc *)malloc(sizeof (struct svm_softc),
399			M_SVM, M_WAITOK | M_ZERO);
400
401	svm_sc->vm = vm;
402	svm_sc->svm_feature = svm_feature;
403	svm_sc->vcpu_cnt = VM_MAXCPU;
404
405	/*
406	 * Each guest has its own unique ASID.
407	 * ASID(Addres Space Identifier) are used by TLB entries.
408	 */
409	svm_sc->asid = guest_asid++;
410
411	/*
412	 * Intercept MSR access to all MSRs except GSBASE, FSBASE,... etc.
413	 */
414	 memset(svm_sc->msr_bitmap, 0xFF, sizeof(svm_sc->msr_bitmap));
415
416	/*
417	 * Following MSR can be completely controlled by virtual machines
418	 * since access to following are translated to access to VMCB.
419	 */
420	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_GSBASE);
421	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_FSBASE);
422	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_KGSBASE);
423
424	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_STAR);
425	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_LSTAR);
426	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_CSTAR);
427	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SF_MASK);
428	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_CS_MSR);
429	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_ESP_MSR);
430	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_EIP_MSR);
431
432	/* For Nested Paging/RVI only. */
433	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_PAT);
434
435	 /* Intercept access to all I/O ports. */
436	memset(svm_sc->iopm_bitmap, 0xFF, sizeof(svm_sc->iopm_bitmap));
437
438	/* Cache physical address for multiple vcpus. */
439	iopm_pa = vtophys(svm_sc->iopm_bitmap);
440	msrpm_pa = vtophys(svm_sc->msr_bitmap);
441	pml4_pa = vtophys(svm_sc->np_pml4);
442
443	for (i = 0; i < svm_sc->vcpu_cnt; i++) {
444		if (svm_init_vcpu(svm_get_vcpu(svm_sc, i), iopm_pa, msrpm_pa,
445				pml4_pa, svm_sc->asid)) {
446			ERR("SVM couldn't initialise VCPU%d\n", i);
447			goto cleanup;
448		}
449	}
450
451	return (svm_sc);
452
453cleanup:
454	free(svm_sc, M_SVM);
455	return (NULL);
456}
457
458/*
459 * Handle guest I/O intercept.
460 */
461static int
462svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
463{
464	struct vmcb_ctrl *ctrl;
465	struct vmcb_state *state;
466	uint64_t info1;
467
468	state = svm_get_vmcb_state(svm_sc, vcpu);
469	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
470	info1 = ctrl->exitinfo1;
471
472	vmexit->exitcode 	= VM_EXITCODE_INOUT;
473	vmexit->u.inout.in 	= (info1 & BIT(0)) ? 1 : 0;
474	vmexit->u.inout.string 	= (info1 & BIT(2)) ? 1 : 0;
475	vmexit->u. inout.rep 	= (info1 & BIT(3)) ? 1 : 0;
476	vmexit->u.inout.bytes 	= (info1 >> 4) & 0x7;
477	vmexit->u.inout.port 	= (uint16_t)(info1 >> 16);
478	vmexit->u.inout.eax 	= (uint32_t)(state->rax);
479
480	return (1);
481}
482
483/*
484 * SVM Nested Page(RVI) Fault handler.
485 * Nested page fault handler used by local APIC emulation.
486 */
487static int
488svm_handle_npf(struct vm *vm, int vcpu, uint64_t gpa, uint64_t rip,
489		uint64_t exitinfo1, uint64_t cr3, struct vie *vie)
490{
491	int err;
492
493	if (exitinfo1 & VMCB_NPF_INFO1_ID) {
494 		VMM_CTR0(vm, vcpu, "SVM:NPF for code access.");
495		return (0);
496	}
497
498	if (exitinfo1 & VMCB_NPF_INFO1_RSV) {
499 		VMM_CTR0(vm, vcpu, "SVM:NPF reserved bits are set.");
500		return (0);
501	}
502
503	if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
504 		VMM_CTR0(vm, vcpu, "SVM:NPF during guest page table walk.");
505		return (0);
506	}
507
508	/*
509	 * nRIP is NULL for NPF so we don't have the length of instruction,
510	 * we rely on instruction decode s/w to determine the size of
511	 * instruction.
512	 *
513	 * XXX: DecodeAssist can use instruction from buffer.
514	 */
515	if (vmm_fetch_instruction(vm, vcpu, rip, VIE_INST_SIZE,
516				cr3, vie) != 0) {
517 		ERR("SVM:NPF instruction fetch failed, RIP:0x%lx\n", rip);
518		return (EINVAL);
519	}
520
521	KASSERT(vie->num_valid, ("No instruction to emulate."));
522	/*
523	 * SVM doesn't provide GLA unlike Intel VM-x. VIE_INVALID_GLA
524	 * which is a non-cannonical address indicate that GLA is not
525	 * available to instruction emulation.
526	 *
527	 * XXX: Which SVM capability can provided GLA?
528	 */
529	if(vmm_decode_instruction(vm, vcpu, VIE_INVALID_GLA, vie)) {
530		ERR("SVM: Couldn't decode instruction.\n");
531		return (0);
532	}
533
534	/*
535	 * XXX: Decoding for user space(IOAPIC) should be done in
536	 * user space.
537	 */
538	if (gpa < DEFAULT_APIC_BASE || gpa >= (DEFAULT_APIC_BASE + PAGE_SIZE)) {
539		VMM_CTR2(vm, vcpu, "SVM:NPF GPA(0x%lx) outside of local APIC"
540			" range(0x%x)\n", gpa, DEFAULT_APIC_BASE);
541		return (0);
542	}
543
544	err = vmm_emulate_instruction(vm, vcpu, gpa, vie, lapic_mmio_read,
545		lapic_mmio_write, 0);
546
547	return (err ? 0 : 1);
548}
549
550/*
551 * Special handling of EFER MSR.
552 * SVM guest must have SVM EFER bit set, prohibit guest from cleareing SVM
553 * enable bit in EFER.
554 */
555static void
556svm_efer(struct svm_softc *svm_sc, int vcpu, boolean_t write)
557{
558	struct svm_regctx *swctx;
559	struct vmcb_state *state;
560
561	state = svm_get_vmcb_state(svm_sc, vcpu);
562	swctx = svm_get_guest_regctx(svm_sc, vcpu);
563
564	if (write) {
565		state->efer = ((swctx->e.g.sctx_rdx & (uint32_t)~0) << 32) |
566				((uint32_t)state->rax) | EFER_SVM;
567	} else {
568		state->rax = (uint32_t)state->efer;
569		swctx->e.g.sctx_rdx = (uint32_t)(state->efer >> 32);
570	}
571}
572
573/*
574 * Determine the cause of virtual cpu exit and return to user space if exit
575 * demand so.
576 * Return: 1 - Return to user space.
577 *	   0 - Continue vcpu run.
578 */
579static int
580svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
581{
582	struct vmcb_state *state;
583	struct vmcb_ctrl *ctrl;
584	struct svm_regctx *ctx;
585	uint64_t code, info1, info2, val;
586	uint32_t eax, ecx, edx;
587	int user;		/* Flag for user mode */
588	int update_rip;		/* Flag for updating RIP */
589	int inst_len;
590
591	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
592
593	state = svm_get_vmcb_state(svm_sc, vcpu);
594	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
595	ctx   = svm_get_guest_regctx(svm_sc, vcpu);
596	update_rip = 1;
597	user = 0;
598
599	vmexit->exitcode = VM_EXITCODE_VMX;
600	vmexit->u.vmx.error = 0;
601	code = ctrl->exitcode;
602	info1 = ctrl->exitinfo1;
603	info2 = ctrl->exitinfo2;
604
605	if (ctrl->nrip) {
606		inst_len = ctrl->nrip - state->rip;
607	} else {
608		inst_len = ctrl->inst_decode_size;
609	}
610
611	switch (code) {
612		case	VMCB_EXIT_MC: /* Machine Check. */
613			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
614			vmexit->exitcode = VM_EXITCODE_MTRAP;
615			user = 1;
616			break;
617
618		case	VMCB_EXIT_MSR:	/* MSR access. */
619			eax = state->rax;
620			ecx = ctx->sctx_rcx;
621			edx = ctx->e.g.sctx_rdx;
622
623			if (ecx == MSR_EFER) {
624				VMM_CTR0(svm_sc->vm, vcpu,"VMEXIT EFER\n");
625				svm_efer(svm_sc, vcpu, info1);
626				break;
627			}
628
629			if (info1) {
630				/* VM exited because of write MSR */
631				vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
632				vmexit->exitcode = VM_EXITCODE_WRMSR;
633				vmexit->u.msr.code = ecx;
634				val = (uint64_t)edx << 32 | eax;
635				if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val)) {
636					vmexit->u.msr.wval = val;
637					user = 1;
638				}
639				VMM_CTR3(svm_sc->vm, vcpu,
640					"VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
641					user ? "user" : "kernel", val, ecx);
642			} else {
643				vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
644				vmexit->exitcode = VM_EXITCODE_RDMSR;
645				vmexit->u.msr.code = ecx;
646				if (emulate_rdmsr(svm_sc->vm, vcpu, ecx)) {
647					user = 1;
648				}
649				VMM_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
650					" 0x%lx,%lx @0x%x", ctx->e.g.sctx_rdx,
651					state->rax, ecx);
652			}
653
654#define MSR_AMDK8_IPM           0xc0010055
655			/*
656			 * We can't hide AMD C1E idle capability since its
657			 * based on CPU generation, for now ignore access to
658			 * this MSR by vcpus
659			 * XXX: special handling of AMD C1E - Ignore.
660			 */
661			 if (ecx == MSR_AMDK8_IPM)
662				user = 0;
663			break;
664
665		case 	VMCB_EXIT_INTR:
666			/*
667			 * Exit on External Interrupt.
668			 * Give host interrupt handler to run and if its guest
669			 * interrupt, local APIC will inject event in guest.
670			 */
671				user = 0;
672			update_rip = 0;
673			VMM_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt"
674				" RIP:0x%lx.\n", state->rip);
675			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
676			break;
677
678		case VMCB_EXIT_IO:
679			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
680			user = svm_handle_io(svm_sc, vcpu, vmexit);
681			VMM_CTR1(svm_sc->vm, vcpu, "SVM:I/O VMEXIT RIP:0x%lx\n",
682				state->rip);
683			break;
684
685		case VMCB_EXIT_CPUID:
686			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
687			(void)x86_emulate_cpuid(svm_sc->vm, vcpu,
688					(uint32_t *)&state->rax,
689					(uint32_t *)&ctx->sctx_rbx,
690					(uint32_t *)&ctx->sctx_rcx,
691					(uint32_t *)&ctx->e.g.sctx_rdx);
692			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT CPUID\n");
693			user = 0;
694			break;
695
696		case VMCB_EXIT_HLT:
697			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
698 			if (ctrl->v_irq) {
699				 /* Interrupt is pending, can't halt guest. */
700				user = 0;
701				vmm_stat_incr(svm_sc->vm, vcpu,
702					VMEXIT_HLT_IGNORED, 1);
703				VMM_CTR0(svm_sc->vm, vcpu,
704					"VMEXIT halt ignored.");
705			} else {
706				VMM_CTR0(svm_sc->vm, vcpu,
707					"VMEXIT halted CPU.");
708				vmexit->exitcode = VM_EXITCODE_HLT;
709				user = 1;
710
711			}
712			break;
713
714		case VMCB_EXIT_PAUSE:
715			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT pause");
716			vmexit->exitcode = VM_EXITCODE_PAUSE;
717			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
718
719			break;
720
721		case VMCB_EXIT_NPF:
722			 /* EXITINFO2 has the physical fault address (GPA). */
723			if (!svm_handle_npf(svm_sc->vm, vcpu, info2,
724					state->rip, info1, state->cr3,
725					&vmexit->u.paging.vie)) {
726				/* I/O APIC for MSI/X. */
727				vmexit->exitcode = VM_EXITCODE_PAGING;
728				user = 1;
729				vmexit->u.paging.gpa = info2;
730			} else {
731				/* Local APIC NPF */
732				update_rip = 1;
733				vmm_stat_incr(svm_sc->vm, vcpu,
734						VMEXIT_NPF_LAPIC, 1);
735			}
736
737			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EPT_FAULT, 1);
738			inst_len = vmexit->u.paging.vie.num_processed;
739 			VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT NPF, GPA:0x%lx "
740				"user=%d instr len=%d.\n", info2, user,
741				inst_len);
742			break;
743
744		case VMCB_EXIT_SHUTDOWN:
745			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT guest shutdown.");
746			user = 1;
747			vmexit->exitcode = VM_EXITCODE_VMX;
748			break;
749
750		case VMCB_EXIT_INVALID:
751			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT INVALID.");
752			user = 1;
753			vmexit->exitcode = VM_EXITCODE_VMX;
754			break;
755
756		default:
757			 /* Return to user space. */
758			user = 1;
759			update_rip = 0;
760			VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx"
761				" EXITINFO1: 0x%lx EXITINFO2:0x%lx\n",
762		 		ctrl->exitcode, info1, info2);
763			VMM_CTR3(svm_sc->vm, vcpu, "SVM:RIP: 0x%lx nRIP:0x%lx"
764				" Inst decoder len:%d\n", state->rip,
765				ctrl->nrip, ctrl->inst_decode_size);
766			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
767			break;
768	}
769
770	if (ctrl->v_irq) {
771		VMM_CTR2(svm_sc->vm, vcpu, "SVM:SVM intr pending vector:0x%x"
772		" priority:0x%x", ctrl->v_intr_vector, ctrl->v_intr_prio);
773	}
774
775	vmexit->rip = state->rip;
776	if (update_rip) {
777		vmexit->rip += inst_len;
778	}
779
780	/* Return to userland for APs to start. */
781	if (vmexit->exitcode == VM_EXITCODE_SPINUP_AP) {
782 		VMM_CTR1(svm_sc->vm, vcpu, "SVM:Starting APs, RIP0x%lx.\n",
783			vmexit->rip);
784		user = 1;
785	}
786
787	 /* XXX: Set next RIP before restarting virtual cpus. */
788	if (ctrl->nrip == 0) {
789		ctrl->nrip = state->rip;
790	}
791
792	return (user);
793}
794
795/*
796 * Inject NMI to virtual cpu.
797 */
798static int
799svm_inject_nmi(struct svm_softc *svm_sc, int vcpu)
800{
801	struct vmcb_ctrl *ctrl;
802
803	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
804
805	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
806	 /* Can't inject another NMI if last one is pending.*/
807	if (!vm_nmi_pending(svm_sc->vm, vcpu))
808		return (0);
809
810	 /* Inject NMI, vector number is not used.*/
811	if (vmcb_eventinject(ctrl, VM_NMI, IDT_NMI, 0, FALSE)) {
812		VMM_CTR0(svm_sc->vm, vcpu, "SVM:NMI injection failed.\n");
813		return (EIO);
814	}
815
816	/* Acknowledge the request is accepted.*/
817	vm_nmi_clear(svm_sc->vm, vcpu);
818
819	VMM_CTR0(svm_sc->vm, vcpu, "SVM:Injected NMI.\n");
820
821	return (1);
822}
823
824/*
825 * Inject event to virtual cpu.
826 */
827static void
828svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu)
829{
830	struct vmcb_ctrl *ctrl;
831	struct vmcb_state *state;
832	int vector;
833
834	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
835
836	state = svm_get_vmcb_state(svm_sc, vcpu);
837	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
838
839	/* Can't inject multiple events at once. */
840	if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
841		VMM_CTR1(svm_sc->vm, vcpu,
842			"SVM:Last event(0x%lx) is pending.\n", ctrl->eventinj);
843		return ;
844	}
845
846	/* Wait for guest to come out of interrupt shadow. */
847	if (ctrl->intr_shadow) {
848		VMM_CTR0(svm_sc->vm, vcpu, "SVM:Guest in interrupt shadow.\n");
849		goto inject_failed;
850	}
851
852	/* Make sure no interrupt is pending.*/
853	if (ctrl->v_irq) {
854		VMM_CTR0(svm_sc->vm, vcpu,
855			"SVM:virtual interrupt is pending.\n");
856		goto inject_failed;
857	}
858
859	/* NMI event has priority over interrupts.*/
860	if (svm_inject_nmi(svm_sc, vcpu)) {
861		return;
862	}
863
864	vector = lapic_pending_intr(svm_sc->vm, vcpu);
865	if (vector < 0) {
866		return;
867	}
868
869	if (vector < 32 || vector > 255) {
870		ERR("Invalid vector number:%d\n", vector);
871		return;
872	}
873
874	if ((state->rflags & PSL_I) == 0) {
875		VMM_CTR0(svm_sc->vm, vcpu, "SVM:Interrupt is disabled\n");
876		goto inject_failed;
877	}
878
879	if(vmcb_eventinject(ctrl, VM_HW_INTR, vector, 0, FALSE)) {
880		VMM_CTR2(svm_sc->vm, vcpu, "SVM:Event injection failed to"
881			" VCPU%d,vector=%d.\n", vcpu, vector);
882		return;
883	}
884
885	/* Acknowledge that event is accepted.*/
886	lapic_intr_accepted(svm_sc->vm, vcpu, vector);
887	VMM_CTR1(svm_sc->vm, vcpu, "SVM:event injected,vector=%d.\n", vector);
888
889inject_failed:
890	return;
891}
892
893/*
894 * Restore host Task Register selector type after every vcpu exit.
895 */
896static void
897setup_tss_type(void)
898{
899	struct system_segment_descriptor *desc;
900
901	desc = (struct system_segment_descriptor *)&gdt[curcpu * NGDT +
902		GPROC0_SEL];
903	/*
904	 * Task selector that should be restored in host is
905	 * 64-bit available(9), not what is read(0xb), see
906	 * APMvol2 Rev3.21 4.8.3 System Descriptors table.
907	 */
908	desc->sd_type = 9;
909}
910
911/*
912 * Start vcpu with specified RIP.
913 */
914static int
915svm_vmrun(void *arg, int vcpu, register_t rip)
916{
917	struct svm_regctx *hctx, *gctx;
918	struct svm_softc *svm_sc;
919	struct svm_vcpu *vcpustate;
920	struct vmcb_state *state;
921	struct vmcb_ctrl *ctrl;
922	struct vm_exit *vmexit;
923	int user;
924	uint64_t vmcb_pa;
925	static uint64_t host_cr2;
926
927	user = 0;
928	svm_sc = arg;
929
930	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
931
932	vcpustate = svm_get_vcpu(svm_sc, vcpu);
933	state	= svm_get_vmcb_state(svm_sc, vcpu);
934	ctrl  	= svm_get_vmcb_ctrl(svm_sc, vcpu);
935	vmexit  = vm_exitinfo(svm_sc->vm , vcpu);
936	if (vmexit->exitcode == VM_EXITCODE_VMX) {
937		ERR("vcpu%d shouldn't run again.\n", vcpu);
938		return(EIO);
939	}
940
941	gctx = svm_get_guest_regctx(svm_sc, vcpu);
942	hctx = &host_ctx[curcpu];
943	vmcb_pa = svm_sc->vcpu[vcpu].vmcb_pa;
944
945	if (vcpustate->lastcpu != curcpu) {
946		/* Virtual CPU is running on a diiferent CPU now.*/
947		vmm_stat_incr(svm_sc->vm, vcpu, VCPU_MIGRATIONS, 1);
948
949		/*
950		 * Flush all TLB mapping for this guest on this CPU,
951		 * it might have stale entries.
952		 */
953		ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST;
954
955		/* Can't use any cached VMCB state by cpu.*/
956		ctrl->vmcb_clean = VMCB_CACHE_NONE;
957	} else {
958		/* Don't flush TLB since guest ASID is unchanged. */
959		ctrl->tlb_ctrl = VMCB_TLB_FLUSH_NOTHING;
960
961		/*
962		 * This is the same cpu on which vcpu last ran so don't
963		 * need to reload all VMCB state.
964		 * ASID is unique for a guest.
965		 * IOPM is unchanged.
966		 * RVI/EPT is unchanged.
967		 *
968		 */
969		ctrl->vmcb_clean = VMCB_CACHE_ASID |
970				VMCB_CACHE_IOPM |
971				VMCB_CACHE_NP;
972
973	}
974
975	vcpustate->lastcpu = curcpu;
976
977	/* Update Guest RIP */
978	state->rip = rip;
979
980	VMM_CTR1(svm_sc->vm, vcpu, "SVM:entered with RIP:0x%lx\n",
981		state->rip);
982	do {
983		 /* We are asked to give the cpu by scheduler. */
984		if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) {
985			vmexit->exitcode = VM_EXITCODE_BOGUS;
986			vmexit->inst_length = 0;
987			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_ASTPENDING, 1);
988			VMM_CTR1(svm_sc->vm, vcpu, "SVM:gave up cpu, RIP:0x%lx\n",
989				state->rip);
990			break;
991		}
992
993		lapic_timer_tick(svm_sc->vm, vcpu);
994
995		(void)svm_set_vmcb(svm_get_vmcb(svm_sc, vcpu), svm_sc->asid);
996
997		(void)svm_inj_interrupts(svm_sc, vcpu);
998
999		/* Change TSS type to available.*/
1000		setup_tss_type();
1001
1002		/*
1003		 * Disable global interrupt to guarantee atomicity
1004		 * during loading of guest state.
1005		 * See 15.5.1 "Loading guest state" APM2.
1006		 */
1007		disable_gintr();
1008
1009		save_cr2(&host_cr2);
1010		load_cr2(&state->cr2);
1011
1012		/* Launch Virtual Machine. */
1013		svm_launch(vmcb_pa, gctx, hctx);
1014
1015		save_cr2(&state->cr2);
1016		load_cr2(&host_cr2);
1017
1018		/*
1019		 * Only GDTR and IDTR of host is saved and restore by SVM,
1020		 * LDTR and TR need to be restored by VMM.
1021		 * XXX: kernel doesn't use LDT, only user space.
1022		 */
1023		ltr(GSEL(GPROC0_SEL, SEL_KPL));
1024
1025		/*
1026		 * Guest FS and GS selector are stashed by vmload and vmsave.
1027		 * Host FS and GS selector are stashed by svm_launch().
1028		 * Host GS base that holds per-cpu need to be restored before
1029		 * enabling global interrupt.
1030		 * FS is not used by FreeBSD kernel and kernel does restore
1031		 * back FS selector and base of user before returning to
1032		 * userland.
1033		 *
1034		 * Note: You can't use 'curcpu' which uses pcpu.
1035		 */
1036		wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[vcpustate->lastcpu]);
1037		wrmsr(MSR_KGSBASE, (uint64_t)&__pcpu[vcpustate->lastcpu]);
1038
1039		/* vcpu exit with glbal interrupt disabled. */
1040		enable_gintr();
1041
1042		/* Handle #VMEXIT and if required return to user space. */
1043		user = svm_vmexit(svm_sc, vcpu, vmexit);
1044		vcpustate->loop++;
1045		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1);
1046
1047		/* Update RIP since we are continuing vcpu execution.*/
1048		state->rip = vmexit->rip;
1049
1050		VMM_CTR1(svm_sc->vm, vcpu, "SVM:loop RIP:0x%lx\n", state->rip);
1051	} while (!user);
1052	VMM_CTR1(svm_sc->vm, vcpu, "SVM:exited with RIP:0x%lx\n",
1053		state->rip);
1054
1055	return (0);
1056}
1057
1058/*
1059 * Cleanup for virtual machine.
1060 */
1061static void
1062svm_vmcleanup(void *arg)
1063{
1064	struct svm_softc *svm_sc;
1065
1066	svm_sc = arg;
1067
1068	VMM_CTR0(svm_sc->vm, 0, "SVM:cleanup\n");
1069
1070	svm_npt_cleanup(svm_sc);
1071	free(svm_sc, M_SVM);
1072}
1073
1074/*
1075 * Return pointer to hypervisor saved register state.
1076 */
1077static register_t *
1078swctx_regptr(struct svm_regctx *regctx, int reg)
1079{
1080
1081	switch (reg) {
1082		case VM_REG_GUEST_RBX:
1083			return (&regctx->sctx_rbx);
1084		case VM_REG_GUEST_RCX:
1085			return (&regctx->sctx_rcx);
1086		case VM_REG_GUEST_RDX:
1087			return (&regctx->e.g.sctx_rdx);
1088		case VM_REG_GUEST_RDI:
1089			return (&regctx->e.g.sctx_rdi);
1090		case VM_REG_GUEST_RSI:
1091			return (&regctx->e.g.sctx_rsi);
1092		case VM_REG_GUEST_RBP:
1093			return (&regctx->sctx_rbp);
1094		case VM_REG_GUEST_R8:
1095			return (&regctx->sctx_r8);
1096		case VM_REG_GUEST_R9:
1097			return (&regctx->sctx_r9);
1098		case VM_REG_GUEST_R10:
1099			return (&regctx->sctx_r10);
1100		case VM_REG_GUEST_R11:
1101			return (&regctx->sctx_r11);
1102		case VM_REG_GUEST_R12:
1103			return (&regctx->sctx_r12);
1104		case VM_REG_GUEST_R13:
1105			return (&regctx->sctx_r13);
1106		case VM_REG_GUEST_R14:
1107			return (&regctx->sctx_r14);
1108		case VM_REG_GUEST_R15:
1109			return (&regctx->sctx_r15);
1110		default:
1111			ERR("Unknown register requested.\n");
1112			break;
1113	}
1114
1115	return (NULL);
1116}
1117
1118/*
1119 * Interface to read guest registers.
1120 * This can be SVM h/w saved or hypervisor saved register.
1121 */
1122static int
1123svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
1124{
1125	struct svm_softc *svm_sc;
1126	struct vmcb *vmcb;
1127	register_t *reg;
1128
1129	svm_sc = arg;
1130	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1131
1132	vmcb = svm_get_vmcb(svm_sc, vcpu);
1133
1134	if (vmcb_read(vmcb, ident, val) == 0) {
1135		return (0);
1136	}
1137
1138	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
1139	if (reg != NULL) {
1140		*val = *reg;
1141		return (0);
1142	}
1143
1144	ERR("reg type %x is not saved n VMCB\n", ident);
1145	return (EINVAL);
1146}
1147
1148/*
1149 * Interface to write to guest registers.
1150 * This can be SVM h/w saved or hypervisor saved register.
1151 */
1152static int
1153svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
1154{
1155	struct svm_softc *svm_sc;
1156	struct vmcb *vmcb;
1157	register_t *reg;
1158
1159	svm_sc = arg;
1160	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1161
1162	vmcb = svm_get_vmcb(svm_sc, vcpu);
1163	if (vmcb_write(vmcb, ident, val) == 0) {
1164		return (0);
1165	}
1166
1167	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
1168
1169	if (reg != NULL) {
1170		*reg = val;
1171		return (0);
1172	}
1173
1174	ERR("reg type %x is not saved n VMCB\n", ident);
1175	return (EINVAL);
1176}
1177
1178
1179/*
1180 * Inteface to set various descriptors.
1181 */
1182static int
1183svm_setdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
1184{
1185	struct svm_softc *svm_sc;
1186	struct vmcb *vmcb;
1187	struct vmcb_segment *seg;
1188	uint16_t attrib;
1189
1190	svm_sc = arg;
1191	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1192
1193	vmcb = svm_get_vmcb(svm_sc, vcpu);
1194
1195	VMM_CTR1(svm_sc->vm, vcpu, "SVM:set_desc: Type%d\n", type);
1196
1197	seg = vmcb_seg(vmcb, type);
1198	if (seg == NULL) {
1199		ERR("Unsupported seg type %d\n", type);
1200		return (EINVAL);
1201	}
1202
1203	/* Map seg_desc access to VMCB attribute format.*/
1204	attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF);
1205	VMM_CTR3(svm_sc->vm, vcpu, "SVM:[sel %d attribute 0x%x limit:0x%x]\n",
1206		type, desc->access, desc->limit);
1207	seg->attrib = attrib;
1208	seg->base = desc->base;
1209	seg->limit = desc->limit;
1210
1211	return (0);
1212}
1213
1214/*
1215 * Interface to get guest descriptor.
1216 */
1217static int
1218svm_getdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
1219{
1220	struct svm_softc *svm_sc;
1221	struct vmcb_segment	*seg;
1222
1223	svm_sc = arg;
1224	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1225
1226	VMM_CTR1(svm_sc->vm, vcpu, "SVM:get_desc: Type%d\n", type);
1227
1228	seg = vmcb_seg(svm_get_vmcb(svm_sc, vcpu), type);
1229	if (!seg) {
1230		ERR("Unsupported seg type %d\n", type);
1231		return (EINVAL);
1232	}
1233
1234	/* Map seg_desc access to VMCB attribute format.*/
1235	desc->access = ((seg->attrib & 0xF00) << 4) | (seg->attrib & 0xFF);
1236	desc->base = seg->base;
1237	desc->limit = seg->limit;
1238
1239	return (0);
1240}
1241
1242static int
1243svm_inject_event(void *arg, int vcpu, int type, int vector,
1244		  uint32_t error, int ec_valid)
1245{
1246	struct svm_softc *svm_sc;
1247	struct vmcb_ctrl *ctrl;
1248
1249	svm_sc = arg;
1250	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1251
1252	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1253	VMM_CTR3(svm_sc->vm, vcpu, "Injecting event type:0x%x vector:0x%x"
1254		"error:0x%x\n", type, vector, error);
1255
1256	return (vmcb_eventinject(ctrl, type, vector, error,
1257		ec_valid ? TRUE : FALSE));
1258}
1259
1260static int
1261svm_setcap(void *arg, int vcpu, int type, int val)
1262{
1263	struct svm_softc *svm_sc;
1264	struct vmcb_ctrl *ctrl;
1265	int ret = ENOENT;
1266
1267	svm_sc = arg;
1268	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1269
1270	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1271
1272	switch (type) {
1273		case VM_CAP_HALT_EXIT:
1274			if (val)
1275				ctrl->ctrl1 |= VMCB_INTCPT_HLT;
1276			else
1277				ctrl->ctrl1 &= ~VMCB_INTCPT_HLT;
1278			ret = 0;
1279			VMM_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Halt exit %s.\n",
1280				val ? "enabled": "disabled");
1281			break;
1282
1283		case VM_CAP_PAUSE_EXIT:
1284			if (val)
1285				ctrl->ctrl1 |= VMCB_INTCPT_PAUSE;
1286			else
1287				ctrl->ctrl1 &= ~VMCB_INTCPT_PAUSE;
1288			ret = 0;
1289			VMM_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Pause exit %s.\n",
1290				val ? "enabled": "disabled");
1291			break;
1292
1293		case VM_CAP_MTRAP_EXIT:
1294			if (val)
1295				ctrl->exception |= BIT(IDT_MC);
1296			else
1297				ctrl->exception &= ~BIT(IDT_MC);
1298			ret = 0;
1299			VMM_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:MC exit %s.\n",
1300				val ? "enabled": "disabled");
1301			break;
1302
1303		case VM_CAP_UNRESTRICTED_GUEST:
1304			/* SVM doesn't need special capability for SMP.*/
1305			VMM_CTR0(svm_sc->vm, vcpu, "SVM:Set_gap:Unrestricted "
1306			"always enabled.\n");
1307			ret = 0;
1308			break;
1309
1310		default:
1311			break;
1312		}
1313
1314	return (ret);
1315}
1316
1317static int
1318svm_getcap(void *arg, int vcpu, int type, int *retval)
1319{
1320	struct svm_softc *svm_sc;
1321	struct vmcb_ctrl *ctrl;
1322
1323	svm_sc = arg;
1324	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1325
1326	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1327
1328	switch (type) {
1329		case VM_CAP_HALT_EXIT:
1330		*retval = (ctrl->ctrl1 & VMCB_INTCPT_HLT) ? 1 : 0;
1331		VMM_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Halt exit %s.\n",
1332			*retval ? "enabled": "disabled");
1333		break;
1334
1335		case VM_CAP_PAUSE_EXIT:
1336		*retval = (ctrl->ctrl1 & VMCB_INTCPT_PAUSE) ? 1 : 0;
1337		VMM_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Pause exit %s.\n",
1338			*retval ? "enabled": "disabled");
1339		break;
1340
1341		case VM_CAP_MTRAP_EXIT:
1342		*retval = (ctrl->exception & BIT(IDT_MC)) ? 1 : 0;
1343		VMM_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:MC exit %s.\n",
1344			*retval ? "enabled": "disabled");
1345		break;
1346
1347	case VM_CAP_UNRESTRICTED_GUEST:
1348		VMM_CTR0(svm_sc->vm, vcpu, "SVM:get_cap:Unrestricted.\n");
1349		*retval = 1;
1350		break;
1351		default:
1352		break;
1353	}
1354
1355	return (0);
1356}
1357
1358struct vmm_ops vmm_ops_amd = {
1359	svm_init,
1360	svm_cleanup,
1361	svm_vminit,
1362	svm_vmrun,
1363	svm_vmcleanup,
1364	svm_npt_vmmap_set,
1365	svm_npt_vmmap_get,
1366	svm_getreg,
1367	svm_setreg,
1368	svm_getdesc,
1369	svm_setdesc,
1370	svm_inject_event,
1371	svm_getcap,
1372	svm_setcap
1373};
1374