svm.c revision 267144
1/*-
2 * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: projects/bhyve_svm/sys/amd64/vmm/amd/svm.c 267144 2014-06-06 02:55:18Z grehan $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/smp.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/pcpu.h>
36#include <sys/proc.h>
37
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <machine/cpufunc.h>
42#include <machine/psl.h>
43#include <machine/pmap.h>
44#include <machine/md_var.h>
45#include <machine/vmparam.h>
46#include <machine/specialreg.h>
47#include <machine/segments.h>
48#include <machine/vmm.h>
49#include <machine/vmm_dev.h>
50#include <machine/vmm_instruction_emul.h>
51
52#include <x86/apicreg.h>
53
54#include "vmm_lapic.h"
55#include "vmm_msr.h"
56#include "vmm_stat.h"
57#include "vmm_ktr.h"
58#include "vmm_ioport.h"
59#include "vlapic.h"
60#include "vlapic_priv.h"
61
62#include "x86.h"
63#include "vmcb.h"
64#include "svm.h"
65#include "svm_softc.h"
66#include "npt.h"
67
68/*
69 * SVM CPUID function 0x8000_000A, edx bit decoding.
70 */
71#define AMD_CPUID_SVM_NP		BIT(0)  /* Nested paging or RVI */
72#define AMD_CPUID_SVM_LBR		BIT(1)  /* Last branch virtualization */
73#define AMD_CPUID_SVM_SVML		BIT(2)  /* SVM lock */
74#define AMD_CPUID_SVM_NRIP_SAVE		BIT(3)  /* Next RIP is saved */
75#define AMD_CPUID_SVM_TSC_RATE		BIT(4)  /* TSC rate control. */
76#define AMD_CPUID_SVM_VMCB_CLEAN	BIT(5)  /* VMCB state caching */
77#define AMD_CPUID_SVM_ASID_FLUSH	BIT(6)  /* Flush by ASID */
78#define AMD_CPUID_SVM_DECODE_ASSIST	BIT(7)  /* Decode assist */
79#define AMD_CPUID_SVM_PAUSE_INC		BIT(10) /* Pause intercept filter. */
80#define AMD_CPUID_SVM_PAUSE_FTH		BIT(12) /* Pause filter threshold */
81
82MALLOC_DEFINE(M_SVM, "svm", "svm");
83MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic");
84
85/* Per-CPU context area. */
86extern struct pcpu __pcpu[];
87
88static bool svm_vmexit(struct svm_softc *svm_sc, int vcpu,
89			struct vm_exit *vmexit);
90static int svm_msr_rw_ok(uint8_t *btmap, uint64_t msr);
91static int svm_msr_rd_ok(uint8_t *btmap, uint64_t msr);
92static int svm_msr_index(uint64_t msr, int *index, int *bit);
93static int svm_getdesc(void *arg, int vcpu, int type, struct seg_desc *desc);
94
95static uint32_t svm_feature; /* AMD SVM features. */
96
97/*
98 * Starting guest ASID, 0 is reserved for host.
99 * Each guest will have its own unique ASID.
100 */
101static uint32_t guest_asid = 1;
102
103/*
104 * Max ASID processor can support.
105 * This limit the maximum number of virtual machines that can be created.
106 */
107static int max_asid;
108
109/*
110 * SVM host state saved area of size 4KB for each core.
111 */
112static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
113
114/*
115 * S/w saved host context.
116 */
117static struct svm_regctx host_ctx[MAXCPU];
118
119static VMM_STAT_AMD(VCPU_EXITINTINFO, "Valid EXITINTINFO");
120
121/*
122 * Common function to enable or disabled SVM for a CPU.
123 */
124static int
125cpu_svm_enable_disable(boolean_t enable)
126{
127	uint64_t efer_msr;
128
129	efer_msr = rdmsr(MSR_EFER);
130
131	if (enable)
132		efer_msr |= EFER_SVM;
133	else
134		efer_msr &= ~EFER_SVM;
135
136	wrmsr(MSR_EFER, efer_msr);
137
138	return(0);
139}
140
141/*
142 * Disable SVM on a CPU.
143 */
144static void
145svm_disable(void *arg __unused)
146{
147
148	(void)cpu_svm_enable_disable(FALSE);
149}
150
151/*
152 * Disable SVM for all CPUs.
153 */
154static int
155svm_cleanup(void)
156{
157
158	smp_rendezvous(NULL, svm_disable, NULL, NULL);
159	return (0);
160}
161
162/*
163 * Check for required BHyVe SVM features in a CPU.
164 */
165static int
166svm_cpuid_features(void)
167{
168	u_int regs[4];
169
170	/* CPUID Fn8000_000A is for SVM */
171	do_cpuid(0x8000000A, regs);
172	svm_feature = regs[3];
173
174	printf("SVM rev: 0x%x NASID:0x%x\n", regs[0] & 0xFF, regs[1]);
175	max_asid = regs[1];
176
177	printf("SVM Features:0x%b\n", svm_feature,
178		"\020"
179		"\001NP"		/* Nested paging */
180		"\002LbrVirt"		/* LBR virtualization */
181		"\003SVML"		/* SVM lock */
182		"\004NRIPS"		/* NRIP save */
183		"\005TscRateMsr"	/* MSR based TSC rate control */
184		"\006VmcbClean"		/* VMCB clean bits */
185		"\007FlushByAsid"	/* Flush by ASID */
186		"\010DecodeAssist"	/* Decode assist */
187		"\011<b20>"
188		"\012<b20>"
189		"\013PauseFilter"
190		"\014<b20>"
191		"\015PauseFilterThreshold"
192		"\016AVIC"
193		);
194
195	/* SVM Lock */
196	if (!(svm_feature & AMD_CPUID_SVM_SVML)) {
197		printf("SVM is disabled by BIOS, please enable in BIOS.\n");
198		return (ENXIO);
199	}
200
201	/*
202	 * bhyve need RVI to work.
203	 */
204	if (!(svm_feature & AMD_CPUID_SVM_NP)) {
205		printf("Missing Nested paging or RVI SVM support in processor.\n");
206		return (EIO);
207	}
208
209	if (svm_feature & AMD_CPUID_SVM_NRIP_SAVE)
210		return (0);
211
212	return (EIO);
213}
214
215/*
216 * Enable SVM for a CPU.
217 */
218static void
219svm_enable(void *arg __unused)
220{
221	uint64_t hsave_pa;
222
223	(void)cpu_svm_enable_disable(TRUE);
224
225	hsave_pa = vtophys(hsave[curcpu]);
226	wrmsr(MSR_VM_HSAVE_PA, hsave_pa);
227
228	if (rdmsr(MSR_VM_HSAVE_PA) != hsave_pa) {
229		panic("VM_HSAVE_PA is wrong on CPU%d\n", curcpu);
230	}
231}
232
233/*
234 * Check if a processor support SVM.
235 */
236static int
237is_svm_enabled(void)
238{
239	uint64_t msr;
240
241	 /* Section 15.4 Enabling SVM from APM2. */
242	if ((amd_feature2 & AMDID2_SVM) == 0) {
243		printf("SVM is not supported on this processor.\n");
244		return (ENXIO);
245	}
246
247	msr = rdmsr(MSR_VM_CR);
248	/* Make sure SVM is not disabled by BIOS. */
249	if ((msr & VM_CR_SVMDIS) == 0) {
250		return svm_cpuid_features();
251	}
252
253	printf("SVM disabled by Key, consult TPM/BIOS manual.\n");
254	return (ENXIO);
255}
256
257/*
258 * Enable SVM on CPU and initialize nested page table h/w.
259 */
260static int
261svm_init(int ipinum)
262{
263	int err;
264
265	err = is_svm_enabled();
266	if (err)
267		return (err);
268
269
270	svm_npt_init(ipinum);
271
272	/* Start SVM on all CPUs */
273	smp_rendezvous(NULL, svm_enable, NULL, NULL);
274
275	return (0);
276}
277
278static void
279svm_restore(void)
280{
281	svm_enable(NULL);
282}
283/*
284 * Get index and bit position for a MSR in MSR permission
285 * bitmap. Two bits are used for each MSR, lower bit is
286 * for read and higher bit is for write.
287 */
288static int
289svm_msr_index(uint64_t msr, int *index, int *bit)
290{
291	uint32_t base, off;
292
293/* Pentium compatible MSRs */
294#define MSR_PENTIUM_START 	0
295#define MSR_PENTIUM_END 	0x1FFF
296/* AMD 6th generation and Intel compatible MSRs */
297#define MSR_AMD6TH_START 	0xC0000000UL
298#define MSR_AMD6TH_END 		0xC0001FFFUL
299/* AMD 7th and 8th generation compatible MSRs */
300#define MSR_AMD7TH_START 	0xC0010000UL
301#define MSR_AMD7TH_END 		0xC0011FFFUL
302
303	*index = -1;
304	*bit = (msr % 4) * 2;
305	base = 0;
306
307	if (msr >= MSR_PENTIUM_START && msr <= MSR_PENTIUM_END) {
308		*index = msr / 4;
309		return (0);
310	}
311
312	base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1);
313	if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
314		off = (msr - MSR_AMD6TH_START);
315		*index = (off + base) / 4;
316		return (0);
317	}
318
319	base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
320	if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
321		off = (msr - MSR_AMD7TH_START);
322		*index = (off + base) / 4;
323		return (0);
324	}
325
326	return (EIO);
327}
328
329/*
330 * Give virtual cpu the complete access to MSR(read & write).
331 */
332static int
333svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write)
334{
335	int index, bit, err;
336
337	err = svm_msr_index(msr, &index, &bit);
338	if (err) {
339		ERR("MSR 0x%lx is not writeable by guest.\n", msr);
340		return (err);
341	}
342
343	if (index < 0 || index > (SVM_MSR_BITMAP_SIZE)) {
344		ERR("MSR 0x%lx index out of range(%d).\n", msr, index);
345		return (EINVAL);
346	}
347	if (bit < 0 || bit > 8) {
348		ERR("MSR 0x%lx bit out of range(%d).\n", msr, bit);
349		return (EINVAL);
350	}
351
352	/* Disable intercept for read and write. */
353	if (read)
354		perm_bitmap[index] &= ~(1UL << bit);
355	if (write)
356		perm_bitmap[index] &= ~(2UL << bit);
357	CTR2(KTR_VMM, "Guest has control:0x%x on SVM:MSR(0x%lx).\n",
358		(perm_bitmap[index] >> bit) & 0x3, msr);
359
360	return (0);
361}
362
363static int
364svm_msr_rw_ok(uint8_t *perm_bitmap, uint64_t msr)
365{
366	return svm_msr_perm(perm_bitmap, msr, true, true);
367}
368
369static int
370svm_msr_rd_ok(uint8_t *perm_bitmap, uint64_t msr)
371{
372	return svm_msr_perm(perm_bitmap, msr, true, false);
373}
374/*
375 * Initialise VCPU.
376 */
377static int
378svm_init_vcpu(struct svm_vcpu *vcpu, vm_paddr_t iopm_pa, vm_paddr_t msrpm_pa,
379		vm_paddr_t pml4_pa, uint8_t asid)
380{
381
382	vcpu->lastcpu = NOCPU;
383	vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
384
385	/*
386	 * Initiaise VMCB persistent area of vcpu.
387	 * 1. Permission bitmap for MSR and IO space.
388	 * 2. Nested paging.
389	 * 3. ASID of virtual machine.
390	 */
391	if (svm_init_vmcb(&vcpu->vmcb, iopm_pa, msrpm_pa, pml4_pa)) {
392			return (EIO);
393	}
394
395	return (0);
396}
397/*
398 * Initialise a virtual machine.
399 */
400static void *
401svm_vminit(struct vm *vm, pmap_t pmap)
402{
403	struct svm_softc *svm_sc;
404	vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;
405	int i;
406
407	if (guest_asid >= max_asid) {
408		ERR("Host support max ASID:%d, can't create more guests.\n",
409			max_asid);
410		return (NULL);
411	}
412
413	svm_sc = (struct svm_softc *)malloc(sizeof (struct svm_softc),
414			M_SVM, M_WAITOK | M_ZERO);
415
416	svm_sc->vm = vm;
417	svm_sc->svm_feature = svm_feature;
418	svm_sc->vcpu_cnt = VM_MAXCPU;
419	svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
420	/*
421	 * Each guest has its own unique ASID.
422	 * ASID(Address Space Identifier) is used by TLB entry.
423	 */
424	svm_sc->asid = guest_asid++;
425
426	/*
427	 * Intercept MSR access to all MSRs except GSBASE, FSBASE,... etc.
428	 */
429	 memset(svm_sc->msr_bitmap, 0xFF, sizeof(svm_sc->msr_bitmap));
430
431	/*
432	 * Following MSR can be completely controlled by virtual machines
433	 * since access to following are translated to access to VMCB.
434	 */
435	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_GSBASE);
436	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_FSBASE);
437	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_KGSBASE);
438
439	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_STAR);
440	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_LSTAR);
441	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_CSTAR);
442	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SF_MASK);
443	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_CS_MSR);
444	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_ESP_MSR);
445	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_EIP_MSR);
446
447	/* For Nested Paging/RVI only. */
448	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_PAT);
449
450	 /* Intercept access to all I/O ports. */
451	memset(svm_sc->iopm_bitmap, 0xFF, sizeof(svm_sc->iopm_bitmap));
452
453	/* Cache physical address for multiple vcpus. */
454	iopm_pa = vtophys(svm_sc->iopm_bitmap);
455	msrpm_pa = vtophys(svm_sc->msr_bitmap);
456	pml4_pa = svm_sc->nptp;
457
458	for (i = 0; i < svm_sc->vcpu_cnt; i++) {
459		if (svm_init_vcpu(svm_get_vcpu(svm_sc, i), iopm_pa, msrpm_pa,
460				pml4_pa, svm_sc->asid)) {
461			ERR("SVM couldn't initialise VCPU%d\n", i);
462			goto cleanup;
463		}
464	}
465
466	return (svm_sc);
467
468cleanup:
469	free(svm_sc, M_SVM);
470	return (NULL);
471}
472
473static int
474svm_cpl(struct vmcb_state *state)
475{
476
477	/*
478	 * From APMv2:
479	 *   "Retrieve the CPL from the CPL field in the VMCB, not
480	 *    from any segment DPL"
481	 */
482	return (state->cpl);
483}
484
485static enum vm_cpu_mode
486svm_vcpu_mode(uint64_t efer)
487{
488
489	if (efer & EFER_LMA)
490		return (CPU_MODE_64BIT);
491	else
492		return (CPU_MODE_COMPATIBILITY);
493}
494
495static enum vm_paging_mode
496svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer)
497{
498
499	if ((cr0 & CR0_PG) == 0)
500		return (PAGING_MODE_FLAT);
501	if ((cr4 & CR4_PAE) == 0)
502		return (PAGING_MODE_32);
503	if (efer & EFER_LME)
504		return (PAGING_MODE_64);
505	else
506		return (PAGING_MODE_PAE);
507}
508
509/*
510 * ins/outs utility routines
511 */
512static uint64_t
513svm_inout_str_index(struct svm_regctx *regs, int in)
514{
515	uint64_t val;
516
517	val = in ? regs->e.g.sctx_rdi : regs->e.g.sctx_rsi;
518
519	return (val);
520}
521
522static uint64_t
523svm_inout_str_count(struct svm_regctx *regs, int rep)
524{
525	uint64_t val;
526
527	val = rep ? regs->sctx_rcx : 1;
528
529	return (val);
530}
531
532static void
533svm_inout_str_seginfo(struct svm_softc *svm_sc, int vcpu, int64_t info1,
534    int in, struct vm_inout_str *vis)
535{
536	int error, s;
537
538	if (in) {
539		vis->seg_name = VM_REG_GUEST_ES;
540	} else {
541		/* The segment field has standard encoding */
542		s = (info1 >> 10) & 0x7;
543		vis->seg_name = vm_segment_name(s);
544	}
545
546	error = svm_getdesc(svm_sc, vcpu, vis->seg_name, &vis->seg_desc);
547	KASSERT(error == 0, ("%s: svm_getdesc error %d", __func__, error));
548}
549
550static int
551svm_inout_str_addrsize(uint64_t info1)
552{
553        uint32_t size;
554
555        size = (info1 >> 7) & 0x7;
556        switch (size) {
557        case 1:
558                return (2);     /* 16 bit */
559        case 2:
560                return (4);     /* 32 bit */
561        case 4:
562                return (8);     /* 64 bit */
563        default:
564                panic("%s: invalid size encoding %d", __func__, size);
565        }
566}
567
568static void
569svm_paging_info(struct vmcb_state *state, struct vm_guest_paging *paging)
570{
571
572	paging->cr3 = state->cr3;
573	paging->cpl = svm_cpl(state);
574	paging->cpu_mode = svm_vcpu_mode(state->efer);
575	paging->paging_mode = svm_paging_mode(state->cr0, state->cr4,
576		   	          state->efer);
577}
578
579/*
580 * Handle guest I/O intercept.
581 */
582static bool
583svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
584{
585	struct vmcb_ctrl *ctrl;
586	struct vmcb_state *state;
587	struct svm_regctx *regs;
588	struct vm_inout_str *vis;
589	uint64_t info1;
590
591	state = svm_get_vmcb_state(svm_sc, vcpu);
592	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
593	regs  = svm_get_guest_regctx(svm_sc, vcpu);
594	info1 = ctrl->exitinfo1;
595
596	vmexit->exitcode 	= VM_EXITCODE_INOUT;
597	vmexit->u.inout.in 	= (info1 & BIT(0)) ? 1 : 0;
598	vmexit->u.inout.string 	= (info1 & BIT(2)) ? 1 : 0;
599	vmexit->u.inout.rep 	= (info1 & BIT(3)) ? 1 : 0;
600	vmexit->u.inout.bytes 	= (info1 >> 4) & 0x7;
601	vmexit->u.inout.port 	= (uint16_t)(info1 >> 16);
602	vmexit->u.inout.eax 	= (uint32_t)(state->rax);
603
604	if (vmexit->u.inout.string) {
605		vmexit->exitcode = VM_EXITCODE_INOUT_STR;
606		vis = &vmexit->u.inout_str;
607		svm_paging_info(state, &vis->paging);
608		vis->rflags = state->rflags;
609		vis->cr0 = state->cr0;
610		vis->index = svm_inout_str_index(regs, vmexit->u.inout.in);
611		vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep);
612		vis->addrsize = svm_inout_str_addrsize(info1);
613		svm_inout_str_seginfo(svm_sc, vcpu, info1,
614		    vmexit->u.inout.in, vis);
615	}
616
617	return (false);
618}
619
620static int
621svm_npf_paging(uint64_t exitinfo1)
622{
623
624	if (exitinfo1 & VMCB_NPF_INFO1_W)
625		return (VM_PROT_WRITE);
626
627	return (VM_PROT_READ);
628}
629
630static bool
631svm_npf_emul_fault(uint64_t exitinfo1)
632{
633
634	if (exitinfo1 & VMCB_NPF_INFO1_ID) {
635		return (false);
636	}
637
638	if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
639		return (false);
640	}
641
642	if ((exitinfo1 & VMCB_NPF_INFO1_GPA) == 0) {
643		return (false);
644	}
645
646	return (true);
647}
648
649/*
650 * Special handling of EFER MSR.
651 * SVM guest must have SVM EFER bit set, prohibit guest from cleareing SVM
652 * enable bit in EFER.
653 */
654static void
655svm_efer(struct svm_softc *svm_sc, int vcpu, boolean_t write)
656{
657	struct svm_regctx *swctx;
658	struct vmcb_state *state;
659
660	state = svm_get_vmcb_state(svm_sc, vcpu);
661	swctx = svm_get_guest_regctx(svm_sc, vcpu);
662
663	if (write) {
664		state->efer = ((swctx->e.g.sctx_rdx & (uint32_t)~0) << 32) |
665				((uint32_t)state->rax) | EFER_SVM;
666	} else {
667		state->rax = (uint32_t)state->efer;
668		swctx->e.g.sctx_rdx = (uint32_t)(state->efer >> 32);
669	}
670}
671
672/*
673 * Determine the cause of virtual cpu exit and handle VMEXIT.
674 * Return: false - Break vcpu execution loop and handle vmexit
675 *		   in kernel or user space.
676 *	   true  - Continue vcpu run.
677 */
678static bool
679svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
680{
681	struct vmcb_state *state;
682	struct vmcb_ctrl *ctrl;
683	struct svm_regctx *ctx;
684	uint64_t code, info1, info2, val;
685	uint32_t eax, ecx, edx;
686	bool update_rip, loop, retu;
687
688	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
689
690	state = svm_get_vmcb_state(svm_sc, vcpu);
691	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
692	ctx   = svm_get_guest_regctx(svm_sc, vcpu);
693	code  = ctrl->exitcode;
694	info1 = ctrl->exitinfo1;
695	info2 = ctrl->exitinfo2;
696
697	update_rip = true;
698	loop = true;
699	vmexit->exitcode = VM_EXITCODE_VMX;
700	vmexit->u.vmx.status = 0;
701
702	switch (code) {
703		case	VMCB_EXIT_MC: /* Machine Check. */
704			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
705			vmexit->exitcode = VM_EXITCODE_MTRAP;
706			loop = false;
707			break;
708
709		case	VMCB_EXIT_MSR:	/* MSR access. */
710			eax = state->rax;
711			ecx = ctx->sctx_rcx;
712			edx = ctx->e.g.sctx_rdx;
713
714			if (ecx == MSR_EFER) {
715				VCPU_CTR0(svm_sc->vm, vcpu,"VMEXIT EFER\n");
716				svm_efer(svm_sc, vcpu, info1);
717				break;
718			}
719
720			retu = false;
721			if (info1) {
722				/* VM exited because of write MSR */
723				vmm_stat_incr(svm_sc->vm, vcpu,
724					VMEXIT_WRMSR, 1);
725				vmexit->exitcode = VM_EXITCODE_WRMSR;
726				vmexit->u.msr.code = ecx;
727				val = (uint64_t)edx << 32 | eax;
728				if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val,
729					&retu)) {
730					vmexit->u.msr.wval = val;
731					loop = false;
732				} else
733					loop = retu ? false : true;
734
735				VCPU_CTR3(svm_sc->vm, vcpu,
736					"VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
737					loop ? "kernel" : "user", val, ecx);
738			} else {
739				vmm_stat_incr(svm_sc->vm, vcpu,
740					VMEXIT_RDMSR, 1);
741				vmexit->exitcode = VM_EXITCODE_RDMSR;
742				vmexit->u.msr.code = ecx;
743				if (emulate_rdmsr(svm_sc->vm, vcpu, ecx,
744					&retu)) {
745					loop = false;
746				} else
747					loop = retu ? false : true;
748				VCPU_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
749					" MSB=0x%08x, LSB=%08x @0x%x",
750					ctx->e.g.sctx_rdx, state->rax, ecx);
751			}
752
753#define MSR_AMDK8_IPM           0xc0010055
754			/*
755			 * We can't hide AMD C1E idle capability since its
756			 * based on CPU generation, for now ignore access to
757			 * this MSR by vcpus
758			 * XXX: special handling of AMD C1E - Ignore.
759			 */
760			 if (ecx == MSR_AMDK8_IPM)
761				loop = true;
762			break;
763
764		case VMCB_EXIT_INTR:
765			/*
766			 * Exit on External Interrupt.
767			 * Give host interrupt handler to run and if its guest
768			 * interrupt, local APIC will inject event in guest.
769			 */
770			update_rip = false;
771			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt"
772				" RIP:0x%lx.\n", state->rip);
773			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
774			break;
775
776		case VMCB_EXIT_IO:
777			loop = svm_handle_io(svm_sc, vcpu, vmexit);
778			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
779			update_rip = true;
780			break;
781
782		case VMCB_EXIT_CPUID:
783			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
784			(void)x86_emulate_cpuid(svm_sc->vm, vcpu,
785					(uint32_t *)&state->rax,
786					(uint32_t *)&ctx->sctx_rbx,
787					(uint32_t *)&ctx->sctx_rcx,
788					(uint32_t *)&ctx->e.g.sctx_rdx);
789			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT CPUID\n");
790			break;
791
792		case VMCB_EXIT_HLT:
793			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
794 			if (ctrl->v_irq) {
795				 /* Interrupt is pending, can't halt guest. */
796				vmm_stat_incr(svm_sc->vm, vcpu,
797					VMEXIT_HLT_IGNORED, 1);
798				VCPU_CTR0(svm_sc->vm, vcpu,
799					"VMEXIT halt ignored.");
800			} else {
801				VCPU_CTR0(svm_sc->vm, vcpu,
802					"VMEXIT halted CPU.");
803				vmexit->exitcode = VM_EXITCODE_HLT;
804				vmexit->u.hlt.rflags = state->rflags;
805				loop = false;
806
807			}
808			break;
809
810		case VMCB_EXIT_PAUSE:
811			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT pause");
812			vmexit->exitcode = VM_EXITCODE_PAUSE;
813			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
814
815			break;
816
817		case VMCB_EXIT_NPF:
818			loop = false;
819			update_rip = false;
820
821        		if (info1 & VMCB_NPF_INFO1_RSV) {
822 				VCPU_CTR2(svm_sc->vm, vcpu, "SVM_ERR:NPT"
823					" reserved bit is set,"
824					"INFO1:0x%lx INFO2:0x%lx .\n",
825					info1, info2);
826        			break;
827			}
828
829			 /* EXITINFO2 has the physical fault address (GPA). */
830			if(vm_mem_allocated(svm_sc->vm, info2)) {
831 				VCPU_CTR3(svm_sc->vm, vcpu, "SVM:NPF-paging,"
832					"RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n",
833				 	state->rip, info1, info2);
834				vmexit->exitcode = VM_EXITCODE_PAGING;
835				vmexit->u.paging.gpa = info2;
836				vmexit->u.paging.fault_type =
837					svm_npf_paging(info1);
838				vmm_stat_incr(svm_sc->vm, vcpu,
839					VMEXIT_NESTED_FAULT, 1);
840			} else if (svm_npf_emul_fault(info1)) {
841 				VCPU_CTR3(svm_sc->vm, vcpu, "SVM:NPF inst_emul,"
842					"RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n",
843					state->rip, info1, info2);
844				vmexit->exitcode = VM_EXITCODE_INST_EMUL;
845				vmexit->u.inst_emul.gpa = info2;
846				vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
847				vmexit->u.inst_emul.paging.cr3 = state->cr3;
848				vmexit->u.inst_emul.paging.cpu_mode =
849					svm_vcpu_mode(state->efer);
850				vmexit->u.inst_emul.paging.paging_mode =
851					svm_paging_mode(state->cr0, state->cr4,
852                                                 state->efer);
853				/* XXX: get CPL from SS */
854				vmexit->u.inst_emul.paging.cpl = 0;
855				/*
856				 * If DecodeAssist SVM feature doesn't exist,
857				 * we don't have faulty instuction length. New
858				 * RIP will be calculated based on software
859				 * instruction emulation.
860				 */
861				vmexit->inst_length = VIE_INST_SIZE;
862				vmm_stat_incr(svm_sc->vm, vcpu,
863					VMEXIT_INST_EMUL, 1);
864			}
865
866			break;
867
868		case VMCB_EXIT_SHUTDOWN:
869			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT shutdown.");
870			loop = false;
871			break;
872
873		case VMCB_EXIT_INVALID:
874			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT INVALID.");
875			loop = false;
876			break;
877
878		default:
879			 /* Return to user space. */
880			loop = false;
881			update_rip = false;
882			VCPU_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx"
883				" EXITINFO1: 0x%lx EXITINFO2:0x%lx\n",
884		 		ctrl->exitcode, info1, info2);
885			VCPU_CTR3(svm_sc->vm, vcpu, "SVM:RIP: 0x%lx nRIP:0x%lx"
886				" Inst decoder len:%d\n", state->rip,
887				ctrl->nrip, ctrl->inst_decode_size);
888			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
889			break;
890	}
891
892	vmexit->rip = state->rip;
893	if (update_rip) {
894		if (ctrl->nrip == 0) {
895 			VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:nRIP is not set "
896				 "for RIP0x%lx.\n", state->rip);
897			vmexit->exitcode = VM_EXITCODE_VMX;
898		} else
899			vmexit->rip = ctrl->nrip;
900	}
901
902	/* If vcpu execution is continued, update RIP. */
903	if (loop) {
904		state->rip = vmexit->rip;
905	}
906
907	if (state->rip == 0) {
908		VCPU_CTR0(svm_sc->vm, vcpu, "SVM_ERR:RIP is NULL\n");
909		vmexit->exitcode = VM_EXITCODE_VMX;
910	}
911
912	return (loop);
913}
914
915/*
916 * Inject NMI to virtual cpu.
917 */
918static int
919svm_inject_nmi(struct svm_softc *svm_sc, int vcpu)
920{
921	struct vmcb_ctrl *ctrl;
922
923	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
924
925	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
926	 /* Can't inject another NMI if last one is pending.*/
927	if (!vm_nmi_pending(svm_sc->vm, vcpu))
928		return (0);
929
930	 /* Inject NMI, vector number is not used.*/
931	if (vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_NMI, IDT_NMI, 0, false)) {
932		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:NMI injection failed.\n");
933		return (EIO);
934	}
935
936	/* Acknowledge the request is accepted.*/
937	vm_nmi_clear(svm_sc->vm, vcpu);
938
939	VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Injected NMI.\n");
940
941	return (1);
942}
943
944/*
945 * Inject event to virtual cpu.
946 */
947static void
948svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu, struct vlapic *vlapic)
949{
950	struct vmcb_ctrl *ctrl;
951	struct vmcb_state *state;
952	struct vm_exception exc;
953	int vector;
954
955	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
956
957	state = svm_get_vmcb_state(svm_sc, vcpu);
958	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
959
960	/* Can't inject multiple events at once. */
961	if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
962		VCPU_CTR1(svm_sc->vm, vcpu,
963			"SVM:Last event(0x%lx) is pending.\n", ctrl->eventinj);
964		return ;
965	}
966
967	/* Wait for guest to come out of interrupt shadow. */
968	if (ctrl->intr_shadow) {
969		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Guest in interrupt shadow.\n");
970		return;
971	}
972
973	if (vm_exception_pending(svm_sc->vm, vcpu, &exc)) {
974		KASSERT(exc.vector >= 0 && exc.vector < 32,
975			("Exception vector% invalid", exc.vector));
976		if (vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_EXCEPTION,
977			exc.vector, exc.error_code,
978			exc.error_code_valid)) {
979			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Exception%d injection"
980				" failed.\n", exc.vector);
981			return;
982		}
983	}
984	/* NMI event has priority over interrupts.*/
985	if (svm_inject_nmi(svm_sc, vcpu)) {
986		return;
987	}
988
989        /* Ask the local apic for a vector to inject */
990        if (!vlapic_pending_intr(vlapic, &vector))
991                return;
992
993	if (vector < 32 || vector > 255) {
994		VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:Event injection"
995			"invalid vector=%d.\n", vector);
996		ERR("SVM_ERR:Event injection invalid vector=%d.\n", vector);
997		return;
998	}
999
1000	if ((state->rflags & PSL_I) == 0) {
1001		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Interrupt is disabled\n");
1002		return;
1003	}
1004
1005	if (vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false)) {
1006		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Event injection failed to"
1007			" vector=%d.\n", vector);
1008		return;
1009	}
1010
1011	/* Acknowledge that event is accepted.*/
1012	vlapic_intr_accepted(vlapic, vector);
1013	VCPU_CTR1(svm_sc->vm, vcpu, "SVM:event injected,vector=%d.\n", vector);
1014}
1015
1016/*
1017 * Restore host Task Register selector type after every vcpu exit.
1018 */
1019static void
1020setup_tss_type(void)
1021{
1022	struct system_segment_descriptor *desc;
1023
1024	desc = (struct system_segment_descriptor *)&gdt[curcpu * NGDT +
1025		GPROC0_SEL];
1026	/*
1027	 * Task selector that should be restored in host is
1028	 * 64-bit available(9), not what is read(0xb), see
1029	 * APMvol2 Rev3.21 4.8.3 System Descriptors table.
1030	 */
1031	desc->sd_type = 9;
1032}
1033
1034static void
1035svm_handle_exitintinfo(struct svm_softc *svm_sc, int vcpu)
1036{
1037	struct vmcb_ctrl *ctrl;
1038	uint64_t intinfo;
1039
1040	ctrl  	= svm_get_vmcb_ctrl(svm_sc, vcpu);
1041
1042	/*
1043	 * VMEXIT while delivering an exception or interrupt.
1044	 * Inject it as virtual interrupt.
1045	 * Section 15.7.2 Intercepts during IDT interrupt delivery.
1046	 */
1047	intinfo = ctrl->exitintinfo;
1048
1049	if (intinfo & VMCB_EXITINTINFO_VALID) {
1050		vmm_stat_incr(svm_sc->vm, vcpu, VCPU_EXITINTINFO, 1);
1051		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:EXITINTINFO:0x%lx is valid\n",
1052			intinfo);
1053		if (vmcb_eventinject(ctrl, VMCB_EXITINTINFO_TYPE(intinfo),
1054			VMCB_EXITINTINFO_VECTOR(intinfo),
1055			VMCB_EXITINTINFO_EC(intinfo),
1056			VMCB_EXITINTINFO_EC_VALID & intinfo)) {
1057			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:couldn't inject pending"
1058				" interrupt, exitintinfo:0x%lx\n", intinfo);
1059		}
1060	}
1061}
1062/*
1063 * Start vcpu with specified RIP.
1064 */
1065static int
1066svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
1067	void *rend_cookie, void *suspended_cookie)
1068{
1069	struct svm_regctx *hctx, *gctx;
1070	struct svm_softc *svm_sc;
1071	struct svm_vcpu *vcpustate;
1072	struct vmcb_state *state;
1073	struct vmcb_ctrl *ctrl;
1074	struct vm_exit *vmexit;
1075	struct vlapic *vlapic;
1076	struct vm *vm;
1077	uint64_t vmcb_pa;
1078	bool loop;	/* Continue vcpu execution loop. */
1079
1080	loop = true;
1081	svm_sc = arg;
1082	vm = svm_sc->vm;
1083
1084	vcpustate = svm_get_vcpu(svm_sc, vcpu);
1085	state = svm_get_vmcb_state(svm_sc, vcpu);
1086	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1087	vmexit = vm_exitinfo(vm, vcpu);
1088	vlapic = vm_lapic(vm, vcpu);
1089
1090	gctx = svm_get_guest_regctx(svm_sc, vcpu);
1091	hctx = &host_ctx[curcpu];
1092	vmcb_pa = svm_sc->vcpu[vcpu].vmcb_pa;
1093
1094	if (vcpustate->lastcpu != curcpu) {
1095		/* Virtual CPU is running on a diiferent CPU now.*/
1096		vmm_stat_incr(vm, vcpu, VCPU_MIGRATIONS, 1);
1097
1098		/*
1099		 * Flush all TLB mapping for this guest on this CPU,
1100		 * it might have stale entries.
1101		 */
1102		ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST;
1103
1104		/* Can't use any cached VMCB state by cpu.*/
1105		ctrl->vmcb_clean = VMCB_CACHE_NONE;
1106	} else {
1107		/*
1108		 * XXX: Using same ASID for all vcpus of a VM will cause TLB
1109		 * corruption. This can easily be produced by muxing two vcpus
1110		 * on same core.
1111		 * For now, flush guest TLB for every vmrun.
1112		 */
1113		ctrl->tlb_ctrl = VMCB_TLB_FLUSH_GUEST;
1114
1115		/*
1116		 * This is the same cpu on which vcpu last ran so don't
1117		 * need to reload all VMCB state.
1118		 * ASID is unique for a guest.
1119		 * IOPM is unchanged.
1120		 * RVI/EPT is unchanged.
1121		 *
1122		 */
1123		ctrl->vmcb_clean = VMCB_CACHE_ASID |
1124				VMCB_CACHE_IOPM |
1125				VMCB_CACHE_NP;
1126	}
1127
1128	vcpustate->lastcpu = curcpu;
1129	VCPU_CTR3(vm, vcpu, "SVM:Enter vmrun RIP:0x%lx"
1130		" inst len=%d/%d\n",
1131		rip, vmexit->inst_length,
1132		vmexit->u.inst_emul.vie.num_valid);
1133	/* Update Guest RIP */
1134	state->rip = rip;
1135
1136	do {
1137		vmexit->inst_length = 0;
1138		 /* We are asked to give the cpu by scheduler. */
1139		if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) {
1140			vmexit->exitcode = VM_EXITCODE_BOGUS;
1141			vmm_stat_incr(vm, vcpu, VMEXIT_ASTPENDING, 1);
1142			VCPU_CTR1(vm, vcpu,
1143				"SVM: ASTPENDING, RIP:0x%lx\n", state->rip);
1144			vmexit->rip = state->rip;
1145			break;
1146		}
1147
1148		if (vcpu_suspended(suspended_cookie)) {
1149			vm_exit_suspended(vm, vcpu, state->rip);
1150			break;
1151		}
1152
1153		if (vcpu_rendezvous_pending(rend_cookie)) {
1154			vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
1155			vmm_stat_incr(vm, vcpu, VMEXIT_RENDEZVOUS, 1);
1156			VCPU_CTR1(vm, vcpu,
1157				"SVM: VCPU rendezvous, RIP:0x%lx\n",
1158				state->rip);
1159			vmexit->rip = state->rip;
1160			break;
1161		}
1162
1163		(void)svm_set_vmcb(svm_get_vmcb(svm_sc, vcpu), svm_sc->asid);
1164
1165		svm_handle_exitintinfo(svm_sc, vcpu);
1166
1167		(void)svm_inj_interrupts(svm_sc, vcpu, vlapic);
1168
1169		/* Change TSS type to available.*/
1170		setup_tss_type();
1171
1172		/*
1173		 * Disable global interrupt to guarantee atomicity
1174		 * during loading of guest state.
1175		 * See 15.5.1 "Loading guest state" APM2.
1176		 */
1177		disable_gintr();
1178
1179		/* Launch Virtual Machine. */
1180		svm_launch(vmcb_pa, gctx, hctx);
1181
1182		/*
1183		 * Only GDTR and IDTR of host is saved and restore by SVM,
1184		 * LDTR and TR need to be restored by VMM.
1185		 * XXX: kernel doesn't use LDT, only user space.
1186		 */
1187		ltr(GSEL(GPROC0_SEL, SEL_KPL));
1188
1189		/*
1190		 * Guest FS and GS selector are stashed by vmload and vmsave.
1191		 * Host FS and GS selector are stashed by svm_launch().
1192		 * Host GS base that holds per-cpu need to be restored before
1193		 * enabling global interrupt.
1194		 * FS is not used by FreeBSD kernel and kernel does restore
1195		 * back FS selector and base of user before returning to
1196		 * userland.
1197		 *
1198		 * Note: You can't use 'curcpu' which uses pcpu.
1199		 */
1200		wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[vcpustate->lastcpu]);
1201		wrmsr(MSR_KGSBASE, (uint64_t)&__pcpu[vcpustate->lastcpu]);
1202
1203		/* vcpu exit with glbal interrupt disabled. */
1204		enable_gintr();
1205
1206		/* Handle #VMEXIT and if required return to user space. */
1207		loop = svm_vmexit(svm_sc, vcpu, vmexit);
1208		vcpustate->loop++;
1209		vmm_stat_incr(vm, vcpu, VMEXIT_COUNT, 1);
1210
1211	} while (loop);
1212
1213	return (0);
1214}
1215
1216/*
1217 * Cleanup for virtual machine.
1218 */
1219static void
1220svm_vmcleanup(void *arg)
1221{
1222	struct svm_softc *svm_sc;
1223
1224	svm_sc = arg;
1225
1226	VCPU_CTR0(svm_sc->vm, 0, "SVM:cleanup\n");
1227
1228	free(svm_sc, M_SVM);
1229}
1230
1231/*
1232 * Return pointer to hypervisor saved register state.
1233 */
1234static register_t *
1235swctx_regptr(struct svm_regctx *regctx, int reg)
1236{
1237
1238	switch (reg) {
1239		case VM_REG_GUEST_RBX:
1240			return (&regctx->sctx_rbx);
1241		case VM_REG_GUEST_RCX:
1242			return (&regctx->sctx_rcx);
1243		case VM_REG_GUEST_RDX:
1244			return (&regctx->e.g.sctx_rdx);
1245		case VM_REG_GUEST_RDI:
1246			return (&regctx->e.g.sctx_rdi);
1247		case VM_REG_GUEST_RSI:
1248			return (&regctx->e.g.sctx_rsi);
1249		case VM_REG_GUEST_RBP:
1250			return (&regctx->sctx_rbp);
1251		case VM_REG_GUEST_R8:
1252			return (&regctx->sctx_r8);
1253		case VM_REG_GUEST_R9:
1254			return (&regctx->sctx_r9);
1255		case VM_REG_GUEST_R10:
1256			return (&regctx->sctx_r10);
1257		case VM_REG_GUEST_R11:
1258			return (&regctx->sctx_r11);
1259		case VM_REG_GUEST_R12:
1260			return (&regctx->sctx_r12);
1261		case VM_REG_GUEST_R13:
1262			return (&regctx->sctx_r13);
1263		case VM_REG_GUEST_R14:
1264			return (&regctx->sctx_r14);
1265		case VM_REG_GUEST_R15:
1266			return (&regctx->sctx_r15);
1267		default:
1268			ERR("Unknown register requested, reg=%d.\n", reg);
1269			break;
1270	}
1271
1272	return (NULL);
1273}
1274
1275/*
1276 * Interface to read guest registers.
1277 * This can be SVM h/w saved or hypervisor saved register.
1278 */
1279static int
1280svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
1281{
1282	struct svm_softc *svm_sc;
1283	struct vmcb *vmcb;
1284	register_t *reg;
1285
1286	svm_sc = arg;
1287	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1288
1289	vmcb = svm_get_vmcb(svm_sc, vcpu);
1290
1291	if (vmcb_read(vmcb, ident, val) == 0) {
1292		return (0);
1293	}
1294
1295	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
1296
1297	if (reg != NULL) {
1298		*val = *reg;
1299		return (0);
1300	}
1301
1302 	ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident);
1303	return (EINVAL);
1304}
1305
1306/*
1307 * Interface to write to guest registers.
1308 * This can be SVM h/w saved or hypervisor saved register.
1309 */
1310static int
1311svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
1312{
1313	struct svm_softc *svm_sc;
1314	struct vmcb *vmcb;
1315	register_t *reg;
1316
1317	svm_sc = arg;
1318	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1319
1320	vmcb = svm_get_vmcb(svm_sc, vcpu);
1321	if (vmcb_write(vmcb, ident, val) == 0) {
1322		return (0);
1323	}
1324
1325	reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
1326
1327	if (reg != NULL) {
1328		*reg = val;
1329		return (0);
1330	}
1331
1332 	ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident);
1333	return (EINVAL);
1334}
1335
1336
1337/*
1338 * Inteface to set various descriptors.
1339 */
1340static int
1341svm_setdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
1342{
1343	struct svm_softc *svm_sc;
1344	struct vmcb *vmcb;
1345	struct vmcb_segment *seg;
1346	uint16_t attrib;
1347
1348	svm_sc = arg;
1349	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1350
1351	vmcb = svm_get_vmcb(svm_sc, vcpu);
1352
1353	VCPU_CTR1(svm_sc->vm, vcpu, "SVM:set_desc: Type%d\n", type);
1354
1355	seg = vmcb_seg(vmcb, type);
1356	if (seg == NULL) {
1357		ERR("SVM_ERR:Unsupported segment type%d\n", type);
1358		return (EINVAL);
1359	}
1360
1361	/* Map seg_desc access to VMCB attribute format.*/
1362	attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF);
1363	VCPU_CTR3(svm_sc->vm, vcpu, "SVM:[sel %d attribute 0x%x limit:0x%x]\n",
1364		type, desc->access, desc->limit);
1365	seg->attrib = attrib;
1366	seg->base = desc->base;
1367	seg->limit = desc->limit;
1368
1369	return (0);
1370}
1371
1372/*
1373 * Interface to get guest descriptor.
1374 */
1375static int
1376svm_getdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
1377{
1378	struct svm_softc *svm_sc;
1379	struct vmcb_segment	*seg;
1380
1381	svm_sc = arg;
1382	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1383
1384	VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_desc: Type%d\n", type);
1385
1386	seg = vmcb_seg(svm_get_vmcb(svm_sc, vcpu), type);
1387	if (!seg) {
1388		ERR("SVM_ERR:Unsupported segment type%d\n", type);
1389		return (EINVAL);
1390	}
1391
1392	/* Map seg_desc access to VMCB attribute format.*/
1393	desc->access = ((seg->attrib & 0xF00) << 4) | (seg->attrib & 0xFF);
1394	desc->base = seg->base;
1395	desc->limit = seg->limit;
1396
1397	/*
1398	 * VT-x uses bit 16 (Unusable) to indicate a segment that has been
1399	 * loaded with a NULL segment selector. The 'desc->access' field is
1400	 * interpreted in the VT-x format by the processor-independent code.
1401	 *
1402	 * SVM uses the 'P' bit to convey the same information so convert it
1403	 * into the VT-x format. For more details refer to section
1404	 * "Segment State in the VMCB" in APMv2.
1405	 */
1406	if (type == VM_REG_GUEST_CS && type == VM_REG_GUEST_TR)
1407		desc->access |= 0x80;		/* CS and TS always present */
1408
1409	if (!(desc->access & 0x80))
1410		desc->access |= 0x10000;	/* Unusable segment */
1411
1412	return (0);
1413}
1414
1415static int
1416svm_setcap(void *arg, int vcpu, int type, int val)
1417{
1418	struct svm_softc *svm_sc;
1419	struct vmcb_ctrl *ctrl;
1420	int ret = ENOENT;
1421
1422	svm_sc = arg;
1423	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1424
1425	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1426
1427	switch (type) {
1428		case VM_CAP_HALT_EXIT:
1429			if (val)
1430				ctrl->ctrl1 |= VMCB_INTCPT_HLT;
1431			else
1432				ctrl->ctrl1 &= ~VMCB_INTCPT_HLT;
1433			ret = 0;
1434			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Halt exit %s.\n",
1435				val ? "enabled": "disabled");
1436			break;
1437
1438		case VM_CAP_PAUSE_EXIT:
1439			if (val)
1440				ctrl->ctrl1 |= VMCB_INTCPT_PAUSE;
1441			else
1442				ctrl->ctrl1 &= ~VMCB_INTCPT_PAUSE;
1443			ret = 0;
1444			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Pause exit %s.\n",
1445				val ? "enabled": "disabled");
1446			break;
1447
1448		case VM_CAP_MTRAP_EXIT:
1449			if (val)
1450				ctrl->exception |= BIT(IDT_MC);
1451			else
1452				ctrl->exception &= ~BIT(IDT_MC);
1453			ret = 0;
1454			VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:MC exit %s.\n",
1455				val ? "enabled": "disabled");
1456			break;
1457
1458		case VM_CAP_UNRESTRICTED_GUEST:
1459			/* SVM doesn't need special capability for SMP.*/
1460			VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Set_gap:Unrestricted "
1461			"always enabled.\n");
1462			ret = 0;
1463			break;
1464
1465		default:
1466			break;
1467		}
1468
1469	return (ret);
1470}
1471
1472static int
1473svm_getcap(void *arg, int vcpu, int type, int *retval)
1474{
1475	struct svm_softc *svm_sc;
1476	struct vmcb_ctrl *ctrl;
1477
1478	svm_sc = arg;
1479	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
1480
1481	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
1482
1483	switch (type) {
1484		case VM_CAP_HALT_EXIT:
1485		*retval = (ctrl->ctrl1 & VMCB_INTCPT_HLT) ? 1 : 0;
1486		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Halt exit %s.\n",
1487			*retval ? "enabled": "disabled");
1488		break;
1489
1490		case VM_CAP_PAUSE_EXIT:
1491		*retval = (ctrl->ctrl1 & VMCB_INTCPT_PAUSE) ? 1 : 0;
1492		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Pause exit %s.\n",
1493			*retval ? "enabled": "disabled");
1494		break;
1495
1496		case VM_CAP_MTRAP_EXIT:
1497		*retval = (ctrl->exception & BIT(IDT_MC)) ? 1 : 0;
1498		VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:MC exit %s.\n",
1499			*retval ? "enabled": "disabled");
1500		break;
1501
1502	case VM_CAP_UNRESTRICTED_GUEST:
1503		VCPU_CTR0(svm_sc->vm, vcpu, "SVM:get_cap:Unrestricted.\n");
1504		*retval = 1;
1505		break;
1506		default:
1507		break;
1508	}
1509
1510	return (0);
1511}
1512
1513static struct vlapic *
1514svm_vlapic_init(void *arg, int vcpuid)
1515{
1516	struct svm_softc *svm_sc;
1517	struct vlapic *vlapic;
1518
1519	svm_sc = arg;
1520	vlapic = malloc(sizeof(struct vlapic), M_SVM_VLAPIC, M_WAITOK | M_ZERO);
1521	vlapic->vm = svm_sc->vm;
1522	vlapic->vcpuid = vcpuid;
1523	vlapic->apic_page = (struct LAPIC *)&svm_sc->apic_page[vcpuid];
1524
1525	vlapic_init(vlapic);
1526
1527	return (vlapic);
1528}
1529
1530static void
1531svm_vlapic_cleanup(void *arg, struct vlapic *vlapic)
1532{
1533
1534        vlapic_cleanup(vlapic);
1535        free(vlapic, M_SVM_VLAPIC);
1536}
1537
1538struct vmm_ops vmm_ops_amd = {
1539	svm_init,
1540	svm_cleanup,
1541	svm_restore,
1542	svm_vminit,
1543	svm_vmrun,
1544	svm_vmcleanup,
1545	svm_getreg,
1546	svm_setreg,
1547	svm_getdesc,
1548	svm_setdesc,
1549	svm_getcap,
1550	svm_setcap,
1551	svm_npt_alloc,
1552	svm_npt_free,
1553	svm_vlapic_init,
1554	svm_vlapic_cleanup
1555};
1556