xmsr.c revision 221828
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <machine/apicreg.h>
34
35#include <assert.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <machine/vmm.h>
39#include <vmmapi.h>
40
41#include "fbsdrun.h"
42#include "xmsr.h"
43
44/*
45 * Trampoline for hypervisor direct 64-bit jump.
46 *
47 *   0  - signature for guest->host verification
48 *   8  - kernel virtual address of trampoline
49 *  16  - instruction virtual address
50 *  24  - stack pointer virtual address
51 *  32  - CR3, physical address of kernel page table
52 *  40  - 24-byte area for null/code/data GDT entries
53 */
54#define MP_V64T_SIG	0xcafebabecafebabeULL
55struct mp_v64tramp {
56	uint64_t	mt_sig;
57	uint64_t	mt_virt;
58	uint64_t	mt_eip;
59	uint64_t	mt_rsp;
60	uint64_t	mt_cr3;
61	uint64_t	mt_gdtr[3];
62};
63
64/*
65 * CPU 0 is considered to be the BSP and is set to the RUNNING state.
66 * All other CPUs are set up in the INIT state.
67 */
68#define BSP  0
69enum cpu_bstate {
70	CPU_S_INIT,
71	CPU_S_SIPI,
72	CPU_S_RUNNING
73} static cpu_b[VM_MAXCPU] = { [BSP] = CPU_S_RUNNING };
74
75static void spinup_ap(struct vmctx *, int, int, uint64_t *);
76static void spinup_ap_direct64(struct vmctx *, int, uintptr_t, uint64_t *);
77
78int
79emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val)
80{
81	int dest;
82	int mode;
83	int thiscpu;
84	int vec;
85	int error, retval;
86	uint64_t rip;
87
88	retval = vcpu;
89	thiscpu = 1 << vcpu;
90
91	/*
92	 * The only MSR value handled is the x2apic CR register
93	 */
94	if (code != 0x830) {
95		printf("Unknown WRMSR code %x, val %lx, cpu %d\n",
96		       code, val, vcpu);
97		exit(1);
98	}
99
100	/*
101	 * The value written to the MSR will generate an IPI to
102	 * a set of CPUs. If this is a SIPI, create the initial
103	 * state for the CPU and switch to it. Otherwise, inject
104	 * an interrupt for the destination CPU(s), and request
105	 * a switch to the next available one by returning -1
106	 */
107	dest = val >> 32;
108	vec = val & APIC_VECTOR_MASK;
109	mode = val & APIC_DELMODE_MASK;
110
111	switch (mode) {
112	case APIC_DELMODE_INIT:
113		assert(dest != 0);
114		assert(dest < guest_ncpus);
115
116		/*
117		 * Ignore legacy de-assert INITs in x2apic mode
118		 */
119		if ((val & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
120			break;
121		}
122		assert(cpu_b[dest] == CPU_S_INIT);
123
124		/*
125		 * Move CPU to wait-for-SIPI state
126		 */
127		error = vcpu_reset(ctx, dest);
128		assert(error == 0);
129
130		cpu_b[dest] = CPU_S_SIPI;
131		break;
132
133	case APIC_DELMODE_STARTUP:
134		assert(dest != 0);
135		assert(dest < guest_ncpus);
136		/*
137		 * Ignore SIPIs in any state other than wait-for-SIPI
138		 */
139		if (cpu_b[dest] != CPU_S_SIPI) {
140			break;
141		}
142
143		/*
144		 * Bring up the AP and signal the main loop that it is
145		 * available and to switch to it.
146		 */
147		spinup_ap(ctx, dest, vec, &rip);
148		cpu_b[dest] = CPU_S_RUNNING;
149		fbsdrun_addcpu(ctx, dest, rip);
150		retval = dest;
151		break;
152
153	default:
154		printf("APIC delivery mode %lx not supported!\n",
155		       val & APIC_DELMODE_MASK);
156		exit(1);
157	}
158
159	return (retval);
160}
161
162/*
163 * There are 2 startup modes possible here:
164 *  - if the CPU supports 'unrestricted guest' mode, the spinup can
165 *    set up the processor state in power-on 16-bit mode, with the CS:IP
166 *    init'd to the specified low-mem 4K page.
167 *  - if the guest has requested a 64-bit trampoline in the low-mem 4K
168 *    page by placing in the specified signature, set up the register
169 *    state using register state in the signature. Note that this
170 *    requires accessing guest physical memory to read the signature
171 *    while 'unrestricted mode' does not.
172 */
173static void
174spinup_ap(struct vmctx *ctx, int newcpu, int vector, uint64_t *rip)
175{
176	int error;
177	uint16_t cs;
178	uint64_t desc_base;
179	uint32_t desc_limit, desc_access;
180
181	if (fbsdrun_vmexit_on_hlt()) {
182		error = vm_set_capability(ctx, newcpu, VM_CAP_HALT_EXIT, 1);
183		assert(error == 0);
184	}
185
186	if (fbsdrun_vmexit_on_pause()) {
187		error = vm_set_capability(ctx, newcpu, VM_CAP_PAUSE_EXIT, 1);
188		assert(error == 0);
189	}
190
191	error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
192	if (error) {
193		/*
194		 * If the guest does not support real-mode execution then
195		 * we will bring up the AP directly in 64-bit mode.
196		 */
197		spinup_ap_direct64(ctx, newcpu, vector << PAGE_SHIFT, rip);
198	} else {
199		/*
200		 * Update the %cs and %rip of the guest so that it starts
201		 * executing real mode code at at 'vector << 12'.
202		 */
203		*rip = 0;
204		error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip);
205		assert(error == 0);
206
207		error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base,
208				    &desc_limit, &desc_access);
209		assert(error == 0);
210
211		desc_base = vector << PAGE_SHIFT;
212		error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS,
213				    desc_base, desc_limit, desc_access);
214		assert(error == 0);
215
216		cs = (vector << PAGE_SHIFT) >> 4;
217		error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs);
218		assert(error == 0);
219	}
220}
221
222static void
223spinup_ap_direct64(struct vmctx *ctx, int newcpu, uintptr_t gaddr,
224	uint64_t *rip)
225{
226	struct mp_v64tramp *mvt;
227	char *errstr;
228	int error;
229	uint64_t gdtbase;
230
231	mvt = paddr_guest2host(gaddr);
232
233	assert(mvt->mt_sig == MP_V64T_SIG);
234
235	/*
236	 * Set up the 3-entry GDT using memory supplied in the
237	 * guest's trampoline structure.
238	 */
239	vm_setup_freebsd_gdt(mvt->mt_gdtr);
240
241#define  CHECK_ERROR(msg) \
242	if (error != 0) { \
243		errstr = msg; \
244		goto err_exit; \
245	}
246
247        /* entry point */
248	*rip = mvt->mt_eip;
249
250	/* Get the guest virtual address of the GDT */
251        gdtbase = mvt->mt_virt + __offsetof(struct mp_v64tramp, mt_gdtr);
252
253	error = vm_setup_freebsd_registers(ctx, newcpu, mvt->mt_eip,
254					   mvt->mt_cr3, gdtbase, mvt->mt_rsp);
255	CHECK_ERROR("vm_setup_freebsd_registers");
256
257	return;
258err_exit:
259	printf("spinup_ap_direct64: machine state error: %s", errstr);
260	exit(1);
261}
262