vmx_msr.c revision 284894
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 284894 2015-06-27 22:48:22Z neel $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 284894 2015-06-27 22:48:22Z neel $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/cpuset.h>
35
36#include <machine/clock.h>
37#include <machine/cpufunc.h>
38#include <machine/md_var.h>
39#include <machine/specialreg.h>
40#include <machine/vmm.h>
41
42#include "vmx.h"
43#include "vmx_msr.h"
44
45static boolean_t
46vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
47{
48
49	if (msr_val & (1UL << (bitpos + 32)))
50		return (TRUE);
51	else
52		return (FALSE);
53}
54
55static boolean_t
56vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
57{
58
59	if ((msr_val & (1UL << bitpos)) == 0)
60		return (TRUE);
61	else
62		return (FALSE);
63}
64
65uint32_t
66vmx_revision(void)
67{
68
69	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
70}
71
72/*
73 * Generate a bitmask to be used for the VMCS execution control fields.
74 *
75 * The caller specifies what bits should be set to one in 'ones_mask'
76 * and what bits should be set to zero in 'zeros_mask'. The don't-care
77 * bits are set to the default value. The default values are obtained
78 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
79 * VMX Capabilities".
80 *
81 * Returns zero on success and non-zero on error.
82 */
83int
84vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
85	       uint32_t zeros_mask, uint32_t *retval)
86{
87	int i;
88	uint64_t val, trueval;
89	boolean_t true_ctls_avail, one_allowed, zero_allowed;
90
91	/* We cannot ask the same bit to be set to both '1' and '0' */
92	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
93		return (EINVAL);
94
95	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
96		true_ctls_avail = TRUE;
97	else
98		true_ctls_avail = FALSE;
99
100	val = rdmsr(ctl_reg);
101	if (true_ctls_avail)
102		trueval = rdmsr(true_ctl_reg);		/* step c */
103	else
104		trueval = val;				/* step a */
105
106	for (i = 0; i < 32; i++) {
107		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
108		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
109
110		KASSERT(one_allowed || zero_allowed,
111			("invalid zero/one setting for bit %d of ctl 0x%0x, "
112			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
113
114		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
115			if (ones_mask & (1 << i))
116				return (EINVAL);
117			*retval &= ~(1 << i);
118		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
119			if (zeros_mask & (1 << i))
120				return (EINVAL);
121			*retval |= 1 << i;
122		} else {
123			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
124				*retval &= ~(1 << i);
125			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
126				*retval |= 1 << i;
127			else if (!true_ctls_avail)
128				*retval &= ~(1 << i);	/* b(iii) */
129			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
130				*retval &= ~(1 << i);
131			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
132				*retval |= 1 << i;
133			else {
134				panic("vmx_set_ctlreg: unable to determine "
135				      "correct value of ctl bit %d for msr "
136				      "0x%0x and true msr 0x%0x", i, ctl_reg,
137				      true_ctl_reg);
138			}
139		}
140	}
141
142	return (0);
143}
144
145void
146msr_bitmap_initialize(char *bitmap)
147{
148
149	memset(bitmap, 0xff, PAGE_SIZE);
150}
151
152int
153msr_bitmap_change_access(char *bitmap, u_int msr, int access)
154{
155	int byte, bit;
156
157	if (msr <= 0x00001FFF)
158		byte = msr / 8;
159	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
160		byte = 1024 + (msr - 0xC0000000) / 8;
161	else
162		return (EINVAL);
163
164	bit = msr & 0x7;
165
166	if (access & MSR_BITMAP_ACCESS_READ)
167		bitmap[byte] &= ~(1 << bit);
168	else
169		bitmap[byte] |= 1 << bit;
170
171	byte += 2048;
172	if (access & MSR_BITMAP_ACCESS_WRITE)
173		bitmap[byte] &= ~(1 << bit);
174	else
175		bitmap[byte] |= 1 << bit;
176
177	return (0);
178}
179
180static uint64_t misc_enable;
181static uint64_t platform_info;
182static uint64_t turbo_ratio_limit;
183static uint64_t host_msrs[GUEST_MSR_NUM];
184
185static bool
186nehalem_cpu(void)
187{
188	u_int family, model;
189
190	/*
191	 * The family:model numbers belonging to the Nehalem microarchitecture
192	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
193	 */
194	family = CPUID_TO_FAMILY(cpu_id);
195	model = CPUID_TO_MODEL(cpu_id);
196	if (family == 0x6) {
197		switch (model) {
198		case 0x1A:
199		case 0x1E:
200		case 0x1F:
201		case 0x2E:
202			return (true);
203		default:
204			break;
205		}
206	}
207	return (false);
208}
209
210static bool
211westmere_cpu(void)
212{
213	u_int family, model;
214
215	/*
216	 * The family:model numbers belonging to the Westmere microarchitecture
217	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
218	 */
219	family = CPUID_TO_FAMILY(cpu_id);
220	model = CPUID_TO_MODEL(cpu_id);
221	if (family == 0x6) {
222		switch (model) {
223		case 0x25:
224		case 0x2C:
225			return (true);
226		default:
227			break;
228		}
229	}
230	return (false);
231}
232
233static bool
234pat_valid(uint64_t val)
235{
236	int i, pa;
237
238	/*
239	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
240	 *
241	 * Extract PA0 through PA7 and validate that each one encodes a
242	 * valid memory type.
243	 */
244	for (i = 0; i < 8; i++) {
245		pa = (val >> (i * 8)) & 0xff;
246		if (pa == 2 || pa == 3 || pa >= 8)
247			return (false);
248	}
249	return (true);
250}
251
252void
253vmx_msr_init(void)
254{
255	uint64_t bus_freq, ratio;
256	int i;
257
258	/*
259	 * It is safe to cache the values of the following MSRs because
260	 * they don't change based on curcpu, curproc or curthread.
261	 */
262	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
263	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
264	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
265	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
266
267	/*
268	 * Initialize emulated MSRs
269	 */
270	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
271	/*
272	 * Set mandatory bits
273	 *  11:   branch trace disabled
274	 *  12:   PEBS unavailable
275	 * Clear unsupported features
276	 *  16:   SpeedStep enable
277	 *  18:   enable MONITOR FSM
278	 */
279	misc_enable |= (1 << 12) | (1 << 11);
280	misc_enable &= ~((1 << 18) | (1 << 16));
281
282	if (nehalem_cpu() || westmere_cpu())
283		bus_freq = 133330000;		/* 133Mhz */
284	else
285		bus_freq = 100000000;		/* 100Mhz */
286
287	/*
288	 * XXXtime
289	 * The ratio should really be based on the virtual TSC frequency as
290	 * opposed to the host TSC.
291	 */
292	ratio = (tsc_freq / bus_freq) & 0xff;
293
294	/*
295	 * The register definition is based on the micro-architecture
296	 * but the following bits are always the same:
297	 * [15:8]  Maximum Non-Turbo Ratio
298	 * [28]    Programmable Ratio Limit for Turbo Mode
299	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
300	 * [47:40] Maximum Efficiency Ratio
301	 *
302	 * The other bits can be safely set to 0 on all
303	 * micro-architectures up to Haswell.
304	 */
305	platform_info = (ratio << 8) | (ratio << 40);
306
307	/*
308	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
309	 * dependent on the maximum cores per package supported by the micro-
310	 * architecture. For e.g., Westmere supports 6 cores per package and
311	 * uses the low 48 bits. Sandybridge support 8 cores per package and
312	 * uses up all 64 bits.
313	 *
314	 * However, the unused bits are reserved so we pretend that all bits
315	 * in this MSR are valid.
316	 */
317	for (i = 0; i < 8; i++)
318		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
319}
320
321void
322vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
323{
324	uint64_t *guest_msrs;
325
326	guest_msrs = vmx->guest_msrs[vcpuid];
327
328	/*
329	 * The permissions bitmap is shared between all vcpus so initialize it
330	 * once when initializing the vBSP.
331	 */
332	if (vcpuid == 0) {
333		guest_msr_rw(vmx, MSR_LSTAR);
334		guest_msr_rw(vmx, MSR_CSTAR);
335		guest_msr_rw(vmx, MSR_STAR);
336		guest_msr_rw(vmx, MSR_SF_MASK);
337		guest_msr_rw(vmx, MSR_KGSBASE);
338	}
339
340	/*
341	 * Initialize guest IA32_PAT MSR with default value after reset.
342	 */
343	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
344	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
345	    PAT_VALUE(2, PAT_UNCACHED)		|
346	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
347	    PAT_VALUE(4, PAT_WRITE_BACK)	|
348	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
349	    PAT_VALUE(6, PAT_UNCACHED)		|
350	    PAT_VALUE(7, PAT_UNCACHEABLE);
351
352	return;
353}
354
355void
356vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
357{
358	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
359
360	/* Save host MSRs (if any) and restore guest MSRs */
361	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
362	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
363	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
364	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
365	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
366}
367
368void
369vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
370{
371	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
372
373	/* Save guest MSRs */
374	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
375	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
376	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
377	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
378	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
379
380	/* Restore host MSRs */
381	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
382	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
383	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
384	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
385
386	/* MSR_KGSBASE will be restored on the way back to userspace */
387}
388
389int
390vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
391{
392	const uint64_t *guest_msrs;
393	int error;
394
395	guest_msrs = vmx->guest_msrs[vcpuid];
396	error = 0;
397
398	switch (num) {
399	case MSR_IA32_MISC_ENABLE:
400		*val = misc_enable;
401		break;
402	case MSR_PLATFORM_INFO:
403		*val = platform_info;
404		break;
405	case MSR_TURBO_RATIO_LIMIT:
406	case MSR_TURBO_RATIO_LIMIT1:
407		*val = turbo_ratio_limit;
408		break;
409	case MSR_PAT:
410		*val = guest_msrs[IDX_MSR_PAT];
411		break;
412	default:
413		error = EINVAL;
414		break;
415	}
416	return (error);
417}
418
419int
420vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
421{
422	uint64_t *guest_msrs;
423	uint64_t changed;
424	int error;
425
426	guest_msrs = vmx->guest_msrs[vcpuid];
427	error = 0;
428
429	switch (num) {
430	case MSR_IA32_MISC_ENABLE:
431		changed = val ^ misc_enable;
432		/*
433		 * If the host has disabled the NX feature then the guest
434		 * also cannot use it. However, a Linux guest will try to
435		 * enable the NX feature by writing to the MISC_ENABLE MSR.
436		 *
437		 * This can be safely ignored because the memory management
438		 * code looks at CPUID.80000001H:EDX.NX to check if the
439		 * functionality is actually enabled.
440		 */
441		changed &= ~(1UL << 34);
442
443		/*
444		 * Punt to userspace if any other bits are being modified.
445		 */
446		if (changed)
447			error = EINVAL;
448
449		break;
450	case MSR_PAT:
451		if (pat_valid(val))
452			guest_msrs[IDX_MSR_PAT] = val;
453		else
454			vm_inject_gp(vmx->vm, vcpuid);
455		break;
456	default:
457		error = EINVAL;
458		break;
459	}
460
461	return (error);
462}
463