vmx_msr.c revision 276429
1254885Sdumbbell/*- 2254885Sdumbbell * Copyright (c) 2011 NetApp, Inc. 3254885Sdumbbell * All rights reserved. 4254885Sdumbbell * 5254885Sdumbbell * Redistribution and use in source and binary forms, with or without 6254885Sdumbbell * modification, are permitted provided that the following conditions 7254885Sdumbbell * are met: 8254885Sdumbbell * 1. Redistributions of source code must retain the above copyright 9254885Sdumbbell * notice, this list of conditions and the following disclaimer. 10254885Sdumbbell * 2. Redistributions in binary form must reproduce the above copyright 11254885Sdumbbell * notice, this list of conditions and the following disclaimer in the 12254885Sdumbbell * documentation and/or other materials provided with the distribution. 13254885Sdumbbell * 14254885Sdumbbell * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15254885Sdumbbell * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16254885Sdumbbell * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17254885Sdumbbell * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18254885Sdumbbell * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19254885Sdumbbell * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20254885Sdumbbell * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21254885Sdumbbell * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22254885Sdumbbell * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23254885Sdumbbell * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24254885Sdumbbell * SUCH DAMAGE. 25254885Sdumbbell * 26254885Sdumbbell * $FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 276429 2014-12-30 22:22:46Z neel $ 27254885Sdumbbell */ 28254885Sdumbbell 29254885Sdumbbell#include <sys/cdefs.h> 30254885Sdumbbell__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 276429 2014-12-30 22:22:46Z neel $"); 31254885Sdumbbell 32254885Sdumbbell#include <sys/param.h> 33254885Sdumbbell#include <sys/systm.h> 34254885Sdumbbell#include <sys/cpuset.h> 35254885Sdumbbell 36254885Sdumbbell#include <machine/clock.h> 37254885Sdumbbell#include <machine/cpufunc.h> 38254885Sdumbbell#include <machine/md_var.h> 39254885Sdumbbell#include <machine/specialreg.h> 40254885Sdumbbell#include <machine/vmm.h> 41254885Sdumbbell 42254885Sdumbbell#include "vmx.h" 43254885Sdumbbell#include "vmx_msr.h" 44254885Sdumbbell 45254885Sdumbbellstatic boolean_t 46254885Sdumbbellvmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) 47254885Sdumbbell{ 48254885Sdumbbell 49254885Sdumbbell if (msr_val & (1UL << (bitpos + 32))) 50254885Sdumbbell return (TRUE); 51254885Sdumbbell else 52254885Sdumbbell return (FALSE); 53254885Sdumbbell} 54254885Sdumbbell 55254885Sdumbbellstatic boolean_t 56254885Sdumbbellvmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) 57254885Sdumbbell{ 58254885Sdumbbell 59254885Sdumbbell if ((msr_val & (1UL << bitpos)) == 0) 60254885Sdumbbell return (TRUE); 61254885Sdumbbell else 62254885Sdumbbell return (FALSE); 63254885Sdumbbell} 64254885Sdumbbell 65254885Sdumbbelluint32_t 66254885Sdumbbellvmx_revision(void) 67254885Sdumbbell{ 68254885Sdumbbell 69254885Sdumbbell return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); 70254885Sdumbbell} 71254885Sdumbbell 72254885Sdumbbell/* 73254885Sdumbbell * Generate a bitmask to be used for the VMCS execution control fields. 74254885Sdumbbell * 75254885Sdumbbell * The caller specifies what bits should be set to one in 'ones_mask' 76254885Sdumbbell * and what bits should be set to zero in 'zeros_mask'. The don't-care 77254885Sdumbbell * bits are set to the default value. The default values are obtained 78254885Sdumbbell * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining 79254885Sdumbbell * VMX Capabilities". 80254885Sdumbbell * 81254885Sdumbbell * Returns zero on success and non-zero on error. 82254885Sdumbbell */ 83254885Sdumbbellint 84254885Sdumbbellvmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, 85254885Sdumbbell uint32_t zeros_mask, uint32_t *retval) 86254885Sdumbbell{ 87254885Sdumbbell int i; 88254885Sdumbbell uint64_t val, trueval; 89254885Sdumbbell boolean_t true_ctls_avail, one_allowed, zero_allowed; 90254885Sdumbbell 91254885Sdumbbell /* We cannot ask the same bit to be set to both '1' and '0' */ 92254885Sdumbbell if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) 93254885Sdumbbell return (EINVAL); 94254885Sdumbbell 95254885Sdumbbell if (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) 96254885Sdumbbell true_ctls_avail = TRUE; 97254885Sdumbbell else 98254885Sdumbbell true_ctls_avail = FALSE; 99254885Sdumbbell 100254885Sdumbbell val = rdmsr(ctl_reg); 101254885Sdumbbell if (true_ctls_avail) 102254885Sdumbbell trueval = rdmsr(true_ctl_reg); /* step c */ 103254885Sdumbbell else 104254885Sdumbbell trueval = val; /* step a */ 105254885Sdumbbell 106254885Sdumbbell for (i = 0; i < 32; i++) { 107254885Sdumbbell one_allowed = vmx_ctl_allows_one_setting(trueval, i); 108254885Sdumbbell zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); 109254885Sdumbbell 110254885Sdumbbell KASSERT(one_allowed || zero_allowed, 111254885Sdumbbell ("invalid zero/one setting for bit %d of ctl 0x%0x, " 112254885Sdumbbell "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); 113254885Sdumbbell 114254885Sdumbbell if (zero_allowed && !one_allowed) { /* b(i),c(i) */ 115254885Sdumbbell if (ones_mask & (1 << i)) 116254885Sdumbbell return (EINVAL); 117254885Sdumbbell *retval &= ~(1 << i); 118 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ 119 if (zeros_mask & (1 << i)) 120 return (EINVAL); 121 *retval |= 1 << i; 122 } else { 123 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ 124 *retval &= ~(1 << i); 125 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ 126 *retval |= 1 << i; 127 else if (!true_ctls_avail) 128 *retval &= ~(1 << i); /* b(iii) */ 129 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ 130 *retval &= ~(1 << i); 131 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ 132 *retval |= 1 << i; 133 else { 134 panic("vmx_set_ctlreg: unable to determine " 135 "correct value of ctl bit %d for msr " 136 "0x%0x and true msr 0x%0x", i, ctl_reg, 137 true_ctl_reg); 138 } 139 } 140 } 141 142 return (0); 143} 144 145void 146msr_bitmap_initialize(char *bitmap) 147{ 148 149 memset(bitmap, 0xff, PAGE_SIZE); 150} 151 152int 153msr_bitmap_change_access(char *bitmap, u_int msr, int access) 154{ 155 int byte, bit; 156 157 if (msr <= 0x00001FFF) 158 byte = msr / 8; 159 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) 160 byte = 1024 + (msr - 0xC0000000) / 8; 161 else 162 return (EINVAL); 163 164 bit = msr & 0x7; 165 166 if (access & MSR_BITMAP_ACCESS_READ) 167 bitmap[byte] &= ~(1 << bit); 168 else 169 bitmap[byte] |= 1 << bit; 170 171 byte += 2048; 172 if (access & MSR_BITMAP_ACCESS_WRITE) 173 bitmap[byte] &= ~(1 << bit); 174 else 175 bitmap[byte] |= 1 << bit; 176 177 return (0); 178} 179 180static uint64_t misc_enable; 181static uint64_t platform_info; 182static uint64_t turbo_ratio_limit; 183static uint64_t host_msrs[GUEST_MSR_NUM]; 184 185static bool 186nehalem_cpu(void) 187{ 188 u_int family, model; 189 190 /* 191 * The family:model numbers belonging to the Nehalem microarchitecture 192 * are documented in Section 35.5, Intel SDM dated Feb 2014. 193 */ 194 family = CPUID_TO_FAMILY(cpu_id); 195 model = CPUID_TO_MODEL(cpu_id); 196 if (family == 0x6) { 197 switch (model) { 198 case 0x1A: 199 case 0x1E: 200 case 0x1F: 201 case 0x2E: 202 return (true); 203 default: 204 break; 205 } 206 } 207 return (false); 208} 209 210static bool 211westmere_cpu(void) 212{ 213 u_int family, model; 214 215 /* 216 * The family:model numbers belonging to the Westmere microarchitecture 217 * are documented in Section 35.6, Intel SDM dated Feb 2014. 218 */ 219 family = CPUID_TO_FAMILY(cpu_id); 220 model = CPUID_TO_MODEL(cpu_id); 221 if (family == 0x6) { 222 switch (model) { 223 case 0x25: 224 case 0x2C: 225 return (true); 226 default: 227 break; 228 } 229 } 230 return (false); 231} 232 233void 234vmx_msr_init(void) 235{ 236 uint64_t bus_freq, ratio; 237 int i; 238 239 /* 240 * It is safe to cache the values of the following MSRs because 241 * they don't change based on curcpu, curproc or curthread. 242 */ 243 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 244 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 245 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 246 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 247 248 /* 249 * Initialize emulated MSRs 250 */ 251 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); 252 /* 253 * Set mandatory bits 254 * 11: branch trace disabled 255 * 12: PEBS unavailable 256 * Clear unsupported features 257 * 16: SpeedStep enable 258 * 18: enable MONITOR FSM 259 */ 260 misc_enable |= (1 << 12) | (1 << 11); 261 misc_enable &= ~((1 << 18) | (1 << 16)); 262 263 if (nehalem_cpu() || westmere_cpu()) 264 bus_freq = 133330000; /* 133Mhz */ 265 else 266 bus_freq = 100000000; /* 100Mhz */ 267 268 /* 269 * XXXtime 270 * The ratio should really be based on the virtual TSC frequency as 271 * opposed to the host TSC. 272 */ 273 ratio = (tsc_freq / bus_freq) & 0xff; 274 275 /* 276 * The register definition is based on the micro-architecture 277 * but the following bits are always the same: 278 * [15:8] Maximum Non-Turbo Ratio 279 * [28] Programmable Ratio Limit for Turbo Mode 280 * [29] Programmable TDC-TDP Limit for Turbo Mode 281 * [47:40] Maximum Efficiency Ratio 282 * 283 * The other bits can be safely set to 0 on all 284 * micro-architectures up to Haswell. 285 */ 286 platform_info = (ratio << 8) | (ratio << 40); 287 288 /* 289 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is 290 * dependent on the maximum cores per package supported by the micro- 291 * architecture. For e.g., Westmere supports 6 cores per package and 292 * uses the low 48 bits. Sandybridge support 8 cores per package and 293 * uses up all 64 bits. 294 * 295 * However, the unused bits are reserved so we pretend that all bits 296 * in this MSR are valid. 297 */ 298 for (i = 0; i < 8; i++) 299 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; 300} 301 302void 303vmx_msr_guest_init(struct vmx *vmx, int vcpuid) 304{ 305 /* 306 * The permissions bitmap is shared between all vcpus so initialize it 307 * once when initializing the vBSP. 308 */ 309 if (vcpuid == 0) { 310 guest_msr_rw(vmx, MSR_LSTAR); 311 guest_msr_rw(vmx, MSR_CSTAR); 312 guest_msr_rw(vmx, MSR_STAR); 313 guest_msr_rw(vmx, MSR_SF_MASK); 314 guest_msr_rw(vmx, MSR_KGSBASE); 315 } 316 return; 317} 318 319void 320vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) 321{ 322 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 323 324 /* Save host MSRs (if any) and restore guest MSRs */ 325 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); 326 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); 327 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); 328 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); 329 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); 330} 331 332void 333vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) 334{ 335 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; 336 337 /* Save guest MSRs */ 338 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); 339 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); 340 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); 341 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); 342 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); 343 344 /* Restore host MSRs */ 345 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); 346 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); 347 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); 348 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); 349 350 /* MSR_KGSBASE will be restored on the way back to userspace */ 351} 352 353int 354vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) 355{ 356 int error = 0; 357 358 switch (num) { 359 case MSR_IA32_MISC_ENABLE: 360 *val = misc_enable; 361 break; 362 case MSR_PLATFORM_INFO: 363 *val = platform_info; 364 break; 365 case MSR_TURBO_RATIO_LIMIT: 366 case MSR_TURBO_RATIO_LIMIT1: 367 *val = turbo_ratio_limit; 368 break; 369 default: 370 error = EINVAL; 371 break; 372 } 373 return (error); 374} 375 376int 377vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) 378{ 379 uint64_t changed; 380 int error; 381 382 error = 0; 383 switch (num) { 384 case MSR_IA32_MISC_ENABLE: 385 changed = val ^ misc_enable; 386 /* 387 * If the host has disabled the NX feature then the guest 388 * also cannot use it. However, a Linux guest will try to 389 * enable the NX feature by writing to the MISC_ENABLE MSR. 390 * 391 * This can be safely ignored because the memory management 392 * code looks at CPUID.80000001H:EDX.NX to check if the 393 * functionality is actually enabled. 394 */ 395 changed &= ~(1UL << 34); 396 397 /* 398 * Punt to userspace if any other bits are being modified. 399 */ 400 if (changed) 401 error = EINVAL; 402 403 break; 404 default: 405 error = EINVAL; 406 break; 407 } 408 409 return (error); 410} 411