1/* $OpenBSD: identcpu.c,v 1.145 2024/06/24 21:22:14 bluhm Exp $ */ 2/* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */ 3 4/* 5 * Copyright (c) 2003 Wasabi Systems, Inc. 6 * All rights reserved. 7 * 8 * Written by Frank van der Linden for Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project by 21 * Wasabi Systems, Inc. 22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 23 * or promote products derived from this software without specific prior 24 * written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/atomic.h> 42#include <sys/proc.h> 43#include <sys/sysctl.h> 44 45#include "vmm.h" 46#include "pvbus.h" 47 48#include <machine/cpu.h> 49#include <machine/cpufunc.h> 50 51#if NPVBUS > 0 52#include <dev/pv/pvvar.h> 53#endif 54 55void replacesmap(void); 56void replacemeltdown(void); 57uint64_t cpu_freq(struct cpu_info *); 58void tsc_identify(struct cpu_info *); 59void tsc_timecounter_init(struct cpu_info *, uint64_t); 60#if NVMM > 0 61void cpu_check_vmm_cap(struct cpu_info *); 62#endif /* NVMM > 0 */ 63 64/* sysctl wants this. */ 65char cpu_model[48]; 66int cpuspeed; 67 68int amd64_has_xcrypt; 69int amd64_pos_cbit; 70int has_rdrand; 71int has_rdseed; 72 73int 74cpu_amd64speed(int *freq) 75{ 76 *freq = cpuspeed; 77 return (0); 78} 79 80#ifndef SMALL_KERNEL 81void intelcore_update_sensor(void *); 82void cpu_hz_update_sensor(void *); 83 84/* 85 * Temperature read on the CPU is relative to the maximum 86 * temperature supported by the CPU, Tj(Max). 87 * Refer to: 88 * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf 89 * Section 35 and 90 * http://www.intel.com/content/dam/www/public/us/en/documents/ 91 * white-papers/cpu-monitoring-dts-peci-paper.pdf 92 * 93 * The temperature on Intel CPUs can be between 70 and 105 degC, since 94 * Westmere we can read the TJmax from the die. For older CPUs we have 95 * to guess or use undocumented MSRs. Then we subtract the temperature 96 * portion of thermal status from max to get current temperature. 97 */ 98void 99intelcore_update_sensor(void *args) 100{ 101 struct cpu_info *ci = (struct cpu_info *) args; 102 u_int64_t msr; 103 int max = 100; 104 105 /* Only some Core family chips have MSR_TEMPERATURE_TARGET. */ 106 if (ci->ci_model == 0x0e && 107 (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) & 108 MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED)) 109 max = 85; 110 111 /* 112 * Newer CPUs can tell you what their max temperature is. 113 * See: '64-ia-32-architectures-software-developer- 114 * vol-3c-part-3-manual.pdf' 115 */ 116 if (ci->ci_model > 0x17 && ci->ci_model != 0x1c && 117 ci->ci_model != 0x26 && ci->ci_model != 0x27 && 118 ci->ci_model != 0x35 && ci->ci_model != 0x36) 119 max = MSR_TEMPERATURE_TARGET_TJMAX( 120 rdmsr(MSR_TEMPERATURE_TARGET)); 121 122 msr = rdmsr(MSR_THERM_STATUS); 123 if (msr & MSR_THERM_STATUS_VALID_BIT) { 124 ci->ci_sensor.value = max - MSR_THERM_STATUS_TEMP(msr); 125 /* micro degrees */ 126 ci->ci_sensor.value *= 1000000; 127 /* kelvin */ 128 ci->ci_sensor.value += 273150000; 129 ci->ci_sensor.flags &= ~SENSOR_FINVALID; 130 } else { 131 ci->ci_sensor.value = 0; 132 ci->ci_sensor.flags |= SENSOR_FINVALID; 133 } 134} 135 136/* 137 * Effective CPU frequency measurement 138 * 139 * Refer to: 140 * 64-ia-32-architectures-software-developer-vol-3b-part-2-manual.pdf 141 * Section 14.2 and 142 * OSRR for AMD Family 17h processors Section 2.1.2 143 * Round to 50Mhz which is the accuracy of this measurement. 144 */ 145#define FREQ_50MHZ (50ULL * 1000000ULL * 1000000ULL) 146void 147cpu_hz_update_sensor(void *args) 148{ 149 extern uint64_t tsc_frequency; 150 struct cpu_info *ci = args; 151 uint64_t mperf, aperf, mdelta, adelta, val; 152 unsigned long s; 153 154 sched_peg_curproc(ci); 155 156 s = intr_disable(); 157 mperf = rdmsr(MSR_MPERF); 158 aperf = rdmsr(MSR_APERF); 159 intr_restore(s); 160 161 mdelta = mperf - ci->ci_hz_mperf; 162 adelta = aperf - ci->ci_hz_aperf; 163 ci->ci_hz_mperf = mperf; 164 ci->ci_hz_aperf = aperf; 165 166 if (mdelta > 0) { 167 val = (adelta * 1000000) / mdelta * tsc_frequency; 168 val = ((val + FREQ_50MHZ / 2) / FREQ_50MHZ) * FREQ_50MHZ; 169 ci->ci_hz_sensor.value = val; 170 } 171 172 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 173} 174#endif 175 176void (*setperf_setup)(struct cpu_info *); 177 178void via_nano_setup(struct cpu_info *ci); 179 180void cpu_topology(struct cpu_info *ci); 181 182void 183via_nano_setup(struct cpu_info *ci) 184{ 185 u_int32_t regs[4], val; 186 u_int64_t msreg; 187 int model = (ci->ci_signature >> 4) & 15; 188 189 if (model >= 9) { 190 CPUID(0xC0000000, regs[0], regs[1], regs[2], regs[3]); 191 val = regs[0]; 192 if (val >= 0xC0000001) { 193 CPUID(0xC0000001, regs[0], regs[1], regs[2], regs[3]); 194 val = regs[3]; 195 } else 196 val = 0; 197 198 if (val & (C3_CPUID_HAS_RNG | C3_CPUID_HAS_ACE)) 199 printf("%s:", ci->ci_dev->dv_xname); 200 201 /* Enable RNG if present and disabled */ 202 if (val & C3_CPUID_HAS_RNG) { 203 extern int viac3_rnd_present; 204 205 if (!(val & C3_CPUID_DO_RNG)) { 206 msreg = rdmsr(0x110B); 207 msreg |= 0x40; 208 wrmsr(0x110B, msreg); 209 } 210 viac3_rnd_present = 1; 211 printf(" RNG"); 212 } 213 214 /* Enable AES engine if present and disabled */ 215 if (val & C3_CPUID_HAS_ACE) { 216#ifdef CRYPTO 217 if (!(val & C3_CPUID_DO_ACE)) { 218 msreg = rdmsr(0x1107); 219 msreg |= (0x01 << 28); 220 wrmsr(0x1107, msreg); 221 } 222 amd64_has_xcrypt |= C3_HAS_AES; 223#endif /* CRYPTO */ 224 printf(" AES"); 225 } 226 227 /* Enable ACE2 engine if present and disabled */ 228 if (val & C3_CPUID_HAS_ACE2) { 229#ifdef CRYPTO 230 if (!(val & C3_CPUID_DO_ACE2)) { 231 msreg = rdmsr(0x1107); 232 msreg |= (0x01 << 28); 233 wrmsr(0x1107, msreg); 234 } 235 amd64_has_xcrypt |= C3_HAS_AESCTR; 236#endif /* CRYPTO */ 237 printf(" AES-CTR"); 238 } 239 240 /* Enable SHA engine if present and disabled */ 241 if (val & C3_CPUID_HAS_PHE) { 242#ifdef CRYPTO 243 if (!(val & C3_CPUID_DO_PHE)) { 244 msreg = rdmsr(0x1107); 245 msreg |= (0x01 << 28/**/); 246 wrmsr(0x1107, msreg); 247 } 248 amd64_has_xcrypt |= C3_HAS_SHA; 249#endif /* CRYPTO */ 250 printf(" SHA1 SHA256"); 251 } 252 253 /* Enable MM engine if present and disabled */ 254 if (val & C3_CPUID_HAS_PMM) { 255#ifdef CRYPTO 256 if (!(val & C3_CPUID_DO_PMM)) { 257 msreg = rdmsr(0x1107); 258 msreg |= (0x01 << 28/**/); 259 wrmsr(0x1107, msreg); 260 } 261 amd64_has_xcrypt |= C3_HAS_MM; 262#endif /* CRYPTO */ 263 printf(" RSA"); 264 } 265 266 printf("\n"); 267 } 268} 269 270#ifndef SMALL_KERNEL 271void via_update_sensor(void *args); 272void 273via_update_sensor(void *args) 274{ 275 struct cpu_info *ci = (struct cpu_info *) args; 276 u_int64_t msr; 277 278 msr = rdmsr(MSR_CENT_TMTEMPERATURE); 279 ci->ci_sensor.value = (msr & 0xffffff); 280 /* micro degrees */ 281 ci->ci_sensor.value *= 1000000; 282 ci->ci_sensor.value += 273150000; 283 ci->ci_sensor.flags &= ~SENSOR_FINVALID; 284} 285#endif 286 287uint64_t 288cpu_freq_ctr(struct cpu_info *ci, uint32_t cpu_perf_eax, 289 uint32_t cpu_perf_edx) 290{ 291 uint64_t count, last_count, msr; 292 293 if ((ci->ci_flags & CPUF_CONST_TSC) == 0 || 294 (cpu_perf_eax & CPUIDEAX_VERID) <= 1 || 295 CPUIDEDX_NUM_FC(cpu_perf_edx) <= 1) 296 return (0); 297 298 msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL); 299 if (msr & MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK)) { 300 /* some hypervisor is dicking us around */ 301 return (0); 302 } 303 304 msr |= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_1); 305 wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr); 306 307 msr = rdmsr(MSR_PERF_GLOBAL_CTRL) | MSR_PERF_GLOBAL_CTR1_EN; 308 wrmsr(MSR_PERF_GLOBAL_CTRL, msr); 309 310 last_count = rdmsr(MSR_PERF_FIXED_CTR1); 311 delay(100000); 312 count = rdmsr(MSR_PERF_FIXED_CTR1); 313 314 msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL); 315 msr &= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK); 316 wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr); 317 318 msr = rdmsr(MSR_PERF_GLOBAL_CTRL); 319 msr &= ~MSR_PERF_GLOBAL_CTR1_EN; 320 wrmsr(MSR_PERF_GLOBAL_CTRL, msr); 321 322 return ((count - last_count) * 10); 323} 324 325uint64_t 326cpu_freq(struct cpu_info *ci) 327{ 328 uint64_t last_count, count; 329 330 last_count = rdtsc(); 331 delay(100000); 332 count = rdtsc(); 333 334 return ((count - last_count) * 10); 335} 336 337/* print flags from one cpuid for cpu0 */ 338static inline void 339pcpu0id3(const char *id, char reg1, uint32_t val1, const char *bits1, 340 char reg2, uint32_t val2, const char *bits2, 341 char reg3, uint32_t val3, const char *bits3) 342{ 343 if (val1 || val2 || val3) { 344 printf("\ncpu0: cpuid %s", id); 345 if (val1) 346 printf(" e%cx=%b", reg1, val1, bits1); 347 if (val2) 348 printf(" e%cx=%b", reg2, val2, bits2); 349 if (val3) 350 printf(" e%cx=%b", reg3, val3, bits3); 351 } 352} 353 354/* print flags from one, 32-bit MSR for cpu0 */ 355static inline void 356pmsr032(uint32_t msr, uint32_t value, const char *bits) 357{ 358 if (value) 359 printf("\ncpu0: msr %x=%b", msr, value, bits); 360} 361 362static void 363pbitdiff(uint32_t value, uint32_t base_value, const char *bits) 364{ 365 uint32_t minus; 366 if (value == base_value) 367 return; 368 minus = base_value & ~value; 369 value &= ~base_value; 370 if (minus) 371 printf("-%b", minus, bits); 372 if (value) 373 printf("+%b", value, bits); 374} 375 376static inline void 377pcpuid(struct cpu_info *ci, const char *id, char reg, uint32_t val, 378 uint32_t prev_val, const char *bits) 379{ 380 if (CPU_IS_PRIMARY(ci)) 381 pcpu0id3(id, reg, val, bits, 0, 0, NULL, 0, 0, NULL); 382 else if (val != prev_val) { 383 printf("\n%s: cpuid %s e%cx=", ci->ci_dev->dv_xname, id, reg); 384 pbitdiff(val, prev_val, bits); 385 } 386} 387 388static inline void 389pcpuid2(struct cpu_info *ci, const char *id, 390 char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1, 391 char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2) 392{ 393 if (CPU_IS_PRIMARY(ci)) 394 pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, 0, 0, 395 NULL); 396 else if (val1 != prev_val1 || val2 != prev_val2) { 397 printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id); 398 if (val1 != prev_val1) { 399 printf(" e%cx=", reg1); 400 pbitdiff(val1, prev_val1, bits1); 401 } 402 if (val2 != prev_val2) { 403 printf(" e%cx=", reg2); 404 pbitdiff(val2, prev_val2, bits2); 405 } 406 } 407} 408 409static inline void 410pcpuid3(struct cpu_info *ci, const char *id, 411 char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1, 412 char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2, 413 char reg3, uint32_t val3, uint32_t prev_val3, const char *bits3) 414{ 415 if (CPU_IS_PRIMARY(ci)) 416 pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, reg3, val3, 417 bits3); 418 else if (val1 != prev_val1 || val2 != prev_val2 || val3 != prev_val3) { 419 printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id); 420 if (val1 != prev_val1) { 421 printf(" e%cx=", reg1); 422 pbitdiff(val1, prev_val1, bits1); 423 } 424 if (val2 != prev_val2) { 425 printf(" e%cx=", reg2); 426 pbitdiff(val2, prev_val2, bits2); 427 } 428 if (val3 != prev_val3) { 429 printf(" e%cx=", reg3); 430 pbitdiff(val3, prev_val3, bits3); 431 } 432 } 433} 434 435static inline void 436pmsr32(struct cpu_info *ci, uint32_t msr, uint32_t value, uint32_t prev_value, 437 const char *bits) 438{ 439 if (CPU_IS_PRIMARY(ci)) 440 pmsr032(msr, value, bits); 441 else if (value != prev_value) { 442 printf("\n%s: msr %x=", ci->ci_dev->dv_xname, msr); 443 pbitdiff(value, prev_value, bits); 444 } 445} 446 447#ifdef MULTIPROCESSOR 448static uint32_t prevcpu_perf_eax; 449static uint32_t prevcpu_perf_edx; 450#endif 451 452static inline void 453print_perf_cpuid(struct cpu_info *ci, uint32_t cpu_perf_eax, 454 uint32_t cpu_perf_edx) 455{ 456 uint32_t version; 457 458 if (CPU_IS_PRIMARY(ci)) { 459 version = cpu_perf_eax & CPUIDEAX_VERID; 460 if (version == 0) 461 return; 462 } 463#ifdef MULTIPROCESSOR 464 else { 465 /* if no difference on the bits we care about, say nothing */ 466 if (((cpu_perf_eax ^ prevcpu_perf_eax) & 0x00ffffff) == 0 && 467 ((cpu_perf_edx ^ prevcpu_perf_edx) & 0x00001fff) == 0) 468 return; 469 version = cpu_perf_eax & CPUIDEAX_VERID; 470 } 471 prevcpu_perf_eax = cpu_perf_eax; 472 prevcpu_perf_edx = cpu_perf_edx; 473#endif 474 475 printf("\n%s: cpuid a vers=%d", ci->ci_dev->dv_xname, version); 476 if (version) { 477 printf(", gp=%d, gpwidth=%d", CPUIDEAX_NUM_GC(cpu_perf_eax), 478 CPUIDEAX_BIT_GC(cpu_perf_eax)); 479 if (version > 1) { 480 printf(", ff=%d, ffwidth=%d", 481 CPUIDEDX_NUM_FC(cpu_perf_edx), 482 CPUIDEDX_BIT_FC(cpu_perf_edx)); 483 } 484 } 485} 486 487void 488identifycpu(struct cpu_info *ci) 489{ 490 static uint32_t prevcpu_1_ecx, prevcpu_tpm_ecxflags, prevcpu_d_1_eax; 491 static uint32_t prevcpu_apmi_edx, prevcpu_arch_capa; 492 static struct cpu_info *prevci = &cpu_info_primary; 493#define CPUID_MEMBER(member) ci->member, prevci->member 494 uint32_t cflushsz, curcpu_1_ecx, curcpu_apmi_edx = 0; 495 uint32_t curcpu_perf_eax = 0, curcpu_perf_edx = 0; 496 uint32_t curcpu_tpm_ecxflags = 0, curcpu_d_1_eax = 0; 497 uint64_t freq = 0; 498 u_int32_t dummy; 499 char mycpu_model[48]; 500 char *brandstr_from, *brandstr_to; 501 int skipspace; 502 503 CPUID(0x80000000, ci->ci_pnfeatset, dummy, dummy, dummy); 504 CPUID(0x80000001, ci->ci_efeature_eax, dummy, ci->ci_efeature_ecx, 505 ci->ci_feature_eflags); 506 507 if (CPU_IS_PRIMARY(ci)) { 508 ci->ci_signature = cpu_id; 509 ci->ci_feature_flags = cpu_feature & ~CPUID_NXE; 510 cflushsz = cpu_ebxfeature; 511 curcpu_1_ecx = cpu_ecxfeature; 512 ecpu_ecxfeature = ci->ci_efeature_ecx; 513 } else { 514 CPUID(1, ci->ci_signature, cflushsz, curcpu_1_ecx, 515 ci->ci_feature_flags); 516 /* Let cpu_feature be the common bits */ 517 cpu_feature &= ci->ci_feature_flags | 518 (ci->ci_feature_eflags & CPUID_NXE); 519 cpu_ecxfeature &= curcpu_1_ecx; 520 } 521 /* cflush cacheline size is equal to bits 15-8 of ebx * 8 */ 522 ci->ci_cflushsz = ((cflushsz >> 8) & 0xff) * 8; 523 524 CPUID(0x80000002, ci->ci_brand[0], 525 ci->ci_brand[1], ci->ci_brand[2], ci->ci_brand[3]); 526 CPUID(0x80000003, ci->ci_brand[4], 527 ci->ci_brand[5], ci->ci_brand[6], ci->ci_brand[7]); 528 CPUID(0x80000004, ci->ci_brand[8], 529 ci->ci_brand[9], ci->ci_brand[10], ci->ci_brand[11]); 530 strlcpy(mycpu_model, (char *)ci->ci_brand, sizeof(mycpu_model)); 531 532 /* Remove leading, trailing and duplicated spaces from mycpu_model */ 533 brandstr_from = brandstr_to = mycpu_model; 534 skipspace = 1; 535 while (*brandstr_from != '\0') { 536 if (!skipspace || *brandstr_from != ' ') { 537 skipspace = 0; 538 *(brandstr_to++) = *brandstr_from; 539 } 540 if (*brandstr_from == ' ') 541 skipspace = 1; 542 brandstr_from++; 543 } 544 if (skipspace && brandstr_to > mycpu_model) 545 brandstr_to--; 546 *brandstr_to = '\0'; 547 548 if (mycpu_model[0] == 0) 549 strlcpy(mycpu_model, "Opteron or Athlon 64", 550 sizeof(mycpu_model)); 551 552 /* If primary cpu, fill in the global cpu_model used by sysctl */ 553 if (CPU_IS_PRIMARY(ci)) 554 strlcpy(cpu_model, mycpu_model, sizeof(cpu_model)); 555 556 ci->ci_family = (ci->ci_signature >> 8) & 0x0f; 557 ci->ci_model = (ci->ci_signature >> 4) & 0x0f; 558 if (ci->ci_family == 0x6 || ci->ci_family == 0xf) { 559 ci->ci_family += (ci->ci_signature >> 20) & 0xff; 560 ci->ci_model += ((ci->ci_signature >> 16) & 0x0f) << 4; 561 } 562 563#if NPVBUS > 0 564 /* Detect hypervisors early, attach the paravirtual bus later */ 565 if (CPU_IS_PRIMARY(ci) && cpu_ecxfeature & CPUIDECX_HV) 566 pvbus_identify(); 567#endif 568 569 if (ci->ci_pnfeatset >= 0x80000007) 570 CPUID(0x80000007, dummy, dummy, dummy, curcpu_apmi_edx); 571 572 if (ci->ci_feature_flags && ci->ci_feature_flags & CPUID_TSC) { 573 /* Has TSC, check if it's constant */ 574 if (ci->ci_vendor == CPUV_INTEL) { 575 if ((ci->ci_family == 0x0f && ci->ci_model >= 0x03) || 576 (ci->ci_family == 0x06 && ci->ci_model >= 0x0e)) { 577 atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC); 578 } 579 } else if (ci->ci_vendor == CPUV_VIA) { 580 /* VIA */ 581 if (ci->ci_model >= 0x0f) { 582 atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC); 583 } 584 } else if (ci->ci_vendor == CPUV_AMD) { 585 if (curcpu_apmi_edx & CPUIDEDX_ITSC) { 586 /* Invariant TSC indicates constant TSC on AMD */ 587 atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC); 588 } 589 } 590 591 /* Check if it's an invariant TSC */ 592 if (curcpu_apmi_edx & CPUIDEDX_ITSC) 593 atomic_setbits_int(&ci->ci_flags, CPUF_INVAR_TSC); 594 595 tsc_identify(ci); 596 } 597 598 if (ci->ci_cpuid_level >= 0xa) { 599 CPUID(0xa, curcpu_perf_eax, dummy, dummy, curcpu_perf_edx); 600 601 freq = cpu_freq_ctr(ci, curcpu_perf_eax, curcpu_perf_edx); 602 } 603 if (freq == 0) 604 freq = cpu_freq(ci); 605 606 if (ci->ci_cpuid_level >= 0x07) { 607 /* "Structured Extended Feature Flags" */ 608 CPUID_LEAF(0x7, 0, dummy, ci->ci_feature_sefflags_ebx, 609 ci->ci_feature_sefflags_ecx, ci->ci_feature_sefflags_edx); 610 /* SEFF0ECX_OSPKE is set late on AP */ 611 ci->ci_feature_sefflags_ecx &= ~SEFF0ECX_OSPKE; 612 } 613 614 printf("%s: %s", ci->ci_dev->dv_xname, mycpu_model); 615 616 if (freq != 0) 617 printf(", %llu.%02llu MHz", (freq + 4999) / 1000000, 618 ((freq + 4999) / 10000) % 100); 619 620 if (CPU_IS_PRIMARY(ci)) { 621 cpuspeed = (freq + 4999) / 1000000; 622 cpu_cpuspeed = cpu_amd64speed; 623 } 624 625 printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model, 626 ci->ci_signature & 0x0f); 627 628 if ((cpu_ecxfeature & CPUIDECX_HV) == 0) { 629 uint64_t level = 0; 630 uint32_t dummy; 631 632 if (ci->ci_vendor == CPUV_AMD) { 633 level = rdmsr(MSR_PATCH_LEVEL); 634 } else if (ci->ci_vendor == CPUV_INTEL) { 635 wrmsr(MSR_BIOS_SIGN, 0); 636 CPUID(1, dummy, dummy, dummy, dummy); 637 level = rdmsr(MSR_BIOS_SIGN) >> 32; 638 } 639 if (level != 0) 640 printf(", patch %08llx", level); 641 } 642 643 if (ci->ci_cpuid_level >= 0x06) 644 CPUID(0x06, ci->ci_feature_tpmflags, dummy, 645 curcpu_tpm_ecxflags, dummy); 646 if (ci->ci_vendor == CPUV_AMD && ci->ci_family >= 0x12) 647 ci->ci_feature_tpmflags |= TPM_ARAT; 648 649 /* xsave subfeatures */ 650 if (ci->ci_cpuid_level >= 0xd) 651 CPUID_LEAF(0xd, 1, curcpu_d_1_eax, dummy, dummy, dummy); 652 653 pcpuid2(ci, "1", 'd', CPUID_MEMBER(ci_feature_flags), CPUID_EDX_BITS, 654 'c', curcpu_1_ecx, prevcpu_1_ecx, CPUID_ECX_BITS); 655 pcpuid2(ci, "6", 'a', CPUID_MEMBER(ci_feature_tpmflags), TPM_EAX_BITS, 656 'c', curcpu_tpm_ecxflags, prevcpu_tpm_ecxflags, TPM_ECX_BITS); 657 pcpuid3(ci, "7.0", 658 'b', CPUID_MEMBER(ci_feature_sefflags_ebx), SEFF0_EBX_BITS, 659 'c', CPUID_MEMBER(ci_feature_sefflags_ecx), SEFF0_ECX_BITS, 660 'd', CPUID_MEMBER(ci_feature_sefflags_edx), SEFF0_EDX_BITS); 661 print_perf_cpuid(ci, curcpu_perf_eax, curcpu_perf_edx); 662 pcpuid(ci, "d.1", 'a', curcpu_d_1_eax, prevcpu_d_1_eax, XSAVE_BITS); 663 pcpuid2(ci, "80000001", 664 'd', CPUID_MEMBER(ci_feature_eflags), CPUIDE_EDX_BITS, 665 'c', CPUID_MEMBER(ci_efeature_ecx), CPUIDE_ECX_BITS); 666 pcpuid(ci, "80000007", 'd', curcpu_apmi_edx, prevcpu_apmi_edx, 667 CPUID_APMI_EDX_BITS); 668#ifdef MULTIPROCESSOR 669 prevcpu_1_ecx = curcpu_1_ecx; 670 prevcpu_tpm_ecxflags = curcpu_tpm_ecxflags; 671 prevcpu_d_1_eax = curcpu_d_1_eax; 672 prevcpu_apmi_edx = curcpu_apmi_edx; 673#endif 674 675 /* speculation control features */ 676 if (ci->ci_vendor == CPUV_AMD) { 677 if (ci->ci_pnfeatset >= 0x80000008) { 678 CPUID(0x80000008, dummy, ci->ci_feature_amdspec_ebx, 679 dummy, dummy); 680 pcpuid(ci, "80000008", 'b', 681 CPUID_MEMBER(ci_feature_amdspec_ebx), 682 CPUID_AMDSPEC_EBX_BITS); 683 } 684 } else if (ci->ci_vendor == CPUV_INTEL) { 685 if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) { 686 uint32_t msr = rdmsr(MSR_ARCH_CAPABILITIES); 687 688 pmsr32(ci, MSR_ARCH_CAPABILITIES, msr, 689 prevcpu_arch_capa, ARCH_CAP_MSR_BITS); 690 prevcpu_arch_capa = msr; 691 if (!CPU_IS_PRIMARY(ci) && cpu_meltdown && 692 (msr & ARCH_CAP_RDCL_NO)) 693 printf("\n%s: -MELTDOWN", ci->ci_dev->dv_xname); 694 } 695 if (cpu_meltdown && CPU_IS_PRIMARY(ci)) 696 printf("\n%s: MELTDOWN", ci->ci_dev->dv_xname); 697 } 698 699 /* AMD secure memory encryption and encrypted virtualization features */ 700 if (ci->ci_vendor == CPUV_AMD && 701 ci->ci_pnfeatset >= CPUID_AMD_SEV_CAP) { 702 CPUID(CPUID_AMD_SEV_CAP, ci->ci_feature_amdsev_eax, 703 ci->ci_feature_amdsev_ebx, ci->ci_feature_amdsev_ecx, 704 ci->ci_feature_amdsev_edx); 705 pcpuid3(ci, "8000001F", 706 'a', CPUID_MEMBER(ci_feature_amdsev_eax), 707 CPUID_AMDSEV_EAX_BITS, 708 'c', CPUID_MEMBER(ci_feature_amdsev_ecx), 709 CPUID_AMDSEV_ECX_BITS, 710 'd', CPUID_MEMBER(ci_feature_amdsev_edx), 711 CPUID_AMDSEV_EDX_BITS); 712 amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f); 713 } 714 715 printf("\n"); 716 717 replacemeltdown(); 718 x86_print_cacheinfo(ci); 719 720 if (CPU_IS_PRIMARY(ci)) { 721#ifndef SMALL_KERNEL 722 if (ci->ci_vendor == CPUV_AMD && 723 ci->ci_pnfeatset >= 0x80000007) { 724 if (curcpu_apmi_edx & 0x06) { 725 if ((ci->ci_signature & 0xF00) == 0xF00) 726 setperf_setup = k8_powernow_init; 727 } 728 if (ci->ci_family >= 0x10) 729 setperf_setup = k1x_init; 730 } 731 732 if (cpu_ecxfeature & CPUIDECX_EST) 733 setperf_setup = est_init; 734#endif 735 736 if (cpu_ecxfeature & CPUIDECX_RDRAND) 737 has_rdrand = 1; 738 739 if (ci->ci_feature_sefflags_ebx & SEFF0EBX_RDSEED) 740 has_rdseed = 1; 741 742 if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) 743 replacesmap(); 744 } 745 746#ifndef SMALL_KERNEL 747 if (CPU_IS_PRIMARY(ci) && (ci->ci_feature_tpmflags & TPM_SENSOR) && 748 ci->ci_vendor == CPUV_INTEL) { 749 ci->ci_sensor.type = SENSOR_TEMP; 750 sensor_task_register(ci, intelcore_update_sensor, 5); 751 sensor_attach(&ci->ci_sensordev, &ci->ci_sensor); 752 } 753#endif 754 755 if (CPU_IS_PRIMARY(ci) && ci->ci_vendor == CPUV_VIA) { 756 ci->cpu_setup = via_nano_setup; 757#ifndef SMALL_KERNEL 758 ci->ci_sensor.type = SENSOR_TEMP; 759 sensor_task_register(ci, via_update_sensor, 5); 760 sensor_attach(&ci->ci_sensordev, &ci->ci_sensor); 761#endif 762 } 763 764 tsc_timecounter_init(ci, freq); 765 766 cpu_topology(ci); 767#if NVMM > 0 768 cpu_check_vmm_cap(ci); 769#endif /* NVMM > 0 */ 770 771 /* Check for effective frequency via MPERF, APERF */ 772 if ((curcpu_tpm_ecxflags & TPM_EFFFREQ) && ci->ci_smt_id == 0) { 773#ifndef SMALL_KERNEL 774 ci->ci_hz_sensor.type = SENSOR_FREQ; 775 sensor_task_register(ci, cpu_hz_update_sensor, 1); 776 sensor_attach(&ci->ci_sensordev, &ci->ci_hz_sensor); 777#endif 778 } 779 prevci = ci; 780} 781 782#ifndef SMALL_KERNEL 783/* 784 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). 785 */ 786static int 787log2(unsigned int i) 788{ 789 int ret = 0; 790 791 while (i >>= 1) 792 ret++; 793 794 return (ret); 795} 796 797static int 798mask_width(u_int x) 799{ 800 int bit; 801 int mask; 802 int powerof2; 803 804 powerof2 = ((x - 1) & x) == 0; 805 mask = (x << (1 - powerof2)) - 1; 806 807 /* fls */ 808 if (mask == 0) 809 return (0); 810 for (bit = 1; mask != 1; bit++) 811 mask = (unsigned int)mask >> 1; 812 813 return (bit); 814} 815#endif 816 817/* 818 * Build up cpu topology for given cpu, must run on the core itself. 819 */ 820void 821cpu_topology(struct cpu_info *ci) 822{ 823#ifndef SMALL_KERNEL 824 u_int32_t eax, ebx, ecx, edx; 825 u_int32_t apicid, max_apicid = 0, max_coreid = 0; 826 u_int32_t smt_bits = 0, core_bits, pkg_bits = 0; 827 u_int32_t smt_mask = 0, core_mask, pkg_mask = 0; 828 829 /* We need at least apicid at CPUID 1 */ 830 if (ci->ci_cpuid_level < 1) 831 goto no_topology; 832 833 /* Initial apicid */ 834 CPUID(1, eax, ebx, ecx, edx); 835 apicid = (ebx >> 24) & 0xff; 836 837 if (ci->ci_vendor == CPUV_AMD) { 838 uint32_t nthreads = 1; /* per core */ 839 uint32_t thread_id; /* within a package */ 840 841 /* We need at least apicid at CPUID 0x80000008 */ 842 if (ci->ci_pnfeatset < 0x80000008) 843 goto no_topology; 844 845 CPUID(0x80000008, eax, ebx, ecx, edx); 846 core_bits = (ecx >> 12) & 0xf; 847 848 if (ci->ci_pnfeatset >= 0x8000001e) { 849 CPUID(0x8000001e, eax, ebx, ecx, edx); 850 nthreads = ((ebx >> 8) & 0xf) + 1; 851 } 852 853 /* Shift the core_bits off to get at the pkg bits */ 854 ci->ci_pkg_id = apicid >> core_bits; 855 856 /* Get rid of the package bits */ 857 core_mask = (1U << core_bits) - 1; 858 thread_id = apicid & core_mask; 859 860 /* Cut logical thread_id into core id, and smt id in a core */ 861 ci->ci_core_id = thread_id / nthreads; 862 ci->ci_smt_id = thread_id % nthreads; 863 } else if (ci->ci_vendor == CPUV_INTEL) { 864 /* We only support leaf 1/4 detection */ 865 if (ci->ci_cpuid_level < 4) 866 goto no_topology; 867 /* Get max_apicid */ 868 CPUID(1, eax, ebx, ecx, edx); 869 max_apicid = (ebx >> 16) & 0xff; 870 /* Get max_coreid */ 871 CPUID_LEAF(4, 0, eax, ebx, ecx, edx); 872 max_coreid = ((eax >> 26) & 0x3f) + 1; 873 /* SMT */ 874 smt_bits = mask_width(max_apicid / max_coreid); 875 smt_mask = (1U << smt_bits) - 1; 876 /* Core */ 877 core_bits = log2(max_coreid); 878 core_mask = (1U << (core_bits + smt_bits)) - 1; 879 core_mask ^= smt_mask; 880 /* Pkg */ 881 pkg_bits = core_bits + smt_bits; 882 pkg_mask = ~0U << core_bits; 883 884 ci->ci_smt_id = apicid & smt_mask; 885 ci->ci_core_id = (apicid & core_mask) >> smt_bits; 886 ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits; 887 } else 888 goto no_topology; 889#ifdef DEBUG 890 printf("cpu%d: smt %u, core %u, pkg %u " 891 "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, " 892 "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n", 893 ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id, 894 apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits, 895 core_mask, pkg_bits, pkg_mask); 896#else 897 printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid, 898 ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id); 899 900#endif 901 return; 902 /* We can't map, so consider ci_core_id as ci_cpuid */ 903no_topology: 904#endif 905 ci->ci_smt_id = 0; 906 ci->ci_core_id = ci->ci_cpuid; 907 ci->ci_pkg_id = 0; 908} 909 910#if NVMM > 0 911/* 912 * cpu_check_vmm_cap 913 * 914 * Checks for VMM capabilities for 'ci'. Initializes certain per-cpu VMM 915 * state in 'ci' if virtualization extensions are found. 916 * 917 * Parameters: 918 * ci: the cpu being checked 919 */ 920void 921cpu_check_vmm_cap(struct cpu_info *ci) 922{ 923 uint64_t msr; 924 uint32_t cap, dummy, edx; 925 926 /* 927 * Check for workable VMX 928 */ 929 if (cpu_ecxfeature & CPUIDECX_VMX) { 930 msr = rdmsr(MSR_IA32_FEATURE_CONTROL); 931 932 if (!(msr & IA32_FEATURE_CONTROL_LOCK)) 933 ci->ci_vmm_flags |= CI_VMM_VMX; 934 else { 935 if (msr & IA32_FEATURE_CONTROL_VMX_EN) 936 ci->ci_vmm_flags |= CI_VMM_VMX; 937 else 938 ci->ci_vmm_flags |= CI_VMM_DIS; 939 } 940 } 941 942 /* 943 * Check for EPT (Intel Nested Paging) and other secondary 944 * controls 945 */ 946 if (ci->ci_vmm_flags & CI_VMM_VMX) { 947 /* Secondary controls available? */ 948 /* XXX should we check true procbased ctls here if avail? */ 949 msr = rdmsr(IA32_VMX_PROCBASED_CTLS); 950 if (msr & (IA32_VMX_ACTIVATE_SECONDARY_CONTROLS) << 32) { 951 msr = rdmsr(IA32_VMX_PROCBASED2_CTLS); 952 /* EPT available? */ 953 if (msr & (IA32_VMX_ENABLE_EPT) << 32) 954 ci->ci_vmm_flags |= CI_VMM_EPT; 955 /* VM Functions available? */ 956 if (msr & (IA32_VMX_ENABLE_VM_FUNCTIONS) << 32) { 957 ci->ci_vmm_cap.vcc_vmx.vmx_vm_func = 958 rdmsr(IA32_VMX_VMFUNC); 959 } 960 } 961 } 962 963 /* 964 * Check startup config (VMX) 965 */ 966 if (ci->ci_vmm_flags & CI_VMM_VMX) { 967 /* CR0 fixed and flexible bits */ 968 msr = rdmsr(IA32_VMX_CR0_FIXED0); 969 ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0 = msr; 970 msr = rdmsr(IA32_VMX_CR0_FIXED1); 971 ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1 = msr; 972 973 /* CR4 fixed and flexible bits */ 974 msr = rdmsr(IA32_VMX_CR4_FIXED0); 975 ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0 = msr; 976 msr = rdmsr(IA32_VMX_CR4_FIXED1); 977 ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1 = msr; 978 979 /* VMXON region revision ID (bits 30:0 of IA32_VMX_BASIC) */ 980 msr = rdmsr(IA32_VMX_BASIC); 981 ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision = 982 (uint32_t)(msr & 0x7FFFFFFF); 983 984 /* MSR save / load table size */ 985 msr = rdmsr(IA32_VMX_MISC); 986 ci->ci_vmm_cap.vcc_vmx.vmx_msr_table_size = 987 (uint32_t)(msr & IA32_VMX_MSR_LIST_SIZE_MASK) >> 25; 988 989 /* CR3 target count size */ 990 ci->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count = 991 (uint32_t)(msr & IA32_VMX_CR3_TGT_SIZE_MASK) >> 16; 992 } 993 994 /* 995 * Check for workable SVM 996 */ 997 if (ecpu_ecxfeature & CPUIDECX_SVM) { 998 msr = rdmsr(MSR_AMD_VM_CR); 999 1000 if (!(msr & AMD_SVMDIS)) 1001 ci->ci_vmm_flags |= CI_VMM_SVM; 1002 1003 CPUID(CPUID_AMD_SVM_CAP, dummy, 1004 ci->ci_vmm_cap.vcc_svm.svm_max_asid, dummy, edx); 1005 1006 if (ci->ci_vmm_cap.vcc_svm.svm_max_asid > 0xFFF) 1007 ci->ci_vmm_cap.vcc_svm.svm_max_asid = 0xFFF; 1008 1009 if (edx & AMD_SVM_FLUSH_BY_ASID_CAP) 1010 ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid = 1; 1011 1012 if (edx & AMD_SVM_VMCB_CLEAN_CAP) 1013 ci->ci_vmm_cap.vcc_svm.svm_vmcb_clean = 1; 1014 1015 if (edx & AMD_SVM_DECODE_ASSIST_CAP) 1016 ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1; 1017 } 1018 1019 /* 1020 * Check for SVM Nested Paging 1021 */ 1022 if ((ci->ci_vmm_flags & CI_VMM_SVM) && 1023 ci->ci_pnfeatset >= CPUID_AMD_SVM_CAP) { 1024 CPUID(CPUID_AMD_SVM_CAP, dummy, dummy, dummy, cap); 1025 if (cap & AMD_SVM_NESTED_PAGING_CAP) 1026 ci->ci_vmm_flags |= CI_VMM_RVI; 1027 } 1028 1029 /* 1030 * Check "L1 flush on VM entry" (Intel L1TF vuln) semantics 1031 * Full details can be found here: 1032 * https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-l1-terminal-fault 1033 */ 1034 if (ci->ci_vendor == CPUV_INTEL) { 1035 if (ci->ci_feature_sefflags_edx & SEFF0EDX_L1DF) 1036 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 1; 1037 else 1038 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 0; 1039 1040 /* 1041 * Certain CPUs may have the vulnerability remedied in 1042 * hardware (RDCL_NO), or we may be nested in an VMM that 1043 * is doing flushes (SKIP_L1DFL_VMENTRY) using the MSR. 1044 * In either case no mitigation at all is necessary. 1045 */ 1046 if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) { 1047 msr = rdmsr(MSR_ARCH_CAPABILITIES); 1048 if ((msr & ARCH_CAP_RDCL_NO) || 1049 ((msr & ARCH_CAP_SKIP_L1DFL_VMENTRY) && 1050 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) 1051 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 1052 VMX_SKIP_L1D_FLUSH; 1053 } 1054 } 1055} 1056#endif /* NVMM > 0 */ 1057