1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2018 Intel Corporation 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted providing that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/types.h> 29#include <sys/param.h> 30#include <sys/sbuf.h> 31#include <sys/module.h> 32#include <sys/systm.h> 33#include <sys/errno.h> 34#include <sys/param.h> 35#include <sys/kernel.h> 36#include <sys/bus.h> 37#include <sys/cpu.h> 38#include <sys/smp.h> 39#include <sys/proc.h> 40#include <sys/sched.h> 41 42#include <machine/cpu.h> 43#include <machine/md_var.h> 44#include <machine/cputypes.h> 45#include <machine/specialreg.h> 46 47#include <contrib/dev/acpica/include/acpi.h> 48 49#include <dev/acpica/acpivar.h> 50 51#include <x86/cpufreq/hwpstate_intel_internal.h> 52 53#include "acpi_if.h" 54#include "cpufreq_if.h" 55 56extern uint64_t tsc_freq; 57 58static int intel_hwpstate_probe(device_t dev); 59static int intel_hwpstate_attach(device_t dev); 60static int intel_hwpstate_detach(device_t dev); 61static int intel_hwpstate_suspend(device_t dev); 62static int intel_hwpstate_resume(device_t dev); 63 64static int intel_hwpstate_get(device_t dev, struct cf_setting *cf); 65static int intel_hwpstate_type(device_t dev, int *type); 66 67static device_method_t intel_hwpstate_methods[] = { 68 /* Device interface */ 69 DEVMETHOD(device_identify, intel_hwpstate_identify), 70 DEVMETHOD(device_probe, intel_hwpstate_probe), 71 DEVMETHOD(device_attach, intel_hwpstate_attach), 72 DEVMETHOD(device_detach, intel_hwpstate_detach), 73 DEVMETHOD(device_suspend, intel_hwpstate_suspend), 74 DEVMETHOD(device_resume, intel_hwpstate_resume), 75 76 /* cpufreq interface */ 77 DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get), 78 DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type), 79 80 DEVMETHOD_END 81}; 82 83struct hwp_softc { 84 device_t dev; 85 bool hwp_notifications; 86 bool hwp_activity_window; 87 bool hwp_pref_ctrl; 88 bool hwp_pkg_ctrl; 89 bool hwp_pkg_ctrl_en; 90 bool hwp_perf_bias; 91 bool hwp_perf_bias_cached; 92 93 uint64_t req; /* Cached copy of HWP_REQUEST */ 94 uint64_t hwp_energy_perf_bias; /* Cache PERF_BIAS */ 95 96 uint8_t high; 97 uint8_t guaranteed; 98 uint8_t efficient; 99 uint8_t low; 100}; 101 102static driver_t hwpstate_intel_driver = { 103 "hwpstate_intel", 104 intel_hwpstate_methods, 105 sizeof(struct hwp_softc), 106}; 107 108DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver, NULL, NULL); 109MODULE_VERSION(hwpstate_intel, 1); 110 111static bool hwpstate_pkg_ctrl_enable = true; 112SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN, 113 &hwpstate_pkg_ctrl_enable, 0, 114 "Set 1 (default) to enable package-level control, 0 to disable"); 115 116static int 117intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) 118{ 119 device_t dev; 120 struct pcpu *pc; 121 struct sbuf *sb; 122 struct hwp_softc *sc; 123 uint64_t data, data2; 124 int ret; 125 126 sc = (struct hwp_softc *)arg1; 127 dev = sc->dev; 128 129 pc = cpu_get_pcpu(dev); 130 if (pc == NULL) 131 return (ENXIO); 132 133 sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); 134 sbuf_putc(sb, '\n'); 135 thread_lock(curthread); 136 sched_bind(curthread, pc->pc_cpuid); 137 thread_unlock(curthread); 138 139 rdmsr_safe(MSR_IA32_PM_ENABLE, &data); 140 sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, 141 ((data & 1) ? "En" : "Dis")); 142 143 if (data == 0) { 144 ret = 0; 145 goto out; 146 } 147 148 rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data); 149 sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff); 150 sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff); 151 sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff); 152 sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff); 153 154 rdmsr_safe(MSR_IA32_HWP_REQUEST, &data); 155 data2 = 0; 156 if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL)) 157 rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2); 158 159 sbuf_putc(sb, '\n'); 160 161#define pkg_print(x, name, offset) do { \ 162 if (!sc->hwp_pkg_ctrl || (data & x) != 0) \ 163 sbuf_printf(sb, "\t%s: %03u\n", name, \ 164 (unsigned)(data >> offset) & 0xff); \ 165 else \ 166 sbuf_printf(sb, "\t%s: %03u\n", name, \ 167 (unsigned)(data2 >> offset) & 0xff); \ 168} while (0) 169 170 pkg_print(IA32_HWP_REQUEST_EPP_VALID, 171 "Requested Efficiency Performance Preference", 24); 172 pkg_print(IA32_HWP_REQUEST_DESIRED_VALID, 173 "Requested Desired Performance", 16); 174 pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID, 175 "Requested Maximum Performance", 8); 176 pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID, 177 "Requested Minimum Performance", 0); 178#undef pkg_print 179 180 sbuf_putc(sb, '\n'); 181 182out: 183 thread_lock(curthread); 184 sched_unbind(curthread); 185 thread_unlock(curthread); 186 187 ret = sbuf_finish(sb); 188 if (ret == 0) 189 ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); 190 sbuf_delete(sb); 191 192 return (ret); 193} 194 195static inline int 196percent_to_raw(int x) 197{ 198 199 MPASS(x <= 100 && x >= 0); 200 return (0xff * x / 100); 201} 202 203/* 204 * Given x * 10 in [0, 1000], round to the integer nearest x. 205 * 206 * This allows round-tripping nice human readable numbers through this 207 * interface. Otherwise, user-provided percentages such as 25, 50, 75 get 208 * rounded down to 24, 49, and 74, which is a bit ugly. 209 */ 210static inline int 211round10(int xtimes10) 212{ 213 return ((xtimes10 + 5) / 10); 214} 215 216static inline int 217raw_to_percent(int x) 218{ 219 MPASS(x <= 0xff && x >= 0); 220 return (round10(x * 1000 / 0xff)); 221} 222 223/* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */ 224static inline int 225percent_to_raw_perf_bias(int x) 226{ 227 /* 228 * Round up so that raw values present as nice round human numbers and 229 * also round-trip to the same raw value. 230 */ 231 MPASS(x <= 100 && x >= 0); 232 return (((0xf * x) + 50) / 100); 233} 234 235static inline int 236raw_to_percent_perf_bias(int x) 237{ 238 /* Rounding to nice human numbers despite a step interval of 6.67%. */ 239 MPASS(x <= 0xf && x >= 0); 240 return (((x * 20) / 0xf) * 5); 241} 242 243static int 244sysctl_epp_select(SYSCTL_HANDLER_ARGS) 245{ 246 struct hwp_softc *sc; 247 device_t dev; 248 struct pcpu *pc; 249 uint64_t epb; 250 uint32_t val; 251 int ret; 252 253 dev = oidp->oid_arg1; 254 sc = device_get_softc(dev); 255 if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias) 256 return (ENODEV); 257 258 pc = cpu_get_pcpu(dev); 259 if (pc == NULL) 260 return (ENXIO); 261 262 thread_lock(curthread); 263 sched_bind(curthread, pc->pc_cpuid); 264 thread_unlock(curthread); 265 266 if (sc->hwp_pref_ctrl) { 267 val = (sc->req & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24; 268 val = raw_to_percent(val); 269 } else { 270 /* 271 * If cpuid indicates EPP is not supported, the HWP controller 272 * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM ��14.4.4). 273 * This register is per-core (but not HT). 274 */ 275 if (!sc->hwp_perf_bias_cached) { 276 ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &epb); 277 if (ret) 278 goto out; 279 sc->hwp_energy_perf_bias = epb; 280 sc->hwp_perf_bias_cached = true; 281 } 282 val = sc->hwp_energy_perf_bias & 283 IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK; 284 val = raw_to_percent_perf_bias(val); 285 } 286 287 MPASS(val >= 0 && val <= 100); 288 289 ret = sysctl_handle_int(oidp, &val, 0, req); 290 if (ret || req->newptr == NULL) 291 goto out; 292 293 if (val > 100) { 294 ret = EINVAL; 295 goto out; 296 } 297 298 if (sc->hwp_pref_ctrl) { 299 val = percent_to_raw(val); 300 301 sc->req = 302 ((sc->req & ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) 303 | (val << 24u)); 304 305 if (sc->hwp_pkg_ctrl_en) 306 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); 307 else 308 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); 309 } else { 310 val = percent_to_raw_perf_bias(val); 311 MPASS((val & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0); 312 313 sc->hwp_energy_perf_bias = 314 ((sc->hwp_energy_perf_bias & 315 ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) | val); 316 ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, 317 sc->hwp_energy_perf_bias); 318 } 319 320out: 321 thread_lock(curthread); 322 sched_unbind(curthread); 323 thread_unlock(curthread); 324 325 return (ret); 326} 327 328void 329intel_hwpstate_identify(driver_t *driver, device_t parent) 330{ 331 if (device_find_child(parent, "hwpstate_intel", -1) != NULL) 332 return; 333 334 if (cpu_vendor_id != CPU_VENDOR_INTEL) 335 return; 336 337 if (resource_disabled("hwpstate_intel", 0)) 338 return; 339 340 /* 341 * Intel SDM 14.4.1 (HWP Programming Interfaces): 342 * Availability of HWP baseline resource and capability, 343 * CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new 344 * architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES, 345 * IA32_HWP_REQUEST, IA32_HWP_STATUS. 346 */ 347 if ((cpu_power_eax & CPUTPM1_HWP) == 0) 348 return; 349 350 if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", device_get_unit(parent)) 351 == NULL) 352 device_printf(parent, "hwpstate_intel: add child failed\n"); 353} 354 355static int 356intel_hwpstate_probe(device_t dev) 357{ 358 359 device_set_desc(dev, "Intel Speed Shift"); 360 return (BUS_PROBE_NOWILDCARD); 361} 362 363static int 364set_autonomous_hwp(struct hwp_softc *sc) 365{ 366 struct pcpu *pc; 367 device_t dev; 368 uint64_t caps; 369 int ret; 370 371 dev = sc->dev; 372 373 pc = cpu_get_pcpu(dev); 374 if (pc == NULL) 375 return (ENXIO); 376 377 thread_lock(curthread); 378 sched_bind(curthread, pc->pc_cpuid); 379 thread_unlock(curthread); 380 381 /* XXX: Many MSRs aren't readable until feature is enabled */ 382 ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); 383 if (ret) { 384 /* 385 * This is actually a package-level MSR, and only the first 386 * write is not ignored. So it is harmless to enable it across 387 * all devices, and this allows us not to care especially in 388 * which order cores (and packages) are probed. This error 389 * condition should not happen given we gate on the HWP CPUID 390 * feature flag, if the Intel SDM is correct. 391 */ 392 device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n", 393 pc->pc_cpuid, ret); 394 goto out; 395 } 396 397 ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req); 398 if (ret) { 399 device_printf(dev, 400 "Failed to read HWP request MSR for cpu%d (%d)\n", 401 pc->pc_cpuid, ret); 402 goto out; 403 } 404 405 ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps); 406 if (ret) { 407 device_printf(dev, 408 "Failed to read HWP capabilities MSR for cpu%d (%d)\n", 409 pc->pc_cpuid, ret); 410 goto out; 411 } 412 413 /* 414 * High and low are static; "guaranteed" is dynamic; and efficient is 415 * also dynamic. 416 */ 417 sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps); 418 sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps); 419 sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps); 420 sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps); 421 422 /* hardware autonomous selection determines the performance target */ 423 sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE; 424 425 /* enable HW dynamic selection of window size */ 426 sc->req &= ~IA32_HWP_ACTIVITY_WINDOW; 427 428 /* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */ 429 sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE; 430 sc->req |= sc->low; 431 432 /* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */ 433 sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE; 434 sc->req |= sc->high << 8; 435 436 /* If supported, request package-level control for this CPU. */ 437 if (sc->hwp_pkg_ctrl_en) 438 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | 439 IA32_HWP_REQUEST_PACKAGE_CONTROL); 440 else 441 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); 442 if (ret) { 443 device_printf(dev, 444 "Failed to setup%s autonomous HWP for cpu%d\n", 445 sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); 446 goto out; 447 } 448 449 /* If supported, write the PKG-wide control MSR. */ 450 if (sc->hwp_pkg_ctrl_en) { 451 /* 452 * "The structure of the IA32_HWP_REQUEST_PKG MSR 453 * (package-level) is identical to the IA32_HWP_REQUEST MSR 454 * with the exception of the Package Control field, which does 455 * not exist." (Intel SDM ��14.4.4) 456 */ 457 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); 458 if (ret) { 459 device_printf(dev, 460 "Failed to set autonomous HWP for package\n"); 461 } 462 } 463 464out: 465 thread_lock(curthread); 466 sched_unbind(curthread); 467 thread_unlock(curthread); 468 469 return (ret); 470} 471 472static int 473intel_hwpstate_attach(device_t dev) 474{ 475 struct hwp_softc *sc; 476 int ret; 477 478 sc = device_get_softc(dev); 479 sc->dev = dev; 480 481 /* eax */ 482 if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION) 483 sc->hwp_notifications = true; 484 if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW) 485 sc->hwp_activity_window = true; 486 if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF) 487 sc->hwp_pref_ctrl = true; 488 if (cpu_power_eax & CPUTPM1_HWP_PKG) 489 sc->hwp_pkg_ctrl = true; 490 491 /* Allow administrators to disable pkg-level control. */ 492 sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable); 493 494 /* ecx */ 495 if (cpu_power_ecx & CPUID_PERF_BIAS) 496 sc->hwp_perf_bias = true; 497 498 ret = set_autonomous_hwp(sc); 499 if (ret) 500 return (ret); 501 502 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 503 SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev), 504 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, 505 sc, 0, intel_hwp_dump_sysctl_handler, "A", ""); 506 507 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 508 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, 509 "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0, 510 sysctl_epp_select, "I", 511 "Efficiency/Performance Preference " 512 "(range from 0, most performant, through 100, most efficient)"); 513 514 return (cpufreq_register(dev)); 515} 516 517static int 518intel_hwpstate_detach(device_t dev) 519{ 520 521 return (cpufreq_unregister(dev)); 522} 523 524static int 525intel_hwpstate_get(device_t dev, struct cf_setting *set) 526{ 527 struct pcpu *pc; 528 uint64_t rate; 529 int ret; 530 531 if (set == NULL) 532 return (EINVAL); 533 534 pc = cpu_get_pcpu(dev); 535 if (pc == NULL) 536 return (ENXIO); 537 538 memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); 539 set->dev = dev; 540 541 ret = cpu_est_clockrate(pc->pc_cpuid, &rate); 542 if (ret == 0) 543 set->freq = rate / 1000000; 544 545 set->volts = CPUFREQ_VAL_UNKNOWN; 546 set->power = CPUFREQ_VAL_UNKNOWN; 547 set->lat = CPUFREQ_VAL_UNKNOWN; 548 549 return (0); 550} 551 552static int 553intel_hwpstate_type(device_t dev, int *type) 554{ 555 if (type == NULL) 556 return (EINVAL); 557 *type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED; 558 559 return (0); 560} 561 562static int 563intel_hwpstate_suspend(device_t dev) 564{ 565 return (0); 566} 567 568/* 569 * Redo a subset of set_autonomous_hwp on resume; untested. Without this, 570 * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus. 571 */ 572static int 573intel_hwpstate_resume(device_t dev) 574{ 575 struct hwp_softc *sc; 576 struct pcpu *pc; 577 int ret; 578 579 sc = device_get_softc(dev); 580 581 pc = cpu_get_pcpu(dev); 582 if (pc == NULL) 583 return (ENXIO); 584 585 thread_lock(curthread); 586 sched_bind(curthread, pc->pc_cpuid); 587 thread_unlock(curthread); 588 589 ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); 590 if (ret) { 591 device_printf(dev, 592 "Failed to enable HWP for cpu%d after suspend (%d)\n", 593 pc->pc_cpuid, ret); 594 goto out; 595 } 596 597 if (sc->hwp_pkg_ctrl_en) 598 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | 599 IA32_HWP_REQUEST_PACKAGE_CONTROL); 600 else 601 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); 602 if (ret) { 603 device_printf(dev, 604 "Failed to set%s autonomous HWP for cpu%d after suspend\n", 605 sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); 606 goto out; 607 } 608 if (sc->hwp_pkg_ctrl_en) { 609 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); 610 if (ret) { 611 device_printf(dev, 612 "Failed to set autonomous HWP for package after " 613 "suspend\n"); 614 goto out; 615 } 616 } 617 if (!sc->hwp_pref_ctrl && sc->hwp_perf_bias_cached) { 618 ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, 619 sc->hwp_energy_perf_bias); 620 if (ret) { 621 device_printf(dev, 622 "Failed to set energy perf bias for cpu%d after " 623 "suspend\n", pc->pc_cpuid); 624 } 625 } 626 627out: 628 thread_lock(curthread); 629 sched_unbind(curthread); 630 thread_unlock(curthread); 631 632 return (ret); 633} 634