1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2009, Intel Corporation. 23 * All rights reserved. 24 */ 25 26#include <sys/atomic.h> 27#include <sys/cpuvar.h> 28#include <sys/cpu.h> 29#include <sys/cpu_event.h> 30#include <sys/cmn_err.h> 31#include <sys/ddi.h> 32#include <sys/kmem.h> 33#include <sys/kstat.h> 34#include <sys/pci.h> 35#include <sys/sunddi.h> 36#include <sys/sunndi.h> 37#include <sys/synch.h> 38#include <sys/sysmacros.h> 39#include <sys/fipe.h> 40#include <vm/hat.h> 41 42/* Current PM policy, configurable through /etc/system and fipe.conf. */ 43fipe_pm_policy_t fipe_pm_policy = FIPE_PM_POLICY_BALANCE; 44int fipe_pm_throttle_level = 1; 45 46/* Enable kstat support. */ 47#define FIPE_KSTAT_SUPPORT 1 48 49/* Enable performance relative statistics. */ 50#define FIPE_KSTAT_DETAIL 1 51 52/* Enable builtin IOAT driver if no IOAT driver is available. */ 53#define FIPE_IOAT_BUILTIN 0 54#if defined(FIPE_IOAT_BUILTIN) && (FIPE_IOAT_BUILTIN == 0) 55#undef FIPE_IOAT_BUILTIN 56#endif 57 58#ifdef FIPE_IOAT_BUILTIN 59/* Use IOAT channel 3 to generate memory transactions. */ 60#define FIPE_IOAT_CHAN_CTRL 0x200 61#define FIPE_IOAT_CHAN_STS_LO 0x204 62#define FIPE_IOAT_CHAN_STS_HI 0x208 63#define FIPE_IOAT_CHAN_ADDR_LO 0x20C 64#define FIPE_IOAT_CHAN_ADDR_HI 0x210 65#define FIPE_IOAT_CHAN_CMD 0x214 66#define FIPE_IOAT_CHAN_ERR 0x228 67#else /* FIPE_IOAT_BUILTIN */ 68#include <sys/dcopy.h> 69#endif /* FIPE_IOAT_BUILTIN */ 70 71/* Memory controller relative PCI configuration constants. */ 72#define FIPE_MC_GBLACT 0x60 73#define FIPE_MC_THRTLOW 0x64 74#define FIPE_MC_THRTCTRL 0x67 75#define FIPE_MC_THRTCTRL_HUNT 0x1 76 77/* Hardware recommended values. */ 78#define FIPE_MC_MEMORY_OFFSET 1024 79#define FIPE_MC_MEMORY_SIZE 128 80 81/* Number of IOAT commands posted when entering idle. */ 82#define FIPE_IOAT_CMD_NUM 2 83 84/* Resource allocation retry interval in microsecond. */ 85#define FIPE_IOAT_RETRY_INTERVAL (15 * 1000 * 1000) 86 87/* Statistics update interval in nanosecond. */ 88#define FIPE_STAT_INTERVAL (10 * 1000 * 1000) 89 90/* Configuration profile support. */ 91#define FIPE_PROFILE_FIELD(field) (fipe_profile_curr->field) 92#define FIPE_PROF_IDLE_COUNT FIPE_PROFILE_FIELD(idle_count) 93#define FIPE_PROF_BUSY_THRESHOLD FIPE_PROFILE_FIELD(busy_threshold) 94#define FIPE_PROF_INTR_THRESHOLD FIPE_PROFILE_FIELD(intr_threshold) 95#define FIPE_PROF_INTR_BUSY_THRESHOLD FIPE_PROFILE_FIELD(intr_busy_threshold) 96#define FIPE_PROF_INTR_BUSY_THROTTLE FIPE_PROFILE_FIELD(intr_busy_throttle) 97 98/* Priority assigned to FIPE memory power management driver on x86. */ 99#define CPU_IDLE_CB_PRIO_FIPE (CPU_IDLE_CB_PRIO_LOW_BASE + 0x4000000) 100 101/* Structure to support power management profile. */ 102static struct fipe_profile { 103 uint32_t idle_count; 104 uint32_t busy_threshold; 105 uint32_t intr_threshold; 106 uint32_t intr_busy_threshold; 107 uint32_t intr_busy_throttle; 108} fipe_profiles[FIPE_PM_POLICY_MAX] = { 109 { 0, 0, 0, 0, 0 }, 110 { 5, 30, 20, 50, 5 }, 111 { 10, 40, 40, 75, 4 }, 112 { 15, 50, 60, 100, 2 }, 113}; 114#pragma align CPU_CACHE_COHERENCE_SIZE(fipe_profiles) 115 116/* Structure to store memory controller relative data. */ 117static struct fipe_mc_ctrl { 118 ddi_acc_handle_t mc_pci_hdl; 119 unsigned char mc_thrtctrl; 120 unsigned char mc_thrtlow; 121 unsigned char mc_gblact; 122 dev_info_t *mc_dip; 123 boolean_t mc_initialized; 124} fipe_mc_ctrl; 125#pragma align CPU_CACHE_COHERENCE_SIZE(fipe_mc_ctrl) 126 127/* Structure to store IOAT relative information. */ 128static struct fipe_ioat_control { 129 kmutex_t ioat_lock; 130 boolean_t ioat_ready; 131#ifdef FIPE_IOAT_BUILTIN 132 boolean_t ioat_reg_mapped; 133 ddi_acc_handle_t ioat_reg_handle; 134 uint8_t *ioat_reg_addr; 135 uint64_t ioat_cmd_physaddr; 136#else /* FIPE_IOAT_BUILTIN */ 137 dcopy_cmd_t ioat_cmds[FIPE_IOAT_CMD_NUM + 1]; 138 dcopy_handle_t ioat_handle; 139#endif /* FIPE_IOAT_BUILTIN */ 140 dev_info_t *ioat_dev_info; 141 uint64_t ioat_buf_physaddr; 142 char *ioat_buf_virtaddr; 143 char *ioat_buf_start; 144 size_t ioat_buf_size; 145 timeout_id_t ioat_timerid; 146 boolean_t ioat_failed; 147 boolean_t ioat_cancel; 148 boolean_t ioat_try_alloc; 149} fipe_ioat_ctrl; 150#pragma align CPU_CACHE_COHERENCE_SIZE(fipe_ioat_ctrl) 151 152static struct fipe_idle_ctrl { 153 boolean_t idle_ready; 154 cpu_idle_callback_handle_t cb_handle; 155 cpu_idle_prop_handle_t prop_enter; 156 cpu_idle_prop_handle_t prop_exit; 157 cpu_idle_prop_handle_t prop_busy; 158 cpu_idle_prop_handle_t prop_idle; 159 cpu_idle_prop_handle_t prop_intr; 160 161 /* Put here for cache efficiency, it should be in fipe_global_ctrl. */ 162 hrtime_t tick_interval; 163} fipe_idle_ctrl; 164#pragma align CPU_CACHE_COHERENCE_SIZE(fipe_idle_ctrl) 165 166/* 167 * Global control structure. 168 * Solaris idle thread has no reentrance issue, so it's enough to count CPUs 169 * in idle state. Otherwise cpuset_t bitmap should be used to track idle CPUs. 170 */ 171static struct fipe_global_ctrl { 172 kmutex_t lock; 173 boolean_t pm_enabled; 174 volatile boolean_t pm_active; 175 volatile uint32_t cpu_count; 176 volatile uint64_t io_waiters; 177 hrtime_t enter_ts; 178 hrtime_t time_in_pm; 179 size_t state_size; 180 char *state_buf; 181#ifdef FIPE_KSTAT_SUPPORT 182 kstat_t *fipe_kstat; 183#endif /* FIPE_KSTAT_SUPPORT */ 184} fipe_gbl_ctrl; 185#pragma align CPU_CACHE_COHERENCE_SIZE(fipe_gbl_ctrl) 186 187#define FIPE_CPU_STATE_PAD (128 - \ 188 2 * sizeof (boolean_t) - 4 * sizeof (hrtime_t) - \ 189 2 * sizeof (uint64_t) - 2 * sizeof (uint32_t)) 190 191/* Per-CPU status. */ 192#pragma pack(1) 193typedef struct fipe_cpu_state { 194 boolean_t cond_ready; 195 boolean_t state_ready; 196 uint32_t idle_count; 197 uint32_t throttle_cnt; 198 hrtime_t throttle_ts; 199 hrtime_t next_ts; 200 hrtime_t last_busy; 201 hrtime_t last_idle; 202 uint64_t last_intr; 203 uint64_t last_iowait; 204 char pad1[FIPE_CPU_STATE_PAD]; 205} fipe_cpu_state_t; 206#pragma pack() 207 208#ifdef FIPE_KSTAT_SUPPORT 209static struct fipe_kstat_s { 210 kstat_named_t fipe_enabled; 211 kstat_named_t fipe_policy; 212 kstat_named_t fipe_pm_time; 213#ifdef FIPE_KSTAT_DETAIL 214 kstat_named_t ioat_ready; 215 kstat_named_t pm_tryenter_cnt; 216 kstat_named_t pm_success_cnt; 217 kstat_named_t pm_race_cnt; 218 kstat_named_t cpu_loop_cnt; 219 kstat_named_t cpu_busy_cnt; 220 kstat_named_t cpu_idle_cnt; 221 kstat_named_t cpu_intr_busy_cnt; 222 kstat_named_t cpu_intr_throttle_cnt; 223 kstat_named_t bio_busy_cnt; 224 kstat_named_t ioat_start_fail_cnt; 225 kstat_named_t ioat_stop_fail_cnt; 226#endif /* FIPE_KSTAT_DETAIL */ 227} fipe_kstat = { 228 { "fipe_enabled", KSTAT_DATA_INT32 }, 229 { "fipe_policy", KSTAT_DATA_INT32 }, 230 { "fipe_pm_time", KSTAT_DATA_UINT64 }, 231#ifdef FIPE_KSTAT_DETAIL 232 { "ioat_ready", KSTAT_DATA_INT32 }, 233 { "pm_tryenter_cnt", KSTAT_DATA_UINT64 }, 234 { "pm_success_cnt", KSTAT_DATA_UINT64 }, 235 { "pm_race_cnt", KSTAT_DATA_UINT64 }, 236 { "cpu_loop_cnt", KSTAT_DATA_UINT64 }, 237 { "cpu_busy_cnt", KSTAT_DATA_UINT64 }, 238 { "cpu_idle_cnt", KSTAT_DATA_UINT64 }, 239 { "cpu_intr_busy_cnt", KSTAT_DATA_UINT64 }, 240 { "cpu_intr_thrt_cnt", KSTAT_DATA_UINT64 }, 241 { "bio_busy_cnt", KSTAT_DATA_UINT64 }, 242 { "ioat_start_fail_cnt", KSTAT_DATA_UINT64 }, 243 { "ioat_stop_fail_cnt", KSTAT_DATA_UINT64 } 244#endif /* FIPE_KSTAT_DETAIL */ 245}; 246#pragma align CPU_CACHE_COHERENCE_SIZE(fipe_kstat) 247 248#define FIPE_KSTAT_INC(v) \ 249 atomic_inc_64(&fipe_kstat.v.value.ui64) 250#ifdef FIPE_KSTAT_DETAIL 251#define FIPE_KSTAT_DETAIL_INC(v) \ 252 atomic_inc_64(&fipe_kstat.v.value.ui64) 253#else /* FIPE_KSTAT_DETAIL */ 254#define FIPE_KSTAT_DETAIL_INC(v) 255#endif /* FIPE_KSTAT_DETAIL */ 256 257#else /* FIPE_KSTAT_SUPPORT */ 258 259#define FIPE_KSTAT_INC(v) 260#define FIPE_KSTAT_DETAIL_INC(v) 261 262#endif /* FIPE_KSTAT_SUPPORT */ 263 264/* Save current power management profile during suspend/resume. */ 265static fipe_pm_policy_t fipe_pm_policy_saved = FIPE_PM_POLICY_BALANCE; 266static fipe_cpu_state_t *fipe_cpu_states = NULL; 267 268/* 269 * There is no lock to protect fipe_profile_curr, so fipe_profile_curr 270 * could change on threads in fipe_idle_enter. This is not an issue, 271 * as it always points to a valid profile, and though it might make 272 * an incorrect choice for the new profile, it will still be a valid 273 * selection, and would do the correct operation for the new profile on 274 * next cpu_idle_enter cycle. Since the selections would always be 275 * valid for some profile, the overhead for the lock is not wasted. 276 */ 277static struct fipe_profile *fipe_profile_curr = NULL; 278 279static void fipe_idle_enter(void *arg, cpu_idle_callback_context_t ctx, 280 cpu_idle_check_wakeup_t check_func, void* check_arg); 281static void fipe_idle_exit(void* arg, cpu_idle_callback_context_t ctx, 282 int flags); 283static cpu_idle_callback_t fipe_idle_cb = { 284 CPU_IDLE_CALLBACK_VER0, 285 fipe_idle_enter, 286 fipe_idle_exit, 287}; 288 289/* 290 * Configure memory controller into power saving mode: 291 * 1) OLTT activation limit is set to unlimited 292 * 2) MC works in S-CLTT mode 293 */ 294static int 295fipe_mc_change(int throttle) 296{ 297 /* Enable OLTT/disable S-CLTT mode */ 298 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL, 299 fipe_mc_ctrl.mc_thrtctrl & ~FIPE_MC_THRTCTRL_HUNT); 300 /* Set OLTT activation limit to unlimited */ 301 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_GBLACT, 0); 302 /* 303 * Set S-CLTT low throttling to desired value. The lower value, 304 * the more power saving and the less available memory bandwidth. 305 */ 306 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTLOW, throttle); 307 /* Enable S-CLTT/disable OLTT mode */ 308 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL, 309 fipe_mc_ctrl.mc_thrtctrl | FIPE_MC_THRTCTRL_HUNT); 310 311 return (0); 312} 313 314/* 315 * Restore memory controller's original configuration. 316 */ 317static void 318fipe_mc_restore(void) 319{ 320 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL, 321 fipe_mc_ctrl.mc_thrtctrl & ~FIPE_MC_THRTCTRL_HUNT); 322 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_GBLACT, 323 fipe_mc_ctrl.mc_gblact); 324 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTLOW, 325 fipe_mc_ctrl.mc_thrtlow); 326 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL, 327 fipe_mc_ctrl.mc_thrtctrl); 328} 329 330/* 331 * Initialize memory controller's data structure and status. 332 */ 333static int 334fipe_mc_init(dev_info_t *dip) 335{ 336 ddi_acc_handle_t handle; 337 338 bzero(&fipe_mc_ctrl, sizeof (fipe_mc_ctrl)); 339 340 /* Hold one reference count and will be released in fipe_mc_fini. */ 341 ndi_hold_devi(dip); 342 343 /* Setup pci configuration handler. */ 344 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) { 345 cmn_err(CE_WARN, 346 "!fipe: failed to setup pcicfg handler in mc_init."); 347 ndi_rele_devi(dip); 348 return (-1); 349 } 350 351 /* Save original configuration. */ 352 fipe_mc_ctrl.mc_thrtctrl = pci_config_get8(handle, FIPE_MC_THRTCTRL); 353 fipe_mc_ctrl.mc_thrtlow = pci_config_get8(handle, FIPE_MC_THRTLOW); 354 fipe_mc_ctrl.mc_gblact = pci_config_get8(handle, FIPE_MC_GBLACT); 355 fipe_mc_ctrl.mc_dip = dip; 356 fipe_mc_ctrl.mc_pci_hdl = handle; 357 fipe_mc_ctrl.mc_initialized = B_TRUE; 358 359 return (0); 360} 361 362/* 363 * Restore memory controller's configuration and release resources. 364 */ 365static void 366fipe_mc_fini(void) 367{ 368 if (fipe_mc_ctrl.mc_initialized) { 369 fipe_mc_restore(); 370 pci_config_teardown(&fipe_mc_ctrl.mc_pci_hdl); 371 ndi_rele_devi(fipe_mc_ctrl.mc_dip); 372 fipe_mc_ctrl.mc_initialized = B_FALSE; 373 } 374 bzero(&fipe_mc_ctrl, sizeof (fipe_mc_ctrl)); 375} 376 377/* Search device with specific pci ids. */ 378struct fipe_pci_ioat_id { 379 uint16_t venid; 380 uint16_t devid; 381 uint16_t subvenid; 382 uint16_t subsysid; 383 char *unitaddr; 384}; 385 386static struct fipe_pci_ioat_id fipe_pci_ioat_ids[] = { 387 { 0x8086, 0x1a38, 0xffff, 0xffff, NULL }, 388 { 0x8086, 0x360b, 0xffff, 0xffff, NULL }, 389}; 390 391/*ARGSUSED*/ 392static int 393fipe_search_ioat_dev(dev_info_t *dip, void *arg) 394{ 395 char *unit; 396 struct fipe_pci_ioat_id *id; 397 int i, max, venid, devid, subvenid, subsysid; 398 399 /* Query PCI id properties. */ 400 venid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 401 "vendor-id", 0xffffffff); 402 if (venid == 0xffffffff) { 403 return (DDI_WALK_CONTINUE); 404 } 405 devid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 406 "device-id", 0xffffffff); 407 if (devid == 0xffffffff) { 408 return (DDI_WALK_CONTINUE); 409 } 410 subvenid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 411 "subsystem-vendor-id", 0xffffffff); 412 if (subvenid == 0xffffffff) { 413 return (DDI_WALK_CONTINUE); 414 } 415 subsysid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 416 "subsystem-id", 0xffffffff); 417 if (subvenid == 0xffffffff) { 418 return (DDI_WALK_CONTINUE); 419 } 420 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 421 "unit-address", &unit) != DDI_PROP_SUCCESS) { 422 return (DDI_WALK_CONTINUE); 423 } 424 425 max = sizeof (fipe_pci_ioat_ids) / sizeof (fipe_pci_ioat_ids[0]); 426 for (i = 0; i < max; i++) { 427 id = &fipe_pci_ioat_ids[i]; 428 if ((id->venid == 0xffffu || id->venid == venid) && 429 (id->devid == 0xffffu || id->devid == devid) && 430 (id->subvenid == 0xffffu || id->subvenid == subvenid) && 431 (id->subsysid == 0xffffu || id->subsysid == subsysid) && 432 (id->unitaddr == NULL || strcmp(id->unitaddr, unit) == 0)) { 433 break; 434 } 435 } 436 ddi_prop_free(unit); 437 if (i >= max) { 438 return (DDI_WALK_CONTINUE); 439 } 440 441 /* Found IOAT device, hold one reference count. */ 442 ndi_hold_devi(dip); 443 fipe_ioat_ctrl.ioat_dev_info = dip; 444 445 return (DDI_WALK_TERMINATE); 446} 447 448/* 449 * To enable FBDIMM idle power enhancement mechanism, IOAT will be used to 450 * generate enough memory traffic to trigger memory controller thermal throttle 451 * circuitry. 452 * If dcopy/ioat is available, we will use dcopy interface to communicate 453 * with IOAT. Otherwise the built-in driver will directly talk to IOAT 454 * hardware. 455 */ 456#ifdef FIPE_IOAT_BUILTIN 457static int 458fipe_ioat_trigger(void) 459{ 460 uint16_t ctrl; 461 uint32_t err; 462 uint8_t *addr = fipe_ioat_ctrl.ioat_reg_addr; 463 ddi_acc_handle_t handle = fipe_ioat_ctrl.ioat_reg_handle; 464 465 /* Check channel in use flag. */ 466 ctrl = ddi_get16(handle, (uint16_t *)(addr + FIPE_IOAT_CHAN_CTRL)); 467 if (ctrl & 0x100) { 468 /* 469 * Channel is in use by somebody else. IOAT driver may have 470 * been loaded, forbid fipe from accessing IOAT hardware 471 * anymore. 472 */ 473 fipe_ioat_ctrl.ioat_ready = B_FALSE; 474 fipe_ioat_ctrl.ioat_failed = B_TRUE; 475 FIPE_KSTAT_INC(ioat_start_fail_cnt); 476 return (-1); 477 } else { 478 /* Set channel in use flag. */ 479 ddi_put16(handle, 480 (uint16_t *)(addr + FIPE_IOAT_CHAN_CTRL), 0x100); 481 } 482 483 /* Write command address. */ 484 ddi_put32(handle, 485 (uint32_t *)(addr + FIPE_IOAT_CHAN_ADDR_LO), 486 (uint32_t)fipe_ioat_ctrl.ioat_cmd_physaddr); 487 ddi_put32(handle, (uint32_t *)(addr + FIPE_IOAT_CHAN_ADDR_HI), 488 (uint32_t)(fipe_ioat_ctrl.ioat_cmd_physaddr >> 32)); 489 490 /* Check and clear error flags. */ 491 err = ddi_get32(handle, (uint32_t *)(addr + FIPE_IOAT_CHAN_ERR)); 492 if (err != 0) { 493 ddi_put32(handle, (uint32_t *)(addr + FIPE_IOAT_CHAN_ERR), err); 494 } 495 496 /* Start channel. */ 497 ddi_put8(handle, (uint8_t *)(addr + FIPE_IOAT_CHAN_CMD), 0x1); 498 499 return (0); 500} 501 502static void 503fipe_ioat_cancel(void) 504{ 505 uint32_t status; 506 uint8_t *addr = fipe_ioat_ctrl.ioat_reg_addr; 507 ddi_acc_handle_t handle = fipe_ioat_ctrl.ioat_reg_handle; 508 509 /* 510 * Reset channel. Sometimes reset is not reliable, 511 * so check completion or abort status after reset. 512 */ 513 /* LINTED: constant in conditional context */ 514 while (1) { 515 /* Issue reset channel command. */ 516 ddi_put8(handle, (uint8_t *)(addr + FIPE_IOAT_CHAN_CMD), 0x20); 517 518 /* Query command status. */ 519 status = ddi_get32(handle, 520 (uint32_t *)(addr + FIPE_IOAT_CHAN_STS_LO)); 521 if (status & 0x1) { 522 /* Reset channel completed. */ 523 break; 524 } else { 525 SMT_PAUSE(); 526 } 527 } 528 529 /* Put channel into "not in use" state. */ 530 ddi_put16(handle, (uint16_t *)(addr + FIPE_IOAT_CHAN_CTRL), 0); 531} 532 533/*ARGSUSED*/ 534static void 535fipe_ioat_alloc(void *arg) 536{ 537 int rc = 0, nregs; 538 dev_info_t *dip; 539 ddi_device_acc_attr_t attr; 540 boolean_t fatal = B_FALSE; 541 542 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 543 /* 544 * fipe_ioat_alloc() is called in DEVICE ATTACH context when loaded. 545 * In DEVICE ATTACH context, it can't call ddi_walk_devs(), so just 546 * schedule a timer and exit. 547 */ 548 if (fipe_ioat_ctrl.ioat_try_alloc == B_FALSE) { 549 fipe_ioat_ctrl.ioat_try_alloc = B_TRUE; 550 goto out_error; 551 } 552 553 /* Check whether has been initialized or encountered permanent error. */ 554 if (fipe_ioat_ctrl.ioat_ready || fipe_ioat_ctrl.ioat_failed || 555 fipe_ioat_ctrl.ioat_cancel) { 556 fipe_ioat_ctrl.ioat_timerid = 0; 557 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 558 return; 559 } 560 561 if (fipe_ioat_ctrl.ioat_dev_info == NULL) { 562 /* Find dev_info_t for IOAT engine. */ 563 ddi_walk_devs(ddi_root_node(), fipe_search_ioat_dev, NULL); 564 if (fipe_ioat_ctrl.ioat_dev_info == NULL) { 565 cmn_err(CE_NOTE, 566 "!fipe: no IOAT hardware found, disable pm."); 567 fatal = B_TRUE; 568 goto out_error; 569 } 570 } 571 572 /* Map in IOAT control register window. */ 573 ASSERT(fipe_ioat_ctrl.ioat_dev_info != NULL); 574 ASSERT(fipe_ioat_ctrl.ioat_reg_mapped == B_FALSE); 575 dip = fipe_ioat_ctrl.ioat_dev_info; 576 if (ddi_dev_nregs(dip, &nregs) != DDI_SUCCESS || nregs < 2) { 577 cmn_err(CE_WARN, "!fipe: ioat has not enough register bars."); 578 fatal = B_TRUE; 579 goto out_error; 580 } 581 attr.devacc_attr_version = DDI_DEVICE_ATTR_V0; 582 attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 583 attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC; 584 rc = ddi_regs_map_setup(dip, 1, 585 (caddr_t *)&fipe_ioat_ctrl.ioat_reg_addr, 586 0, 0, &attr, &fipe_ioat_ctrl.ioat_reg_handle); 587 if (rc != DDI_SUCCESS) { 588 cmn_err(CE_WARN, "!fipe: failed to map IOAT registeres."); 589 fatal = B_TRUE; 590 goto out_error; 591 } 592 593 /* Mark IOAT status. */ 594 fipe_ioat_ctrl.ioat_reg_mapped = B_TRUE; 595 fipe_ioat_ctrl.ioat_ready = B_TRUE; 596 fipe_ioat_ctrl.ioat_failed = B_FALSE; 597 fipe_ioat_ctrl.ioat_timerid = 0; 598 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 599 600 return; 601 602out_error: 603 fipe_ioat_ctrl.ioat_timerid = 0; 604 if (!fipe_ioat_ctrl.ioat_ready && !fipe_ioat_ctrl.ioat_cancel) { 605 if (fatal) { 606 /* Mark permanent error and give up. */ 607 fipe_ioat_ctrl.ioat_failed = B_TRUE; 608 /* Release reference count hold by ddi_find_devinfo. */ 609 if (fipe_ioat_ctrl.ioat_dev_info != NULL) { 610 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info); 611 fipe_ioat_ctrl.ioat_dev_info = NULL; 612 } 613 } else { 614 /* 615 * Schedule another timer to keep on trying. 616 * timeout() should always succeed, no need to check 617 * return. 618 */ 619 fipe_ioat_ctrl.ioat_timerid = timeout(fipe_ioat_alloc, 620 NULL, drv_usectohz(FIPE_IOAT_RETRY_INTERVAL)); 621 } 622 } 623 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 624} 625 626static void 627fipe_ioat_free(void) 628{ 629 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 630 /* Cancel timeout to avoid race condition. */ 631 if (fipe_ioat_ctrl.ioat_timerid != 0) { 632 fipe_ioat_ctrl.ioat_cancel = B_TRUE; 633 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 634 (void) untimeout(fipe_ioat_ctrl.ioat_timerid); 635 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 636 fipe_ioat_ctrl.ioat_timerid = 0; 637 fipe_ioat_ctrl.ioat_cancel = B_FALSE; 638 } 639 640 if (fipe_ioat_ctrl.ioat_reg_mapped) { 641 ddi_regs_map_free(&fipe_ioat_ctrl.ioat_reg_handle); 642 fipe_ioat_ctrl.ioat_reg_mapped = B_FALSE; 643 } 644 645 fipe_ioat_ctrl.ioat_ready = B_FALSE; 646 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 647} 648 649#else /* FIPE_IOAT_BUILTIN */ 650 651/* 652 * Trigger IOAT memory copy operation when entering power saving state. 653 * A group of commands will be posted to IOAT driver and those commands 654 * will be placed into an IOAT ring buffer. 655 */ 656static int 657fipe_ioat_trigger(void) 658{ 659 int idx; 660 dcopy_cmd_t *cmds = fipe_ioat_ctrl.ioat_cmds; 661 662 for (idx = FIPE_IOAT_CMD_NUM; idx > 0; idx--) { 663 if (dcopy_cmd_post(cmds[idx]) == DCOPY_SUCCESS) { 664 continue; 665 } else { 666 /* 667 * Don't rollback on failure, it doesn't hurt much more 668 * than some small memory copy operations. 669 */ 670 FIPE_KSTAT_DETAIL_INC(ioat_start_fail_cnt); 671 return (-1); 672 } 673 } 674 675 return (0); 676} 677 678/* 679 * Cancel the memory copy operations posted by fipe_ioat_trigger. 680 * It's achieved by posting a new command which will break the ring 681 * created by fipe_ioat_trigger. If it fails, the best way to recover 682 * is to just let it go. IOAT will recover when posting next command 683 * on the same channel. 684 */ 685static void 686fipe_ioat_cancel(void) 687{ 688 if (dcopy_cmd_post(fipe_ioat_ctrl.ioat_cmds[0]) != DCOPY_SUCCESS) { 689 FIPE_KSTAT_DETAIL_INC(ioat_stop_fail_cnt); 690 } 691} 692 693/* 694 * This function will be called from allocate IOAT resources. 695 * Allocation may fail due to following reasons: 696 * 1) IOAT driver hasn't been loaded yet. Keep on trying in this case. 697 * 2) IOAT resources are temporarily unavailable. Keep on trying in this case. 698 * 3) Other no recoverable reasons. Disable power management function. 699 */ 700/*ARGSUSED*/ 701static void 702fipe_ioat_alloc(void *arg) 703{ 704 int idx, flags, rc = 0; 705 uint64_t physaddr; 706 boolean_t fatal = B_FALSE; 707 dcopy_query_t info; 708 dcopy_handle_t handle; 709 dcopy_cmd_t cmds[FIPE_IOAT_CMD_NUM + 1]; 710 711 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 712 /* 713 * fipe_ioat_alloc() is called in DEVICE ATTACH context when loaded. 714 * In DEVICE ATTACH context, it can't call ddi_walk_devs(), so just 715 * schedule a timer and exit. 716 */ 717 if (fipe_ioat_ctrl.ioat_try_alloc == B_FALSE) { 718 fipe_ioat_ctrl.ioat_try_alloc = B_TRUE; 719 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 720 goto out_error; 721 } 722 723 /* 724 * Check whether device has been initialized or if it encountered 725 * some permanent error. 726 */ 727 if (fipe_ioat_ctrl.ioat_ready || fipe_ioat_ctrl.ioat_failed || 728 fipe_ioat_ctrl.ioat_cancel) { 729 fipe_ioat_ctrl.ioat_timerid = 0; 730 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 731 return; 732 } 733 734 if (fipe_ioat_ctrl.ioat_dev_info == NULL) { 735 /* Find dev_info_t for IOAT engine. */ 736 ddi_walk_devs(ddi_root_node(), fipe_search_ioat_dev, NULL); 737 if (fipe_ioat_ctrl.ioat_dev_info == NULL) { 738 cmn_err(CE_NOTE, 739 "!fipe: no IOAT hardware found, disable pm."); 740 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 741 fatal = B_TRUE; 742 goto out_error; 743 } 744 } 745 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 746 747 /* Check, allocate and initialize IOAT resources with lock released. */ 748 dcopy_query(&info); 749 if (info.dq_version < DCOPY_QUERY_V0) { 750 /* Permanent error, give up. */ 751 cmn_err(CE_WARN, "!fipe: IOAT driver version mismatch."); 752 fatal = B_TRUE; 753 goto out_error; 754 } else if (info.dq_num_channels == 0) { 755 /* IOAT driver hasn't been loaded, keep trying. */ 756 goto out_error; 757 } 758 759 /* Allocate IOAT channel. */ 760 rc = dcopy_alloc(DCOPY_NOSLEEP, &handle); 761 if (rc == DCOPY_NORESOURCES) { 762 /* Resource temporarily not available, keep trying. */ 763 goto out_error; 764 } else if (rc != DCOPY_SUCCESS) { 765 /* Permanent error, give up. */ 766 cmn_err(CE_WARN, "!fipe: failed to allocate IOAT channel."); 767 fatal = B_TRUE; 768 goto out_error; 769 } 770 771 /* 772 * Allocate multiple IOAT commands and organize them into a ring to 773 * loop forever. Commands number is determined by IOAT descriptor size 774 * and memory interleave pattern. 775 * cmd[0] is used break the loop and disable IOAT operation. 776 * cmd[1, FIPE_IOAT_CMD_NUM] are grouped into a ring and cmd[1] is the 777 * list head. 778 */ 779 bzero(cmds, sizeof (cmds)); 780 physaddr = fipe_ioat_ctrl.ioat_buf_physaddr; 781 for (idx = FIPE_IOAT_CMD_NUM; idx >= 0; idx--) { 782 /* Allocate IOAT commands. */ 783 if (idx == 0 || idx == FIPE_IOAT_CMD_NUM) { 784 flags = DCOPY_NOSLEEP; 785 } else { 786 /* 787 * To link commands into a list, the initial value of 788 * cmd need to be set to next cmd on list. 789 */ 790 flags = DCOPY_NOSLEEP | DCOPY_ALLOC_LINK; 791 cmds[idx] = cmds[idx + 1]; 792 } 793 rc = dcopy_cmd_alloc(handle, flags, &cmds[idx]); 794 if (rc == DCOPY_NORESOURCES) { 795 goto out_freecmd; 796 } else if (rc != DCOPY_SUCCESS) { 797 /* Permanent error, give up. */ 798 cmn_err(CE_WARN, 799 "!fipe: failed to allocate IOAT command."); 800 fatal = B_TRUE; 801 goto out_freecmd; 802 } 803 804 /* Disable src/dst snoop to improve CPU cache efficiency. */ 805 cmds[idx]->dp_flags = DCOPY_CMD_NOSRCSNP | DCOPY_CMD_NODSTSNP; 806 /* Specially handle commands on the list. */ 807 if (idx != 0) { 808 /* Disable IOAT status. */ 809 cmds[idx]->dp_flags |= DCOPY_CMD_NOSTAT; 810 /* Disable waiting for resources. */ 811 cmds[idx]->dp_flags |= DCOPY_CMD_NOWAIT; 812 if (idx == 1) { 813 /* The list head, chain command into loop. */ 814 cmds[idx]->dp_flags |= DCOPY_CMD_LOOP; 815 } else { 816 /* Queue all other commands except head. */ 817 cmds[idx]->dp_flags |= DCOPY_CMD_QUEUE; 818 } 819 } 820 cmds[idx]->dp_cmd = DCOPY_CMD_COPY; 821 cmds[idx]->dp.copy.cc_source = physaddr; 822 cmds[idx]->dp.copy.cc_dest = physaddr + FIPE_MC_MEMORY_OFFSET; 823 if (idx == 0) { 824 /* 825 * Command 0 is used to cancel memory copy by breaking 826 * the ring created in fipe_ioat_trigger(). 827 * For efficiency, use the smallest memory copy size. 828 */ 829 cmds[idx]->dp.copy.cc_size = 1; 830 } else { 831 cmds[idx]->dp.copy.cc_size = FIPE_MC_MEMORY_SIZE; 832 } 833 } 834 835 /* Update IOAT control status if it hasn't been initialized yet. */ 836 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 837 if (!fipe_ioat_ctrl.ioat_ready && !fipe_ioat_ctrl.ioat_cancel) { 838 fipe_ioat_ctrl.ioat_handle = handle; 839 for (idx = 0; idx <= FIPE_IOAT_CMD_NUM; idx++) { 840 fipe_ioat_ctrl.ioat_cmds[idx] = cmds[idx]; 841 } 842 fipe_ioat_ctrl.ioat_ready = B_TRUE; 843 fipe_ioat_ctrl.ioat_failed = B_FALSE; 844 fipe_ioat_ctrl.ioat_timerid = 0; 845 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 846 return; 847 } 848 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 849 /* Initialized by another thread, fall through to free resources. */ 850 851out_freecmd: 852 if (cmds[0] != NULL) { 853 dcopy_cmd_free(&cmds[0]); 854 } 855 /* Only need to free head, dcopy will free all commands on the list. */ 856 for (idx = 1; idx <= FIPE_IOAT_CMD_NUM; idx++) { 857 if (cmds[idx] != NULL) { 858 dcopy_cmd_free(&cmds[idx]); 859 break; 860 } 861 } 862 dcopy_free(&handle); 863 864out_error: 865 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 866 fipe_ioat_ctrl.ioat_timerid = 0; 867 if (!fipe_ioat_ctrl.ioat_ready && !fipe_ioat_ctrl.ioat_cancel) { 868 if (fatal) { 869 /* Mark permanent error and give up. */ 870 fipe_ioat_ctrl.ioat_failed = B_TRUE; 871 /* Release reference count hold by ddi_find_devinfo. */ 872 if (fipe_ioat_ctrl.ioat_dev_info != NULL) { 873 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info); 874 fipe_ioat_ctrl.ioat_dev_info = NULL; 875 } 876 } else { 877 /* 878 * Schedule another timer to keep on trying. 879 * timeout() should always success, no need to check. 880 */ 881 fipe_ioat_ctrl.ioat_timerid = timeout(fipe_ioat_alloc, 882 NULL, drv_usectohz(FIPE_IOAT_RETRY_INTERVAL)); 883 } 884 } 885 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 886} 887 888/* 889 * Free resources allocated in fipe_ioat_alloc. 890 */ 891static void 892fipe_ioat_free(void) 893{ 894 int idx = 0; 895 dcopy_cmd_t *cmds = fipe_ioat_ctrl.ioat_cmds; 896 897 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 898 899 /* Cancel timeout to avoid race condition. */ 900 if (fipe_ioat_ctrl.ioat_timerid != 0) { 901 fipe_ioat_ctrl.ioat_cancel = B_TRUE; 902 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 903 (void) untimeout(fipe_ioat_ctrl.ioat_timerid); 904 mutex_enter(&fipe_ioat_ctrl.ioat_lock); 905 fipe_ioat_ctrl.ioat_timerid = 0; 906 fipe_ioat_ctrl.ioat_cancel = B_FALSE; 907 } 908 909 /* Free ioat resources. */ 910 if (fipe_ioat_ctrl.ioat_ready) { 911 if (cmds[0] != NULL) { 912 dcopy_cmd_free(&cmds[0]); 913 } 914 for (idx = 1; idx <= FIPE_IOAT_CMD_NUM; idx++) { 915 if (cmds[idx] != NULL) { 916 dcopy_cmd_free(&cmds[idx]); 917 break; 918 } 919 } 920 bzero(fipe_ioat_ctrl.ioat_cmds, 921 sizeof (fipe_ioat_ctrl.ioat_cmds)); 922 dcopy_free(&fipe_ioat_ctrl.ioat_handle); 923 fipe_ioat_ctrl.ioat_handle = NULL; 924 fipe_ioat_ctrl.ioat_ready = B_FALSE; 925 } 926 927 /* Release reference count hold by ddi_find_devinfo. */ 928 if (fipe_ioat_ctrl.ioat_dev_info != NULL) { 929 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info); 930 fipe_ioat_ctrl.ioat_dev_info = NULL; 931 } 932 933 mutex_exit(&fipe_ioat_ctrl.ioat_lock); 934} 935#endif /* FIPE_IOAT_BUILTIN */ 936 937/* 938 * Initialize IOAT relative resources. 939 */ 940static int 941fipe_ioat_init(void) 942{ 943 char *buf; 944 size_t size; 945 946 bzero(&fipe_ioat_ctrl, sizeof (fipe_ioat_ctrl)); 947 mutex_init(&fipe_ioat_ctrl.ioat_lock, NULL, MUTEX_DRIVER, NULL); 948 949 /* 950 * Allocate memory for IOAT memory copy operation. 951 * The allocated memory should be page aligned to achieve better power 952 * savings. 953 * Don't use ddi_dma_mem_alloc here to keep thing simple. This also 954 * makes quiesce easier. 955 */ 956 size = PAGESIZE; 957 buf = kmem_zalloc(size, KM_SLEEP); 958 if ((intptr_t)buf & PAGEOFFSET) { 959 kmem_free(buf, PAGESIZE); 960 size <<= 1; 961 buf = kmem_zalloc(size, KM_SLEEP); 962 } 963 fipe_ioat_ctrl.ioat_buf_size = size; 964 fipe_ioat_ctrl.ioat_buf_start = buf; 965 buf = (char *)P2ROUNDUP((intptr_t)buf, PAGESIZE); 966 fipe_ioat_ctrl.ioat_buf_virtaddr = buf; 967 fipe_ioat_ctrl.ioat_buf_physaddr = hat_getpfnum(kas.a_hat, buf); 968 fipe_ioat_ctrl.ioat_buf_physaddr <<= PAGESHIFT; 969 970#ifdef FIPE_IOAT_BUILTIN 971 { 972 uint64_t bufpa; 973 /* IOAT descriptor data structure copied from ioat.h. */ 974 struct fipe_ioat_cmd_desc { 975 uint32_t dd_size; 976 uint32_t dd_ctrl; 977 uint64_t dd_src_paddr; 978 uint64_t dd_dest_paddr; 979 uint64_t dd_next_desc; 980 uint64_t dd_res4; 981 uint64_t dd_res5; 982 uint64_t dd_res6; 983 uint64_t dd_res7; 984 } *desc; 985 986 /* 987 * Build two IOAT command descriptors and chain them into ring. 988 * Control flags as below: 989 * 0x2: disable source snoop 990 * 0x4: disable destination snoop 991 * 0x0 << 24: memory copy operation 992 * The layout for command descriptors and memory buffers are 993 * organized for power saving effect, please don't change it. 994 */ 995 buf = fipe_ioat_ctrl.ioat_buf_virtaddr; 996 bufpa = fipe_ioat_ctrl.ioat_buf_physaddr; 997 fipe_ioat_ctrl.ioat_cmd_physaddr = bufpa; 998 999 /* First command descriptor. */ 1000 desc = (struct fipe_ioat_cmd_desc *)(buf); 1001 desc->dd_size = 128; 1002 desc->dd_ctrl = 0x6; 1003 desc->dd_src_paddr = bufpa + 2048; 1004 desc->dd_dest_paddr = bufpa + 3072; 1005 /* Point to second descriptor. */ 1006 desc->dd_next_desc = bufpa + 64; 1007 1008 /* Second command descriptor. */ 1009 desc = (struct fipe_ioat_cmd_desc *)(buf + 64); 1010 desc->dd_size = 128; 1011 desc->dd_ctrl = 0x6; 1012 desc->dd_src_paddr = bufpa + 2048; 1013 desc->dd_dest_paddr = bufpa + 3072; 1014 /* Point to first descriptor. */ 1015 desc->dd_next_desc = bufpa; 1016 } 1017#endif /* FIPE_IOAT_BUILTIN */ 1018 1019 return (0); 1020} 1021 1022static void 1023fipe_ioat_fini(void) 1024{ 1025 /* Release reference count hold by ddi_find_devinfo. */ 1026 if (fipe_ioat_ctrl.ioat_dev_info != NULL) { 1027 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info); 1028 fipe_ioat_ctrl.ioat_dev_info = NULL; 1029 } 1030 1031 if (fipe_ioat_ctrl.ioat_buf_start != NULL) { 1032 ASSERT(fipe_ioat_ctrl.ioat_buf_size != 0); 1033 kmem_free(fipe_ioat_ctrl.ioat_buf_start, 1034 fipe_ioat_ctrl.ioat_buf_size); 1035 } 1036 1037 mutex_destroy(&fipe_ioat_ctrl.ioat_lock); 1038 bzero(&fipe_ioat_ctrl, sizeof (fipe_ioat_ctrl)); 1039} 1040 1041static int 1042fipe_idle_start(void) 1043{ 1044 int rc; 1045 1046 if (fipe_idle_ctrl.idle_ready) { 1047 return (0); 1048 } 1049 1050 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_ENTER_TIMESTAMP, 1051 &fipe_idle_ctrl.prop_enter) != 0) { 1052 cmn_err(CE_WARN, "!fipe: failed to get enter_ts property."); 1053 return (-1); 1054 } 1055 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_EXIT_TIMESTAMP, 1056 &fipe_idle_ctrl.prop_exit) != 0) { 1057 cmn_err(CE_WARN, "!fipe: failed to get exit_ts property."); 1058 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter); 1059 return (-1); 1060 } 1061 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_TOTAL_IDLE_TIME, 1062 &fipe_idle_ctrl.prop_idle) != 0) { 1063 cmn_err(CE_WARN, "!fipe: failed to get idle_time property."); 1064 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit); 1065 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter); 1066 return (-1); 1067 } 1068 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_TOTAL_BUSY_TIME, 1069 &fipe_idle_ctrl.prop_busy) != 0) { 1070 cmn_err(CE_WARN, "!fipe: failed to get busy_time property."); 1071 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle); 1072 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit); 1073 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter); 1074 return (-1); 1075 } 1076 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_INTERRUPT_COUNT, 1077 &fipe_idle_ctrl.prop_intr) != 0) { 1078 cmn_err(CE_WARN, "!fipe: failed to get intr_count property."); 1079 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_busy); 1080 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle); 1081 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit); 1082 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter); 1083 return (-1); 1084 } 1085 1086 /* Register idle state notification callback. */ 1087 rc = cpu_idle_register_callback(CPU_IDLE_CB_PRIO_FIPE, &fipe_idle_cb, 1088 NULL, &fipe_idle_ctrl.cb_handle); 1089 if (rc != 0) { 1090 cmn_err(CE_WARN, "!fipe: failed to register cpuidle callback."); 1091 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_intr); 1092 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_busy); 1093 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle); 1094 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit); 1095 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter); 1096 return (-1); 1097 } 1098 1099 fipe_idle_ctrl.idle_ready = B_TRUE; 1100 1101 return (0); 1102} 1103 1104static int 1105fipe_idle_stop(void) 1106{ 1107 int rc; 1108 1109 if (fipe_idle_ctrl.idle_ready == B_FALSE) { 1110 return (0); 1111 } 1112 1113 rc = cpu_idle_unregister_callback(fipe_idle_ctrl.cb_handle); 1114 if (rc != 0) { 1115 cmn_err(CE_WARN, 1116 "!fipe: failed to unregister cpuidle callback."); 1117 return (-1); 1118 } 1119 1120 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_intr); 1121 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_busy); 1122 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle); 1123 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit); 1124 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter); 1125 1126 fipe_idle_ctrl.idle_ready = B_FALSE; 1127 1128 return (0); 1129} 1130 1131#ifdef FIPE_KSTAT_SUPPORT 1132static int 1133fipe_kstat_update(kstat_t *ksp, int rw) 1134{ 1135 struct fipe_kstat_s *sp; 1136 hrtime_t hrt; 1137 1138 if (rw == KSTAT_WRITE) { 1139 return (EACCES); 1140 } 1141 1142 sp = ksp->ks_data; 1143 sp->fipe_enabled.value.i32 = fipe_gbl_ctrl.pm_enabled ? 1 : 0; 1144 sp->fipe_policy.value.i32 = fipe_pm_policy; 1145 1146 hrt = fipe_gbl_ctrl.time_in_pm; 1147 scalehrtime(&hrt); 1148 sp->fipe_pm_time.value.ui64 = (uint64_t)hrt; 1149 1150#ifdef FIPE_KSTAT_DETAIL 1151 sp->ioat_ready.value.i32 = fipe_ioat_ctrl.ioat_ready ? 1 : 0; 1152#endif /* FIPE_KSTAT_DETAIL */ 1153 1154 return (0); 1155} 1156#endif /* FIPE_KSTAT_SUPPORT */ 1157 1158/* 1159 * Initialize memory power management subsystem. 1160 * Note: This function should only be called from ATTACH. 1161 * Note: caller must ensure exclusive access to all fipe_xxx interfaces. 1162 */ 1163int 1164fipe_init(dev_info_t *dip) 1165{ 1166 size_t nsize; 1167 hrtime_t hrt; 1168 1169 /* Initialize global control structure. */ 1170 bzero(&fipe_gbl_ctrl, sizeof (fipe_gbl_ctrl)); 1171 mutex_init(&fipe_gbl_ctrl.lock, NULL, MUTEX_DRIVER, NULL); 1172 1173 /* Query power management policy from device property. */ 1174 fipe_pm_policy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 1175 FIPE_PROP_PM_POLICY, fipe_pm_policy); 1176 if (fipe_pm_policy < 0 || fipe_pm_policy >= FIPE_PM_POLICY_MAX) { 1177 cmn_err(CE_CONT, 1178 "?fipe: invalid power management policy %d.\n", 1179 fipe_pm_policy); 1180 fipe_pm_policy = FIPE_PM_POLICY_BALANCE; 1181 } 1182 fipe_profile_curr = &fipe_profiles[fipe_pm_policy]; 1183 1184 /* 1185 * Compute unscaled hrtime value corresponding to FIPE_STAT_INTERVAL. 1186 * (1 << 36) should be big enough here. 1187 */ 1188 hrt = 1ULL << 36; 1189 scalehrtime(&hrt); 1190 fipe_idle_ctrl.tick_interval = FIPE_STAT_INTERVAL * (1ULL << 36) / hrt; 1191 1192 if (fipe_mc_init(dip) != 0) { 1193 cmn_err(CE_WARN, "!fipe: failed to initialize mc state."); 1194 goto out_mc_error; 1195 } 1196 if (fipe_ioat_init() != 0) { 1197 cmn_err(CE_NOTE, "!fipe: failed to initialize ioat state."); 1198 goto out_ioat_error; 1199 } 1200 1201 /* Allocate per-CPU structure. */ 1202 nsize = max_ncpus * sizeof (fipe_cpu_state_t); 1203 nsize += CPU_CACHE_COHERENCE_SIZE; 1204 fipe_gbl_ctrl.state_buf = kmem_zalloc(nsize, KM_SLEEP); 1205 fipe_gbl_ctrl.state_size = nsize; 1206 fipe_cpu_states = (fipe_cpu_state_t *)P2ROUNDUP( 1207 (intptr_t)fipe_gbl_ctrl.state_buf, CPU_CACHE_COHERENCE_SIZE); 1208 1209#ifdef FIPE_KSTAT_SUPPORT 1210 fipe_gbl_ctrl.fipe_kstat = kstat_create("fipe", 0, "fipe-pm", "misc", 1211 KSTAT_TYPE_NAMED, sizeof (fipe_kstat) / sizeof (kstat_named_t), 1212 KSTAT_FLAG_VIRTUAL); 1213 if (fipe_gbl_ctrl.fipe_kstat == NULL) { 1214 cmn_err(CE_CONT, "?fipe: failed to create kstat object.\n"); 1215 } else { 1216 fipe_gbl_ctrl.fipe_kstat->ks_lock = &fipe_gbl_ctrl.lock; 1217 fipe_gbl_ctrl.fipe_kstat->ks_data = &fipe_kstat; 1218 fipe_gbl_ctrl.fipe_kstat->ks_update = fipe_kstat_update; 1219 kstat_install(fipe_gbl_ctrl.fipe_kstat); 1220 } 1221#endif /* FIPE_KSTAT_SUPPORT */ 1222 1223 return (0); 1224 1225out_ioat_error: 1226 fipe_mc_fini(); 1227out_mc_error: 1228 mutex_destroy(&fipe_gbl_ctrl.lock); 1229 bzero(&fipe_gbl_ctrl, sizeof (fipe_gbl_ctrl)); 1230 1231 return (-1); 1232} 1233 1234/* 1235 * Destroy memory power management subsystem. 1236 * Note: This function should only be called from DETACH. 1237 * Note: caller must ensure exclusive access to all fipe_xxx interfaces. 1238 */ 1239int 1240fipe_fini(void) 1241{ 1242 if (fipe_gbl_ctrl.pm_enabled) { 1243 cmn_err(CE_NOTE, "!fipe: call fipe_fini without stopping PM."); 1244 return (EBUSY); 1245 } 1246 1247 ASSERT(!fipe_gbl_ctrl.pm_active); 1248 fipe_ioat_fini(); 1249 fipe_mc_fini(); 1250 1251#ifdef FIPE_KSTAT_SUPPORT 1252 if (fipe_gbl_ctrl.fipe_kstat != NULL) { 1253 kstat_delete(fipe_gbl_ctrl.fipe_kstat); 1254 fipe_gbl_ctrl.fipe_kstat = NULL; 1255 } 1256#endif /* FIPE_KSTAT_SUPPORT */ 1257 1258 if (fipe_gbl_ctrl.state_buf != NULL) { 1259 ASSERT(fipe_gbl_ctrl.state_size != 0); 1260 kmem_free(fipe_gbl_ctrl.state_buf, fipe_gbl_ctrl.state_size); 1261 fipe_cpu_states = NULL; 1262 } 1263 1264 fipe_profile_curr = NULL; 1265 mutex_destroy(&fipe_gbl_ctrl.lock); 1266 bzero(&fipe_gbl_ctrl, sizeof (fipe_gbl_ctrl)); 1267 1268 return (0); 1269} 1270 1271/* 1272 * Start memory power management subsystem. 1273 * Note: caller must ensure exclusive access to all fipe_xxx interfaces. 1274 */ 1275int 1276fipe_start(void) 1277{ 1278 if (fipe_gbl_ctrl.pm_enabled == B_TRUE) { 1279 return (0); 1280 } 1281 1282 bzero(fipe_cpu_states, max_ncpus * sizeof (fipe_cpu_states[0])); 1283 fipe_ioat_alloc(NULL); 1284 if (fipe_idle_start() != 0) { 1285 cmn_err(CE_NOTE, "!fipe: failed to start PM subsystem."); 1286 fipe_ioat_free(); 1287 return (-1); 1288 } 1289 1290 fipe_gbl_ctrl.pm_enabled = B_TRUE; 1291 1292 return (0); 1293} 1294 1295/* 1296 * Stop memory power management subsystem. 1297 * Note: caller must ensure exclusive access to all fipe_xxx interfaces. 1298 */ 1299int 1300fipe_stop(void) 1301{ 1302 if (fipe_gbl_ctrl.pm_enabled) { 1303 if (fipe_idle_stop() != 0) { 1304 cmn_err(CE_NOTE, 1305 "!fipe: failed to stop PM subsystem."); 1306 return (-1); 1307 } 1308 fipe_ioat_free(); 1309 fipe_gbl_ctrl.pm_enabled = B_FALSE; 1310 } 1311 ASSERT(!fipe_gbl_ctrl.pm_active); 1312 1313 return (0); 1314} 1315 1316int 1317fipe_suspend(void) 1318{ 1319 /* Save current power management policy. */ 1320 fipe_pm_policy_saved = fipe_pm_policy; 1321 /* Disable PM by setting profile to FIPE_PM_POLICY_DISABLE. */ 1322 fipe_pm_policy = FIPE_PM_POLICY_DISABLE; 1323 fipe_profile_curr = &fipe_profiles[fipe_pm_policy]; 1324 1325 return (0); 1326} 1327 1328int 1329fipe_resume(void) 1330{ 1331 /* Restore saved power management policy. */ 1332 fipe_pm_policy = fipe_pm_policy_saved; 1333 fipe_profile_curr = &fipe_profiles[fipe_pm_policy]; 1334 1335 return (0); 1336} 1337 1338fipe_pm_policy_t 1339fipe_get_pmpolicy(void) 1340{ 1341 return (fipe_pm_policy); 1342} 1343 1344int 1345fipe_set_pmpolicy(fipe_pm_policy_t policy) 1346{ 1347 if (policy < 0 || policy >= FIPE_PM_POLICY_MAX) { 1348 return (EINVAL); 1349 } 1350 fipe_pm_policy = policy; 1351 fipe_profile_curr = &fipe_profiles[fipe_pm_policy]; 1352 1353 return (0); 1354} 1355 1356/* 1357 * Check condition (fipe_gbl_ctrl.cpu_cnt == ncpus) to make sure that 1358 * there is other CPU trying to wake up system from memory power saving state. 1359 * If a CPU is waking up system, fipe_disable() will set 1360 * fipe_gbl_ctrl.pm_active to false as soon as possible and allow other CPU's 1361 * to continue, and it will take the responsibility to recover system from 1362 * memory power saving state. 1363 */ 1364static void 1365fipe_enable(int throttle, cpu_idle_check_wakeup_t check_func, void* check_arg) 1366{ 1367 extern void membar_sync(void); 1368 1369 FIPE_KSTAT_DETAIL_INC(pm_tryenter_cnt); 1370 1371 /* 1372 * Check CPU wakeup events. 1373 */ 1374 if (check_func != NULL) { 1375 (*check_func)(check_arg); 1376 } 1377 1378 /* 1379 * Try to acquire mutex, which also implicitly has the same effect 1380 * of calling membar_sync(). 1381 * If mutex_tryenter fails, that means other CPU is waking up. 1382 */ 1383 if (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) { 1384 FIPE_KSTAT_DETAIL_INC(pm_race_cnt); 1385 /* 1386 * Handle a special race condition for the case that a CPU wakes 1387 * and then enters into idle state within a short period. 1388 * This case can't be reliably detected by cpu_count mechanism. 1389 */ 1390 } else if (fipe_gbl_ctrl.pm_active) { 1391 FIPE_KSTAT_DETAIL_INC(pm_race_cnt); 1392 mutex_exit(&fipe_gbl_ctrl.lock); 1393 } else { 1394 fipe_gbl_ctrl.pm_active = B_TRUE; 1395 membar_sync(); 1396 if (fipe_gbl_ctrl.cpu_count != ncpus) { 1397 FIPE_KSTAT_DETAIL_INC(pm_race_cnt); 1398 fipe_gbl_ctrl.pm_active = B_FALSE; 1399 } else if (fipe_ioat_trigger() != 0) { 1400 fipe_gbl_ctrl.pm_active = B_FALSE; 1401 } else if (fipe_gbl_ctrl.cpu_count != ncpus || 1402 fipe_mc_change(throttle) != 0) { 1403 fipe_gbl_ctrl.pm_active = B_FALSE; 1404 fipe_ioat_cancel(); 1405 if (fipe_gbl_ctrl.cpu_count != ncpus) { 1406 FIPE_KSTAT_DETAIL_INC(pm_race_cnt); 1407 } 1408 } else if (fipe_gbl_ctrl.cpu_count != ncpus) { 1409 fipe_gbl_ctrl.pm_active = B_FALSE; 1410 fipe_mc_restore(); 1411 fipe_ioat_cancel(); 1412 FIPE_KSTAT_DETAIL_INC(pm_race_cnt); 1413 } else { 1414 FIPE_KSTAT_DETAIL_INC(pm_success_cnt); 1415 } 1416 mutex_exit(&fipe_gbl_ctrl.lock); 1417 } 1418} 1419 1420static void 1421fipe_disable(void) 1422{ 1423 /* 1424 * Try to acquire lock, which also implicitly has the same effect 1425 * of calling membar_sync(). 1426 */ 1427 while (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) { 1428 /* 1429 * If power saving is inactive, just return and all dirty 1430 * house-keeping work will be handled in fipe_enable(). 1431 */ 1432 if (fipe_gbl_ctrl.pm_active == B_FALSE) { 1433 return; 1434 } else { 1435 (void) SMT_PAUSE(); 1436 } 1437 } 1438 1439 /* Disable power saving if it's active. */ 1440 if (fipe_gbl_ctrl.pm_active) { 1441 /* 1442 * Set pm_active to FALSE as soon as possible to prevent 1443 * other CPUs from waiting on pm_active flag. 1444 */ 1445 fipe_gbl_ctrl.pm_active = B_FALSE; 1446 membar_producer(); 1447 fipe_mc_restore(); 1448 fipe_ioat_cancel(); 1449 } 1450 1451 mutex_exit(&fipe_gbl_ctrl.lock); 1452} 1453 1454/*ARGSUSED*/ 1455static boolean_t 1456fipe_check_cpu(struct fipe_cpu_state *sp, cpu_idle_callback_context_t ctx, 1457 hrtime_t ts) 1458{ 1459 if (cpu_flagged_offline(CPU->cpu_flags)) { 1460 /* Treat CPU in offline state as ready. */ 1461 sp->cond_ready = B_TRUE; 1462 return (B_TRUE); 1463 } else if (sp->next_ts <= ts) { 1464 uint64_t intr; 1465 hrtime_t idle, busy, diff; 1466 cpu_idle_prop_value_t val; 1467 1468 /* Set default value. */ 1469 sp->cond_ready = B_TRUE; 1470 sp->idle_count = 0; 1471 1472 /* Calculate idle percent. */ 1473 idle = sp->last_idle; 1474 sp->last_idle = cpu_idle_prop_get_hrtime( 1475 fipe_idle_ctrl.prop_idle, ctx); 1476 idle = sp->last_idle - idle; 1477 busy = sp->last_busy; 1478 sp->last_busy = cpu_idle_prop_get_hrtime( 1479 fipe_idle_ctrl.prop_busy, ctx); 1480 busy = sp->last_busy - busy; 1481 /* Check idle condition. */ 1482 if (idle > 0 && busy > 0) { 1483 if (busy * (100 - FIPE_PROF_BUSY_THRESHOLD) > 1484 idle * FIPE_PROF_BUSY_THRESHOLD) { 1485 FIPE_KSTAT_DETAIL_INC(cpu_busy_cnt); 1486 sp->cond_ready = B_FALSE; 1487 } else { 1488 FIPE_KSTAT_DETAIL_INC(cpu_idle_cnt); 1489 } 1490 } else { 1491 FIPE_KSTAT_DETAIL_INC(cpu_busy_cnt); 1492 sp->cond_ready = B_FALSE; 1493 } 1494 1495 /* Calculate interrupt count. */ 1496 diff = sp->next_ts; 1497 sp->next_ts = ts + fipe_idle_ctrl.tick_interval; 1498 diff = sp->next_ts - diff; 1499 intr = sp->last_intr; 1500 if (cpu_idle_prop_get_value(fipe_idle_ctrl.prop_intr, ctx, 1501 &val) == 0) { 1502 sp->last_intr = val.cipv_uint64; 1503 intr = sp->last_intr - intr; 1504 if (diff != 0) { 1505 intr = intr * fipe_idle_ctrl.tick_interval; 1506 intr /= diff; 1507 } else { 1508 intr = FIPE_PROF_INTR_THRESHOLD; 1509 } 1510 } else { 1511 intr = FIPE_PROF_INTR_THRESHOLD; 1512 } 1513 1514 /* 1515 * System is busy with interrupts, so disable all PM 1516 * status checks for INTR_BUSY_THROTTLE ticks. 1517 * Interrupts are disabled when FIPE callbacks are called, 1518 * so this optimization will help to reduce interrupt 1519 * latency. 1520 */ 1521 if (intr >= FIPE_PROF_INTR_BUSY_THRESHOLD) { 1522 FIPE_KSTAT_DETAIL_INC(cpu_intr_busy_cnt); 1523 sp->throttle_ts = ts + FIPE_PROF_INTR_BUSY_THROTTLE * 1524 fipe_idle_ctrl.tick_interval; 1525 sp->cond_ready = B_FALSE; 1526 } else if (intr >= FIPE_PROF_INTR_THRESHOLD) { 1527 FIPE_KSTAT_DETAIL_INC(cpu_intr_throttle_cnt); 1528 sp->cond_ready = B_FALSE; 1529 } 1530 } else if (++sp->idle_count >= FIPE_PROF_IDLE_COUNT) { 1531 /* Too many idle enter/exit in this tick. */ 1532 FIPE_KSTAT_DETAIL_INC(cpu_loop_cnt); 1533 sp->throttle_ts = sp->next_ts + fipe_idle_ctrl.tick_interval; 1534 sp->idle_count = 0; 1535 sp->cond_ready = B_FALSE; 1536 return (B_FALSE); 1537 } 1538 1539 return (sp->cond_ready); 1540} 1541 1542/*ARGSUSED*/ 1543static void 1544fipe_idle_enter(void *arg, cpu_idle_callback_context_t ctx, 1545 cpu_idle_check_wakeup_t check_func, void* check_arg) 1546{ 1547 hrtime_t ts; 1548 uint32_t cnt; 1549 uint64_t iowait; 1550 cpu_t *cp = CPU; 1551 struct fipe_cpu_state *sp; 1552 1553 sp = &fipe_cpu_states[cp->cpu_id]; 1554 ts = cpu_idle_prop_get_hrtime(fipe_idle_ctrl.prop_enter, ctx); 1555 1556 if (fipe_pm_policy != FIPE_PM_POLICY_DISABLE && 1557 fipe_ioat_ctrl.ioat_ready && 1558 sp->state_ready && sp->throttle_ts <= ts) { 1559 /* Adjust iowait count for local CPU. */ 1560 iowait = CPU_STATS(cp, sys.iowait); 1561 if (iowait != sp->last_iowait) { 1562 atomic_add_64(&fipe_gbl_ctrl.io_waiters, 1563 iowait - sp->last_iowait); 1564 sp->last_iowait = iowait; 1565 } 1566 1567 /* Check current CPU status. */ 1568 if (fipe_check_cpu(sp, ctx, ts)) { 1569 /* Increase count of CPU ready for power saving. */ 1570 do { 1571 cnt = fipe_gbl_ctrl.cpu_count; 1572 ASSERT(cnt < ncpus); 1573 } while (atomic_cas_32(&fipe_gbl_ctrl.cpu_count, 1574 cnt, cnt + 1) != cnt); 1575 1576 /* 1577 * Enable power saving if all CPUs are idle. 1578 */ 1579 if (cnt + 1 == ncpus) { 1580 if (fipe_gbl_ctrl.io_waiters == 0) { 1581 fipe_gbl_ctrl.enter_ts = ts; 1582 fipe_enable(fipe_pm_throttle_level, 1583 check_func, check_arg); 1584 /* There are ongoing block io operations. */ 1585 } else { 1586 FIPE_KSTAT_DETAIL_INC(bio_busy_cnt); 1587 } 1588 } 1589 } 1590 } else if (fipe_pm_policy == FIPE_PM_POLICY_DISABLE || 1591 fipe_ioat_ctrl.ioat_ready == B_FALSE) { 1592 if (sp->cond_ready == B_TRUE) { 1593 sp->cond_ready = B_FALSE; 1594 } 1595 } else if (sp->state_ready == B_FALSE) { 1596 sp->cond_ready = B_FALSE; 1597 sp->state_ready = B_TRUE; 1598 sp->throttle_ts = 0; 1599 sp->next_ts = ts + fipe_idle_ctrl.tick_interval; 1600 sp->last_busy = cpu_idle_prop_get_hrtime( 1601 fipe_idle_ctrl.prop_busy, ctx); 1602 sp->last_idle = cpu_idle_prop_get_hrtime( 1603 fipe_idle_ctrl.prop_idle, ctx); 1604 sp->last_intr = cpu_idle_prop_get_hrtime( 1605 fipe_idle_ctrl.prop_intr, ctx); 1606 sp->idle_count = 0; 1607 } 1608} 1609 1610/*ARGSUSED*/ 1611static void 1612fipe_idle_exit(void* arg, cpu_idle_callback_context_t ctx, int flags) 1613{ 1614 uint32_t cnt; 1615 hrtime_t ts; 1616 struct fipe_cpu_state *sp; 1617 1618 sp = &fipe_cpu_states[CPU->cpu_id]; 1619 if (sp->cond_ready) { 1620 do { 1621 cnt = fipe_gbl_ctrl.cpu_count; 1622 ASSERT(cnt > 0); 1623 } while (atomic_cas_32(&fipe_gbl_ctrl.cpu_count, 1624 cnt, cnt - 1) != cnt); 1625 1626 /* 1627 * Try to disable power saving state. 1628 * Only the first CPU waking from idle state will try to 1629 * disable power saving state, all other CPUs will just go 1630 * on and not try to wait for memory to recover from power 1631 * saving state. 1632 * So there are possible periods during which some CPUs are in 1633 * active state but memory is in power saving state. 1634 * This is OK, since it is an uncommon case, and it is 1635 * better for performance to let them continue as their 1636 * blocking latency is smaller than a mutex, and is only 1637 * hit in the uncommon condition. 1638 */ 1639 if (cnt == ncpus) { 1640 fipe_disable(); 1641 ts = cpu_idle_prop_get_hrtime(fipe_idle_ctrl.prop_exit, 1642 ctx); 1643 fipe_gbl_ctrl.time_in_pm += ts - fipe_gbl_ctrl.enter_ts; 1644 } 1645 } 1646} 1647