1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#include <stdio.h> 28#include <sys/types.h> 29#include <fcntl.h> 30#include <string.h> 31#include <stdlib.h> 32#include <unistd.h> 33#include <errno.h> 34 35#include <sys/stat.h> 36#include <poll.h> 37#include <signal.h> 38#include <pthread.h> 39#include <thread.h> 40#include <time.h> 41#include <sys/systeminfo.h> 42#include <sys/cred.h> 43#include <dirent.h> 44#include <libdevinfo.h> 45#include <sys/pm.h> 46#include <sys/ppmio.h> 47#include <locale.h> 48 49#include "fpsapi.h" 50#include "fpsd.h" 51#include "messages.h" 52 53 54#define DEV_PM "/devices/pseudo/pm@0:pm" 55#define DEFAULT_CPU_FULL_POWER 3 56 57int is_estar_system = 0; /* Not an E* system, by default */ 58int sys_pm_state = PM_SYSTEM_PM_DISABLED; /* By default autopm disabled */ 59 60 61static di_node_t fps_di_root = DI_NODE_NIL; 62static di_prom_handle_t fps_di_prom = DI_PROM_HANDLE_NIL; 63static char **cpu_dpaths = NULL; /* Used only on E* system */ 64static int *proc_ids = NULL; /* Used only on E* system */ 65static int num_cpus = 0; /* Used only on E* system */ 66static int devpm_fd = -1; /* Used only on E* system */ 67static int full_pwr = DEFAULT_CPU_FULL_POWER; 68 69/* 70 * Initialize system PM state enable/disable and 71 * enable system default info logging accordingly. 72 * Note: Even for systems for which CPU PM is not enabled by 73 * default, disk PM may be enabled explicitly using power.conf; 74 * If power management is enabled, disable informational logging 75 * by default. 76 * Some platforms don't have /dev/pm entry. It is perfectly OK. 77 * Don't complain if there is no /dev/pm entry. 78 * The platforms on which CPU PM is enabled by default, would 79 * ofcourse have /dev/pm entry. 80 * 81 * Note: open_dev_pm() should have been called initially before 82 * calling this function. 83 * 84 */ 85 86void 87update_pm_state() 88{ 89 int pm_stat; 90 91 if (devpm_fd == -1) 92 return; 93 94 pm_stat = ioctl(devpm_fd, PM_GET_PM_STATE); 95 96 if (pm_stat == -1) 97 return; 98 99 sys_pm_state = pm_stat; 100 101} 102 103/* 104 * Some platforms don't support power management. (neither CPU nor disk) 105 * Those platforms don't have /dev/pm entry. Don't complain in such case. 106 * Some platfors support PM only for disks. (they have /dev/pm entry. 107 * and logging is disabled on those platforms.) 108 * Some platforms support PM for both disks and CPUs (apart from others). 109 * Those platforms also have /dev/pm entry. 110 * Note that even desktops which support CPU PM E* can be custom 111 * configured to remove power management drivers. In that case, 112 * there won't be any /dev/pm entry and it is valid config. 113 * 114 */ 115 116static void open_dev_pm() 117{ 118 devpm_fd = open(DEV_PM, O_RDWR); 119 120} 121 122/* 123 * Initialize Estar info database. 124 * 125 */ 126 127void 128init_estar_db() 129{ 130 di_node_t fnode, node; 131 di_prop_t nextp; 132 char *path = NULL; 133 int cpu_i; 134 int is_pmprop_found = 0; 135 pm_req_t pmreq; 136 uchar_t *prop_data = NULL; 137 138 /* 139 * First open /dev/pm and keep it open for later uses. 140 * Note that this needs to be open on all power management supported 141 * systems. Some systems support power mgmt on only some 142 * devices like disk, but not CPU. /dev/pm does not exist on 143 * some platforms. Also PM drivers can be removed on custom 144 * configurations. 145 */ 146 open_dev_pm(); 147 148 if (devpm_fd == -1) 149 return; 150 151 fps_di_root = di_init("/", DINFOCPYALL); 152 153 if (DI_NODE_NIL == fps_di_root) { 154 fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING, DI_INIT_FAIL); 155 } 156 157 fps_di_prom = di_prom_init(); 158 159 if (DI_PROM_HANDLE_NIL == fps_di_prom) { 160 fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING, DI_PROM_INIT_FAIL); 161 di_fini(fps_di_root); 162 } 163 164 if (di_prom_prop_lookup_bytes(fps_di_prom, fps_di_root, 165 "energystar-v3", &prop_data) == -1) 166 goto exit_es; 167 168 /* 169 * As a final check, also check for "us" driver property pm-components 170 * On Estar systems, the driver should define this property. 171 */ 172 173 fnode = node = di_drv_first_node("us", fps_di_root); 174 175 if (DI_NODE_NIL == node) { 176 goto exit_es; 177 } 178 179 is_pmprop_found = 0; 180 for (nextp = di_prop_next(node, DI_PROP_NIL); nextp != DI_PROP_NIL; 181 nextp = di_prop_next(node, nextp)) { 182 if (strcmp(di_prop_name(nextp), "pm-components") == 0) { 183 is_pmprop_found = 1; 184 break; 185 } 186 } 187 188 if (!is_pmprop_found) 189 goto exit_es; 190 191 is_estar_system = 1; /* CPU power mgmt supported E* system */ 192 193 num_cpus = 0; 194 while (node != DI_NODE_NIL) { 195 num_cpus++; 196 node = di_drv_next_node(node); 197 } 198 199 cpu_dpaths = (char **)calloc(num_cpus+1, sizeof (char *)); 200 proc_ids = (int *)calloc(num_cpus+1, sizeof (int)); 201 proc_ids[num_cpus] = -1; /* Terminate processor ids by -1 */ 202 203 cpu_i = 0; 204 for (node = fnode; node != DI_NODE_NIL; node = di_drv_next_node(node)) { 205 proc_ids[cpu_i] = -1; 206 cpu_dpaths[cpu_i] = NULL; 207 208 path = di_devfs_path(node); 209 if (NULL == path) 210 continue; 211 cpu_dpaths[cpu_i] = strdup(path); 212 di_devfs_path_free(path); 213 /* 214 * Keep the mapping between path and processor IDs. 215 * Currently, processor IDs are not used. 216 * But may be used in future. 217 */ 218 219 /* 220 * On workstation platforms (where CPU E* supported), 221 * processor ID and instance numbers are same. 222 * This may change in future. So watch out. 223 */ 224 225 proc_ids[cpu_i] = di_instance(node); /* Currently unused. */ 226 cpu_i++; 227 } 228 229 proc_ids[cpu_i] = -1; 230 cpu_dpaths[cpu_i] = NULL; 231 232 /* Initialize what "FULL POWER" mode is. */ 233 full_pwr = DEFAULT_CPU_FULL_POWER; 234 235 pmreq.physpath = cpu_dpaths[0]; 236 pmreq.component = 0; 237 pmreq.value = 0; 238 pmreq.data = NULL; 239 pmreq.datasize = 0; 240 241 242 full_pwr = ioctl(devpm_fd, PM_GET_FULL_POWER, &pmreq); 243 if (full_pwr == -1) 244 full_pwr = DEFAULT_CPU_FULL_POWER; 245exit_es: 246 247 if (fps_di_root != DI_NODE_NIL) { 248 di_fini(fps_di_root); 249 fps_di_root = DI_NODE_NIL; 250 } 251 if (DI_PROM_HANDLE_NIL != fps_di_prom) { 252 di_prom_fini(fps_di_prom); 253 fps_di_prom = DI_PROM_HANDLE_NIL; 254 } 255} 256 257/* 258 * Return the min(idle_times), min(remaining_times), max(rem_time) for all 259 * CPUs in full power mode. The "remain time" is the remaining 260 * threshold time after which the CPU will make next lower level 261 * power transition if left idle. 262 * If the CPUs are not in full power mode or could not exactly determine 263 * the power mode then return -1. 264 * return 0 if CPUs are in full power mode. 265 */ 266 267int 268get_idle_rem_stats(int *min_idle, int *min_rem, int *max_rem) 269{ 270 int idle_time; 271 int pmstats[2]; 272 int i; 273 pm_req_t pmreq; 274 int ret; 275 276 *min_idle = -1; 277 *min_rem = -1; 278 *max_rem = -1; 279 280 for (i = 0; i < num_cpus; i++) { 281 282 pmreq.physpath = cpu_dpaths[i]; 283 pmreq.component = 0; 284 pmreq.value = 0; 285 pmreq.data = pmstats; 286 pmreq.datasize = sizeof (pmstats); 287 idle_time = ioctl(devpm_fd, PM_GET_TIME_IDLE, &pmreq); 288 if (idle_time == -1) 289 continue; 290 ret = ioctl(devpm_fd, PM_GET_STATS, &pmreq); 291 292 /* Now pmstats[0] = cur power level; pmstats[1]=remain time */ 293 if (ret == -1) 294 continue; 295 if (pmstats[0] != full_pwr) 296 continue; 297 298 if ((*min_idle == -1) || (idle_time < *min_idle)) 299 *min_idle = idle_time; 300 if (*min_rem == -1 || pmstats[1] < *min_rem) { 301 *min_rem = pmstats[1]; 302 303 /* 304 * The remain time can be negative if there are 2 cpus 305 * and 1 cpu is ready to transition 306 * and the other one is not 307 */ 308 if (*min_rem < 0) 309 *min_rem = 0; 310 } 311 if (*max_rem == -1 || pmstats[1] > *max_rem) 312 *max_rem = pmstats[1]; 313 } 314 315 return 316 ((*min_idle == -1 || *min_rem == -1 || *max_rem == -1) ? -1 : 0); 317} 318 319/* 320 * Wait until CPU comes to full power state or timeout occurs. 321 * If multiple threads call this function, execute the 322 * PM ioctl system call only once. 323 * This is better than all 3 threads polling cpu pwr state same time. 324 * 325 * Callers of this function should not assume that on returning from 326 * this function CPU will be in full power state. 327 * (They should check again). 328 * This function just optimizes for performance during wait. 329 * 330 * 331 */ 332 333void 334wait_for_pm_state_change() 335{ 336 int res; 337 static pthread_mutex_t wrlck; 338 static int is_active = 0; 339 static pm_req_t pmreq; 340 static pm_state_change_t pmsc; 341 static char path[MAXPATHLEN]; 342 343 int pwr = 0; 344 int cur_lvl = 0; /* 0 = unknown. 1=low, 3=full power */ 345 346 pmreq.physpath = cpu_dpaths[0]; 347 pmreq.component = 0; 348 pmreq.value = 0; 349 pmreq.data = NULL; 350 pmreq.datasize = 0; 351 352 353 (void) pthread_mutex_lock(&wrlck); 354 355 if (!is_active) { /* This is the first thread trying to wait */ 356 is_active = 1; 357 (void) pthread_mutex_unlock(&wrlck); 358 359 pmsc.physpath = path; 360 pmsc.size = MAXPATHLEN; 361 path[0] = 0; /* init not required. Just in case... */ 362 363 /* 364 * PM starts buffering the state changes after the first call to 365 * PM_GET_STATE_CHANGE/PM_GET_STATE_CHANGE_WAIT 366 * 367 * The PM_GET_STATE_CHANGE is a non-blocking call where as 368 * _WAIT is blocking call. The PM_GET_STATE_CHANGE also 369 * returns all the info * about the latest buffered state 370 * change if already buffered event is available. So it is 371 * important to drain out all old events, 372 * if you are only interested in future events. 373 * 374 * After the state changes the exact information/timestamp about 375 * state changes are reflected in the ioctl struct. 376 * To keep things simple, after draining out all buffered info, 377 * we issue get current power to get the current power level and 378 * then we issue another _WAIT command to get the 379 * next power change. 380 * 381 */ 382 383 do { 384 385 res = ioctl(devpm_fd, PM_GET_STATE_CHANGE, &pmsc); 386 387 if (res == -1 && errno != EWOULDBLOCK) { 388 fpsd_message(FPSD_NO_EXIT, FPS_WARNING, 389 INTERNAL_FAILURE_WARN, 390 strerror(errno)); 391 /* 1 second sleep. Avoid busy loop */ 392 (void) poll(NULL, 0, 1000); 393 /* Probably will succeed in next call. */ 394 goto psc_complete; 395 } 396 397 } while (errno != EWOULDBLOCK); 398 399 /* drain out all buffered state changes */ 400 401 /* If current state is full power, then get out. */ 402 403 do { 404 pwr = ioctl(devpm_fd, PM_GET_CURRENT_POWER, &pmreq); 405 if (pwr != -1) break; 406 if (errno == EAGAIN) { 407 (void) poll(NULL, 0, 1000); /* 1 sec sleep */ 408 continue; 409 } else { 410 fpsd_message(FPSD_NO_EXIT, FPS_WARNING, 411 INTERNAL_FAILURE_WARN1, 412 strerror(errno)); 413 (void) poll(NULL, 0, 1000); /* 1 sec sleep */ 414 goto psc_complete; 415 } 416 /*CONSTCOND*/ 417 } while (1); 418 419 if (pwr == full_pwr) 420 goto psc_complete; 421 422 while (cur_lvl != full_pwr) { 423 pmsc.physpath = path; 424 pmsc.size = MAXPATHLEN; 425 path[0] = 0; /* init not required. Just in case... */ 426 427 do { 428 res = ioctl(devpm_fd, 429 PM_GET_STATE_CHANGE_WAIT, &pmsc); 430 if (res == -1 && errno == EINTR) { 431 /* 1 second sleep */ 432 (void) poll(NULL, 0, 1000); 433 } 434 } while (res == -1 && errno == EINTR); 435 436 if (res == -1) { 437 fpsd_message(FPSD_NO_EXIT, FPS_WARNING, 438 INTERNAL_FAILURE_WARN2, 439 strerror(errno)); 440 /* 441 * If there are failures in state change ioctl, 442 * just would fall back to normal polling of 443 * status later. get out quiet. 444 */ 445 /* avoid busy loop -- 1 second sleep */ 446 (void) poll(NULL, 0, 1000); 447 goto psc_complete; 448 } 449 450 if (strcmp(pmsc.physpath, cpu_dpaths[0]) == 0 && 451 pmsc.new_level == full_pwr) 452 cur_lvl = full_pwr; 453 } 454 455psc_complete: 456 (void) pthread_mutex_lock(&wrlck); 457 is_active = 0; 458 (void) pthread_mutex_unlock(&wrlck); 459 460 } else { 461 /* Release the lock first */ 462 (void) pthread_mutex_unlock(&wrlck); 463 /* 464 * Already one other thread is active issuing ioctl call. 465 * Just poll here to check the local flag without any expensive 466 * ioctl calls until the transition is complete. 467 */ 468 (void) poll(NULL, 0, 1000); /* first time 1 second wait */ 469 for (;;) { 470 (void) pthread_mutex_lock(&wrlck); 471 if (!is_active) { 472 (void) pthread_mutex_unlock(&wrlck); 473 break; 474 } 475 (void) pthread_mutex_unlock(&wrlck); 476 (void) poll(NULL, 0, 4000); /* 4 seconds wait */ 477 } 478 } 479} 480