1/* $Id: bbc_envctrl.c,v 1.1.1.1 2008/10/15 03:26:47 james26_jang Exp $ 2 * bbc_envctrl.c: UltraSPARC-III environment control driver. 3 * 4 * Copyright (C) 2001 David S. Miller (davem@redhat.com) 5 */ 6 7#include <linux/kernel.h> 8#include <linux/sched.h> 9#include <linux/slab.h> 10#include <asm/oplib.h> 11#include <asm/ebus.h> 12#define __KERNEL_SYSCALLS__ 13static int errno; 14#include <asm/unistd.h> 15 16#include "bbc_i2c.h" 17#include "max1617.h" 18 19#undef ENVCTRL_TRACE 20 21/* WARNING: Making changes to this driver is very dangerous. 22 * If you misprogram the sensor chips they can 23 * cut the power on you instantly. 24 */ 25 26/* Two temperature sensors exist in the SunBLADE-1000 enclosure. 27 * Both are implemented using max1617 i2c devices. Each max1617 28 * monitors 2 temperatures, one for one of the cpu dies and the other 29 * for the ambient temperature. 30 * 31 * The max1617 is capable of being programmed with power-off 32 * temperature values, one low limit and one high limit. These 33 * can be controlled independantly for the cpu or ambient temperature. 34 * If a limit is violated, the power is simply shut off. The frequency 35 * with which the max1617 does temperature sampling can be controlled 36 * as well. 37 * 38 * Three fans exist inside the machine, all three are controlled with 39 * an i2c digital to analog converter. There is a fan directed at the 40 * two processor slots, another for the rest of the enclosure, and the 41 * third is for the power supply. The first two fans may be speed 42 * controlled by changing the voltage fed to them. The third fan may 43 * only be completely off or on. The third fan is meant to only be 44 * disabled/enabled when entering/exiting the lowest power-saving 45 * mode of the machine. 46 * 47 * An environmental control kernel thread periodically monitors all 48 * temperature sensors. Based upon the samples it will adjust the 49 * fan speeds to try and keep the system within a certain temperature 50 * range (the goal being to make the fans as quiet as possible without 51 * allowing the system to get too hot). 52 * 53 * If the temperature begins to rise/fall outside of the acceptable 54 * operating range, a periodic warning will be sent to the kernel log. 55 * The fans will be put on full blast to attempt to deal with this 56 * situation. After exceeding the acceptable operating range by a 57 * certain threshold, the kernel thread will shut down the system. 58 * Here, the thread is attempting to shut the machine down cleanly 59 * before the hardware based power-off event is triggered. 60 */ 61 62/* These settings are in celcius. We use these defaults only 63 * if we cannot interrogate the cpu-fru SEEPROM. 64 */ 65struct temp_limits { 66 s8 high_pwroff, high_shutdown, high_warn; 67 s8 low_warn, low_shutdown, low_pwroff; 68}; 69 70static struct temp_limits cpu_temp_limits[2] = { 71 { 100, 85, 80, 5, -5, -10 }, 72 { 100, 85, 80, 5, -5, -10 }, 73}; 74 75static struct temp_limits amb_temp_limits[2] = { 76 { 65, 55, 40, 5, -5, -10 }, 77 { 65, 55, 40, 5, -5, -10 }, 78}; 79 80enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX }; 81 82struct bbc_cpu_temperature { 83 struct bbc_cpu_temperature *next; 84 85 struct bbc_i2c_client *client; 86 int index; 87 88 /* Current readings, and history. */ 89 s8 curr_cpu_temp; 90 s8 curr_amb_temp; 91 s8 prev_cpu_temp; 92 s8 prev_amb_temp; 93 s8 avg_cpu_temp; 94 s8 avg_amb_temp; 95 96 int sample_tick; 97 98 enum fan_action fan_todo[2]; 99#define FAN_AMBIENT 0 100#define FAN_CPU 1 101}; 102 103struct bbc_cpu_temperature *all_bbc_temps; 104 105struct bbc_fan_control { 106 struct bbc_fan_control *next; 107 108 struct bbc_i2c_client *client; 109 int index; 110 111 int psupply_fan_on; 112 int cpu_fan_speed; 113 int system_fan_speed; 114}; 115 116struct bbc_fan_control *all_bbc_fans; 117 118#define CPU_FAN_REG 0xf0 119#define SYS_FAN_REG 0xf2 120#define PSUPPLY_FAN_REG 0xf4 121 122#define FAN_SPEED_MIN 0x0c 123#define FAN_SPEED_MAX 0x3f 124 125#define PSUPPLY_FAN_ON 0x1f 126#define PSUPPLY_FAN_OFF 0x00 127 128static void set_fan_speeds(struct bbc_fan_control *fp) 129{ 130 /* Put temperatures into range so we don't mis-program 131 * the hardware. 132 */ 133 if (fp->cpu_fan_speed < FAN_SPEED_MIN) 134 fp->cpu_fan_speed = FAN_SPEED_MIN; 135 if (fp->cpu_fan_speed > FAN_SPEED_MAX) 136 fp->cpu_fan_speed = FAN_SPEED_MAX; 137 if (fp->system_fan_speed < FAN_SPEED_MIN) 138 fp->system_fan_speed = FAN_SPEED_MIN; 139 if (fp->system_fan_speed > FAN_SPEED_MAX) 140 fp->system_fan_speed = FAN_SPEED_MAX; 141#ifdef ENVCTRL_TRACE 142 printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n", 143 fp->index, 144 fp->cpu_fan_speed, fp->system_fan_speed); 145#endif 146 147 bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG); 148 bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG); 149 bbc_i2c_writeb(fp->client, 150 (fp->psupply_fan_on ? 151 PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF), 152 PSUPPLY_FAN_REG); 153} 154 155static void get_current_temps(struct bbc_cpu_temperature *tp) 156{ 157 tp->prev_amb_temp = tp->curr_amb_temp; 158 bbc_i2c_readb(tp->client, 159 (unsigned char *) &tp->curr_amb_temp, 160 MAX1617_AMB_TEMP); 161 tp->prev_cpu_temp = tp->curr_cpu_temp; 162 bbc_i2c_readb(tp->client, 163 (unsigned char *) &tp->curr_cpu_temp, 164 MAX1617_CPU_TEMP); 165#ifdef ENVCTRL_TRACE 166 printk("temp%d: cpu(%d C) amb(%d C)\n", 167 tp->index, 168 (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp); 169#endif 170} 171 172 173static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 174{ 175 static int shutting_down = 0; 176 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 177 char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; 178 char *type = "???"; 179 s8 val = -1; 180 181 if (shutting_down != 0) 182 return; 183 184 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 185 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 186 type = "ambient"; 187 val = tp->curr_amb_temp; 188 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 189 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 190 type = "CPU"; 191 val = tp->curr_cpu_temp; 192 } 193 194 printk(KERN_CRIT "temp%d: Outside of safe %s " 195 "operating temperature, %d C.\n", 196 tp->index, type, val); 197 198 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 199 200 shutting_down = 1; 201 if (execve("/sbin/shutdown", argv, envp) < 0) 202 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 203} 204 205#define WARN_INTERVAL (30 * HZ) 206 207static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 208{ 209 int ret = 0; 210 211 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 212 if (tp->curr_amb_temp >= 213 amb_temp_limits[tp->index].high_warn) { 214 printk(KERN_WARNING "temp%d: " 215 "Above safe ambient operating temperature, %d C.\n", 216 tp->index, (int) tp->curr_amb_temp); 217 ret = 1; 218 } else if (tp->curr_amb_temp < 219 amb_temp_limits[tp->index].low_warn) { 220 printk(KERN_WARNING "temp%d: " 221 "Below safe ambient operating temperature, %d C.\n", 222 tp->index, (int) tp->curr_amb_temp); 223 ret = 1; 224 } 225 if (ret) 226 *last_warn = jiffies; 227 } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn || 228 tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn) 229 ret = 1; 230 231 /* Now check the shutdown limits. */ 232 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 233 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 234 do_envctrl_shutdown(tp); 235 ret = 1; 236 } 237 238 if (ret) { 239 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST; 240 } else if ((tick & (8 - 1)) == 0) { 241 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10; 242 s8 amb_goal_lo; 243 244 amb_goal_lo = amb_goal_hi - 3; 245 246 /* We do not try to avoid 'too cold' events. Basically we 247 * only try to deal with over-heating and fan noise reduction. 248 */ 249 if (tp->avg_amb_temp < amb_goal_hi) { 250 if (tp->avg_amb_temp >= amb_goal_lo) 251 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 252 else 253 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER; 254 } else { 255 tp->fan_todo[FAN_AMBIENT] = FAN_FASTER; 256 } 257 } else { 258 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 259 } 260} 261 262static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 263{ 264 int ret = 0; 265 266 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 267 if (tp->curr_cpu_temp >= 268 cpu_temp_limits[tp->index].high_warn) { 269 printk(KERN_WARNING "temp%d: " 270 "Above safe CPU operating temperature, %d C.\n", 271 tp->index, (int) tp->curr_cpu_temp); 272 ret = 1; 273 } else if (tp->curr_cpu_temp < 274 cpu_temp_limits[tp->index].low_warn) { 275 printk(KERN_WARNING "temp%d: " 276 "Below safe CPU operating temperature, %d C.\n", 277 tp->index, (int) tp->curr_cpu_temp); 278 ret = 1; 279 } 280 if (ret) 281 *last_warn = jiffies; 282 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn || 283 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn) 284 ret = 1; 285 286 /* Now check the shutdown limits. */ 287 if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 288 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 289 do_envctrl_shutdown(tp); 290 ret = 1; 291 } 292 293 if (ret) { 294 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST; 295 } else if ((tick & (8 - 1)) == 0) { 296 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10; 297 s8 cpu_goal_lo; 298 299 cpu_goal_lo = cpu_goal_hi - 3; 300 301 /* We do not try to avoid 'too cold' events. Basically we 302 * only try to deal with over-heating and fan noise reduction. 303 */ 304 if (tp->avg_cpu_temp < cpu_goal_hi) { 305 if (tp->avg_cpu_temp >= cpu_goal_lo) 306 tp->fan_todo[FAN_CPU] = FAN_SAME; 307 else 308 tp->fan_todo[FAN_CPU] = FAN_SLOWER; 309 } else { 310 tp->fan_todo[FAN_CPU] = FAN_FASTER; 311 } 312 } else { 313 tp->fan_todo[FAN_CPU] = FAN_SAME; 314 } 315} 316 317static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn) 318{ 319 tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2); 320 tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2); 321 322 analyze_ambient_temp(tp, last_warn, tp->sample_tick); 323 analyze_cpu_temp(tp, last_warn, tp->sample_tick); 324 325 tp->sample_tick++; 326} 327 328static enum fan_action prioritize_fan_action(int which_fan) 329{ 330 struct bbc_cpu_temperature *tp; 331 enum fan_action decision = FAN_STATE_MAX; 332 333 /* Basically, prioritize what the temperature sensors 334 * recommend we do, and perform that action on all the 335 * fans. 336 */ 337 for (tp = all_bbc_temps; tp; tp = tp->next) { 338 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) { 339 decision = FAN_FULLBLAST; 340 break; 341 } 342 if (tp->fan_todo[which_fan] == FAN_SAME && 343 decision != FAN_FASTER) 344 decision = FAN_SAME; 345 else if (tp->fan_todo[which_fan] == FAN_FASTER) 346 decision = FAN_FASTER; 347 else if (decision != FAN_FASTER && 348 decision != FAN_SAME && 349 tp->fan_todo[which_fan] == FAN_SLOWER) 350 decision = FAN_SLOWER; 351 } 352 if (decision == FAN_STATE_MAX) 353 decision = FAN_SAME; 354 355 return decision; 356} 357 358static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp) 359{ 360 enum fan_action decision = prioritize_fan_action(FAN_AMBIENT); 361 int ret; 362 363 if (decision == FAN_SAME) 364 return 0; 365 366 ret = 1; 367 if (decision == FAN_FULLBLAST) { 368 if (fp->system_fan_speed >= FAN_SPEED_MAX) 369 ret = 0; 370 else 371 fp->system_fan_speed = FAN_SPEED_MAX; 372 } else { 373 if (decision == FAN_FASTER) { 374 if (fp->system_fan_speed >= FAN_SPEED_MAX) 375 ret = 0; 376 else 377 fp->system_fan_speed += 2; 378 } else { 379 int orig_speed = fp->system_fan_speed; 380 381 if (orig_speed <= FAN_SPEED_MIN || 382 orig_speed <= (fp->cpu_fan_speed - 3)) 383 ret = 0; 384 else 385 fp->system_fan_speed -= 1; 386 } 387 } 388 389 return ret; 390} 391 392static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp) 393{ 394 enum fan_action decision = prioritize_fan_action(FAN_CPU); 395 int ret; 396 397 if (decision == FAN_SAME) 398 return 0; 399 400 ret = 1; 401 if (decision == FAN_FULLBLAST) { 402 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 403 ret = 0; 404 else 405 fp->cpu_fan_speed = FAN_SPEED_MAX; 406 } else { 407 if (decision == FAN_FASTER) { 408 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 409 ret = 0; 410 else { 411 fp->cpu_fan_speed += 2; 412 if (fp->system_fan_speed < 413 (fp->cpu_fan_speed - 3)) 414 fp->system_fan_speed = 415 fp->cpu_fan_speed - 3; 416 } 417 } else { 418 if (fp->cpu_fan_speed <= FAN_SPEED_MIN) 419 ret = 0; 420 else 421 fp->cpu_fan_speed -= 1; 422 } 423 } 424 425 return ret; 426} 427 428static void maybe_new_fan_speeds(struct bbc_fan_control *fp) 429{ 430 int new; 431 432 new = maybe_new_ambient_fan_speed(fp); 433 new |= maybe_new_cpu_fan_speed(fp); 434 435 if (new) 436 set_fan_speeds(fp); 437} 438 439static void fans_full_blast(void) 440{ 441 struct bbc_fan_control *fp; 442 443 /* Since we will not be monitoring things anymore, put 444 * the fans on full blast. 445 */ 446 for (fp = all_bbc_fans; fp; fp = fp->next) { 447 fp->cpu_fan_speed = FAN_SPEED_MAX; 448 fp->system_fan_speed = FAN_SPEED_MAX; 449 fp->psupply_fan_on = 1; 450 set_fan_speeds(fp); 451 } 452} 453 454#define POLL_INTERVAL (5 * HZ) 455static unsigned long last_warning_jiffies; 456static struct task_struct *kenvctrld_task; 457 458static int kenvctrld(void *__unused) 459{ 460 daemonize(); 461 strcpy(current->comm, "kenvctrld"); 462 kenvctrld_task = current; 463 464 printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n"); 465 last_warning_jiffies = jiffies - WARN_INTERVAL; 466 for (;;) { 467 struct bbc_cpu_temperature *tp; 468 struct bbc_fan_control *fp; 469 470 current->state = TASK_INTERRUPTIBLE; 471 schedule_timeout(POLL_INTERVAL); 472 current->state = TASK_RUNNING; 473 if (signal_pending(current)) 474 break; 475 476 for (tp = all_bbc_temps; tp; tp = tp->next) { 477 get_current_temps(tp); 478 analyze_temps(tp, &last_warning_jiffies); 479 } 480 for (fp = all_bbc_fans; fp; fp = fp->next) 481 maybe_new_fan_speeds(fp); 482 } 483 printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n"); 484 485 fans_full_blast(); 486 487 return 0; 488} 489 490static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx) 491{ 492 struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL); 493 494 if (!tp) 495 return; 496 memset(tp, 0, sizeof(*tp)); 497 tp->client = bbc_i2c_attach(echild); 498 if (!tp->client) { 499 kfree(tp); 500 return; 501 } 502 503 tp->index = temp_idx; 504 { 505 struct bbc_cpu_temperature **tpp = &all_bbc_temps; 506 while (*tpp) 507 tpp = &((*tpp)->next); 508 tp->next = NULL; 509 *tpp = tp; 510 } 511 512 /* Tell it to convert once every 5 seconds, clear all cfg 513 * bits. 514 */ 515 bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE); 516 bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE); 517 518 /* Program the hard temperature limits into the chip. */ 519 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff, 520 MAX1617_WR_AMB_HIGHLIM); 521 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff, 522 MAX1617_WR_AMB_LOWLIM); 523 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff, 524 MAX1617_WR_CPU_HIGHLIM); 525 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff, 526 MAX1617_WR_CPU_LOWLIM); 527 528 get_current_temps(tp); 529 tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp; 530 tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp; 531 532 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 533 tp->fan_todo[FAN_CPU] = FAN_SAME; 534} 535 536static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx) 537{ 538 struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL); 539 540 if (!fp) 541 return; 542 memset(fp, 0, sizeof(*fp)); 543 fp->client = bbc_i2c_attach(echild); 544 if (!fp->client) { 545 kfree(fp); 546 return; 547 } 548 549 fp->index = fan_idx; 550 551 { 552 struct bbc_fan_control **fpp = &all_bbc_fans; 553 while (*fpp) 554 fpp = &((*fpp)->next); 555 fp->next = NULL; 556 *fpp = fp; 557 } 558 559 /* The i2c device controlling the fans is write-only. 560 * So the only way to keep track of the current power 561 * level fed to the fans is via software. Choose half 562 * power for cpu/system and 'on' fo the powersupply fan 563 * and set it now. 564 */ 565 fp->psupply_fan_on = 1; 566 fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 567 fp->cpu_fan_speed += FAN_SPEED_MIN; 568 fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 569 fp->system_fan_speed += FAN_SPEED_MIN; 570 571 set_fan_speeds(fp); 572} 573 574void bbc_envctrl_init(void) 575{ 576 struct linux_ebus_child *echild; 577 int temp_index = 0; 578 int fan_index = 0; 579 int devidx = 0; 580 581 while ((echild = bbc_i2c_getdev(devidx++)) != NULL) { 582 if (!strcmp(echild->prom_name, "temperature")) 583 attach_one_temp(echild, temp_index++); 584 if (!strcmp(echild->prom_name, "fan-control")) 585 attach_one_fan(echild, fan_index++); 586 } 587 if (temp_index != 0 && fan_index != 0) 588 kernel_thread(kenvctrld, NULL, CLONE_FS | CLONE_FILES); 589} 590 591static void destroy_one_temp(struct bbc_cpu_temperature *tp) 592{ 593 bbc_i2c_detach(tp->client); 594 kfree(tp); 595} 596 597static void destroy_one_fan(struct bbc_fan_control *fp) 598{ 599 bbc_i2c_detach(fp->client); 600 kfree(fp); 601} 602 603void bbc_envctrl_cleanup(void) 604{ 605 struct bbc_cpu_temperature *tp; 606 struct bbc_fan_control *fp; 607 608 if (kenvctrld_task != NULL) { 609 force_sig(SIGKILL, kenvctrld_task); 610 for (;;) { 611 struct task_struct *p; 612 int found = 0; 613 614 read_lock(&tasklist_lock); 615 for_each_task(p) { 616 if (p == kenvctrld_task) { 617 found = 1; 618 break; 619 } 620 } 621 read_unlock(&tasklist_lock); 622 if (!found) 623 break; 624 current->state = TASK_INTERRUPTIBLE; 625 schedule_timeout(HZ); 626 current->state = TASK_RUNNING; 627 } 628 kenvctrld_task = NULL; 629 } 630 631 tp = all_bbc_temps; 632 while (tp != NULL) { 633 struct bbc_cpu_temperature *next = tp->next; 634 destroy_one_temp(tp); 635 tp = next; 636 } 637 all_bbc_temps = NULL; 638 639 fp = all_bbc_fans; 640 while (fp != NULL) { 641 struct bbc_fan_control *next = fp->next; 642 destroy_one_fan(fp); 643 fp = next; 644 } 645 all_bbc_fans = NULL; 646} 647