1/* $Id: bbc_envctrl.c,v 1.1.1.1 2007/08/03 18:52:55 Exp $ 2 * bbc_envctrl.c: UltraSPARC-III environment control driver. 3 * 4 * Copyright (C) 2001 David S. Miller (davem@redhat.com) 5 */ 6 7#include <linux/kthread.h> 8#include <linux/delay.h> 9#include <linux/kmod.h> 10#include <asm/oplib.h> 11#include <asm/ebus.h> 12 13#include "bbc_i2c.h" 14#include "max1617.h" 15 16#undef ENVCTRL_TRACE 17 18/* WARNING: Making changes to this driver is very dangerous. 19 * If you misprogram the sensor chips they can 20 * cut the power on you instantly. 21 */ 22 23/* Two temperature sensors exist in the SunBLADE-1000 enclosure. 24 * Both are implemented using max1617 i2c devices. Each max1617 25 * monitors 2 temperatures, one for one of the cpu dies and the other 26 * for the ambient temperature. 27 * 28 * The max1617 is capable of being programmed with power-off 29 * temperature values, one low limit and one high limit. These 30 * can be controlled independently for the cpu or ambient temperature. 31 * If a limit is violated, the power is simply shut off. The frequency 32 * with which the max1617 does temperature sampling can be controlled 33 * as well. 34 * 35 * Three fans exist inside the machine, all three are controlled with 36 * an i2c digital to analog converter. There is a fan directed at the 37 * two processor slots, another for the rest of the enclosure, and the 38 * third is for the power supply. The first two fans may be speed 39 * controlled by changing the voltage fed to them. The third fan may 40 * only be completely off or on. The third fan is meant to only be 41 * disabled/enabled when entering/exiting the lowest power-saving 42 * mode of the machine. 43 * 44 * An environmental control kernel thread periodically monitors all 45 * temperature sensors. Based upon the samples it will adjust the 46 * fan speeds to try and keep the system within a certain temperature 47 * range (the goal being to make the fans as quiet as possible without 48 * allowing the system to get too hot). 49 * 50 * If the temperature begins to rise/fall outside of the acceptable 51 * operating range, a periodic warning will be sent to the kernel log. 52 * The fans will be put on full blast to attempt to deal with this 53 * situation. After exceeding the acceptable operating range by a 54 * certain threshold, the kernel thread will shut down the system. 55 * Here, the thread is attempting to shut the machine down cleanly 56 * before the hardware based power-off event is triggered. 57 */ 58 59/* These settings are in Celsius. We use these defaults only 60 * if we cannot interrogate the cpu-fru SEEPROM. 61 */ 62struct temp_limits { 63 s8 high_pwroff, high_shutdown, high_warn; 64 s8 low_warn, low_shutdown, low_pwroff; 65}; 66 67static struct temp_limits cpu_temp_limits[2] = { 68 { 100, 85, 80, 5, -5, -10 }, 69 { 100, 85, 80, 5, -5, -10 }, 70}; 71 72static struct temp_limits amb_temp_limits[2] = { 73 { 65, 55, 40, 5, -5, -10 }, 74 { 65, 55, 40, 5, -5, -10 }, 75}; 76 77enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX }; 78 79struct bbc_cpu_temperature { 80 struct bbc_cpu_temperature *next; 81 82 struct bbc_i2c_client *client; 83 int index; 84 85 /* Current readings, and history. */ 86 s8 curr_cpu_temp; 87 s8 curr_amb_temp; 88 s8 prev_cpu_temp; 89 s8 prev_amb_temp; 90 s8 avg_cpu_temp; 91 s8 avg_amb_temp; 92 93 int sample_tick; 94 95 enum fan_action fan_todo[2]; 96#define FAN_AMBIENT 0 97#define FAN_CPU 1 98}; 99 100struct bbc_cpu_temperature *all_bbc_temps; 101 102struct bbc_fan_control { 103 struct bbc_fan_control *next; 104 105 struct bbc_i2c_client *client; 106 int index; 107 108 int psupply_fan_on; 109 int cpu_fan_speed; 110 int system_fan_speed; 111}; 112 113struct bbc_fan_control *all_bbc_fans; 114 115#define CPU_FAN_REG 0xf0 116#define SYS_FAN_REG 0xf2 117#define PSUPPLY_FAN_REG 0xf4 118 119#define FAN_SPEED_MIN 0x0c 120#define FAN_SPEED_MAX 0x3f 121 122#define PSUPPLY_FAN_ON 0x1f 123#define PSUPPLY_FAN_OFF 0x00 124 125static void set_fan_speeds(struct bbc_fan_control *fp) 126{ 127 /* Put temperatures into range so we don't mis-program 128 * the hardware. 129 */ 130 if (fp->cpu_fan_speed < FAN_SPEED_MIN) 131 fp->cpu_fan_speed = FAN_SPEED_MIN; 132 if (fp->cpu_fan_speed > FAN_SPEED_MAX) 133 fp->cpu_fan_speed = FAN_SPEED_MAX; 134 if (fp->system_fan_speed < FAN_SPEED_MIN) 135 fp->system_fan_speed = FAN_SPEED_MIN; 136 if (fp->system_fan_speed > FAN_SPEED_MAX) 137 fp->system_fan_speed = FAN_SPEED_MAX; 138#ifdef ENVCTRL_TRACE 139 printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n", 140 fp->index, 141 fp->cpu_fan_speed, fp->system_fan_speed); 142#endif 143 144 bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG); 145 bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG); 146 bbc_i2c_writeb(fp->client, 147 (fp->psupply_fan_on ? 148 PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF), 149 PSUPPLY_FAN_REG); 150} 151 152static void get_current_temps(struct bbc_cpu_temperature *tp) 153{ 154 tp->prev_amb_temp = tp->curr_amb_temp; 155 bbc_i2c_readb(tp->client, 156 (unsigned char *) &tp->curr_amb_temp, 157 MAX1617_AMB_TEMP); 158 tp->prev_cpu_temp = tp->curr_cpu_temp; 159 bbc_i2c_readb(tp->client, 160 (unsigned char *) &tp->curr_cpu_temp, 161 MAX1617_CPU_TEMP); 162#ifdef ENVCTRL_TRACE 163 printk("temp%d: cpu(%d C) amb(%d C)\n", 164 tp->index, 165 (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp); 166#endif 167} 168 169 170static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 171{ 172 static int shutting_down = 0; 173 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 174 char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; 175 char *type = "???"; 176 s8 val = -1; 177 178 if (shutting_down != 0) 179 return; 180 181 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 182 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 183 type = "ambient"; 184 val = tp->curr_amb_temp; 185 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 186 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 187 type = "CPU"; 188 val = tp->curr_cpu_temp; 189 } 190 191 printk(KERN_CRIT "temp%d: Outside of safe %s " 192 "operating temperature, %d C.\n", 193 tp->index, type, val); 194 195 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 196 197 shutting_down = 1; 198 if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0) 199 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 200} 201 202#define WARN_INTERVAL (30 * HZ) 203 204static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 205{ 206 int ret = 0; 207 208 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 209 if (tp->curr_amb_temp >= 210 amb_temp_limits[tp->index].high_warn) { 211 printk(KERN_WARNING "temp%d: " 212 "Above safe ambient operating temperature, %d C.\n", 213 tp->index, (int) tp->curr_amb_temp); 214 ret = 1; 215 } else if (tp->curr_amb_temp < 216 amb_temp_limits[tp->index].low_warn) { 217 printk(KERN_WARNING "temp%d: " 218 "Below safe ambient operating temperature, %d C.\n", 219 tp->index, (int) tp->curr_amb_temp); 220 ret = 1; 221 } 222 if (ret) 223 *last_warn = jiffies; 224 } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn || 225 tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn) 226 ret = 1; 227 228 /* Now check the shutdown limits. */ 229 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 230 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 231 do_envctrl_shutdown(tp); 232 ret = 1; 233 } 234 235 if (ret) { 236 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST; 237 } else if ((tick & (8 - 1)) == 0) { 238 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10; 239 s8 amb_goal_lo; 240 241 amb_goal_lo = amb_goal_hi - 3; 242 243 /* We do not try to avoid 'too cold' events. Basically we 244 * only try to deal with over-heating and fan noise reduction. 245 */ 246 if (tp->avg_amb_temp < amb_goal_hi) { 247 if (tp->avg_amb_temp >= amb_goal_lo) 248 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 249 else 250 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER; 251 } else { 252 tp->fan_todo[FAN_AMBIENT] = FAN_FASTER; 253 } 254 } else { 255 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 256 } 257} 258 259static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 260{ 261 int ret = 0; 262 263 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 264 if (tp->curr_cpu_temp >= 265 cpu_temp_limits[tp->index].high_warn) { 266 printk(KERN_WARNING "temp%d: " 267 "Above safe CPU operating temperature, %d C.\n", 268 tp->index, (int) tp->curr_cpu_temp); 269 ret = 1; 270 } else if (tp->curr_cpu_temp < 271 cpu_temp_limits[tp->index].low_warn) { 272 printk(KERN_WARNING "temp%d: " 273 "Below safe CPU operating temperature, %d C.\n", 274 tp->index, (int) tp->curr_cpu_temp); 275 ret = 1; 276 } 277 if (ret) 278 *last_warn = jiffies; 279 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn || 280 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn) 281 ret = 1; 282 283 /* Now check the shutdown limits. */ 284 if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 285 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 286 do_envctrl_shutdown(tp); 287 ret = 1; 288 } 289 290 if (ret) { 291 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST; 292 } else if ((tick & (8 - 1)) == 0) { 293 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10; 294 s8 cpu_goal_lo; 295 296 cpu_goal_lo = cpu_goal_hi - 3; 297 298 /* We do not try to avoid 'too cold' events. Basically we 299 * only try to deal with over-heating and fan noise reduction. 300 */ 301 if (tp->avg_cpu_temp < cpu_goal_hi) { 302 if (tp->avg_cpu_temp >= cpu_goal_lo) 303 tp->fan_todo[FAN_CPU] = FAN_SAME; 304 else 305 tp->fan_todo[FAN_CPU] = FAN_SLOWER; 306 } else { 307 tp->fan_todo[FAN_CPU] = FAN_FASTER; 308 } 309 } else { 310 tp->fan_todo[FAN_CPU] = FAN_SAME; 311 } 312} 313 314static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn) 315{ 316 tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2); 317 tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2); 318 319 analyze_ambient_temp(tp, last_warn, tp->sample_tick); 320 analyze_cpu_temp(tp, last_warn, tp->sample_tick); 321 322 tp->sample_tick++; 323} 324 325static enum fan_action prioritize_fan_action(int which_fan) 326{ 327 struct bbc_cpu_temperature *tp; 328 enum fan_action decision = FAN_STATE_MAX; 329 330 /* Basically, prioritize what the temperature sensors 331 * recommend we do, and perform that action on all the 332 * fans. 333 */ 334 for (tp = all_bbc_temps; tp; tp = tp->next) { 335 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) { 336 decision = FAN_FULLBLAST; 337 break; 338 } 339 if (tp->fan_todo[which_fan] == FAN_SAME && 340 decision != FAN_FASTER) 341 decision = FAN_SAME; 342 else if (tp->fan_todo[which_fan] == FAN_FASTER) 343 decision = FAN_FASTER; 344 else if (decision != FAN_FASTER && 345 decision != FAN_SAME && 346 tp->fan_todo[which_fan] == FAN_SLOWER) 347 decision = FAN_SLOWER; 348 } 349 if (decision == FAN_STATE_MAX) 350 decision = FAN_SAME; 351 352 return decision; 353} 354 355static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp) 356{ 357 enum fan_action decision = prioritize_fan_action(FAN_AMBIENT); 358 int ret; 359 360 if (decision == FAN_SAME) 361 return 0; 362 363 ret = 1; 364 if (decision == FAN_FULLBLAST) { 365 if (fp->system_fan_speed >= FAN_SPEED_MAX) 366 ret = 0; 367 else 368 fp->system_fan_speed = FAN_SPEED_MAX; 369 } else { 370 if (decision == FAN_FASTER) { 371 if (fp->system_fan_speed >= FAN_SPEED_MAX) 372 ret = 0; 373 else 374 fp->system_fan_speed += 2; 375 } else { 376 int orig_speed = fp->system_fan_speed; 377 378 if (orig_speed <= FAN_SPEED_MIN || 379 orig_speed <= (fp->cpu_fan_speed - 3)) 380 ret = 0; 381 else 382 fp->system_fan_speed -= 1; 383 } 384 } 385 386 return ret; 387} 388 389static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp) 390{ 391 enum fan_action decision = prioritize_fan_action(FAN_CPU); 392 int ret; 393 394 if (decision == FAN_SAME) 395 return 0; 396 397 ret = 1; 398 if (decision == FAN_FULLBLAST) { 399 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 400 ret = 0; 401 else 402 fp->cpu_fan_speed = FAN_SPEED_MAX; 403 } else { 404 if (decision == FAN_FASTER) { 405 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 406 ret = 0; 407 else { 408 fp->cpu_fan_speed += 2; 409 if (fp->system_fan_speed < 410 (fp->cpu_fan_speed - 3)) 411 fp->system_fan_speed = 412 fp->cpu_fan_speed - 3; 413 } 414 } else { 415 if (fp->cpu_fan_speed <= FAN_SPEED_MIN) 416 ret = 0; 417 else 418 fp->cpu_fan_speed -= 1; 419 } 420 } 421 422 return ret; 423} 424 425static void maybe_new_fan_speeds(struct bbc_fan_control *fp) 426{ 427 int new; 428 429 new = maybe_new_ambient_fan_speed(fp); 430 new |= maybe_new_cpu_fan_speed(fp); 431 432 if (new) 433 set_fan_speeds(fp); 434} 435 436static void fans_full_blast(void) 437{ 438 struct bbc_fan_control *fp; 439 440 /* Since we will not be monitoring things anymore, put 441 * the fans on full blast. 442 */ 443 for (fp = all_bbc_fans; fp; fp = fp->next) { 444 fp->cpu_fan_speed = FAN_SPEED_MAX; 445 fp->system_fan_speed = FAN_SPEED_MAX; 446 fp->psupply_fan_on = 1; 447 set_fan_speeds(fp); 448 } 449} 450 451#define POLL_INTERVAL (5 * 1000) 452static unsigned long last_warning_jiffies; 453static struct task_struct *kenvctrld_task; 454 455static int kenvctrld(void *__unused) 456{ 457 printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n"); 458 last_warning_jiffies = jiffies - WARN_INTERVAL; 459 for (;;) { 460 struct bbc_cpu_temperature *tp; 461 struct bbc_fan_control *fp; 462 463 msleep_interruptible(POLL_INTERVAL); 464 if (kthread_should_stop()) 465 break; 466 467 for (tp = all_bbc_temps; tp; tp = tp->next) { 468 get_current_temps(tp); 469 analyze_temps(tp, &last_warning_jiffies); 470 } 471 for (fp = all_bbc_fans; fp; fp = fp->next) 472 maybe_new_fan_speeds(fp); 473 } 474 printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n"); 475 476 fans_full_blast(); 477 478 return 0; 479} 480 481static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx) 482{ 483 struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL); 484 485 if (!tp) 486 return; 487 memset(tp, 0, sizeof(*tp)); 488 tp->client = bbc_i2c_attach(echild); 489 if (!tp->client) { 490 kfree(tp); 491 return; 492 } 493 494 tp->index = temp_idx; 495 { 496 struct bbc_cpu_temperature **tpp = &all_bbc_temps; 497 while (*tpp) 498 tpp = &((*tpp)->next); 499 tp->next = NULL; 500 *tpp = tp; 501 } 502 503 /* Tell it to convert once every 5 seconds, clear all cfg 504 * bits. 505 */ 506 bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE); 507 bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE); 508 509 /* Program the hard temperature limits into the chip. */ 510 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff, 511 MAX1617_WR_AMB_HIGHLIM); 512 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff, 513 MAX1617_WR_AMB_LOWLIM); 514 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff, 515 MAX1617_WR_CPU_HIGHLIM); 516 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff, 517 MAX1617_WR_CPU_LOWLIM); 518 519 get_current_temps(tp); 520 tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp; 521 tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp; 522 523 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 524 tp->fan_todo[FAN_CPU] = FAN_SAME; 525} 526 527static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx) 528{ 529 struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL); 530 531 if (!fp) 532 return; 533 memset(fp, 0, sizeof(*fp)); 534 fp->client = bbc_i2c_attach(echild); 535 if (!fp->client) { 536 kfree(fp); 537 return; 538 } 539 540 fp->index = fan_idx; 541 542 { 543 struct bbc_fan_control **fpp = &all_bbc_fans; 544 while (*fpp) 545 fpp = &((*fpp)->next); 546 fp->next = NULL; 547 *fpp = fp; 548 } 549 550 /* The i2c device controlling the fans is write-only. 551 * So the only way to keep track of the current power 552 * level fed to the fans is via software. Choose half 553 * power for cpu/system and 'on' fo the powersupply fan 554 * and set it now. 555 */ 556 fp->psupply_fan_on = 1; 557 fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 558 fp->cpu_fan_speed += FAN_SPEED_MIN; 559 fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 560 fp->system_fan_speed += FAN_SPEED_MIN; 561 562 set_fan_speeds(fp); 563} 564 565int bbc_envctrl_init(void) 566{ 567 struct linux_ebus_child *echild; 568 int temp_index = 0; 569 int fan_index = 0; 570 int devidx = 0; 571 572 while ((echild = bbc_i2c_getdev(devidx++)) != NULL) { 573 if (!strcmp(echild->prom_node->name, "temperature")) 574 attach_one_temp(echild, temp_index++); 575 if (!strcmp(echild->prom_node->name, "fan-control")) 576 attach_one_fan(echild, fan_index++); 577 } 578 if (temp_index != 0 && fan_index != 0) { 579 kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld"); 580 if (IS_ERR(kenvctrld_task)) 581 return PTR_ERR(kenvctrld_task); 582 } 583 584 return 0; 585} 586 587static void destroy_one_temp(struct bbc_cpu_temperature *tp) 588{ 589 bbc_i2c_detach(tp->client); 590 kfree(tp); 591} 592 593static void destroy_one_fan(struct bbc_fan_control *fp) 594{ 595 bbc_i2c_detach(fp->client); 596 kfree(fp); 597} 598 599void bbc_envctrl_cleanup(void) 600{ 601 struct bbc_cpu_temperature *tp; 602 struct bbc_fan_control *fp; 603 604 kthread_stop(kenvctrld_task); 605 606 tp = all_bbc_temps; 607 while (tp != NULL) { 608 struct bbc_cpu_temperature *next = tp->next; 609 destroy_one_temp(tp); 610 tp = next; 611 } 612 all_bbc_temps = NULL; 613 614 fp = all_bbc_fans; 615 while (fp != NULL) { 616 struct bbc_fan_control *next = fp->next; 617 destroy_one_fan(fp); 618 fp = next; 619 } 620 all_bbc_fans = NULL; 621} 622