1/* $Id: bbc_envctrl.c,v 1.1.1.1 2007/08/03 18:52:55 Exp $
2 * bbc_envctrl.c: UltraSPARC-III environment control driver.
3 *
4 * Copyright (C) 2001 David S. Miller (davem@redhat.com)
5 */
6
7#include <linux/kthread.h>
8#include <linux/delay.h>
9#include <linux/kmod.h>
10#include <asm/oplib.h>
11#include <asm/ebus.h>
12
13#include "bbc_i2c.h"
14#include "max1617.h"
15
16#undef ENVCTRL_TRACE
17
18/* WARNING: Making changes to this driver is very dangerous.
19 *          If you misprogram the sensor chips they can
20 *          cut the power on you instantly.
21 */
22
23/* Two temperature sensors exist in the SunBLADE-1000 enclosure.
24 * Both are implemented using max1617 i2c devices.  Each max1617
25 * monitors 2 temperatures, one for one of the cpu dies and the other
26 * for the ambient temperature.
27 *
28 * The max1617 is capable of being programmed with power-off
29 * temperature values, one low limit and one high limit.  These
30 * can be controlled independently for the cpu or ambient temperature.
31 * If a limit is violated, the power is simply shut off.  The frequency
32 * with which the max1617 does temperature sampling can be controlled
33 * as well.
34 *
35 * Three fans exist inside the machine, all three are controlled with
36 * an i2c digital to analog converter.  There is a fan directed at the
37 * two processor slots, another for the rest of the enclosure, and the
38 * third is for the power supply.  The first two fans may be speed
39 * controlled by changing the voltage fed to them.  The third fan may
40 * only be completely off or on.  The third fan is meant to only be
41 * disabled/enabled when entering/exiting the lowest power-saving
42 * mode of the machine.
43 *
44 * An environmental control kernel thread periodically monitors all
45 * temperature sensors.  Based upon the samples it will adjust the
46 * fan speeds to try and keep the system within a certain temperature
47 * range (the goal being to make the fans as quiet as possible without
48 * allowing the system to get too hot).
49 *
50 * If the temperature begins to rise/fall outside of the acceptable
51 * operating range, a periodic warning will be sent to the kernel log.
52 * The fans will be put on full blast to attempt to deal with this
53 * situation.  After exceeding the acceptable operating range by a
54 * certain threshold, the kernel thread will shut down the system.
55 * Here, the thread is attempting to shut the machine down cleanly
56 * before the hardware based power-off event is triggered.
57 */
58
59/* These settings are in Celsius.  We use these defaults only
60 * if we cannot interrogate the cpu-fru SEEPROM.
61 */
62struct temp_limits {
63	s8 high_pwroff, high_shutdown, high_warn;
64	s8 low_warn, low_shutdown, low_pwroff;
65};
66
67static struct temp_limits cpu_temp_limits[2] = {
68	{ 100, 85, 80, 5, -5, -10 },
69	{ 100, 85, 80, 5, -5, -10 },
70};
71
72static struct temp_limits amb_temp_limits[2] = {
73	{ 65, 55, 40, 5, -5, -10 },
74	{ 65, 55, 40, 5, -5, -10 },
75};
76
77enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX };
78
79struct bbc_cpu_temperature {
80	struct bbc_cpu_temperature	*next;
81
82	struct bbc_i2c_client		*client;
83	int				index;
84
85	/* Current readings, and history. */
86	s8				curr_cpu_temp;
87	s8				curr_amb_temp;
88	s8				prev_cpu_temp;
89	s8				prev_amb_temp;
90	s8				avg_cpu_temp;
91	s8				avg_amb_temp;
92
93	int				sample_tick;
94
95	enum fan_action			fan_todo[2];
96#define FAN_AMBIENT	0
97#define FAN_CPU		1
98};
99
100struct bbc_cpu_temperature *all_bbc_temps;
101
102struct bbc_fan_control {
103	struct bbc_fan_control 	*next;
104
105	struct bbc_i2c_client 	*client;
106	int 			index;
107
108	int			psupply_fan_on;
109	int			cpu_fan_speed;
110	int			system_fan_speed;
111};
112
113struct bbc_fan_control *all_bbc_fans;
114
115#define CPU_FAN_REG	0xf0
116#define SYS_FAN_REG	0xf2
117#define PSUPPLY_FAN_REG	0xf4
118
119#define FAN_SPEED_MIN	0x0c
120#define FAN_SPEED_MAX	0x3f
121
122#define PSUPPLY_FAN_ON	0x1f
123#define PSUPPLY_FAN_OFF	0x00
124
125static void set_fan_speeds(struct bbc_fan_control *fp)
126{
127	/* Put temperatures into range so we don't mis-program
128	 * the hardware.
129	 */
130	if (fp->cpu_fan_speed < FAN_SPEED_MIN)
131		fp->cpu_fan_speed = FAN_SPEED_MIN;
132	if (fp->cpu_fan_speed > FAN_SPEED_MAX)
133		fp->cpu_fan_speed = FAN_SPEED_MAX;
134	if (fp->system_fan_speed < FAN_SPEED_MIN)
135		fp->system_fan_speed = FAN_SPEED_MIN;
136	if (fp->system_fan_speed > FAN_SPEED_MAX)
137		fp->system_fan_speed = FAN_SPEED_MAX;
138#ifdef ENVCTRL_TRACE
139	printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
140	       fp->index,
141	       fp->cpu_fan_speed, fp->system_fan_speed);
142#endif
143
144	bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG);
145	bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG);
146	bbc_i2c_writeb(fp->client,
147		       (fp->psupply_fan_on ?
148			PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF),
149		       PSUPPLY_FAN_REG);
150}
151
152static void get_current_temps(struct bbc_cpu_temperature *tp)
153{
154	tp->prev_amb_temp = tp->curr_amb_temp;
155	bbc_i2c_readb(tp->client,
156		      (unsigned char *) &tp->curr_amb_temp,
157		      MAX1617_AMB_TEMP);
158	tp->prev_cpu_temp = tp->curr_cpu_temp;
159	bbc_i2c_readb(tp->client,
160		      (unsigned char *) &tp->curr_cpu_temp,
161		      MAX1617_CPU_TEMP);
162#ifdef ENVCTRL_TRACE
163	printk("temp%d: cpu(%d C) amb(%d C)\n",
164	       tp->index,
165	       (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
166#endif
167}
168
169
170static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
171{
172	static int shutting_down = 0;
173	static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
174	char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
175	char *type = "???";
176	s8 val = -1;
177
178	if (shutting_down != 0)
179		return;
180
181	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
182	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
183		type = "ambient";
184		val = tp->curr_amb_temp;
185	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
186		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
187		type = "CPU";
188		val = tp->curr_cpu_temp;
189	}
190
191	printk(KERN_CRIT "temp%d: Outside of safe %s "
192	       "operating temperature, %d C.\n",
193	       tp->index, type, val);
194
195	printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
196
197	shutting_down = 1;
198	if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0)
199		printk(KERN_CRIT "envctrl: shutdown execution failed\n");
200}
201
202#define WARN_INTERVAL	(30 * HZ)
203
204static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
205{
206	int ret = 0;
207
208	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
209		if (tp->curr_amb_temp >=
210		    amb_temp_limits[tp->index].high_warn) {
211			printk(KERN_WARNING "temp%d: "
212			       "Above safe ambient operating temperature, %d C.\n",
213			       tp->index, (int) tp->curr_amb_temp);
214			ret = 1;
215		} else if (tp->curr_amb_temp <
216			   amb_temp_limits[tp->index].low_warn) {
217			printk(KERN_WARNING "temp%d: "
218			       "Below safe ambient operating temperature, %d C.\n",
219			       tp->index, (int) tp->curr_amb_temp);
220			ret = 1;
221		}
222		if (ret)
223			*last_warn = jiffies;
224	} else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
225		   tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
226		ret = 1;
227
228	/* Now check the shutdown limits. */
229	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
230	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
231		do_envctrl_shutdown(tp);
232		ret = 1;
233	}
234
235	if (ret) {
236		tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST;
237	} else if ((tick & (8 - 1)) == 0) {
238		s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
239		s8 amb_goal_lo;
240
241		amb_goal_lo = amb_goal_hi - 3;
242
243		/* We do not try to avoid 'too cold' events.  Basically we
244		 * only try to deal with over-heating and fan noise reduction.
245		 */
246		if (tp->avg_amb_temp < amb_goal_hi) {
247			if (tp->avg_amb_temp >= amb_goal_lo)
248				tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
249			else
250				tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER;
251		} else {
252			tp->fan_todo[FAN_AMBIENT] = FAN_FASTER;
253		}
254	} else {
255		tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
256	}
257}
258
259static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
260{
261	int ret = 0;
262
263	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
264		if (tp->curr_cpu_temp >=
265		    cpu_temp_limits[tp->index].high_warn) {
266			printk(KERN_WARNING "temp%d: "
267			       "Above safe CPU operating temperature, %d C.\n",
268			       tp->index, (int) tp->curr_cpu_temp);
269			ret = 1;
270		} else if (tp->curr_cpu_temp <
271			   cpu_temp_limits[tp->index].low_warn) {
272			printk(KERN_WARNING "temp%d: "
273			       "Below safe CPU operating temperature, %d C.\n",
274			       tp->index, (int) tp->curr_cpu_temp);
275			ret = 1;
276		}
277		if (ret)
278			*last_warn = jiffies;
279	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
280		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
281		ret = 1;
282
283	/* Now check the shutdown limits. */
284	if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
285	    tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
286		do_envctrl_shutdown(tp);
287		ret = 1;
288	}
289
290	if (ret) {
291		tp->fan_todo[FAN_CPU] = FAN_FULLBLAST;
292	} else if ((tick & (8 - 1)) == 0) {
293		s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
294		s8 cpu_goal_lo;
295
296		cpu_goal_lo = cpu_goal_hi - 3;
297
298		/* We do not try to avoid 'too cold' events.  Basically we
299		 * only try to deal with over-heating and fan noise reduction.
300		 */
301		if (tp->avg_cpu_temp < cpu_goal_hi) {
302			if (tp->avg_cpu_temp >= cpu_goal_lo)
303				tp->fan_todo[FAN_CPU] = FAN_SAME;
304			else
305				tp->fan_todo[FAN_CPU] = FAN_SLOWER;
306		} else {
307			tp->fan_todo[FAN_CPU] = FAN_FASTER;
308		}
309	} else {
310		tp->fan_todo[FAN_CPU] = FAN_SAME;
311	}
312}
313
314static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
315{
316	tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
317	tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
318
319	analyze_ambient_temp(tp, last_warn, tp->sample_tick);
320	analyze_cpu_temp(tp, last_warn, tp->sample_tick);
321
322	tp->sample_tick++;
323}
324
325static enum fan_action prioritize_fan_action(int which_fan)
326{
327	struct bbc_cpu_temperature *tp;
328	enum fan_action decision = FAN_STATE_MAX;
329
330	/* Basically, prioritize what the temperature sensors
331	 * recommend we do, and perform that action on all the
332	 * fans.
333	 */
334	for (tp = all_bbc_temps; tp; tp = tp->next) {
335		if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
336			decision = FAN_FULLBLAST;
337			break;
338		}
339		if (tp->fan_todo[which_fan] == FAN_SAME &&
340		    decision != FAN_FASTER)
341			decision = FAN_SAME;
342		else if (tp->fan_todo[which_fan] == FAN_FASTER)
343			decision = FAN_FASTER;
344		else if (decision != FAN_FASTER &&
345			 decision != FAN_SAME &&
346			 tp->fan_todo[which_fan] == FAN_SLOWER)
347			decision = FAN_SLOWER;
348	}
349	if (decision == FAN_STATE_MAX)
350		decision = FAN_SAME;
351
352	return decision;
353}
354
355static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
356{
357	enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
358	int ret;
359
360	if (decision == FAN_SAME)
361		return 0;
362
363	ret = 1;
364	if (decision == FAN_FULLBLAST) {
365		if (fp->system_fan_speed >= FAN_SPEED_MAX)
366			ret = 0;
367		else
368			fp->system_fan_speed = FAN_SPEED_MAX;
369	} else {
370		if (decision == FAN_FASTER) {
371			if (fp->system_fan_speed >= FAN_SPEED_MAX)
372				ret = 0;
373			else
374				fp->system_fan_speed += 2;
375		} else {
376			int orig_speed = fp->system_fan_speed;
377
378			if (orig_speed <= FAN_SPEED_MIN ||
379			    orig_speed <= (fp->cpu_fan_speed - 3))
380				ret = 0;
381			else
382				fp->system_fan_speed -= 1;
383		}
384	}
385
386	return ret;
387}
388
389static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
390{
391	enum fan_action decision = prioritize_fan_action(FAN_CPU);
392	int ret;
393
394	if (decision == FAN_SAME)
395		return 0;
396
397	ret = 1;
398	if (decision == FAN_FULLBLAST) {
399		if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
400			ret = 0;
401		else
402			fp->cpu_fan_speed = FAN_SPEED_MAX;
403	} else {
404		if (decision == FAN_FASTER) {
405			if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
406				ret = 0;
407			else {
408				fp->cpu_fan_speed += 2;
409				if (fp->system_fan_speed <
410				    (fp->cpu_fan_speed - 3))
411					fp->system_fan_speed =
412						fp->cpu_fan_speed - 3;
413			}
414		} else {
415			if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
416				ret = 0;
417			else
418				fp->cpu_fan_speed -= 1;
419		}
420	}
421
422	return ret;
423}
424
425static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
426{
427	int new;
428
429	new  = maybe_new_ambient_fan_speed(fp);
430	new |= maybe_new_cpu_fan_speed(fp);
431
432	if (new)
433		set_fan_speeds(fp);
434}
435
436static void fans_full_blast(void)
437{
438	struct bbc_fan_control *fp;
439
440	/* Since we will not be monitoring things anymore, put
441	 * the fans on full blast.
442	 */
443	for (fp = all_bbc_fans; fp; fp = fp->next) {
444		fp->cpu_fan_speed = FAN_SPEED_MAX;
445		fp->system_fan_speed = FAN_SPEED_MAX;
446		fp->psupply_fan_on = 1;
447		set_fan_speeds(fp);
448	}
449}
450
451#define POLL_INTERVAL	(5 * 1000)
452static unsigned long last_warning_jiffies;
453static struct task_struct *kenvctrld_task;
454
455static int kenvctrld(void *__unused)
456{
457	printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
458	last_warning_jiffies = jiffies - WARN_INTERVAL;
459	for (;;) {
460		struct bbc_cpu_temperature *tp;
461		struct bbc_fan_control *fp;
462
463		msleep_interruptible(POLL_INTERVAL);
464		if (kthread_should_stop())
465			break;
466
467		for (tp = all_bbc_temps; tp; tp = tp->next) {
468			get_current_temps(tp);
469			analyze_temps(tp, &last_warning_jiffies);
470		}
471		for (fp = all_bbc_fans; fp; fp = fp->next)
472			maybe_new_fan_speeds(fp);
473	}
474	printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
475
476	fans_full_blast();
477
478	return 0;
479}
480
481static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx)
482{
483	struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL);
484
485	if (!tp)
486		return;
487	memset(tp, 0, sizeof(*tp));
488	tp->client = bbc_i2c_attach(echild);
489	if (!tp->client) {
490		kfree(tp);
491		return;
492	}
493
494	tp->index = temp_idx;
495	{
496		struct bbc_cpu_temperature **tpp = &all_bbc_temps;
497		while (*tpp)
498			tpp = &((*tpp)->next);
499		tp->next = NULL;
500		*tpp = tp;
501	}
502
503	/* Tell it to convert once every 5 seconds, clear all cfg
504	 * bits.
505	 */
506	bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE);
507	bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE);
508
509	/* Program the hard temperature limits into the chip. */
510	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
511		       MAX1617_WR_AMB_HIGHLIM);
512	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
513		       MAX1617_WR_AMB_LOWLIM);
514	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
515		       MAX1617_WR_CPU_HIGHLIM);
516	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
517		       MAX1617_WR_CPU_LOWLIM);
518
519	get_current_temps(tp);
520	tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
521	tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
522
523	tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
524	tp->fan_todo[FAN_CPU] = FAN_SAME;
525}
526
527static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx)
528{
529	struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL);
530
531	if (!fp)
532		return;
533	memset(fp, 0, sizeof(*fp));
534	fp->client = bbc_i2c_attach(echild);
535	if (!fp->client) {
536		kfree(fp);
537		return;
538	}
539
540	fp->index = fan_idx;
541
542	{
543		struct bbc_fan_control **fpp = &all_bbc_fans;
544		while (*fpp)
545			fpp = &((*fpp)->next);
546		fp->next = NULL;
547		*fpp = fp;
548	}
549
550	/* The i2c device controlling the fans is write-only.
551	 * So the only way to keep track of the current power
552	 * level fed to the fans is via software.  Choose half
553	 * power for cpu/system and 'on' fo the powersupply fan
554	 * and set it now.
555	 */
556	fp->psupply_fan_on = 1;
557	fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
558	fp->cpu_fan_speed += FAN_SPEED_MIN;
559	fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
560	fp->system_fan_speed += FAN_SPEED_MIN;
561
562	set_fan_speeds(fp);
563}
564
565int bbc_envctrl_init(void)
566{
567	struct linux_ebus_child *echild;
568	int temp_index = 0;
569	int fan_index = 0;
570	int devidx = 0;
571
572	while ((echild = bbc_i2c_getdev(devidx++)) != NULL) {
573		if (!strcmp(echild->prom_node->name, "temperature"))
574			attach_one_temp(echild, temp_index++);
575		if (!strcmp(echild->prom_node->name, "fan-control"))
576			attach_one_fan(echild, fan_index++);
577	}
578	if (temp_index != 0 && fan_index != 0) {
579		kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
580		if (IS_ERR(kenvctrld_task))
581			return PTR_ERR(kenvctrld_task);
582	}
583
584	return 0;
585}
586
587static void destroy_one_temp(struct bbc_cpu_temperature *tp)
588{
589	bbc_i2c_detach(tp->client);
590	kfree(tp);
591}
592
593static void destroy_one_fan(struct bbc_fan_control *fp)
594{
595	bbc_i2c_detach(fp->client);
596	kfree(fp);
597}
598
599void bbc_envctrl_cleanup(void)
600{
601	struct bbc_cpu_temperature *tp;
602	struct bbc_fan_control *fp;
603
604	kthread_stop(kenvctrld_task);
605
606	tp = all_bbc_temps;
607	while (tp != NULL) {
608		struct bbc_cpu_temperature *next = tp->next;
609		destroy_one_temp(tp);
610		tp = next;
611	}
612	all_bbc_temps = NULL;
613
614	fp = all_bbc_fans;
615	while (fp != NULL) {
616		struct bbc_fan_control *next = fp->next;
617		destroy_one_fan(fp);
618		fp = next;
619	}
620	all_bbc_fans = NULL;
621}
622