1/* $Id: bbc_envctrl.c,v 1.1.1.1 2008/10/15 03:26:47 james26_jang Exp $
2 * bbc_envctrl.c: UltraSPARC-III environment control driver.
3 *
4 * Copyright (C) 2001 David S. Miller (davem@redhat.com)
5 */
6
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <asm/oplib.h>
11#include <asm/ebus.h>
12#define __KERNEL_SYSCALLS__
13static int errno;
14#include <asm/unistd.h>
15
16#include "bbc_i2c.h"
17#include "max1617.h"
18
19#undef ENVCTRL_TRACE
20
21/* WARNING: Making changes to this driver is very dangerous.
22 *          If you misprogram the sensor chips they can
23 *          cut the power on you instantly.
24 */
25
26/* Two temperature sensors exist in the SunBLADE-1000 enclosure.
27 * Both are implemented using max1617 i2c devices.  Each max1617
28 * monitors 2 temperatures, one for one of the cpu dies and the other
29 * for the ambient temperature.
30 *
31 * The max1617 is capable of being programmed with power-off
32 * temperature values, one low limit and one high limit.  These
33 * can be controlled independantly for the cpu or ambient temperature.
34 * If a limit is violated, the power is simply shut off.  The frequency
35 * with which the max1617 does temperature sampling can be controlled
36 * as well.
37 *
38 * Three fans exist inside the machine, all three are controlled with
39 * an i2c digital to analog converter.  There is a fan directed at the
40 * two processor slots, another for the rest of the enclosure, and the
41 * third is for the power supply.  The first two fans may be speed
42 * controlled by changing the voltage fed to them.  The third fan may
43 * only be completely off or on.  The third fan is meant to only be
44 * disabled/enabled when entering/exiting the lowest power-saving
45 * mode of the machine.
46 *
47 * An environmental control kernel thread periodically monitors all
48 * temperature sensors.  Based upon the samples it will adjust the
49 * fan speeds to try and keep the system within a certain temperature
50 * range (the goal being to make the fans as quiet as possible without
51 * allowing the system to get too hot).
52 *
53 * If the temperature begins to rise/fall outside of the acceptable
54 * operating range, a periodic warning will be sent to the kernel log.
55 * The fans will be put on full blast to attempt to deal with this
56 * situation.  After exceeding the acceptable operating range by a
57 * certain threshold, the kernel thread will shut down the system.
58 * Here, the thread is attempting to shut the machine down cleanly
59 * before the hardware based power-off event is triggered.
60 */
61
62/* These settings are in celcius.  We use these defaults only
63 * if we cannot interrogate the cpu-fru SEEPROM.
64 */
65struct temp_limits {
66	s8 high_pwroff, high_shutdown, high_warn;
67	s8 low_warn, low_shutdown, low_pwroff;
68};
69
70static struct temp_limits cpu_temp_limits[2] = {
71	{ 100, 85, 80, 5, -5, -10 },
72	{ 100, 85, 80, 5, -5, -10 },
73};
74
75static struct temp_limits amb_temp_limits[2] = {
76	{ 65, 55, 40, 5, -5, -10 },
77	{ 65, 55, 40, 5, -5, -10 },
78};
79
80enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX };
81
82struct bbc_cpu_temperature {
83	struct bbc_cpu_temperature	*next;
84
85	struct bbc_i2c_client		*client;
86	int				index;
87
88	/* Current readings, and history. */
89	s8				curr_cpu_temp;
90	s8				curr_amb_temp;
91	s8				prev_cpu_temp;
92	s8				prev_amb_temp;
93	s8				avg_cpu_temp;
94	s8				avg_amb_temp;
95
96	int				sample_tick;
97
98	enum fan_action			fan_todo[2];
99#define FAN_AMBIENT	0
100#define FAN_CPU		1
101};
102
103struct bbc_cpu_temperature *all_bbc_temps;
104
105struct bbc_fan_control {
106	struct bbc_fan_control 	*next;
107
108	struct bbc_i2c_client 	*client;
109	int 			index;
110
111	int			psupply_fan_on;
112	int			cpu_fan_speed;
113	int			system_fan_speed;
114};
115
116struct bbc_fan_control *all_bbc_fans;
117
118#define CPU_FAN_REG	0xf0
119#define SYS_FAN_REG	0xf2
120#define PSUPPLY_FAN_REG	0xf4
121
122#define FAN_SPEED_MIN	0x0c
123#define FAN_SPEED_MAX	0x3f
124
125#define PSUPPLY_FAN_ON	0x1f
126#define PSUPPLY_FAN_OFF	0x00
127
128static void set_fan_speeds(struct bbc_fan_control *fp)
129{
130	/* Put temperatures into range so we don't mis-program
131	 * the hardware.
132	 */
133	if (fp->cpu_fan_speed < FAN_SPEED_MIN)
134		fp->cpu_fan_speed = FAN_SPEED_MIN;
135	if (fp->cpu_fan_speed > FAN_SPEED_MAX)
136		fp->cpu_fan_speed = FAN_SPEED_MAX;
137	if (fp->system_fan_speed < FAN_SPEED_MIN)
138		fp->system_fan_speed = FAN_SPEED_MIN;
139	if (fp->system_fan_speed > FAN_SPEED_MAX)
140		fp->system_fan_speed = FAN_SPEED_MAX;
141#ifdef ENVCTRL_TRACE
142	printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
143	       fp->index,
144	       fp->cpu_fan_speed, fp->system_fan_speed);
145#endif
146
147	bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG);
148	bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG);
149	bbc_i2c_writeb(fp->client,
150		       (fp->psupply_fan_on ?
151			PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF),
152		       PSUPPLY_FAN_REG);
153}
154
155static void get_current_temps(struct bbc_cpu_temperature *tp)
156{
157	tp->prev_amb_temp = tp->curr_amb_temp;
158	bbc_i2c_readb(tp->client,
159		      (unsigned char *) &tp->curr_amb_temp,
160		      MAX1617_AMB_TEMP);
161	tp->prev_cpu_temp = tp->curr_cpu_temp;
162	bbc_i2c_readb(tp->client,
163		      (unsigned char *) &tp->curr_cpu_temp,
164		      MAX1617_CPU_TEMP);
165#ifdef ENVCTRL_TRACE
166	printk("temp%d: cpu(%d C) amb(%d C)\n",
167	       tp->index,
168	       (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
169#endif
170}
171
172
173static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
174{
175	static int shutting_down = 0;
176	static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
177	char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
178	char *type = "???";
179	s8 val = -1;
180
181	if (shutting_down != 0)
182		return;
183
184	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
185	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
186		type = "ambient";
187		val = tp->curr_amb_temp;
188	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
189		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
190		type = "CPU";
191		val = tp->curr_cpu_temp;
192	}
193
194	printk(KERN_CRIT "temp%d: Outside of safe %s "
195	       "operating temperature, %d C.\n",
196	       tp->index, type, val);
197
198	printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
199
200	shutting_down = 1;
201	if (execve("/sbin/shutdown", argv, envp) < 0)
202		printk(KERN_CRIT "envctrl: shutdown execution failed\n");
203}
204
205#define WARN_INTERVAL	(30 * HZ)
206
207static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
208{
209	int ret = 0;
210
211	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
212		if (tp->curr_amb_temp >=
213		    amb_temp_limits[tp->index].high_warn) {
214			printk(KERN_WARNING "temp%d: "
215			       "Above safe ambient operating temperature, %d C.\n",
216			       tp->index, (int) tp->curr_amb_temp);
217			ret = 1;
218		} else if (tp->curr_amb_temp <
219			   amb_temp_limits[tp->index].low_warn) {
220			printk(KERN_WARNING "temp%d: "
221			       "Below safe ambient operating temperature, %d C.\n",
222			       tp->index, (int) tp->curr_amb_temp);
223			ret = 1;
224		}
225		if (ret)
226			*last_warn = jiffies;
227	} else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
228		   tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
229		ret = 1;
230
231	/* Now check the shutdown limits. */
232	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
233	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
234		do_envctrl_shutdown(tp);
235		ret = 1;
236	}
237
238	if (ret) {
239		tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST;
240	} else if ((tick & (8 - 1)) == 0) {
241		s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
242		s8 amb_goal_lo;
243
244		amb_goal_lo = amb_goal_hi - 3;
245
246		/* We do not try to avoid 'too cold' events.  Basically we
247		 * only try to deal with over-heating and fan noise reduction.
248		 */
249		if (tp->avg_amb_temp < amb_goal_hi) {
250			if (tp->avg_amb_temp >= amb_goal_lo)
251				tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
252			else
253				tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER;
254		} else {
255			tp->fan_todo[FAN_AMBIENT] = FAN_FASTER;
256		}
257	} else {
258		tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
259	}
260}
261
262static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
263{
264	int ret = 0;
265
266	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
267		if (tp->curr_cpu_temp >=
268		    cpu_temp_limits[tp->index].high_warn) {
269			printk(KERN_WARNING "temp%d: "
270			       "Above safe CPU operating temperature, %d C.\n",
271			       tp->index, (int) tp->curr_cpu_temp);
272			ret = 1;
273		} else if (tp->curr_cpu_temp <
274			   cpu_temp_limits[tp->index].low_warn) {
275			printk(KERN_WARNING "temp%d: "
276			       "Below safe CPU operating temperature, %d C.\n",
277			       tp->index, (int) tp->curr_cpu_temp);
278			ret = 1;
279		}
280		if (ret)
281			*last_warn = jiffies;
282	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
283		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
284		ret = 1;
285
286	/* Now check the shutdown limits. */
287	if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
288	    tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
289		do_envctrl_shutdown(tp);
290		ret = 1;
291	}
292
293	if (ret) {
294		tp->fan_todo[FAN_CPU] = FAN_FULLBLAST;
295	} else if ((tick & (8 - 1)) == 0) {
296		s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
297		s8 cpu_goal_lo;
298
299		cpu_goal_lo = cpu_goal_hi - 3;
300
301		/* We do not try to avoid 'too cold' events.  Basically we
302		 * only try to deal with over-heating and fan noise reduction.
303		 */
304		if (tp->avg_cpu_temp < cpu_goal_hi) {
305			if (tp->avg_cpu_temp >= cpu_goal_lo)
306				tp->fan_todo[FAN_CPU] = FAN_SAME;
307			else
308				tp->fan_todo[FAN_CPU] = FAN_SLOWER;
309		} else {
310			tp->fan_todo[FAN_CPU] = FAN_FASTER;
311		}
312	} else {
313		tp->fan_todo[FAN_CPU] = FAN_SAME;
314	}
315}
316
317static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
318{
319	tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
320	tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
321
322	analyze_ambient_temp(tp, last_warn, tp->sample_tick);
323	analyze_cpu_temp(tp, last_warn, tp->sample_tick);
324
325	tp->sample_tick++;
326}
327
328static enum fan_action prioritize_fan_action(int which_fan)
329{
330	struct bbc_cpu_temperature *tp;
331	enum fan_action decision = FAN_STATE_MAX;
332
333	/* Basically, prioritize what the temperature sensors
334	 * recommend we do, and perform that action on all the
335	 * fans.
336	 */
337	for (tp = all_bbc_temps; tp; tp = tp->next) {
338		if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
339			decision = FAN_FULLBLAST;
340			break;
341		}
342		if (tp->fan_todo[which_fan] == FAN_SAME &&
343		    decision != FAN_FASTER)
344			decision = FAN_SAME;
345		else if (tp->fan_todo[which_fan] == FAN_FASTER)
346			decision = FAN_FASTER;
347		else if (decision != FAN_FASTER &&
348			 decision != FAN_SAME &&
349			 tp->fan_todo[which_fan] == FAN_SLOWER)
350			decision = FAN_SLOWER;
351	}
352	if (decision == FAN_STATE_MAX)
353		decision = FAN_SAME;
354
355	return decision;
356}
357
358static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
359{
360	enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
361	int ret;
362
363	if (decision == FAN_SAME)
364		return 0;
365
366	ret = 1;
367	if (decision == FAN_FULLBLAST) {
368		if (fp->system_fan_speed >= FAN_SPEED_MAX)
369			ret = 0;
370		else
371			fp->system_fan_speed = FAN_SPEED_MAX;
372	} else {
373		if (decision == FAN_FASTER) {
374			if (fp->system_fan_speed >= FAN_SPEED_MAX)
375				ret = 0;
376			else
377				fp->system_fan_speed += 2;
378		} else {
379			int orig_speed = fp->system_fan_speed;
380
381			if (orig_speed <= FAN_SPEED_MIN ||
382			    orig_speed <= (fp->cpu_fan_speed - 3))
383				ret = 0;
384			else
385				fp->system_fan_speed -= 1;
386		}
387	}
388
389	return ret;
390}
391
392static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
393{
394	enum fan_action decision = prioritize_fan_action(FAN_CPU);
395	int ret;
396
397	if (decision == FAN_SAME)
398		return 0;
399
400	ret = 1;
401	if (decision == FAN_FULLBLAST) {
402		if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
403			ret = 0;
404		else
405			fp->cpu_fan_speed = FAN_SPEED_MAX;
406	} else {
407		if (decision == FAN_FASTER) {
408			if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
409				ret = 0;
410			else {
411				fp->cpu_fan_speed += 2;
412				if (fp->system_fan_speed <
413				    (fp->cpu_fan_speed - 3))
414					fp->system_fan_speed =
415						fp->cpu_fan_speed - 3;
416			}
417		} else {
418			if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
419				ret = 0;
420			else
421				fp->cpu_fan_speed -= 1;
422		}
423	}
424
425	return ret;
426}
427
428static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
429{
430	int new;
431
432	new  = maybe_new_ambient_fan_speed(fp);
433	new |= maybe_new_cpu_fan_speed(fp);
434
435	if (new)
436		set_fan_speeds(fp);
437}
438
439static void fans_full_blast(void)
440{
441	struct bbc_fan_control *fp;
442
443	/* Since we will not be monitoring things anymore, put
444	 * the fans on full blast.
445	 */
446	for (fp = all_bbc_fans; fp; fp = fp->next) {
447		fp->cpu_fan_speed = FAN_SPEED_MAX;
448		fp->system_fan_speed = FAN_SPEED_MAX;
449		fp->psupply_fan_on = 1;
450		set_fan_speeds(fp);
451	}
452}
453
454#define POLL_INTERVAL	(5 * HZ)
455static unsigned long last_warning_jiffies;
456static struct task_struct *kenvctrld_task;
457
458static int kenvctrld(void *__unused)
459{
460	daemonize();
461	strcpy(current->comm, "kenvctrld");
462	kenvctrld_task = current;
463
464	printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
465	last_warning_jiffies = jiffies - WARN_INTERVAL;
466	for (;;) {
467		struct bbc_cpu_temperature *tp;
468		struct bbc_fan_control *fp;
469
470		current->state = TASK_INTERRUPTIBLE;
471		schedule_timeout(POLL_INTERVAL);
472		current->state = TASK_RUNNING;
473		if (signal_pending(current))
474			break;
475
476		for (tp = all_bbc_temps; tp; tp = tp->next) {
477			get_current_temps(tp);
478			analyze_temps(tp, &last_warning_jiffies);
479		}
480		for (fp = all_bbc_fans; fp; fp = fp->next)
481			maybe_new_fan_speeds(fp);
482	}
483	printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
484
485	fans_full_blast();
486
487	return 0;
488}
489
490static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx)
491{
492	struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL);
493
494	if (!tp)
495		return;
496	memset(tp, 0, sizeof(*tp));
497	tp->client = bbc_i2c_attach(echild);
498	if (!tp->client) {
499		kfree(tp);
500		return;
501	}
502
503	tp->index = temp_idx;
504	{
505		struct bbc_cpu_temperature **tpp = &all_bbc_temps;
506		while (*tpp)
507			tpp = &((*tpp)->next);
508		tp->next = NULL;
509		*tpp = tp;
510	}
511
512	/* Tell it to convert once every 5 seconds, clear all cfg
513	 * bits.
514	 */
515	bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE);
516	bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE);
517
518	/* Program the hard temperature limits into the chip. */
519	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
520		       MAX1617_WR_AMB_HIGHLIM);
521	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
522		       MAX1617_WR_AMB_LOWLIM);
523	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
524		       MAX1617_WR_CPU_HIGHLIM);
525	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
526		       MAX1617_WR_CPU_LOWLIM);
527
528	get_current_temps(tp);
529	tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
530	tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
531
532	tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
533	tp->fan_todo[FAN_CPU] = FAN_SAME;
534}
535
536static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx)
537{
538	struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL);
539
540	if (!fp)
541		return;
542	memset(fp, 0, sizeof(*fp));
543	fp->client = bbc_i2c_attach(echild);
544	if (!fp->client) {
545		kfree(fp);
546		return;
547	}
548
549	fp->index = fan_idx;
550
551	{
552		struct bbc_fan_control **fpp = &all_bbc_fans;
553		while (*fpp)
554			fpp = &((*fpp)->next);
555		fp->next = NULL;
556		*fpp = fp;
557	}
558
559	/* The i2c device controlling the fans is write-only.
560	 * So the only way to keep track of the current power
561	 * level fed to the fans is via software.  Choose half
562	 * power for cpu/system and 'on' fo the powersupply fan
563	 * and set it now.
564	 */
565	fp->psupply_fan_on = 1;
566	fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
567	fp->cpu_fan_speed += FAN_SPEED_MIN;
568	fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
569	fp->system_fan_speed += FAN_SPEED_MIN;
570
571	set_fan_speeds(fp);
572}
573
574void bbc_envctrl_init(void)
575{
576	struct linux_ebus_child *echild;
577	int temp_index = 0;
578	int fan_index = 0;
579	int devidx = 0;
580
581	while ((echild = bbc_i2c_getdev(devidx++)) != NULL) {
582		if (!strcmp(echild->prom_name, "temperature"))
583			attach_one_temp(echild, temp_index++);
584		if (!strcmp(echild->prom_name, "fan-control"))
585			attach_one_fan(echild, fan_index++);
586	}
587	if (temp_index != 0 && fan_index != 0)
588		kernel_thread(kenvctrld, NULL, CLONE_FS | CLONE_FILES);
589}
590
591static void destroy_one_temp(struct bbc_cpu_temperature *tp)
592{
593	bbc_i2c_detach(tp->client);
594	kfree(tp);
595}
596
597static void destroy_one_fan(struct bbc_fan_control *fp)
598{
599	bbc_i2c_detach(fp->client);
600	kfree(fp);
601}
602
603void bbc_envctrl_cleanup(void)
604{
605	struct bbc_cpu_temperature *tp;
606	struct bbc_fan_control *fp;
607
608	if (kenvctrld_task != NULL) {
609		force_sig(SIGKILL, kenvctrld_task);
610		for (;;) {
611			struct task_struct *p;
612			int found = 0;
613
614			read_lock(&tasklist_lock);
615			for_each_task(p) {
616				if (p == kenvctrld_task) {
617					found = 1;
618					break;
619				}
620			}
621			read_unlock(&tasklist_lock);
622			if (!found)
623				break;
624			current->state = TASK_INTERRUPTIBLE;
625			schedule_timeout(HZ);
626			current->state = TASK_RUNNING;
627		}
628		kenvctrld_task = NULL;
629	}
630
631	tp = all_bbc_temps;
632	while (tp != NULL) {
633		struct bbc_cpu_temperature *next = tp->next;
634		destroy_one_temp(tp);
635		tp = next;
636	}
637	all_bbc_temps = NULL;
638
639	fp = all_bbc_fans;
640	while (fp != NULL) {
641		struct bbc_fan_control *next = fp->next;
642		destroy_one_fan(fp);
643		fp = next;
644	}
645	all_bbc_fans = NULL;
646}
647