kern_cpu.c revision 141240
1/*-
2 * Copyright (c) 2004-2005 Nate Lawson (SDG)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_cpu.c 141240 2005-02-04 05:39:19Z njl $");
29
30#include <sys/param.h>
31#include <sys/bus.h>
32#include <sys/cpu.h>
33#include <sys/eventhandler.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/proc.h>
38#include <sys/queue.h>
39#include <sys/sched.h>
40#include <sys/sysctl.h>
41#include <sys/systm.h>
42#include <sys/sbuf.h>
43
44#include "cpufreq_if.h"
45
46/*
47 * Common CPU frequency glue code.  Drivers for specific hardware can
48 * attach this interface to allow users to get/set the CPU frequency.
49 */
50
51/*
52 * Number of levels we can handle.  Levels are synthesized from settings
53 * so for N settings there may be N^2 levels.
54 */
55#define CF_MAX_LEVELS	32
56
57struct cpufreq_softc {
58	struct cf_level			curr_level;
59	int				priority;
60	struct cf_level_lst		all_levels;
61	device_t			dev;
62	struct sysctl_ctx_list		sysctl_ctx;
63};
64
65struct cf_setting_array {
66	struct cf_setting		sets[MAX_SETTINGS];
67	int				count;
68	TAILQ_ENTRY(cf_setting_array)	link;
69};
70
71TAILQ_HEAD(cf_setting_lst, cf_setting_array);
72
73static int	cpufreq_attach(device_t dev);
74static int	cpufreq_detach(device_t dev);
75static void	cpufreq_evaluate(void *arg);
76static int	cf_set_method(device_t dev, const struct cf_level *level,
77		    int priority);
78static int	cf_get_method(device_t dev, struct cf_level *level);
79static int	cf_levels_method(device_t dev, struct cf_level *levels,
80		    int *count);
81static int	cpufreq_insert_abs(struct cf_level_lst *list,
82		    struct cf_setting *sets, int count);
83static int	cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS);
84static int	cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS);
85
86static device_method_t cpufreq_methods[] = {
87	DEVMETHOD(device_probe,		bus_generic_probe),
88	DEVMETHOD(device_attach,	cpufreq_attach),
89	DEVMETHOD(device_detach,	cpufreq_detach),
90
91        DEVMETHOD(cpufreq_set,		cf_set_method),
92        DEVMETHOD(cpufreq_get,		cf_get_method),
93        DEVMETHOD(cpufreq_levels,	cf_levels_method),
94	{0, 0}
95};
96static driver_t cpufreq_driver = {
97	"cpufreq", cpufreq_methods, sizeof(struct cpufreq_softc)
98};
99static devclass_t cpufreq_dc;
100DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0);
101
102static eventhandler_tag cf_ev_tag;
103
104static int
105cpufreq_attach(device_t dev)
106{
107	struct cpufreq_softc *sc;
108	device_t parent;
109	int numdevs;
110
111	sc = device_get_softc(dev);
112	parent = device_get_parent(dev);
113	sc->dev = dev;
114	sysctl_ctx_init(&sc->sysctl_ctx);
115	TAILQ_INIT(&sc->all_levels);
116	sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN;
117
118	/*
119	 * Only initialize one set of sysctls for all CPUs.  In the future,
120	 * if multiple CPUs can have different settings, we can move these
121	 * sysctls to be under every CPU instead of just the first one.
122	 */
123	numdevs = devclass_get_count(cpufreq_dc);
124	if (numdevs > 1)
125		return (0);
126
127	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
128	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
129	    OID_AUTO, "freq", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
130	    cpufreq_curr_sysctl, "I", "Current CPU frequency");
131	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
132	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
133	    OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
134	    cpufreq_levels_sysctl, "A", "CPU frequency levels");
135	cf_ev_tag = EVENTHANDLER_REGISTER(cpufreq_changed, cpufreq_evaluate,
136	    NULL, EVENTHANDLER_PRI_ANY);
137
138	return (0);
139}
140
141static int
142cpufreq_detach(device_t dev)
143{
144	struct cpufreq_softc *sc;
145	int numdevs;
146
147	sc = device_get_softc(dev);
148	sysctl_ctx_free(&sc->sysctl_ctx);
149
150	/* Only clean up these resources when the last device is detaching. */
151	numdevs = devclass_get_count(cpufreq_dc);
152	if (numdevs == 1)
153		EVENTHANDLER_DEREGISTER(cpufreq_changed, cf_ev_tag);
154
155	return (0);
156}
157
158static void
159cpufreq_evaluate(void *arg)
160{
161	/* TODO: Re-evaluate when notified of changes to drivers. */
162}
163
164static int
165cf_set_method(device_t dev, const struct cf_level *level, int priority)
166{
167	struct cpufreq_softc *sc;
168	const struct cf_setting *set;
169	int error;
170
171	sc = device_get_softc(dev);
172
173	/* If already at this level, just return. */
174	if (CPUFREQ_CMP(sc->curr_level.total_set.freq, level->total_set.freq))
175		return (0);
176
177	/* First, set the absolute frequency via its driver. */
178	set = &level->abs_set;
179	if (set->dev) {
180		if (!device_is_attached(set->dev)) {
181			error = ENXIO;
182			goto out;
183		}
184		error = CPUFREQ_DRV_SET(set->dev, set);
185		if (error) {
186			goto out;
187		}
188	}
189
190	/* TODO: Next, set any/all relative frequencies via their drivers. */
191
192	/* Record the current level. */
193	sc->curr_level = *level;
194	sc->priority = priority;
195	error = 0;
196
197out:
198	if (error)
199		device_printf(set->dev, "set freq failed, err %d\n", error);
200	return (error);
201}
202
203static int
204cf_get_method(device_t dev, struct cf_level *level)
205{
206	struct cpufreq_softc *sc;
207	struct cf_level *levels;
208	struct cf_setting *curr_set, set;
209	struct pcpu *pc;
210	device_t *devs;
211	int count, error, i, numdevs;
212	uint64_t rate;
213
214	sc = device_get_softc(dev);
215	curr_set = &sc->curr_level.total_set;
216	levels = NULL;
217
218	/* If we already know the current frequency, we're done. */
219	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
220		goto out;
221
222	/*
223	 * We need to figure out the current level.  Loop through every
224	 * driver, getting the current setting.  Then, attempt to get a best
225	 * match of settings against each level.
226	 */
227	count = CF_MAX_LEVELS;
228	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
229	if (levels == NULL)
230		return (ENOMEM);
231	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
232	if (error)
233		goto out;
234	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
235	if (error)
236		goto out;
237	for (i = 0; i < numdevs && curr_set->freq == CPUFREQ_VAL_UNKNOWN; i++) {
238		if (!device_is_attached(devs[i]))
239			continue;
240		error = CPUFREQ_DRV_GET(devs[i], &set);
241		if (error)
242			continue;
243		for (i = 0; i < count; i++) {
244			if (CPUFREQ_CMP(set.freq, levels[i].abs_set.freq)) {
245				sc->curr_level = levels[i];
246				break;
247			}
248		}
249	}
250	free(devs, M_TEMP);
251	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
252		goto out;
253
254	/*
255	 * We couldn't find an exact match, so attempt to estimate and then
256	 * match against a level.
257	 */
258	pc = cpu_get_pcpu(dev);
259	if (pc == NULL) {
260		error = ENXIO;
261		goto out;
262	}
263	cpu_est_clockrate(pc->pc_cpuid, &rate);
264	rate /= 1000000;
265	for (i = 0; i < count; i++) {
266		if (CPUFREQ_CMP(rate, levels[i].total_set.freq)) {
267			sc->curr_level = levels[i];
268			break;
269		}
270	}
271
272out:
273	if (levels)
274		free(levels, M_TEMP);
275	*level = sc->curr_level;
276	return (0);
277}
278
279static int
280cf_levels_method(device_t dev, struct cf_level *levels, int *count)
281{
282	struct cf_setting_lst rel_sets;
283	struct cpufreq_softc *sc;
284	struct cf_level *lev;
285	struct cf_setting *sets;
286	struct pcpu *pc;
287	device_t *devs;
288	int error, i, numdevs, numlevels, set_count, type;
289	uint64_t rate;
290
291	if (levels == NULL || count == NULL)
292		return (EINVAL);
293
294	TAILQ_INIT(&rel_sets);
295	sc = device_get_softc(dev);
296	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
297	if (error)
298		return (error);
299	sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
300	if (sets == NULL) {
301		free(devs, M_TEMP);
302		return (ENOMEM);
303	}
304
305	/* Get settings from all cpufreq drivers. */
306	numlevels = 0;
307	for (i = 0; i < numdevs; i++) {
308		if (!device_is_attached(devs[i]))
309			continue;
310		set_count = MAX_SETTINGS;
311		error = CPUFREQ_DRV_SETTINGS(devs[i], sets, &set_count, &type);
312		if (error || set_count == 0)
313			continue;
314		error = cpufreq_insert_abs(&sc->all_levels, sets, set_count);
315		if (error)
316			goto out;
317		numlevels += set_count;
318	}
319
320	/* If the caller doesn't have enough space, return the actual count. */
321	if (numlevels > *count) {
322		*count = numlevels;
323		error = E2BIG;
324		goto out;
325	}
326
327	/* If there are no absolute levels, create a fake one at 100%. */
328	if (TAILQ_EMPTY(&sc->all_levels)) {
329		bzero(&sets[0], sizeof(*sets));
330		pc = cpu_get_pcpu(dev);
331		if (pc == NULL) {
332			error = ENXIO;
333			goto out;
334		}
335		cpu_est_clockrate(pc->pc_cpuid, &rate);
336		sets[0].freq = rate / 1000000;
337		error = cpufreq_insert_abs(&sc->all_levels, sets, 1);
338		if (error)
339			goto out;
340	}
341
342	/* TODO: Create a combined list of absolute + relative levels. */
343	i = 0;
344	TAILQ_FOREACH(lev, &sc->all_levels, link) {
345		/* For now, just assume total freq equals absolute freq. */
346		lev->total_set = lev->abs_set;
347		lev->total_set.dev = NULL;
348		levels[i] = *lev;
349		i++;
350	}
351	*count = i;
352	error = 0;
353
354out:
355	/* Clear all levels since we regenerate them each time. */
356	while ((lev = TAILQ_FIRST(&sc->all_levels)) != NULL) {
357		TAILQ_REMOVE(&sc->all_levels, lev, link);
358		free(lev, M_TEMP);
359	}
360	free(devs, M_TEMP);
361	free(sets, M_TEMP);
362	return (error);
363}
364
365/*
366 * Create levels for an array of absolute settings and insert them in
367 * sorted order in the specified list.
368 */
369static int
370cpufreq_insert_abs(struct cf_level_lst *list, struct cf_setting *sets,
371    int count)
372{
373	struct cf_level *level, *search;
374	int i;
375
376	for (i = 0; i < count; i++) {
377		level = malloc(sizeof(*level), M_TEMP, M_NOWAIT | M_ZERO);
378		if (level == NULL)
379			return (ENOMEM);
380		level->abs_set = sets[i];
381
382		if (TAILQ_EMPTY(list)) {
383			TAILQ_INSERT_HEAD(list, level, link);
384			continue;
385		}
386
387		TAILQ_FOREACH_REVERSE(search, list, cf_level_lst, link) {
388			if (sets[i].freq <= search->abs_set.freq) {
389				TAILQ_INSERT_AFTER(list, search, level, link);
390				break;
391			}
392		}
393	}
394	return (0);
395}
396
397static int
398cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS)
399{
400	struct cpufreq_softc *sc;
401	struct cf_level *levels;
402	int count, error, freq, i;
403
404	sc = oidp->oid_arg1;
405	count = CF_MAX_LEVELS;
406	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
407	if (levels == NULL)
408		return (ENOMEM);
409
410	error = CPUFREQ_GET(sc->dev, &levels[0]);
411	if (error)
412		goto out;
413	freq = levels[0].total_set.freq;
414	error = sysctl_handle_int(oidp, &freq, 0, req);
415	if (error != 0 || req->newptr == NULL)
416		goto out;
417
418	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
419	if (error)
420		goto out;
421	for (i = 0; i < count; i++) {
422		if (CPUFREQ_CMP(levels[i].total_set.freq, freq)) {
423			error = CPUFREQ_SET(sc->dev, &levels[i],
424			    CPUFREQ_PRIO_USER);
425			break;
426		}
427	}
428	if (i == count)
429		error = EINVAL;
430
431out:
432	if (levels)
433		free(levels, M_TEMP);
434	return (error);
435}
436
437static int
438cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS)
439{
440	struct cpufreq_softc *sc;
441	struct cf_level *levels;
442	struct cf_setting *set;
443	struct sbuf sb;
444	int count, error, i;
445
446	sc = oidp->oid_arg1;
447	sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
448
449	/* Get settings from the device and generate the output string. */
450	count = CF_MAX_LEVELS;
451	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
452	if (levels == NULL)
453		return (ENOMEM);
454	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
455	if (error)
456		goto out;
457	if (count) {
458		for (i = 0; i < count; i++) {
459			set = &levels[i].total_set;
460			sbuf_printf(&sb, "%d/%d ", set->freq, set->power);
461		}
462	} else
463		sbuf_cpy(&sb, "0");
464	sbuf_trim(&sb);
465	sbuf_finish(&sb);
466	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
467
468out:
469	free(levels, M_TEMP);
470	sbuf_delete(&sb);
471	return (error);
472}
473
474int
475cpufreq_register(device_t dev)
476{
477	device_t cf_dev, cpu_dev;
478
479	/*
480	 * Only add one cpufreq device (on cpu0) for all control.  Once
481	 * independent multi-cpu control appears, we can assign one cpufreq
482	 * device per cpu.
483	 */
484	cf_dev = devclass_get_device(cpufreq_dc, 0);
485	if (cf_dev) {
486		device_printf(dev,
487		    "warning: only one cpufreq device at a time supported\n");
488		return (0);
489	}
490
491	/* Add the child device and sysctls. */
492	cpu_dev = devclass_get_device(devclass_find("cpu"), 0);
493	cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", 0);
494	if (cf_dev == NULL)
495		return (ENOMEM);
496	device_quiet(cf_dev);
497
498	return (device_probe_and_attach(cf_dev));
499}
500
501int
502cpufreq_unregister(device_t dev)
503{
504	device_t cf_dev, *devs;
505	int cfcount, count, devcount, error, i, type;
506	struct cf_setting set;
507
508	/*
509	 * If this is the last cpufreq child device, remove the control
510	 * device as well.  We identify cpufreq children by calling a method
511	 * they support.
512	 */
513	error = device_get_children(device_get_parent(dev), &devs, &devcount);
514	if (error)
515		return (error);
516	cf_dev = devclass_get_device(cpufreq_dc, 0);
517	KASSERT(cf_dev != NULL, ("unregister with no cpufreq dev"));
518	cfcount = 0;
519	for (i = 0; i < devcount; i++) {
520		if (!device_is_attached(devs[i]))
521			continue;
522		count = 1;
523		if (CPUFREQ_DRV_SETTINGS(devs[i], &set, &count, &type) == 0)
524			cfcount++;
525	}
526	if (cfcount <= 1) {
527		device_delete_child(device_get_parent(cf_dev), cf_dev);
528	}
529	free(devs, M_TEMP);
530
531	return (0);
532}
533