cpu_pm.c revision 10797:8e4cf0dbd8ca
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/cpu_pm.h>
27#include <sys/cmn_err.h>
28#include <sys/time.h>
29#include <sys/sdt.h>
30
31/*
32 * Solaris Event Based CPU Power Manager
33 *
34 * This file implements platform independent event based CPU power management.
35 * When CPUs are configured into the system, the CMT scheduling subsystem will
36 * query the platform to determine if the CPU belongs to any power management
37 * domains. That is, sets of CPUs that share power management states.
38 *
39 * Active Power Management domains represent a group of CPUs across which the
40 * Operating System can request speed changes (which may in turn result
41 * in voltage changes). This allows the operating system to trade off
42 * performance for power savings.
43 *
44 * Idle Power Management domains can enter power savings states when they are
45 * unutilized. These states allow the Operating System to trade off power
46 * for performance (in the form of latency to transition from the idle state
47 * to an active one).
48 *
49 * For each active and idle power domain the CMT subsystem instantiates, a
50 * cpupm_domain_t structure is created. As the dispatcher schedules threads
51 * to run on the system's CPUs, it will also track the utilization of the
52 * enumerated power domains. Significant changes in utilization will result
53 * in the dispatcher sending the power manager events that relate to the
54 * utilization of the power domain. The power manager recieves the events,
55 * and in the context of the policy objectives in force, may decide to request
56 * the domain's power/performance state be changed.
57 *
58 * Under the "elastic" CPUPM policy, when the utilization rises, the CPU power
59 * manager will request the CPUs in the domain run at their fastest (and most
60 * power consuming) state. When the domain becomes idle (utilization at zero),
61 * the power manager will request that the CPUs run at a speed that saves the
62 * most power.
63 *
64 * The advantage of this scheme, is that the CPU power manager working with the
65 * dispatcher can be extremely responsive to changes in utilization. Optimizing
66 * for performance in the presence of utilization, and power savings in the
67 * presence of idleness. Such close collaboration with the dispatcher has other
68 * benefits that will play out in the form of more sophisticated power /
69 * performance policy in the near future.
70 *
71 * Avoiding state thrashing in the presence of transient periods of utilization
72 * and idleness while still being responsive to non-transient periods is key.
73 * The power manager implements a "governor" that is used to throttle
74 * state transitions when a significant amount of transient idle or transient
75 * work is detected.
76 *
77 * Kernel background activity (e.g. taskq threads) are by far the most common
78 * form of transient utilization. Ungoverned in the face of this utililzation,
79 * hundreds of state transitions per second would result on an idle system.
80 *
81 * Transient idleness is common when a thread briefly yields the CPU to
82 * wait for an event elsewhere in the system. Where the idle period is short
83 * enough, the overhead associated with making the state transition doesn't
84 * justify the power savings.
85 *
86 * The following is the state machine for the governor implemented by
87 * cpupm_utilization_event():
88 *
89 *         ----->---tw---->-----
90 *        /                     \
91 *      (I)-<-ti-<-     -<-ntw-<(W)
92 *       |         \   /         |
93 *       \          \ /          /
94 *        >-nti/rm->(D)--->-tw->-
95 * Key:
96 *
97 * States
98 * - (D): Default (ungoverned)
99 * - (W): Transient work governed
100 * - (I): Transient idle governed
101 * State Transitions
102 * - tw: transient work
103 * - ti: transient idleness
104 * - ntw: non-transient work
105 * - nti: non-transient idleness
106 * - rm: thread remain event
107 */
108
109static cpupm_domain_t *cpupm_domains = NULL;
110
111/*
112 * Uninitialized state of CPU power management is disabled
113 */
114cpupm_policy_t cpupm_policy = CPUPM_POLICY_DISABLED;
115
116/*
117 * Periods of utilization lasting less than this time interval are characterized
118 * as transient. State changes associated with transient work are considered
119 * to be mispredicted. That is, it's not worth raising and lower power states
120 * where the utilization lasts for less than this interval.
121 */
122hrtime_t cpupm_tw_predict_interval;
123
124/*
125 * Periods of idleness lasting less than this time interval are characterized
126 * as transient. State changes associated with transient idle are considered
127 * to be mispredicted. That is, it's not worth lowering and raising power
128 * states where the idleness lasts for less than this interval.
129 */
130hrtime_t cpupm_ti_predict_interval;
131
132/*
133 * Number of mispredictions after which future transitions will be governed.
134 */
135int cpupm_mispredict_thresh = 4;
136
137/*
138 * Likewise, the number of mispredicted governed transitions after which the
139 * governor will be removed.
140 */
141int cpupm_mispredict_gov_thresh = 4;
142
143/*
144 * The transient work and transient idle prediction intervals are specified
145 * here. Tuning them higher will result in the transient work, and transient
146 * idle governors being used more aggresively, which limits the frequency of
147 * state transitions at the expense of performance and power savings,
148 * respectively. The intervals are specified in nanoseconds.
149 */
150/*
151 * 400 usec
152 */
153#define	CPUPM_DEFAULT_TI_INTERVAL	400000
154/*
155 * 400 usec
156 */
157#define	CPUPM_DEFAULT_TW_INTERVAL	400000
158
159hrtime_t cpupm_ti_gov_interval = CPUPM_DEFAULT_TI_INTERVAL;
160hrtime_t cpupm_tw_gov_interval = CPUPM_DEFAULT_TW_INTERVAL;
161
162
163static void	cpupm_governor_initialize(void);
164static void	cpupm_state_change_global(cpupm_dtype_t, cpupm_state_name_t);
165
166cpupm_policy_t
167cpupm_get_policy(void)
168{
169	return (cpupm_policy);
170}
171
172int
173cpupm_set_policy(cpupm_policy_t new_policy)
174{
175	static int	gov_init = 0;
176	int		result = 0;
177
178	mutex_enter(&cpu_lock);
179	if (new_policy == cpupm_policy) {
180		mutex_exit(&cpu_lock);
181		return (result);
182	}
183
184	/*
185	 * Pausing CPUs causes a high priority thread to be scheduled
186	 * on all other CPUs (besides the current one). This locks out
187	 * other CPUs from making CPUPM state transitions.
188	 */
189	switch (new_policy) {
190	case CPUPM_POLICY_DISABLED:
191		pause_cpus(NULL);
192		cpupm_policy = CPUPM_POLICY_DISABLED;
193		start_cpus();
194
195		result = cmt_pad_disable(PGHW_POW_ACTIVE);
196
197		/*
198		 * Once PAD has been enabled, it should always be possible
199		 * to disable it.
200		 */
201		ASSERT(result == 0);
202
203		/*
204		 * Bring all the active power domains to the maximum
205		 * performance state.
206		 */
207		cpupm_state_change_global(CPUPM_DTYPE_ACTIVE,
208		    CPUPM_STATE_MAX_PERF);
209
210		break;
211	case CPUPM_POLICY_ELASTIC:
212
213		result = cmt_pad_enable(PGHW_POW_ACTIVE);
214		if (result < 0) {
215			/*
216			 * Failed to enable PAD across the active power
217			 * domains, which may well be because none were
218			 * enumerated.
219			 */
220			break;
221		}
222
223		/*
224		 * Initialize the governor parameters the first time through.
225		 */
226		if (gov_init == 0) {
227			cpupm_governor_initialize();
228			gov_init = 1;
229		}
230
231		pause_cpus(NULL);
232		cpupm_policy = CPUPM_POLICY_ELASTIC;
233		start_cpus();
234
235		break;
236	default:
237		cmn_err(CE_WARN, "Attempt to set unknown CPUPM policy %d\n",
238		    new_policy);
239		ASSERT(0);
240		break;
241	}
242	mutex_exit(&cpu_lock);
243
244	return (result);
245}
246
247/*
248 * Look for an existing power domain
249 */
250static cpupm_domain_t *
251cpupm_domain_find(id_t id, cpupm_dtype_t type)
252{
253	ASSERT(MUTEX_HELD(&cpu_lock));
254
255	cpupm_domain_t *dom;
256
257	dom = cpupm_domains;
258	while (dom != NULL) {
259		if (id == dom->cpd_id && type == dom->cpd_type)
260			return (dom);
261		dom = dom->cpd_next;
262	}
263	return (NULL);
264}
265
266/*
267 * Create a new domain
268 */
269static cpupm_domain_t *
270cpupm_domain_create(id_t id, cpupm_dtype_t type)
271{
272	cpupm_domain_t *dom;
273
274	ASSERT(MUTEX_HELD(&cpu_lock));
275
276	dom = kmem_zalloc(sizeof (cpupm_domain_t), KM_SLEEP);
277	dom->cpd_id = id;
278	dom->cpd_type = type;
279
280	/* Link into the known domain list */
281	dom->cpd_next = cpupm_domains;
282	cpupm_domains = dom;
283
284	return (dom);
285}
286
287static void
288cpupm_domain_state_enum(struct cpu *cp, cpupm_domain_t *dom)
289{
290	/*
291	 * In the envent we're enumerating because the domain's state
292	 * configuration has changed, toss any existing states.
293	 */
294	if (dom->cpd_nstates > 0) {
295		kmem_free(dom->cpd_states,
296		    sizeof (cpupm_state_t) * dom->cpd_nstates);
297		dom->cpd_nstates = 0;
298	}
299
300	/*
301	 * Query to determine the number of states, allocate storage
302	 * large enough to hold the state information, and pass it back
303	 * to the platform driver to complete the enumeration.
304	 */
305	dom->cpd_nstates = cpupm_plat_state_enumerate(cp, dom->cpd_type, NULL);
306
307	if (dom->cpd_nstates == 0)
308		return;
309
310	dom->cpd_states =
311	    kmem_zalloc(dom->cpd_nstates * sizeof (cpupm_state_t), KM_SLEEP);
312	(void) cpupm_plat_state_enumerate(cp, dom->cpd_type, dom->cpd_states);
313}
314
315/*
316 * Initialize the specified type of power domain on behalf of the CPU
317 */
318cpupm_domain_t *
319cpupm_domain_init(struct cpu *cp, cpupm_dtype_t type)
320{
321	cpupm_domain_t	*dom;
322	id_t		did;
323
324	ASSERT(MUTEX_HELD(&cpu_lock));
325
326	/*
327	 * Instantiate the domain if it doesn't already exist
328	 * and enumerate its power states.
329	 */
330	did = cpupm_domain_id(cp, type);
331	dom = cpupm_domain_find(did, type);
332	if (dom == NULL) {
333		dom = cpupm_domain_create(did, type);
334		cpupm_domain_state_enum(cp, dom);
335	}
336
337	/*
338	 * Named state initialization
339	 */
340	if (type == CPUPM_DTYPE_ACTIVE) {
341		/*
342		 * For active power domains, the highest performance
343		 * state is defined as first state returned from
344		 * the domain enumeration.
345		 */
346		dom->cpd_named_states[CPUPM_STATE_MAX_PERF] =
347		    &dom->cpd_states[0];
348		dom->cpd_named_states[CPUPM_STATE_LOW_POWER] =
349		    &dom->cpd_states[dom->cpd_nstates - 1];
350
351		/*
352		 * Begin by assuming CPU is running at the max perf state.
353		 */
354		dom->cpd_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
355	}
356
357	return (dom);
358}
359
360/*
361 * Return the id associated with the given type of domain
362 * to which cp belongs
363 */
364id_t
365cpupm_domain_id(struct cpu *cp, cpupm_dtype_t type)
366{
367	return (cpupm_plat_domain_id(cp, type));
368}
369
370/*
371 * Initiate a state change for the specified domain on behalf of cp
372 */
373int
374cpupm_change_state(struct cpu *cp, cpupm_domain_t *dom, cpupm_state_t *state)
375{
376	if (cpupm_plat_change_state(cp, state) < 0)
377		return (-1);
378
379	DTRACE_PROBE2(cpupm__change__state,
380	    cpupm_domain_t *, dom,
381	    cpupm_state_t *, state);
382
383	dom->cpd_state = state;
384	return (0);
385}
386
387/*
388 * Interface into the CPU power manager to indicate a significant change
389 * in utilization of the specified active power domain
390 */
391void
392cpupm_utilization_event(struct cpu *cp, hrtime_t now, cpupm_domain_t *dom,
393			    cpupm_util_event_t event)
394{
395	cpupm_state_t	*new_state = NULL;
396	hrtime_t	last;
397
398	if (cpupm_policy == CPUPM_POLICY_DISABLED) {
399		return;
400	}
401
402	/*
403	 * What follows is a simple elastic power state management policy.
404	 *
405	 * If the utilization has become non-zero, and the domain was
406	 * previously at it's lowest power state, then transition it
407	 * to the highest state in the spirit of "race to idle".
408	 *
409	 * If the utilization has dropped to zero, then transition the
410	 * domain to its lowest power state.
411	 *
412	 * Statistics are maintained to implement a governor to reduce state
413	 * transitions resulting from either transient work, or periods of
414	 * transient idleness on the domain.
415	 */
416	switch (event) {
417	case CPUPM_DOM_REMAIN_BUSY:
418
419		/*
420		 * We've received an event that the domain is running a thread
421		 * that's made it to the end of it's time slice. If we are at
422		 * low power, then raise it. If the transient work governor
423		 * is engaged, then remove it.
424		 */
425		if (dom->cpd_state ==
426		    dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) {
427			new_state =
428			    dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
429			if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) {
430				dom->cpd_governor = CPUPM_GOV_DISENGAGED;
431				dom->cpd_tw = 0;
432			}
433		}
434		break;
435
436	case CPUPM_DOM_BUSY_FROM_IDLE:
437		last = dom->cpd_last_lower;
438		dom->cpd_last_raise = now;
439
440		DTRACE_PROBE3(cpupm__raise__req,
441		    cpupm_domain_t *, dom,
442		    hrtime_t, last,
443		    hrtime_t, now);
444
445		if (dom->cpd_state ==
446		    dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) {
447
448			/*
449			 * There's non-zero utilization, and the domain is
450			 * running in the lower power state. Before we
451			 * consider raising power, check if the preceeding
452			 * idle period was transient in duration.
453			 *
454			 * If the domain is already transient work governed,
455			 * then we don't bother maintaining transient idle
456			 * statistics, as the presence of enough transient work
457			 * can also make the domain frequently transiently idle.
458			 * In this case, we still want to remain transient work
459			 * governed.
460			 */
461			if (dom->cpd_governor == CPUPM_GOV_DISENGAGED) {
462				if ((now - last) < cpupm_ti_predict_interval) {
463					/*
464					 * We're raising the domain power and
465					 * we *just* lowered it. Consider
466					 * this a mispredicted power state
467					 * transition due to a transient
468					 * idle period.
469					 */
470					if (++dom->cpd_ti >=
471					    cpupm_mispredict_thresh) {
472						/*
473						 * There's enough transient
474						 * idle transitions to
475						 * justify governing future
476						 * lowering requests.
477						 */
478						dom->cpd_governor =
479						    CPUPM_GOV_TRANS_IDLE;
480						dom->cpd_ti = 0;
481						DTRACE_PROBE1(
482						    cpupm__ti__governed,
483						    cpupm_domain_t *, dom);
484					}
485				} else {
486					/*
487					 * We correctly predicted the last
488					 * lowering.
489					 */
490					dom->cpd_ti = 0;
491				}
492			}
493			if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) {
494				/*
495				 * Raise requests are governed due to
496				 * transient work.
497				 */
498				DTRACE_PROBE1(cpupm__raise__governed,
499				    cpupm_domain_t *, dom);
500
501				return;
502			}
503			/*
504			 * Prepare to transition to the higher power state
505			 */
506			new_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
507
508		} else if (dom->cpd_state ==
509		    dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) {
510
511			/*
512			 * Utilization is non-zero, and we're already running
513			 * in the higher power state. Take this opportunity to
514			 * perform some book keeping if the last lowering
515			 * request was governed.
516			 */
517			if (dom->cpd_governor == CPUPM_GOV_TRANS_IDLE) {
518
519				if ((now - last) >= cpupm_ti_predict_interval) {
520					/*
521					 * The domain is transient idle
522					 * governed, and we mispredicted
523					 * governing the last lowering request.
524					 */
525					if (++dom->cpd_ti >=
526					    cpupm_mispredict_gov_thresh) {
527						/*
528						 * There's enough non-transient
529						 * idle periods to justify
530						 * removing the governor.
531						 */
532						dom->cpd_governor =
533						    CPUPM_GOV_DISENGAGED;
534						dom->cpd_ti = 0;
535						DTRACE_PROBE1(
536						    cpupm__ti__ungoverned,
537						    cpupm_domain_t *, dom);
538					}
539				} else {
540					/*
541					 * Correctly predicted governing the
542					 * last lowering request.
543					 */
544					dom->cpd_ti = 0;
545				}
546			}
547		}
548		break;
549
550	case CPUPM_DOM_IDLE_FROM_BUSY:
551		last = dom->cpd_last_raise;
552		dom->cpd_last_lower = now;
553
554		DTRACE_PROBE3(cpupm__lower__req,
555		    cpupm_domain_t *, dom,
556		    hrtime_t, last,
557		    hrtime_t, now);
558
559		if (dom->cpd_state ==
560		    dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) {
561
562			/*
563			 * The domain is idle, and is running in the highest
564			 * performance state. Before we consider lowering power,
565			 * perform some book keeping for the transient work
566			 * governor.
567			 */
568			if (dom->cpd_governor == CPUPM_GOV_DISENGAGED) {
569				if ((now - last) < cpupm_tw_predict_interval) {
570					/*
571					 * We're lowering the domain power and
572					 * we *just* raised it. Consider the
573					 * last raise mispredicted due to
574					 * transient work.
575					 */
576					if (++dom->cpd_tw >=
577					    cpupm_mispredict_thresh) {
578						/*
579						 * There's enough transient work
580						 * transitions to justify
581						 * governing future raise
582						 * requests.
583						 */
584						dom->cpd_governor =
585						    CPUPM_GOV_TRANS_WORK;
586						dom->cpd_tw = 0;
587						DTRACE_PROBE1(
588						    cpupm__tw__governed,
589						    cpupm_domain_t *, dom);
590					}
591				} else {
592					/*
593					 * We correctly predicted during the
594					 * last raise.
595					 */
596					dom->cpd_tw = 0;
597				}
598			}
599			if (dom->cpd_governor == CPUPM_GOV_TRANS_IDLE) {
600				/*
601				 * Lowering requests are governed due to
602				 * transient idleness.
603				 */
604				DTRACE_PROBE1(cpupm__lowering__governed,
605				    cpupm_domain_t *, dom);
606
607				return;
608			}
609
610			/*
611			 * Prepare to transition to a lower power state.
612			 */
613			new_state =
614			    dom->cpd_named_states[CPUPM_STATE_LOW_POWER];
615
616		} else if (dom->cpd_state ==
617		    dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) {
618
619			/*
620			 * The domain is idle, and we're already running in
621			 * the lower power state. Take this opportunity to
622			 * perform some book keeping if the last raising
623			 * request was governed.
624			 */
625			if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) {
626				if ((now - last) >= cpupm_tw_predict_interval) {
627					/*
628					 * The domain is transient work
629					 * governed, and we mispredicted
630					 * governing the last raising request.
631					 */
632					if (++dom->cpd_tw >=
633					    cpupm_mispredict_gov_thresh) {
634						/*
635						 * There's enough non-transient
636						 * work to justify removing
637						 * the governor.
638						 */
639						dom->cpd_governor =
640						    CPUPM_GOV_DISENGAGED;
641						dom->cpd_tw = 0;
642						DTRACE_PROBE1(
643						    cpupm__tw__ungoverned,
644						    cpupm_domain_t *, dom);
645					}
646				} else {
647					/*
648					 * We correctly predicted governing
649					 * the last raise.
650					 */
651					dom->cpd_tw = 0;
652				}
653			}
654		}
655		break;
656	}
657	/*
658	 * Change the power state
659	 * Not much currently done if this doesn't succeed
660	 */
661	if (new_state)
662		(void) cpupm_change_state(cp, dom, new_state);
663}
664
665
666/*
667 * Interface called by platforms to dynamically change the
668 * MAX performance cpupm state
669 */
670void
671cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level)
672{
673	cpupm_domain_t	*dom;
674	id_t		did;
675	cpupm_dtype_t	type = CPUPM_DTYPE_ACTIVE;
676	boolean_t	change_state = B_FALSE;
677	cpupm_state_t	*new_state = NULL;
678
679	did = cpupm_domain_id(cp, type);
680	mutex_enter(&cpu_lock);
681	dom = cpupm_domain_find(did, type);
682	mutex_exit(&cpu_lock);
683
684	/*
685	 * Can use a lock to avoid changing the power state of the cpu when
686	 * CPUPM_STATE_MAX_PERF is getting changed.
687	 * Since the occurance of events to change MAX_PERF is not frequent,
688	 * it may not be a good idea to overburden with locks. In the worst
689	 * case, for one cycle the power may not get changed to the required
690	 * level
691	 */
692	if (dom != NULL) {
693		if (dom->cpd_state ==
694		    dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) {
695			change_state = B_TRUE;
696		}
697
698		/*
699		 * If an out of range level is passed, use the lowest supported
700		 * speed.
701		 */
702		if (max_perf_level >= dom->cpd_nstates &&
703		    dom->cpd_nstates > 1) {
704			max_perf_level = dom->cpd_nstates - 1;
705		}
706
707		dom->cpd_named_states[CPUPM_STATE_MAX_PERF] =
708		    &dom->cpd_states[max_perf_level];
709
710		/*
711		 * If the current state is MAX_PERF, change the current state
712		 * to the new MAX_PERF
713		 */
714		if (change_state) {
715			new_state =
716			    dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
717			if (new_state) {
718				(void) cpupm_change_state(cp, dom, new_state);
719			}
720		}
721	}
722}
723
724/*
725 * Initialize the parameters for the transience governor state machine
726 */
727static void
728cpupm_governor_initialize(void)
729{
730	/*
731	 * The default prediction intervals are specified in nanoseconds.
732	 * Convert these to the equivalent in unscaled hrtime, which is the
733	 * format of the timestamps passed to cpupm_utilization_event()
734	 */
735	cpupm_ti_predict_interval = unscalehrtime(cpupm_ti_gov_interval);
736	cpupm_tw_predict_interval = unscalehrtime(cpupm_tw_gov_interval);
737}
738
739/*
740 * Initiate a state change in all CPUPM domain instances of the specified type
741 */
742static void
743cpupm_state_change_global(cpupm_dtype_t type, cpupm_state_name_t state)
744{
745	cpu_t		*cp;
746	pg_cmt_t	*pwr_pg;
747	cpupm_domain_t	*dom;
748	group_t		*hwset;
749	group_iter_t	giter;
750	pg_cpu_itr_t	cpu_iter;
751	pghw_type_t	hw;
752
753	ASSERT(MUTEX_HELD(&cpu_lock));
754
755	switch (type) {
756	case CPUPM_DTYPE_ACTIVE:
757		hw = PGHW_POW_ACTIVE;
758		break;
759	default:
760		/*
761		 * Power domain types other than "active" unsupported.
762		 */
763		ASSERT(type == CPUPM_DTYPE_ACTIVE);
764		return;
765	}
766
767	if ((hwset = pghw_set_lookup(hw)) == NULL)
768		return;
769
770	/*
771	 * Iterate over the power domains
772	 */
773	group_iter_init(&giter);
774	while ((pwr_pg = group_iterate(hwset, &giter)) != NULL) {
775
776		dom = (cpupm_domain_t *)pwr_pg->cmt_pg.pghw_handle;
777
778		/*
779		 * Iterate over the CPUs in each domain
780		 */
781		PG_CPU_ITR_INIT(pwr_pg, cpu_iter);
782		while ((cp = pg_cpu_next(&cpu_iter)) != NULL) {
783			(void) cpupm_change_state(cp, dom,
784			    dom->cpd_named_states[state]);
785		}
786	}
787}
788