1/*
2 * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * CPU-specific power management support.
31 *
32 * Implements the "wrappers" to the KEXT.
33 */
34#include <i386/asm.h>
35#include <i386/machine_cpu.h>
36#include <i386/mp.h>
37#include <i386/machine_routines.h>
38#include <i386/proc_reg.h>
39#include <i386/pmap.h>
40#include <i386/misc_protos.h>
41#include <kern/machine.h>
42#include <kern/pms.h>
43#include <kern/processor.h>
44#include <kern/timer_queue.h>
45#include <i386/cpu_threads.h>
46#include <i386/pmCPU.h>
47#include <i386/cpuid.h>
48#include <i386/rtclock_protos.h>
49#include <kern/sched_prim.h>
50#include <i386/lapic.h>
51#include <i386/pal_routines.h>
52#include <sys/kdebug.h>
53#include <i386/tsc.h>
54
55extern int disableConsoleOutput;
56
57#define DELAY_UNSET		0xFFFFFFFFFFFFFFFFULL
58
59uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16* NSEC_PER_USEC, 32* NSEC_PER_USEC, 64* NSEC_PER_USEC, 128* NSEC_PER_USEC, 256* NSEC_PER_USEC, 512* NSEC_PER_USEC, 1024* NSEC_PER_USEC, 2048* NSEC_PER_USEC, 4096* NSEC_PER_USEC, 8192* NSEC_PER_USEC, 16384* NSEC_PER_USEC, 32768* NSEC_PER_USEC};
60uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
61
62/*
63 * The following is set when the KEXT loads and initializes.
64 */
65pmDispatch_t	*pmDispatch	= NULL;
66
67uint32_t		pmInitDone		= 0;
68static boolean_t	earlyTopology		= FALSE;
69static uint64_t		earlyMaxBusDelay	= DELAY_UNSET;
70static uint64_t		earlyMaxIntDelay	= DELAY_UNSET;
71
72/*
73 * Initialize the Cstate change code.
74 */
75void
76power_management_init(void)
77{
78    if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
79	(*pmDispatch->cstateInit)();
80}
81
82static inline void machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins) {
83	uint32_t i;
84 	for (i = 0; i < nbins; i++) {
85 		if (interval < binvals[i]) {
86 			bins[i]++;
87 			break;
88 		}
89 	}
90}
91
92uint64_t	idle_pending_timers_processed;
93uint32_t	idle_entry_timer_processing_hdeadline_threshold = 5000000;
94
95/*
96 * Called when the CPU is idle.  It calls into the power management kext
97 * to determine the best way to idle the CPU.
98 */
99void
100machine_idle(void)
101{
102	cpu_data_t		*my_cpu		= current_cpu_datap();
103	__unused uint32_t	cnum = my_cpu->cpu_number;
104	uint64_t		ctime, rtime, itime;
105#if CST_DEMOTION_DEBUG
106	processor_t		cproc = my_cpu->cpu_processor;
107	uint64_t		cwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);
108#endif /* CST_DEMOTION_DEBUG */
109	uint64_t esdeadline, ehdeadline;
110	boolean_t do_process_pending_timers = FALSE;
111
112	ctime = mach_absolute_time();
113	esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
114	ehdeadline = my_cpu->rtclock_timer.deadline;
115/* Determine if pending timers exist */
116	if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
117	    ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
118		idle_pending_timers_processed++;
119		do_process_pending_timers = TRUE;
120		goto machine_idle_exit;
121	} else {
122		TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
123	}
124
125	my_cpu->lcpu.state = LCPU_IDLE;
126	DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
127	MARK_CPU_IDLE(cnum);
128
129	rtime = ctime - my_cpu->cpu_ixtime;
130
131	my_cpu->cpu_rtime_total += rtime;
132	machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
133#if CST_DEMOTION_DEBUG
134	uint32_t cl = 0, ch = 0;
135	uint64_t c3res, c6res, c7res;
136	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
137	c3res = ((uint64_t)ch << 32) | cl;
138	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
139	c6res = ((uint64_t)ch << 32) | cl;
140	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
141	c7res = ((uint64_t)ch << 32) | cl;
142#endif
143
144	if (pmInitDone) {
145		/*
146		 * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
147		 * were called prior to the CPU PM kext being registered.  We do
148		 * this here since we know at this point the values will be first
149		 * used since idle is where the decisions using these values is made.
150		 */
151		if (earlyMaxBusDelay != DELAY_UNSET)
152			ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
153		if (earlyMaxIntDelay != DELAY_UNSET)
154			ml_set_maxintdelay(earlyMaxIntDelay);
155	}
156
157	if (pmInitDone
158	    && pmDispatch != NULL
159	    && pmDispatch->MachineIdle != NULL)
160		(*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
161	else {
162		/*
163		 * If no power management, re-enable interrupts and halt.
164		 * This will keep the CPU from spinning through the scheduler
165		 * and will allow at least some minimal power savings (but it
166		 * cause problems in some MP configurations w.r.t. the APIC
167		 * stopping during a GV3 transition).
168		 */
169		pal_hlt();
170		/* Once woken, re-disable interrupts. */
171		pal_cli();
172	}
173
174	/*
175	 * Mark the CPU as running again.
176	 */
177	MARK_CPU_ACTIVE(cnum);
178	DBGLOG(cpu_handle, cnum, MP_UNIDLE);
179	my_cpu->lcpu.state = LCPU_RUN;
180	uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
181	itime = ixtime - ctime;
182	my_cpu->cpu_idle_exits++;
183        my_cpu->cpu_itime_total += itime;
184    	machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
185#if CST_DEMOTION_DEBUG
186	cl = ch = 0;
187	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
188	c3res = (((uint64_t)ch << 32) | cl) - c3res;
189	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
190	c6res = (((uint64_t)ch << 32) | cl) - c6res;
191	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
192	c7res = (((uint64_t)ch << 32) | cl) - c7res;
193
194	uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
195	KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
196	if ((itime > 1000000) && (ndelta > 250000))
197		KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
198#endif
199
200	machine_idle_exit:
201	/*
202	 * Re-enable interrupts.
203	 */
204
205	pal_sti();
206
207	if (do_process_pending_timers) {
208		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
209
210		/* Adjust to reflect that this isn't truly a package idle exit */
211		__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
212		lapic_timer_swi(); /* Trigger software timer interrupt */
213		__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
214
215		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
216	}
217#if CST_DEMOTION_DEBUG
218	uint64_t nwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);
219
220	if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
221		KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
222	}
223#endif
224}
225
226/*
227 * Called when the CPU is to be halted.  It will choose the best C-State
228 * to be in.
229 */
230void
231pmCPUHalt(uint32_t reason)
232{
233    cpu_data_t	*cpup	= current_cpu_datap();
234
235    switch (reason) {
236    case PM_HALT_DEBUG:
237	cpup->lcpu.state = LCPU_PAUSE;
238	pal_stop_cpu(FALSE);
239	break;
240
241    case PM_HALT_PANIC:
242	cpup->lcpu.state = LCPU_PAUSE;
243	pal_stop_cpu(TRUE);
244	break;
245
246    case PM_HALT_NORMAL:
247    case PM_HALT_SLEEP:
248    default:
249        pal_cli();
250
251	if (pmInitDone
252	    && pmDispatch != NULL
253	    && pmDispatch->pmCPUHalt != NULL) {
254	    /*
255	     * Halt the CPU (and put it in a low power state.
256	     */
257	    (*pmDispatch->pmCPUHalt)();
258
259	    /*
260	     * We've exited halt, so get the CPU schedulable again.
261	     * - by calling the fast init routine for a slave, or
262	     * - by returning if we're the master processor.
263	     */
264	    if (cpup->cpu_number != master_cpu) {
265		i386_init_slave_fast();
266		panic("init_slave_fast returned");
267	    }
268	} else
269	{
270	    /*
271	     * If no power managment and a processor is taken off-line,
272	     * then invalidate the cache and halt it (it will not be able
273	     * to be brought back on-line without resetting the CPU).
274	     */
275	    __asm__ volatile ("wbinvd");
276	    cpup->lcpu.state = LCPU_HALT;
277	    pal_stop_cpu(FALSE);
278
279	    panic("back from Halt");
280	}
281
282	break;
283    }
284}
285
286void
287pmMarkAllCPUsOff(void)
288{
289    if (pmInitDone
290	&& pmDispatch != NULL
291	&& pmDispatch->markAllCPUsOff != NULL)
292	(*pmDispatch->markAllCPUsOff)();
293}
294
295static void
296pmInitComplete(void)
297{
298    if (earlyTopology
299	&& pmDispatch != NULL
300	&& pmDispatch->pmCPUStateInit != NULL) {
301	(*pmDispatch->pmCPUStateInit)();
302	earlyTopology = FALSE;
303    }
304    pmInitDone = 1;
305}
306
307x86_lcpu_t *
308pmGetLogicalCPU(int cpu)
309{
310    return(cpu_to_lcpu(cpu));
311}
312
313x86_lcpu_t *
314pmGetMyLogicalCPU(void)
315{
316    cpu_data_t	*cpup	= current_cpu_datap();
317
318    return(&cpup->lcpu);
319}
320
321static x86_core_t *
322pmGetCore(int cpu)
323{
324    return(cpu_to_core(cpu));
325}
326
327static x86_core_t *
328pmGetMyCore(void)
329{
330    cpu_data_t	*cpup	= current_cpu_datap();
331
332    return(cpup->lcpu.core);
333}
334
335static x86_die_t *
336pmGetDie(int cpu)
337{
338    return(cpu_to_die(cpu));
339}
340
341static x86_die_t *
342pmGetMyDie(void)
343{
344    cpu_data_t	*cpup	= current_cpu_datap();
345
346    return(cpup->lcpu.die);
347}
348
349static x86_pkg_t *
350pmGetPackage(int cpu)
351{
352    return(cpu_to_package(cpu));
353}
354
355static x86_pkg_t *
356pmGetMyPackage(void)
357{
358    cpu_data_t	*cpup	= current_cpu_datap();
359
360    return(cpup->lcpu.package);
361}
362
363static void
364pmLockCPUTopology(int lock)
365{
366    if (lock) {
367	simple_lock(&x86_topo_lock);
368    } else {
369	simple_unlock(&x86_topo_lock);
370    }
371}
372
373/*
374 * Called to get the next deadline that has been set by the
375 * power management code.
376 * Note: a return of 0 from AICPM and this routine signifies
377 * that no deadline is set.
378 */
379uint64_t
380pmCPUGetDeadline(cpu_data_t *cpu)
381{
382    uint64_t	deadline	= 0;
383
384    if (pmInitDone
385	&& pmDispatch != NULL
386	&& pmDispatch->GetDeadline != NULL)
387	deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
388
389    return(deadline);
390}
391
392/*
393 * Called to determine if the supplied deadline or the power management
394 * deadline is sooner.  Returns which ever one is first.
395 */
396
397uint64_t
398pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
399{
400   if (pmInitDone
401	&& pmDispatch != NULL
402	&& pmDispatch->SetDeadline != NULL)
403	deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
404
405    return(deadline);
406}
407
408/*
409 * Called when a power management deadline expires.
410 */
411void
412pmCPUDeadline(cpu_data_t *cpu)
413{
414    if (pmInitDone
415	&& pmDispatch != NULL
416	&& pmDispatch->Deadline != NULL)
417	(*pmDispatch->Deadline)(&cpu->lcpu);
418}
419
420/*
421 * Called to get a CPU out of idle.
422 */
423boolean_t
424pmCPUExitIdle(cpu_data_t *cpu)
425{
426    boolean_t		do_ipi;
427
428    if (pmInitDone
429	&& pmDispatch != NULL
430	&& pmDispatch->exitIdle != NULL)
431	do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
432    else
433	do_ipi = TRUE;
434
435    return(do_ipi);
436}
437
438kern_return_t
439pmCPUExitHalt(int cpu)
440{
441    kern_return_t	rc	= KERN_INVALID_ARGUMENT;
442
443    if (pmInitDone
444	&& pmDispatch != NULL
445	&& pmDispatch->exitHalt != NULL)
446	rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
447
448    return(rc);
449}
450
451kern_return_t
452pmCPUExitHaltToOff(int cpu)
453{
454    kern_return_t	rc	= KERN_SUCCESS;
455
456    if (pmInitDone
457	&& pmDispatch != NULL
458	&& pmDispatch->exitHaltToOff != NULL)
459	rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
460
461    return(rc);
462}
463
464/*
465 * Called to initialize the power management structures for the CPUs.
466 */
467void
468pmCPUStateInit(void)
469{
470    if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
471	(*pmDispatch->pmCPUStateInit)();
472    else
473	earlyTopology = TRUE;
474}
475
476/*
477 * Called when a CPU is being restarted after being powered off (as in S3).
478 */
479void
480pmCPUMarkRunning(cpu_data_t *cpu)
481{
482    cpu_data_t	*cpup	= current_cpu_datap();
483
484    if (pmInitDone
485	&& pmDispatch != NULL
486	&& pmDispatch->markCPURunning != NULL)
487	(*pmDispatch->markCPURunning)(&cpu->lcpu);
488    else
489	cpup->lcpu.state = LCPU_RUN;
490}
491
492/*
493 * Called to get/set CPU power management state.
494 */
495int
496pmCPUControl(uint32_t cmd, void *datap)
497{
498    int		rc	= -1;
499
500    if (pmDispatch != NULL
501	&& pmDispatch->pmCPUControl != NULL)
502	rc = (*pmDispatch->pmCPUControl)(cmd, datap);
503
504    return(rc);
505}
506
507/*
508 * Called to save the timer state used by power management prior
509 * to "sleeping".
510 */
511void
512pmTimerSave(void)
513{
514    if (pmDispatch != NULL
515	&& pmDispatch->pmTimerStateSave != NULL)
516	(*pmDispatch->pmTimerStateSave)();
517}
518
519/*
520 * Called to restore the timer state used by power management after
521 * waking from "sleep".
522 */
523void
524pmTimerRestore(void)
525{
526    if (pmDispatch != NULL
527	&& pmDispatch->pmTimerStateRestore != NULL)
528	(*pmDispatch->pmTimerStateRestore)();
529}
530
531/*
532 * Set the worst-case time for the C4 to C2 transition.
533 * No longer does anything.
534 */
535void
536ml_set_maxsnoop(__unused uint32_t maxdelay)
537{
538}
539
540
541/*
542 * Get the worst-case time for the C4 to C2 transition.  Returns nanoseconds.
543 */
544unsigned
545ml_get_maxsnoop(void)
546{
547    uint64_t	max_snoop	= 0;
548
549    if (pmInitDone
550	&& pmDispatch != NULL
551	&& pmDispatch->getMaxSnoop != NULL)
552	max_snoop = pmDispatch->getMaxSnoop();
553
554    return((unsigned)(max_snoop & 0xffffffff));
555}
556
557
558uint32_t
559ml_get_maxbusdelay(void)
560{
561    uint64_t	max_delay	= 0;
562
563    if (pmInitDone
564	&& pmDispatch != NULL
565	&& pmDispatch->getMaxBusDelay != NULL)
566	max_delay = pmDispatch->getMaxBusDelay();
567
568    return((uint32_t)(max_delay & 0xffffffff));
569}
570
571/*
572 * Advertise a memory access latency tolerance of "mdelay" ns
573 */
574void
575ml_set_maxbusdelay(uint32_t mdelay)
576{
577    uint64_t	maxdelay	= mdelay;
578
579    if (pmDispatch != NULL
580	&& pmDispatch->setMaxBusDelay != NULL) {
581	earlyMaxBusDelay = DELAY_UNSET;
582	pmDispatch->setMaxBusDelay(maxdelay);
583    } else
584	earlyMaxBusDelay = maxdelay;
585}
586
587uint64_t
588ml_get_maxintdelay(void)
589{
590    uint64_t	max_delay	= 0;
591
592    if (pmDispatch != NULL
593	&& pmDispatch->getMaxIntDelay != NULL)
594	max_delay = pmDispatch->getMaxIntDelay();
595
596    return(max_delay);
597}
598
599/*
600 * Set the maximum delay allowed for an interrupt.
601 */
602void
603ml_set_maxintdelay(uint64_t mdelay)
604{
605    if (pmDispatch != NULL
606	&& pmDispatch->setMaxIntDelay != NULL) {
607	earlyMaxIntDelay = DELAY_UNSET;
608	pmDispatch->setMaxIntDelay(mdelay);
609    } else
610	earlyMaxIntDelay = mdelay;
611}
612
613boolean_t
614ml_get_interrupt_prewake_applicable()
615{
616    boolean_t applicable = FALSE;
617
618    if (pmInitDone
619	&& pmDispatch != NULL
620	&& pmDispatch->pmInterruptPrewakeApplicable != NULL)
621	applicable = pmDispatch->pmInterruptPrewakeApplicable();
622
623    return applicable;
624}
625
626/*
627 * Put a CPU into "safe" mode with respect to power.
628 *
629 * Some systems cannot operate at a continuous "normal" speed without
630 * exceeding the thermal design.  This is called per-CPU to place the
631 * CPUs into a "safe" operating mode.
632 */
633void
634pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
635{
636    if (pmDispatch != NULL
637	&& pmDispatch->pmCPUSafeMode != NULL)
638	pmDispatch->pmCPUSafeMode(lcpu, flags);
639    else {
640	/*
641	 * Do something reasonable if the KEXT isn't present.
642	 *
643	 * We only look at the PAUSE and RESUME flags.  The other flag(s)
644	 * will not make any sense without the KEXT, so just ignore them.
645	 *
646	 * We set the CPU's state to indicate that it's halted.  If this
647	 * is the CPU we're currently running on, then spin until the
648	 * state becomes non-halted.
649	 */
650	if (flags & PM_SAFE_FL_PAUSE) {
651	    lcpu->state = LCPU_PAUSE;
652	    if (lcpu == x86_lcpu()) {
653		while (lcpu->state == LCPU_PAUSE)
654		    cpu_pause();
655	    }
656	}
657
658	/*
659	 * Clear the halted flag for the specified CPU, that will
660	 * get it out of it's spin loop.
661	 */
662	if (flags & PM_SAFE_FL_RESUME) {
663	    lcpu->state = LCPU_RUN;
664	}
665    }
666}
667
668static uint32_t		saved_run_count = 0;
669
670void
671machine_run_count(uint32_t count)
672{
673    if (pmDispatch != NULL
674	&& pmDispatch->pmSetRunCount != NULL)
675	pmDispatch->pmSetRunCount(count);
676    else
677	saved_run_count = count;
678}
679
680processor_t
681machine_choose_processor(processor_set_t pset,
682			 processor_t preferred)
683{
684    int		startCPU;
685    int		endCPU;
686    int		preferredCPU;
687    int		chosenCPU;
688
689    if (!pmInitDone)
690	return(preferred);
691
692    if (pset == NULL) {
693	startCPU = -1;
694	endCPU = -1;
695    } else {
696	startCPU = pset->cpu_set_low;
697	endCPU = pset->cpu_set_hi;
698    }
699
700    if (preferred == NULL)
701	preferredCPU = -1;
702    else
703	preferredCPU = preferred->cpu_id;
704
705    if (pmDispatch != NULL
706	&& pmDispatch->pmChooseCPU != NULL) {
707	chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
708
709	if (chosenCPU == -1)
710	    return(NULL);
711	return(cpu_datap(chosenCPU)->cpu_processor);
712    }
713
714    return(preferred);
715}
716
717static int
718pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
719{
720	int             urgency;
721	uint64_t        arg1, arg2;
722
723	urgency = thread_get_urgency(current_processor()->next_thread, &arg1, &arg2);
724
725	if (urgency == THREAD_URGENCY_REAL_TIME) {
726		if (rt_period != NULL)
727			*rt_period = arg1;
728
729		if (rt_deadline != NULL)
730			*rt_deadline = arg2;
731	}
732
733	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), urgency, arg1, arg2, 0, 0);
734
735	return(urgency);
736}
737
738#if	DEBUG
739uint32_t	urgency_stats[64][THREAD_URGENCY_MAX];
740#endif
741
742#define		URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
743uint64_t	urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
744
745void
746thread_tell_urgency(int urgency,
747    uint64_t rt_period,
748    uint64_t rt_deadline,
749    thread_t nthread)
750{
751	uint64_t	urgency_notification_time_start, delta;
752	boolean_t	urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
753	assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
754#if	DEBUG
755	urgency_stats[cpu_number() % 64][urgency]++;
756#endif
757	if (!pmInitDone
758	    || pmDispatch == NULL
759	    || pmDispatch->pmThreadTellUrgency == NULL)
760		return;
761
762	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0);
763
764	if (__improbable((urgency_assert == TRUE)))
765		urgency_notification_time_start = mach_absolute_time();
766
767	current_cpu_datap()->cpu_nthread = nthread;
768	pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
769
770	if (__improbable((urgency_assert == TRUE))) {
771		delta = mach_absolute_time() - urgency_notification_time_start;
772
773		if (__improbable(delta > urgency_notification_max_recorded)) {
774			/* This is not synchronized, but it doesn't matter
775			 * if we (rarely) miss an event, as it is statistically
776			 * unlikely that it will never recur.
777			 */
778			urgency_notification_max_recorded = delta;
779
780			if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
781				panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
782		}
783	}
784
785	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
786}
787
788void
789active_rt_threads(boolean_t active)
790{
791    if (!pmInitDone
792	|| pmDispatch == NULL
793	|| pmDispatch->pmActiveRTThreads == NULL)
794	return;
795
796    pmDispatch->pmActiveRTThreads(active);
797}
798
799static uint32_t
800pmGetSavedRunCount(void)
801{
802    return(saved_run_count);
803}
804
805/*
806 * Returns the root of the package tree.
807 */
808x86_pkg_t *
809pmGetPkgRoot(void)
810{
811    return(x86_pkgs);
812}
813
814static boolean_t
815pmCPUGetHibernate(int cpu)
816{
817    return(cpu_datap(cpu)->cpu_hibernate);
818}
819
820processor_t
821pmLCPUtoProcessor(int lcpu)
822{
823    return(cpu_datap(lcpu)->cpu_processor);
824}
825
826static void
827pmReSyncDeadlines(int cpu)
828{
829    static boolean_t	registered	= FALSE;
830
831    if (!registered) {
832	PM_interrupt_register(&timer_resync_deadlines);
833	registered = TRUE;
834    }
835
836    if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num)
837	timer_resync_deadlines();
838    else
839	cpu_PM_interrupt(cpu);
840}
841
842static void
843pmSendIPI(int cpu)
844{
845    lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
846}
847
848static void
849pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
850{
851	/*
852	 * Make sure that nanotime didn't change while we were reading it.
853	 */
854	do {
855		rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
856		rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
857		rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
858		rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
859		rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
860	} while(pal_rtc_nanotime_info.generation != 0
861		&& rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
862}
863
864uint32_t
865pmTimerQueueMigrate(int target_cpu)
866{
867    /* Call the etimer code to do this. */
868    return (target_cpu != cpu_number())
869		? timer_queue_migrate_cpu(target_cpu)
870		: 0;
871}
872
873
874/*
875 * Called by the power management kext to register itself and to get the
876 * callbacks it might need into other kernel functions.  This interface
877 * is versioned to allow for slight mis-matches between the kext and the
878 * kernel.
879 */
880void
881pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
882    pmCallBacks_t *callbacks)
883{
884	if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
885		callbacks->setRTCPop            = setPop;
886		callbacks->resyncDeadlines      = pmReSyncDeadlines;
887		callbacks->initComplete         = pmInitComplete;
888		callbacks->GetLCPU              = pmGetLogicalCPU;
889		callbacks->GetCore              = pmGetCore;
890		callbacks->GetDie               = pmGetDie;
891		callbacks->GetPackage           = pmGetPackage;
892		callbacks->GetMyLCPU            = pmGetMyLogicalCPU;
893		callbacks->GetMyCore            = pmGetMyCore;
894		callbacks->GetMyDie             = pmGetMyDie;
895		callbacks->GetMyPackage         = pmGetMyPackage;
896		callbacks->GetPkgRoot           = pmGetPkgRoot;
897		callbacks->LockCPUTopology      = pmLockCPUTopology;
898		callbacks->GetHibernate         = pmCPUGetHibernate;
899		callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
900		callbacks->ThreadBind           = thread_bind;
901		callbacks->GetSavedRunCount     = pmGetSavedRunCount;
902		callbacks->GetNanotimeInfo	= pmGetNanotimeInfo;
903		callbacks->ThreadGetUrgency	= pmThreadGetUrgency;
904		callbacks->RTCClockAdjust	= rtc_clock_adjust;
905		callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
906		callbacks->topoParms            = &topoParms;
907		callbacks->pmSendIPI		= pmSendIPI;
908		callbacks->InterruptPending	= lapic_is_interrupt_pending;
909		callbacks->IsInterrupting	= lapic_is_interrupting;
910		callbacks->InterruptStats	= lapic_interrupt_counts;
911		callbacks->DisableApicTimer	= lapic_disable_timer;
912	} else {
913		panic("Version mis-match between Kernel and CPU PM");
914	}
915
916	if (cpuFuncs != NULL) {
917		if (pmDispatch) {
918			panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
919		}
920
921		pmDispatch = cpuFuncs;
922
923		if (earlyTopology
924		    && pmDispatch->pmCPUStateInit != NULL) {
925			(*pmDispatch->pmCPUStateInit)();
926			earlyTopology = FALSE;
927		}
928
929		if (pmDispatch->pmIPIHandler != NULL) {
930			lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
931		}
932	}
933}
934
935/*
936 * Unregisters the power management functions from the kext.
937 */
938void
939pmUnRegister(pmDispatch_t *cpuFuncs)
940{
941    if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
942	pmDispatch = NULL;
943    }
944}
945
946void machine_track_platform_idle(boolean_t entry) {
947	cpu_data_t		*my_cpu		= current_cpu_datap();
948
949	if (entry) {
950		(void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
951	}
952 	else {
953 		uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
954 		if (nidle == topoParms.nLThreadsPerPackage) {
955 			my_cpu->lcpu.package->package_idle_exits++;
956 		}
957 	}
958}
959