1/*
2 * Copyright 2012-2013, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Yongcong Du <ycdu.vmcore@gmail.com>
7 *		Pawe�� Dziepak, <pdziepak@quarnos.org>
8 */
9
10
11#include <arch_system_info.h>
12#include <cpu.h>
13#include <debug.h>
14#include <smp.h>
15#include <thread.h>
16#include <util/AutoLock.h>
17
18#include <cpuidle.h>
19#include <KernelExport.h>
20
21#include <stdio.h>
22
23
24#define CPUIDLE_CSTATE_MAX			8
25
26#define MWAIT_INTERRUPTS_BREAK		(1 << 0)
27
28#define INTEL_CSTATES_MODULE_NAME	CPUIDLE_MODULES_PREFIX "/intel_cstates/v1"
29
30#define BASE_TIME_STEP				500
31
32struct CState {
33	uint32	fCode;
34	int		fSubStatesCount;
35};
36
37static CState sCStates[CPUIDLE_CSTATE_MAX];
38static int sCStateCount;
39
40static int sTimeStep = BASE_TIME_STEP;
41static bool sEnableWait = false;
42
43static bigtime_t* sIdleTime;
44
45
46static inline void
47x86_monitor(void* address, uint32 ecx, uint32 edx)
48{
49	asm volatile("monitor" : : "a" (address), "c" (ecx), "d"(edx));
50}
51
52
53static inline void
54x86_mwait(uint32 eax, uint32 ecx)
55{
56	asm volatile("mwait" : : "a" (eax), "c" (ecx));
57}
58
59
60static void
61cstates_set_scheduler_mode(scheduler_mode mode)
62{
63	if (mode == SCHEDULER_MODE_POWER_SAVING) {
64		sTimeStep = BASE_TIME_STEP / 4;
65		sEnableWait = true;
66	} else {
67		sTimeStep = BASE_TIME_STEP;
68		sEnableWait = false;
69	}
70}
71
72
73static void
74cstates_idle(void)
75{
76	ASSERT(thread_get_current_thread()->pinned_to_cpu > 0);
77	int32 cpu = smp_get_current_cpu();
78
79	bigtime_t timeStep = sTimeStep;
80	bigtime_t idleTime = sIdleTime[cpu];
81	int state = min_c(idleTime / timeStep, sCStateCount - 1);
82
83	if(state < 0 || state >= sCStateCount) {
84		panic("State %d of CPU %" B_PRId32 " is out of range (0 to %d), "
85			"idleTime %" B_PRIdBIGTIME "/%" B_PRIdBIGTIME, state, cpu,
86			sCStateCount, idleTime, timeStep);
87	}
88
89	int subState = idleTime % timeStep;
90	subState *= sCStates[state].fSubStatesCount;
91	subState /= timeStep;
92
93	ASSERT(subState >= 0 && subState < sCStates[state].fSubStatesCount);
94
95	InterruptsLocker locker;
96	int dummy;
97	bigtime_t start = system_time();
98	x86_monitor(&dummy, 0, 0);
99	x86_mwait(sCStates[state].fCode | subState, MWAIT_INTERRUPTS_BREAK);
100	bigtime_t delta = system_time() - start;
101	locker.Unlock();
102
103	// Negative delta shouldn't happen, but apparently it does...
104	if (delta >= 0)
105		sIdleTime[cpu] = (idleTime + delta) / 2;
106}
107
108
109static void
110cstates_wait(int32* variable, int32 test)
111{
112	if (!sEnableWait)
113		return;
114
115	InterruptsLocker _;
116	x86_monitor(variable, 0, 0);
117	if (*variable != test)
118		x86_mwait(sCStates[0].fCode, MWAIT_INTERRUPTS_BREAK);
119}
120
121
122static status_t
123init_cstates()
124{
125	if (!x86_check_feature(IA32_FEATURE_EXT_MONITOR, FEATURE_EXT))
126		return B_ERROR;
127	if (!x86_check_feature(IA32_FEATURE_POWER_MWAIT, FEATURE_5_ECX))
128		return B_ERROR;
129	if (!x86_check_feature(IA32_FEATURE_INTERRUPT_MWAIT, FEATURE_5_ECX))
130		return B_ERROR;
131
132	// we need invariant TSC
133	if (!x86_check_feature(IA32_FEATURE_INVARIANT_TSC, FEATURE_EXT_7_EDX))
134		return B_ERROR;
135
136	// get C-state data
137	cpuid_info cpuid;
138	get_current_cpuid(&cpuid, 0, 0);
139	uint32 maxBasicLeaf = cpuid.eax_0.max_eax;
140	if (maxBasicLeaf < 5)
141		return B_ERROR;
142
143	get_current_cpuid(&cpuid, 5, 0);
144	if ((cpuid.regs.eax & 0xffff) < sizeof(int32))
145		return B_ERROR;
146
147	char cStates[64];
148	unsigned int offset = 0;
149	for (int32 i = 1; i < CPUIDLE_CSTATE_MAX; i++) {
150		int32 subStates = (cpuid.regs.edx >> (i * 4)) & 0xf;
151		// no sub-states means the state is not available
152		if (subStates == 0)
153			continue;
154
155		if (offset < sizeof(cStates)) {
156			offset += snprintf(cStates + offset, sizeof(cStates) - offset,
157					", C%" B_PRId32, i);
158		}
159
160		sCStates[sCStateCount].fCode = sCStateCount * 0x10;
161		sCStates[sCStateCount].fSubStatesCount = subStates;
162		sCStateCount++;
163	}
164
165	if (sCStateCount == 0)
166		return B_ERROR;
167
168	sIdleTime = new(std::nothrow) bigtime_t[smp_get_num_cpus()];
169	if (sIdleTime == NULL)
170		return B_NO_MEMORY;
171	memset(sIdleTime, 0, sizeof(bigtime_t) * smp_get_num_cpus());
172
173	cstates_set_scheduler_mode(SCHEDULER_MODE_LOW_LATENCY);
174
175	dprintf("using Intel C-States: C0%s\n", cStates);
176	return B_OK;
177}
178
179
180static status_t
181uninit_cstates()
182{
183	delete[] sIdleTime;
184	return B_OK;
185}
186
187
188static status_t
189std_ops(int32 op, ...)
190{
191	switch (op) {
192		case B_MODULE_INIT:
193			return init_cstates();
194
195		case B_MODULE_UNINIT:
196			uninit_cstates();
197			return B_OK;
198	}
199
200	return B_ERROR;
201}
202
203
204static cpuidle_module_info sIntelCStates = {
205	{
206		INTEL_CSTATES_MODULE_NAME,
207		0,
208		std_ops,
209	},
210
211	0.8f,
212
213	cstates_set_scheduler_mode,
214
215	cstates_idle,
216	cstates_wait
217};
218
219
220module_info* modules[] = {
221	(module_info*)&sIntelCStates,
222	NULL
223};
224
225