1255932Salfred/*
2272407Shselasky * Copyright (c) 2010, 2014 Mellanox Technologies. All rights reserved.
3255932Salfred *
4255932Salfred * This software is available to you under a choice of one of two
5255932Salfred * licenses.  You may choose to be licensed under the terms of the GNU
6255932Salfred * General Public License (GPL) Version 2, available from the file
7255932Salfred * COPYING in the main directory of this source tree, or the
8255932Salfred * OpenIB.org BSD license below:
9255932Salfred *
10255932Salfred *     Redistribution and use in source and binary forms, with or
11255932Salfred *     without modification, are permitted provided that the following
12255932Salfred *     conditions are met:
13255932Salfred *
14255932Salfred *      - Redistributions of source code must retain the above
15255932Salfred *        copyright notice, this list of conditions and the following
16255932Salfred *        disclaimer.
17255932Salfred *
18255932Salfred *      - Redistributions in binary form must reproduce the above
19255932Salfred *        copyright notice, this list of conditions and the following
20255932Salfred *        disclaimer in the documentation and/or other materials
21255932Salfred *        provided with the distribution.
22255932Salfred *
23255932Salfred * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24255932Salfred * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25255932Salfred * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26255932Salfred * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27255932Salfred * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28255932Salfred * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29255932Salfred * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30255932Salfred * SOFTWARE.
31255932Salfred *
32255932Salfred */
33255932Salfred
34255932Salfred#include <linux/sched.h>
35255932Salfred#include <linux/mutex.h>
36255932Salfred#include <asm/atomic.h>
37255932Salfred
38255932Salfred#include "mlx4.h"
39255932Salfred
40255932Salfred#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
41255932Salfred
42255932Salfred/* Each CPU is put into a group.  In most cases, the group number is
43255932Salfred * equal to the CPU number of one of the CPUs in the group.  The
44255932Salfred * exception is group NR_CPUS which is the default group.  This is
45255932Salfred * protected by sys_tune_startup_mutex. */
46255932SalfredDEFINE_PER_CPU(int, idle_cpu_group) = NR_CPUS;
47255932Salfred
48255932Salfred/* For each group, a count of the number of CPUs in the group which
49255932Salfred * are known to be busy.  A busy CPU might be running the busy loop
50255932Salfred * below or general kernel code.  The count is decremented on entry to
51255932Salfred * the old pm_idle handler and incremented on exit.  The aim is to
52255932Salfred * avoid the count going to zero or negative.  This situation can
53255932Salfred * occur temporarily during module unload or CPU hot-plug but
54255932Salfred * normality will be restored when the affected CPUs next exit the
55255932Salfred * idle loop. */
56255932Salfredstatic atomic_t busy_cpu_count[NR_CPUS+1];
57255932Salfred
58255932Salfred/* A workqueue item to be executed to cause the CPU to exit from the
59255932Salfred * idle loop. */
60255932SalfredDEFINE_PER_CPU(struct work_struct, sys_tune_cpu_work);
61255932Salfred
62255932Salfred#define sys_tune_set_state(CPU,STATE) \
63255932Salfred	do { } while(0)
64255932Salfred
65255932Salfred
66255932Salfred/* A mutex to protect most of the module datastructures. */
67255932Salfredstatic DEFINE_MUTEX(sys_tune_startup_mutex);
68255932Salfred
69255932Salfred/* The old pm_idle handler. */
70255932Salfredstatic void (*old_pm_idle)(void) = NULL;
71255932Salfred
72255932Salfredstatic void sys_tune_pm_idle(void)
73255932Salfred{
74255932Salfred	atomic_t *busy_cpus_ptr;
75255932Salfred	int busy_cpus;
76255932Salfred	int cpu = smp_processor_id();
77255932Salfred
78255932Salfred	busy_cpus_ptr = &(busy_cpu_count[per_cpu(idle_cpu_group, cpu)]);
79255932Salfred
80255932Salfred	sys_tune_set_state(cpu, 2);
81255932Salfred
82255932Salfred	local_irq_enable();
83255932Salfred	while (!need_resched()) {
84255932Salfred		busy_cpus = atomic_read(busy_cpus_ptr);
85255932Salfred
86255932Salfred		/* If other CPUs in this group are busy then let this
87255932Salfred		 * CPU go idle.  We mustn't let the number of busy
88255932Salfred		 * CPUs drop below 1. */
89255932Salfred		if ( busy_cpus > 1 &&
90255932Salfred		     old_pm_idle != NULL &&
91255932Salfred		     ( atomic_cmpxchg(busy_cpus_ptr, busy_cpus,
92255932Salfred				      busy_cpus-1) == busy_cpus ) ) {
93255932Salfred			local_irq_disable();
94255932Salfred			sys_tune_set_state(cpu, 3);
95255932Salfred			/* This check might not be necessary, but it
96255932Salfred			 * seems safest to include it because there
97255932Salfred			 * might be a kernel version which requires
98255932Salfred			 * it. */
99255932Salfred			if (need_resched())
100255932Salfred				local_irq_enable();
101255932Salfred			else
102255932Salfred				old_pm_idle();
103255932Salfred			/* This CPU is busy again. */
104255932Salfred			sys_tune_set_state(cpu, 1);
105255932Salfred			atomic_add(1, busy_cpus_ptr);
106255932Salfred			return;
107255932Salfred		}
108255932Salfred
109255932Salfred		cpu_relax();
110255932Salfred	}
111255932Salfred	sys_tune_set_state(cpu, 0);
112255932Salfred}
113255932Salfred
114255932Salfred
115255932Salfredvoid sys_tune_work_func(struct work_struct *work)
116255932Salfred{
117255932Salfred	/* Do nothing.  Since this function is running in process
118255932Salfred	 * context, the idle thread isn't running on this CPU. */
119255932Salfred}
120255932Salfred
121255932Salfred
122255932Salfred#ifdef CONFIG_SMP
123255932Salfredstatic void sys_tune_smp_call(void *info)
124255932Salfred{
125255932Salfred	schedule_work(&get_cpu_var(sys_tune_cpu_work));
126255932Salfred	put_cpu_var(sys_tune_cpu_work);
127255932Salfred}
128255932Salfred#endif
129255932Salfred
130255932Salfred
131255932Salfred#ifdef CONFIG_SMP
132255932Salfredstatic void sys_tune_refresh(void)
133255932Salfred{
134255932Salfred#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
135255932Salfred        on_each_cpu(&sys_tune_smp_call, NULL, 0, 1);
136255932Salfred#else
137255932Salfred        on_each_cpu(&sys_tune_smp_call, NULL, 1);
138255932Salfred#endif
139255932Salfred}
140255932Salfred#else
141255932Salfredstatic void sys_tune_refresh(void)
142255932Salfred{
143255932Salfred	/* The current thread is executing on the one and only CPU so
144255932Salfred	 * the idle thread isn't running. */
145255932Salfred}
146255932Salfred#endif
147255932Salfred
148255932Salfred
149255932Salfred
150255932Salfredstatic int sys_tune_cpu_group(int cpu)
151255932Salfred{
152255932Salfred#ifdef CONFIG_SMP
153255932Salfred	const cpumask_t *mask;
154255932Salfred	int other_cpu;
155255932Salfred	int group;
156255932Salfred
157255932Salfred#if defined(topology_thread_cpumask) && defined(ST_HAVE_EXPORTED_CPU_SIBLING_MAP)
158255932Salfred	/* Keep one hyperthread busy per core. */
159255932Salfred	mask = topology_thread_cpumask(cpu);
160255932Salfred#else
161255932Salfred	return cpu;
162255932Salfred#endif
163255932Salfred	for_each_cpu_mask(cpu, *(mask))	{
164255932Salfred		group = per_cpu(idle_cpu_group, other_cpu);
165255932Salfred		if (group != NR_CPUS)
166255932Salfred			return group;
167255932Salfred	}
168255932Salfred#endif
169255932Salfred
170255932Salfred	return cpu;
171255932Salfred}
172255932Salfred
173255932Salfred
174255932Salfredstatic void sys_tune_add_cpu(int cpu)
175255932Salfred{
176255932Salfred	int group;
177255932Salfred
178255932Salfred	/* Do nothing if this CPU has already been added. */
179255932Salfred	if (per_cpu(idle_cpu_group, cpu) != NR_CPUS)
180255932Salfred		return;
181255932Salfred
182255932Salfred	group = sys_tune_cpu_group(cpu);
183255932Salfred	per_cpu(idle_cpu_group, cpu) = group;
184255932Salfred	atomic_inc(&(busy_cpu_count[group]));
185255932Salfred
186255932Salfred}
187255932Salfred
188255932Salfredstatic void sys_tune_del_cpu(int cpu)
189255932Salfred{
190255932Salfred
191255932Salfred	int group;
192255932Salfred
193255932Salfred	if (per_cpu(idle_cpu_group, cpu) == NR_CPUS)
194255932Salfred		return;
195255932Salfred
196255932Salfred	group = per_cpu(idle_cpu_group, cpu);
197255932Salfred	/* If the CPU was busy, this can cause the count to drop to
198255932Salfred	 * zero.  To rectify this, we need to cause one of the other
199255932Salfred	 * CPUs in the group to exit the idle loop.  If the CPU was
200255932Salfred	 * not busy then this causes the contribution for this CPU to
201255932Salfred	 * go to -1 which can cause the overall count to drop to zero
202255932Salfred	 * or go negative.  To rectify this situation we need to cause
203255932Salfred	 * this CPU to exit the idle loop. */
204255932Salfred	atomic_dec(&(busy_cpu_count[group]));
205255932Salfred	per_cpu(idle_cpu_group, cpu) = NR_CPUS;
206255932Salfred
207255932Salfred}
208255932Salfred
209255932Salfred
210255932Salfredstatic int sys_tune_cpu_notify(struct notifier_block *self,
211255932Salfred			       unsigned long action, void *hcpu)
212255932Salfred{
213255932Salfred	int cpu = (long)hcpu;
214255932Salfred
215255932Salfred	switch(action) {
216255932Salfred#ifdef CPU_ONLINE_FROZEN
217255932Salfred	case CPU_ONLINE_FROZEN:
218255932Salfred#endif
219255932Salfred	case CPU_ONLINE:
220255932Salfred		mutex_lock(&sys_tune_startup_mutex);
221255932Salfred		sys_tune_add_cpu(cpu);
222255932Salfred		mutex_unlock(&sys_tune_startup_mutex);
223255932Salfred		/* The CPU might have already entered the idle loop in
224255932Salfred		 * the wrong group.  Make sure it exits the idle loop
225255932Salfred		 * so that it picks up the correct group. */
226255932Salfred		sys_tune_refresh();
227255932Salfred		break;
228255932Salfred
229255932Salfred#ifdef CPU_DEAD_FROZEN
230255932Salfred	case CPU_DEAD_FROZEN:
231255932Salfred#endif
232255932Salfred	case CPU_DEAD:
233255932Salfred		mutex_lock(&sys_tune_startup_mutex);
234255932Salfred		sys_tune_del_cpu(cpu);
235255932Salfred		mutex_unlock(&sys_tune_startup_mutex);
236255932Salfred		/* The deleted CPU may have been the only busy CPU in
237255932Salfred		 * the group.  Make sure one of the other CPUs in the
238255932Salfred		 * group exits the idle loop. */
239255932Salfred		sys_tune_refresh();
240255932Salfred		break;
241255932Salfred	}
242255932Salfred	return NOTIFY_OK;
243255932Salfred}
244255932Salfred
245255932Salfred
246255932Salfredstatic struct notifier_block sys_tune_cpu_nb = {
247255932Salfred	.notifier_call = sys_tune_cpu_notify,
248255932Salfred};
249255932Salfred
250255932Salfred
251255932Salfredstatic void sys_tune_ensure_init(void)
252255932Salfred{
253255932Salfred	BUG_ON (old_pm_idle != NULL);
254255932Salfred
255255932Salfred	/* Atomically update pm_idle to &sys_tune_pm_idle.  The old value
256255932Salfred	 * is stored in old_pm_idle before installing the new
257255932Salfred	 * handler. */
258255932Salfred	do {
259255932Salfred		old_pm_idle = pm_idle;
260255932Salfred	} while (cmpxchg(&pm_idle, old_pm_idle, &sys_tune_pm_idle) !=
261255932Salfred		 old_pm_idle);
262255932Salfred}
263255932Salfred#endif
264255932Salfred
265255932Salfredvoid sys_tune_fini(void)
266255932Salfred{
267255932Salfred#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
268255932Salfred	void (*old)(void);
269255932Salfred	int cpu;
270255932Salfred
271255932Salfred	unregister_cpu_notifier(&sys_tune_cpu_nb);
272255932Salfred
273255932Salfred	mutex_lock(&sys_tune_startup_mutex);
274255932Salfred
275255932Salfred
276255932Salfred	old = cmpxchg(&pm_idle, &sys_tune_pm_idle, old_pm_idle);
277255932Salfred
278255932Salfred	for_each_online_cpu(cpu)
279255932Salfred		sys_tune_del_cpu(cpu);
280255932Salfred
281255932Salfred	mutex_unlock(&sys_tune_startup_mutex);
282255932Salfred
283255932Salfred	/* Our handler may still be executing on other CPUs.
284255932Salfred	 * Schedule this thread on all CPUs to make sure all
285255932Salfred	 * idle threads get interrupted. */
286255932Salfred	sys_tune_refresh();
287255932Salfred
288255932Salfred	/* Make sure the work item has finished executing on all CPUs.
289255932Salfred	 * This in turn ensures that all idle threads have been
290255932Salfred	 * interrupted. */
291255932Salfred	flush_scheduled_work();
292255932Salfred#endif /* CONFIG_X86 */
293255932Salfred}
294255932Salfred
295255932Salfredvoid sys_tune_init(void)
296255932Salfred{
297255932Salfred#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
298255932Salfred	int cpu;
299255932Salfred
300255932Salfred	for_each_possible_cpu(cpu) {
301255932Salfred		INIT_WORK(&per_cpu(sys_tune_cpu_work, cpu),
302255932Salfred			  sys_tune_work_func);
303255932Salfred	}
304255932Salfred
305255932Salfred	/* Start by registering the handler to ensure we don't miss
306255932Salfred	 * any updates. */
307255932Salfred	register_cpu_notifier(&sys_tune_cpu_nb);
308255932Salfred
309255932Salfred	mutex_lock(&sys_tune_startup_mutex);
310255932Salfred
311255932Salfred	for_each_online_cpu(cpu)
312255932Salfred		sys_tune_add_cpu(cpu);
313255932Salfred
314255932Salfred	sys_tune_ensure_init();
315255932Salfred
316255932Salfred
317255932Salfred	mutex_unlock(&sys_tune_startup_mutex);
318255932Salfred
319255932Salfred	/* Ensure our idle handler starts to run. */
320255932Salfred	sys_tune_refresh();
321255932Salfred#endif
322255932Salfred}
323255932Salfred
324