1255932Salfred/* 2272407Shselasky * Copyright (c) 2010, 2014 Mellanox Technologies. All rights reserved. 3255932Salfred * 4255932Salfred * This software is available to you under a choice of one of two 5255932Salfred * licenses. You may choose to be licensed under the terms of the GNU 6255932Salfred * General Public License (GPL) Version 2, available from the file 7255932Salfred * COPYING in the main directory of this source tree, or the 8255932Salfred * OpenIB.org BSD license below: 9255932Salfred * 10255932Salfred * Redistribution and use in source and binary forms, with or 11255932Salfred * without modification, are permitted provided that the following 12255932Salfred * conditions are met: 13255932Salfred * 14255932Salfred * - Redistributions of source code must retain the above 15255932Salfred * copyright notice, this list of conditions and the following 16255932Salfred * disclaimer. 17255932Salfred * 18255932Salfred * - Redistributions in binary form must reproduce the above 19255932Salfred * copyright notice, this list of conditions and the following 20255932Salfred * disclaimer in the documentation and/or other materials 21255932Salfred * provided with the distribution. 22255932Salfred * 23255932Salfred * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24255932Salfred * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25255932Salfred * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26255932Salfred * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27255932Salfred * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28255932Salfred * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29255932Salfred * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30255932Salfred * SOFTWARE. 31255932Salfred * 32255932Salfred */ 33255932Salfred 34255932Salfred#include <linux/sched.h> 35255932Salfred#include <linux/mutex.h> 36255932Salfred#include <asm/atomic.h> 37255932Salfred 38255932Salfred#include "mlx4.h" 39255932Salfred 40255932Salfred#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE) 41255932Salfred 42255932Salfred/* Each CPU is put into a group. In most cases, the group number is 43255932Salfred * equal to the CPU number of one of the CPUs in the group. The 44255932Salfred * exception is group NR_CPUS which is the default group. This is 45255932Salfred * protected by sys_tune_startup_mutex. */ 46255932SalfredDEFINE_PER_CPU(int, idle_cpu_group) = NR_CPUS; 47255932Salfred 48255932Salfred/* For each group, a count of the number of CPUs in the group which 49255932Salfred * are known to be busy. A busy CPU might be running the busy loop 50255932Salfred * below or general kernel code. The count is decremented on entry to 51255932Salfred * the old pm_idle handler and incremented on exit. The aim is to 52255932Salfred * avoid the count going to zero or negative. This situation can 53255932Salfred * occur temporarily during module unload or CPU hot-plug but 54255932Salfred * normality will be restored when the affected CPUs next exit the 55255932Salfred * idle loop. */ 56255932Salfredstatic atomic_t busy_cpu_count[NR_CPUS+1]; 57255932Salfred 58255932Salfred/* A workqueue item to be executed to cause the CPU to exit from the 59255932Salfred * idle loop. */ 60255932SalfredDEFINE_PER_CPU(struct work_struct, sys_tune_cpu_work); 61255932Salfred 62255932Salfred#define sys_tune_set_state(CPU,STATE) \ 63255932Salfred do { } while(0) 64255932Salfred 65255932Salfred 66255932Salfred/* A mutex to protect most of the module datastructures. */ 67255932Salfredstatic DEFINE_MUTEX(sys_tune_startup_mutex); 68255932Salfred 69255932Salfred/* The old pm_idle handler. */ 70255932Salfredstatic void (*old_pm_idle)(void) = NULL; 71255932Salfred 72255932Salfredstatic void sys_tune_pm_idle(void) 73255932Salfred{ 74255932Salfred atomic_t *busy_cpus_ptr; 75255932Salfred int busy_cpus; 76255932Salfred int cpu = smp_processor_id(); 77255932Salfred 78255932Salfred busy_cpus_ptr = &(busy_cpu_count[per_cpu(idle_cpu_group, cpu)]); 79255932Salfred 80255932Salfred sys_tune_set_state(cpu, 2); 81255932Salfred 82255932Salfred local_irq_enable(); 83255932Salfred while (!need_resched()) { 84255932Salfred busy_cpus = atomic_read(busy_cpus_ptr); 85255932Salfred 86255932Salfred /* If other CPUs in this group are busy then let this 87255932Salfred * CPU go idle. We mustn't let the number of busy 88255932Salfred * CPUs drop below 1. */ 89255932Salfred if ( busy_cpus > 1 && 90255932Salfred old_pm_idle != NULL && 91255932Salfred ( atomic_cmpxchg(busy_cpus_ptr, busy_cpus, 92255932Salfred busy_cpus-1) == busy_cpus ) ) { 93255932Salfred local_irq_disable(); 94255932Salfred sys_tune_set_state(cpu, 3); 95255932Salfred /* This check might not be necessary, but it 96255932Salfred * seems safest to include it because there 97255932Salfred * might be a kernel version which requires 98255932Salfred * it. */ 99255932Salfred if (need_resched()) 100255932Salfred local_irq_enable(); 101255932Salfred else 102255932Salfred old_pm_idle(); 103255932Salfred /* This CPU is busy again. */ 104255932Salfred sys_tune_set_state(cpu, 1); 105255932Salfred atomic_add(1, busy_cpus_ptr); 106255932Salfred return; 107255932Salfred } 108255932Salfred 109255932Salfred cpu_relax(); 110255932Salfred } 111255932Salfred sys_tune_set_state(cpu, 0); 112255932Salfred} 113255932Salfred 114255932Salfred 115255932Salfredvoid sys_tune_work_func(struct work_struct *work) 116255932Salfred{ 117255932Salfred /* Do nothing. Since this function is running in process 118255932Salfred * context, the idle thread isn't running on this CPU. */ 119255932Salfred} 120255932Salfred 121255932Salfred 122255932Salfred#ifdef CONFIG_SMP 123255932Salfredstatic void sys_tune_smp_call(void *info) 124255932Salfred{ 125255932Salfred schedule_work(&get_cpu_var(sys_tune_cpu_work)); 126255932Salfred put_cpu_var(sys_tune_cpu_work); 127255932Salfred} 128255932Salfred#endif 129255932Salfred 130255932Salfred 131255932Salfred#ifdef CONFIG_SMP 132255932Salfredstatic void sys_tune_refresh(void) 133255932Salfred{ 134255932Salfred#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) 135255932Salfred on_each_cpu(&sys_tune_smp_call, NULL, 0, 1); 136255932Salfred#else 137255932Salfred on_each_cpu(&sys_tune_smp_call, NULL, 1); 138255932Salfred#endif 139255932Salfred} 140255932Salfred#else 141255932Salfredstatic void sys_tune_refresh(void) 142255932Salfred{ 143255932Salfred /* The current thread is executing on the one and only CPU so 144255932Salfred * the idle thread isn't running. */ 145255932Salfred} 146255932Salfred#endif 147255932Salfred 148255932Salfred 149255932Salfred 150255932Salfredstatic int sys_tune_cpu_group(int cpu) 151255932Salfred{ 152255932Salfred#ifdef CONFIG_SMP 153255932Salfred const cpumask_t *mask; 154255932Salfred int other_cpu; 155255932Salfred int group; 156255932Salfred 157255932Salfred#if defined(topology_thread_cpumask) && defined(ST_HAVE_EXPORTED_CPU_SIBLING_MAP) 158255932Salfred /* Keep one hyperthread busy per core. */ 159255932Salfred mask = topology_thread_cpumask(cpu); 160255932Salfred#else 161255932Salfred return cpu; 162255932Salfred#endif 163255932Salfred for_each_cpu_mask(cpu, *(mask)) { 164255932Salfred group = per_cpu(idle_cpu_group, other_cpu); 165255932Salfred if (group != NR_CPUS) 166255932Salfred return group; 167255932Salfred } 168255932Salfred#endif 169255932Salfred 170255932Salfred return cpu; 171255932Salfred} 172255932Salfred 173255932Salfred 174255932Salfredstatic void sys_tune_add_cpu(int cpu) 175255932Salfred{ 176255932Salfred int group; 177255932Salfred 178255932Salfred /* Do nothing if this CPU has already been added. */ 179255932Salfred if (per_cpu(idle_cpu_group, cpu) != NR_CPUS) 180255932Salfred return; 181255932Salfred 182255932Salfred group = sys_tune_cpu_group(cpu); 183255932Salfred per_cpu(idle_cpu_group, cpu) = group; 184255932Salfred atomic_inc(&(busy_cpu_count[group])); 185255932Salfred 186255932Salfred} 187255932Salfred 188255932Salfredstatic void sys_tune_del_cpu(int cpu) 189255932Salfred{ 190255932Salfred 191255932Salfred int group; 192255932Salfred 193255932Salfred if (per_cpu(idle_cpu_group, cpu) == NR_CPUS) 194255932Salfred return; 195255932Salfred 196255932Salfred group = per_cpu(idle_cpu_group, cpu); 197255932Salfred /* If the CPU was busy, this can cause the count to drop to 198255932Salfred * zero. To rectify this, we need to cause one of the other 199255932Salfred * CPUs in the group to exit the idle loop. If the CPU was 200255932Salfred * not busy then this causes the contribution for this CPU to 201255932Salfred * go to -1 which can cause the overall count to drop to zero 202255932Salfred * or go negative. To rectify this situation we need to cause 203255932Salfred * this CPU to exit the idle loop. */ 204255932Salfred atomic_dec(&(busy_cpu_count[group])); 205255932Salfred per_cpu(idle_cpu_group, cpu) = NR_CPUS; 206255932Salfred 207255932Salfred} 208255932Salfred 209255932Salfred 210255932Salfredstatic int sys_tune_cpu_notify(struct notifier_block *self, 211255932Salfred unsigned long action, void *hcpu) 212255932Salfred{ 213255932Salfred int cpu = (long)hcpu; 214255932Salfred 215255932Salfred switch(action) { 216255932Salfred#ifdef CPU_ONLINE_FROZEN 217255932Salfred case CPU_ONLINE_FROZEN: 218255932Salfred#endif 219255932Salfred case CPU_ONLINE: 220255932Salfred mutex_lock(&sys_tune_startup_mutex); 221255932Salfred sys_tune_add_cpu(cpu); 222255932Salfred mutex_unlock(&sys_tune_startup_mutex); 223255932Salfred /* The CPU might have already entered the idle loop in 224255932Salfred * the wrong group. Make sure it exits the idle loop 225255932Salfred * so that it picks up the correct group. */ 226255932Salfred sys_tune_refresh(); 227255932Salfred break; 228255932Salfred 229255932Salfred#ifdef CPU_DEAD_FROZEN 230255932Salfred case CPU_DEAD_FROZEN: 231255932Salfred#endif 232255932Salfred case CPU_DEAD: 233255932Salfred mutex_lock(&sys_tune_startup_mutex); 234255932Salfred sys_tune_del_cpu(cpu); 235255932Salfred mutex_unlock(&sys_tune_startup_mutex); 236255932Salfred /* The deleted CPU may have been the only busy CPU in 237255932Salfred * the group. Make sure one of the other CPUs in the 238255932Salfred * group exits the idle loop. */ 239255932Salfred sys_tune_refresh(); 240255932Salfred break; 241255932Salfred } 242255932Salfred return NOTIFY_OK; 243255932Salfred} 244255932Salfred 245255932Salfred 246255932Salfredstatic struct notifier_block sys_tune_cpu_nb = { 247255932Salfred .notifier_call = sys_tune_cpu_notify, 248255932Salfred}; 249255932Salfred 250255932Salfred 251255932Salfredstatic void sys_tune_ensure_init(void) 252255932Salfred{ 253255932Salfred BUG_ON (old_pm_idle != NULL); 254255932Salfred 255255932Salfred /* Atomically update pm_idle to &sys_tune_pm_idle. The old value 256255932Salfred * is stored in old_pm_idle before installing the new 257255932Salfred * handler. */ 258255932Salfred do { 259255932Salfred old_pm_idle = pm_idle; 260255932Salfred } while (cmpxchg(&pm_idle, old_pm_idle, &sys_tune_pm_idle) != 261255932Salfred old_pm_idle); 262255932Salfred} 263255932Salfred#endif 264255932Salfred 265255932Salfredvoid sys_tune_fini(void) 266255932Salfred{ 267255932Salfred#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE) 268255932Salfred void (*old)(void); 269255932Salfred int cpu; 270255932Salfred 271255932Salfred unregister_cpu_notifier(&sys_tune_cpu_nb); 272255932Salfred 273255932Salfred mutex_lock(&sys_tune_startup_mutex); 274255932Salfred 275255932Salfred 276255932Salfred old = cmpxchg(&pm_idle, &sys_tune_pm_idle, old_pm_idle); 277255932Salfred 278255932Salfred for_each_online_cpu(cpu) 279255932Salfred sys_tune_del_cpu(cpu); 280255932Salfred 281255932Salfred mutex_unlock(&sys_tune_startup_mutex); 282255932Salfred 283255932Salfred /* Our handler may still be executing on other CPUs. 284255932Salfred * Schedule this thread on all CPUs to make sure all 285255932Salfred * idle threads get interrupted. */ 286255932Salfred sys_tune_refresh(); 287255932Salfred 288255932Salfred /* Make sure the work item has finished executing on all CPUs. 289255932Salfred * This in turn ensures that all idle threads have been 290255932Salfred * interrupted. */ 291255932Salfred flush_scheduled_work(); 292255932Salfred#endif /* CONFIG_X86 */ 293255932Salfred} 294255932Salfred 295255932Salfredvoid sys_tune_init(void) 296255932Salfred{ 297255932Salfred#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE) 298255932Salfred int cpu; 299255932Salfred 300255932Salfred for_each_possible_cpu(cpu) { 301255932Salfred INIT_WORK(&per_cpu(sys_tune_cpu_work, cpu), 302255932Salfred sys_tune_work_func); 303255932Salfred } 304255932Salfred 305255932Salfred /* Start by registering the handler to ensure we don't miss 306255932Salfred * any updates. */ 307255932Salfred register_cpu_notifier(&sys_tune_cpu_nb); 308255932Salfred 309255932Salfred mutex_lock(&sys_tune_startup_mutex); 310255932Salfred 311255932Salfred for_each_online_cpu(cpu) 312255932Salfred sys_tune_add_cpu(cpu); 313255932Salfred 314255932Salfred sys_tune_ensure_init(); 315255932Salfred 316255932Salfred 317255932Salfred mutex_unlock(&sys_tune_startup_mutex); 318255932Salfred 319255932Salfred /* Ensure our idle handler starts to run. */ 320255932Salfred sys_tune_refresh(); 321255932Salfred#endif 322255932Salfred} 323255932Salfred 324