1/****************************************************************************** 2 * arch/ia64/xen/time.c 3 * 4 * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> 5 * VA Linux Systems Japan K.K. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 */ 22 23#include <linux/delay.h> 24#include <linux/kernel_stat.h> 25#include <linux/posix-timers.h> 26#include <linux/irq.h> 27#include <linux/clocksource.h> 28 29#include <asm/timex.h> 30 31#include <asm/xen/hypervisor.h> 32 33#include <xen/interface/vcpu.h> 34 35#include "../kernel/fsyscall_gtod_data.h" 36 37static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); 38static DEFINE_PER_CPU(unsigned long, xen_stolen_time); 39static DEFINE_PER_CPU(unsigned long, xen_blocked_time); 40 41/* taken from i386/kernel/time-xen.c */ 42static void xen_init_missing_ticks_accounting(int cpu) 43{ 44 struct vcpu_register_runstate_memory_area area; 45 struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu); 46 int rc; 47 48 memset(runstate, 0, sizeof(*runstate)); 49 50 area.addr.v = runstate; 51 rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, 52 &area); 53 WARN_ON(rc && rc != -ENOSYS); 54 55 per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; 56 per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] 57 + runstate->time[RUNSTATE_offline]; 58} 59 60/* 61 * Runstate accounting 62 */ 63/* stolen from arch/x86/xen/time.c */ 64static void get_runstate_snapshot(struct vcpu_runstate_info *res) 65{ 66 u64 state_time; 67 struct vcpu_runstate_info *state; 68 69 BUG_ON(preemptible()); 70 71 state = &__get_cpu_var(xen_runstate); 72 73 /* 74 * The runstate info is always updated by the hypervisor on 75 * the current CPU, so there's no need to use anything 76 * stronger than a compiler barrier when fetching it. 77 */ 78 do { 79 state_time = state->state_entry_time; 80 rmb(); 81 *res = *state; 82 rmb(); 83 } while (state->state_entry_time != state_time); 84} 85 86#define NS_PER_TICK (1000000000LL/HZ) 87 88static unsigned long 89consider_steal_time(unsigned long new_itm) 90{ 91 unsigned long stolen, blocked; 92 unsigned long delta_itm = 0, stolentick = 0; 93 int cpu = smp_processor_id(); 94 struct vcpu_runstate_info runstate; 95 struct task_struct *p = current; 96 97 get_runstate_snapshot(&runstate); 98 99 /* 100 * Check for vcpu migration effect 101 * In this case, itc value is reversed. 102 * This causes huge stolen value. 103 * This function just checks and reject this effect. 104 */ 105 if (!time_after_eq(runstate.time[RUNSTATE_blocked], 106 per_cpu(xen_blocked_time, cpu))) 107 blocked = 0; 108 109 if (!time_after_eq(runstate.time[RUNSTATE_runnable] + 110 runstate.time[RUNSTATE_offline], 111 per_cpu(xen_stolen_time, cpu))) 112 stolen = 0; 113 114 if (!time_after(delta_itm + new_itm, ia64_get_itc())) 115 stolentick = ia64_get_itc() - new_itm; 116 117 do_div(stolentick, NS_PER_TICK); 118 stolentick++; 119 120 do_div(stolen, NS_PER_TICK); 121 122 if (stolen > stolentick) 123 stolen = stolentick; 124 125 stolentick -= stolen; 126 do_div(blocked, NS_PER_TICK); 127 128 if (blocked > stolentick) 129 blocked = stolentick; 130 131 if (stolen > 0 || blocked > 0) { 132 account_steal_ticks(stolen); 133 account_idle_ticks(blocked); 134 run_local_timers(); 135 136 rcu_check_callbacks(cpu, user_mode(get_irq_regs())); 137 138 scheduler_tick(); 139 run_posix_cpu_timers(p); 140 delta_itm += local_cpu_data->itm_delta * (stolen + blocked); 141 142 if (cpu == time_keeper_id) { 143 write_seqlock(&xtime_lock); 144 do_timer(stolen + blocked); 145 local_cpu_data->itm_next = delta_itm + new_itm; 146 write_sequnlock(&xtime_lock); 147 } else { 148 local_cpu_data->itm_next = delta_itm + new_itm; 149 } 150 per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen; 151 per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked; 152 } 153 return delta_itm; 154} 155 156static int xen_do_steal_accounting(unsigned long *new_itm) 157{ 158 unsigned long delta_itm; 159 delta_itm = consider_steal_time(*new_itm); 160 *new_itm += delta_itm; 161 if (time_after(*new_itm, ia64_get_itc()) && delta_itm) 162 return 1; 163 164 return 0; 165} 166 167static void xen_itc_jitter_data_reset(void) 168{ 169 u64 lcycle, ret; 170 171 do { 172 lcycle = itc_jitter_data.itc_lastcycle; 173 ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0); 174 } while (unlikely(ret != lcycle)); 175} 176 177/* based on xen_sched_clock() in arch/x86/xen/time.c. */ 178/* 179 * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined, 180 * something similar logic should be implemented here. 181 */ 182/* 183 * Xen sched_clock implementation. Returns the number of unstolen 184 * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED 185 * states. 186 */ 187static unsigned long long xen_sched_clock(void) 188{ 189 struct vcpu_runstate_info runstate; 190 191 unsigned long long now; 192 unsigned long long offset; 193 unsigned long long ret; 194 195 /* 196 * Ideally sched_clock should be called on a per-cpu basis 197 * anyway, so preempt should already be disabled, but that's 198 * not current practice at the moment. 199 */ 200 preempt_disable(); 201 202 /* 203 * both ia64_native_sched_clock() and xen's runstate are 204 * based on mAR.ITC. So difference of them makes sense. 205 */ 206 now = ia64_native_sched_clock(); 207 208 get_runstate_snapshot(&runstate); 209 210 WARN_ON(runstate.state != RUNSTATE_running); 211 212 offset = 0; 213 if (now > runstate.state_entry_time) 214 offset = now - runstate.state_entry_time; 215 ret = runstate.time[RUNSTATE_blocked] + 216 runstate.time[RUNSTATE_running] + 217 offset; 218 219 preempt_enable(); 220 221 return ret; 222} 223 224struct pv_time_ops xen_time_ops __initdata = { 225 .init_missing_ticks_accounting = xen_init_missing_ticks_accounting, 226 .do_steal_accounting = xen_do_steal_accounting, 227 .clocksource_resume = xen_itc_jitter_data_reset, 228 .sched_clock = xen_sched_clock, 229}; 230 231/* Called after suspend, to resume time. */ 232static void xen_local_tick_resume(void) 233{ 234 /* Just trigger a tick. */ 235 ia64_cpu_local_tick(); 236 touch_softlockup_watchdog(); 237} 238 239void 240xen_timer_resume(void) 241{ 242 unsigned int cpu; 243 244 xen_local_tick_resume(); 245 246 for_each_online_cpu(cpu) 247 xen_init_missing_ticks_accounting(cpu); 248} 249 250static void ia64_cpu_local_tick_fn(void *unused) 251{ 252 xen_local_tick_resume(); 253 xen_init_missing_ticks_accounting(smp_processor_id()); 254} 255 256void 257xen_timer_resume_on_aps(void) 258{ 259 smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1); 260} 261