• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/arch/x86/xen/
1/*
2 * Xen time implementation.
3 *
4 * This is implemented in terms of a clocksource driver which uses
5 * the hypervisor clock as a nanosecond timebase, and a clockevent
6 * driver which uses the hypervisor's timer mechanism.
7 *
8 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
9 */
10#include <linux/kernel.h>
11#include <linux/interrupt.h>
12#include <linux/clocksource.h>
13#include <linux/clockchips.h>
14#include <linux/kernel_stat.h>
15#include <linux/math64.h>
16#include <linux/gfp.h>
17
18#include <asm/pvclock.h>
19#include <asm/xen/hypervisor.h>
20#include <asm/xen/hypercall.h>
21
22#include <xen/events.h>
23#include <xen/features.h>
24#include <xen/interface/xen.h>
25#include <xen/interface/vcpu.h>
26
27#include "xen-ops.h"
28
29#define XEN_SHIFT 22
30
31/* Xen may fire a timer up to this many ns early */
32#define TIMER_SLOP	100000
33#define NS_PER_TICK	(1000000000LL / HZ)
34
35/* runstate info updated by Xen */
36static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
37
38/* snapshots of runstate info */
39static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
40
41/* unused ns of stolen and blocked time */
42static DEFINE_PER_CPU(u64, xen_residual_stolen);
43static DEFINE_PER_CPU(u64, xen_residual_blocked);
44
45/* return an consistent snapshot of 64-bit time/counter value */
46static u64 get64(const u64 *p)
47{
48	u64 ret;
49
50	if (BITS_PER_LONG < 64) {
51		u32 *p32 = (u32 *)p;
52		u32 h, l;
53
54		do {
55			h = p32[1];
56			barrier();
57			l = p32[0];
58			barrier();
59		} while (p32[1] != h);
60
61		ret = (((u64)h) << 32) | l;
62	} else
63		ret = *p;
64
65	return ret;
66}
67
68/*
69 * Runstate accounting
70 */
71static void get_runstate_snapshot(struct vcpu_runstate_info *res)
72{
73	u64 state_time;
74	struct vcpu_runstate_info *state;
75
76	BUG_ON(preemptible());
77
78	state = &__get_cpu_var(xen_runstate);
79
80	/*
81	 * The runstate info is always updated by the hypervisor on
82	 * the current CPU, so there's no need to use anything
83	 * stronger than a compiler barrier when fetching it.
84	 */
85	do {
86		state_time = get64(&state->state_entry_time);
87		barrier();
88		*res = *state;
89		barrier();
90	} while (get64(&state->state_entry_time) != state_time);
91}
92
93/* return true when a vcpu could run but has no real cpu to run on */
94bool xen_vcpu_stolen(int vcpu)
95{
96	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
97}
98
99void xen_setup_runstate_info(int cpu)
100{
101	struct vcpu_register_runstate_memory_area area;
102
103	area.addr.v = &per_cpu(xen_runstate, cpu);
104
105	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
106			       cpu, &area))
107		BUG();
108}
109
110static void do_stolen_accounting(void)
111{
112	struct vcpu_runstate_info state;
113	struct vcpu_runstate_info *snap;
114	s64 blocked, runnable, offline, stolen;
115	cputime_t ticks;
116
117	get_runstate_snapshot(&state);
118
119	WARN_ON(state.state != RUNSTATE_running);
120
121	snap = &__get_cpu_var(xen_runstate_snapshot);
122
123	/* work out how much time the VCPU has not been runn*ing*  */
124	blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
125	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
126	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
127
128	*snap = state;
129
130	/* Add the appropriate number of ticks of stolen time,
131	   including any left-overs from last time. */
132	stolen = runnable + offline + __get_cpu_var(xen_residual_stolen);
133
134	if (stolen < 0)
135		stolen = 0;
136
137	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
138	__get_cpu_var(xen_residual_stolen) = stolen;
139	account_steal_ticks(ticks);
140
141	/* Add the appropriate number of ticks of blocked time,
142	   including any left-overs from last time. */
143	blocked += __get_cpu_var(xen_residual_blocked);
144
145	if (blocked < 0)
146		blocked = 0;
147
148	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
149	__get_cpu_var(xen_residual_blocked) = blocked;
150	account_idle_ticks(ticks);
151}
152
153/* Get the TSC speed from Xen */
154static unsigned long xen_tsc_khz(void)
155{
156	struct pvclock_vcpu_time_info *info =
157		&HYPERVISOR_shared_info->vcpu_info[0].time;
158
159	return pvclock_tsc_khz(info);
160}
161
162cycle_t xen_clocksource_read(void)
163{
164        struct pvclock_vcpu_time_info *src;
165	cycle_t ret;
166
167	src = &get_cpu_var(xen_vcpu)->time;
168	ret = pvclock_clocksource_read(src);
169	put_cpu_var(xen_vcpu);
170	return ret;
171}
172
173static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
174{
175	return xen_clocksource_read();
176}
177
178static void xen_read_wallclock(struct timespec *ts)
179{
180	struct shared_info *s = HYPERVISOR_shared_info;
181	struct pvclock_wall_clock *wall_clock = &(s->wc);
182        struct pvclock_vcpu_time_info *vcpu_time;
183
184	vcpu_time = &get_cpu_var(xen_vcpu)->time;
185	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
186	put_cpu_var(xen_vcpu);
187}
188
189static unsigned long xen_get_wallclock(void)
190{
191	struct timespec ts;
192
193	xen_read_wallclock(&ts);
194	return ts.tv_sec;
195}
196
197static int xen_set_wallclock(unsigned long now)
198{
199	/* do nothing for domU */
200	return -1;
201}
202
203static struct clocksource xen_clocksource __read_mostly = {
204	.name = "xen",
205	.rating = 400,
206	.read = xen_clocksource_get_cycles,
207	.mask = ~0,
208	.mult = 1<<XEN_SHIFT,		/* time directly in nanoseconds */
209	.shift = XEN_SHIFT,
210	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
211};
212
213/*
214   Xen clockevent implementation
215
216   Xen has two clockevent implementations:
217
218   The old timer_op one works with all released versions of Xen prior
219   to version 3.0.4.  This version of the hypervisor provides a
220   single-shot timer with nanosecond resolution.  However, sharing the
221   same event channel is a 100Hz tick which is delivered while the
222   vcpu is running.  We don't care about or use this tick, but it will
223   cause the core time code to think the timer fired too soon, and
224   will end up resetting it each time.  It could be filtered, but
225   doing so has complications when the ktime clocksource is not yet
226   the xen clocksource (ie, at boot time).
227
228   The new vcpu_op-based timer interface allows the tick timer period
229   to be changed or turned off.  The tick timer is not useful as a
230   periodic timer because events are only delivered to running vcpus.
231   The one-shot timer can report when a timeout is in the past, so
232   set_next_event is capable of returning -ETIME when appropriate.
233   This interface is used when available.
234*/
235
236
237/*
238  Get a hypervisor absolute time.  In theory we could maintain an
239  offset between the kernel's time and the hypervisor's time, and
240  apply that to a kernel's absolute timeout.  Unfortunately the
241  hypervisor and kernel times can drift even if the kernel is using
242  the Xen clocksource, because ntp can warp the kernel's clocksource.
243*/
244static s64 get_abs_timeout(unsigned long delta)
245{
246	return xen_clocksource_read() + delta;
247}
248
249static void xen_timerop_set_mode(enum clock_event_mode mode,
250				 struct clock_event_device *evt)
251{
252	switch (mode) {
253	case CLOCK_EVT_MODE_PERIODIC:
254		/* unsupported */
255		WARN_ON(1);
256		break;
257
258	case CLOCK_EVT_MODE_ONESHOT:
259	case CLOCK_EVT_MODE_RESUME:
260		break;
261
262	case CLOCK_EVT_MODE_UNUSED:
263	case CLOCK_EVT_MODE_SHUTDOWN:
264		HYPERVISOR_set_timer_op(0);  /* cancel timeout */
265		break;
266	}
267}
268
269static int xen_timerop_set_next_event(unsigned long delta,
270				      struct clock_event_device *evt)
271{
272	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
273
274	if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
275		BUG();
276
277	/* We may have missed the deadline, but there's no real way of
278	   knowing for sure.  If the event was in the past, then we'll
279	   get an immediate interrupt. */
280
281	return 0;
282}
283
284static const struct clock_event_device xen_timerop_clockevent = {
285	.name = "xen",
286	.features = CLOCK_EVT_FEAT_ONESHOT,
287
288	.max_delta_ns = 0xffffffff,
289	.min_delta_ns = TIMER_SLOP,
290
291	.mult = 1,
292	.shift = 0,
293	.rating = 500,
294
295	.set_mode = xen_timerop_set_mode,
296	.set_next_event = xen_timerop_set_next_event,
297};
298
299
300
301static void xen_vcpuop_set_mode(enum clock_event_mode mode,
302				struct clock_event_device *evt)
303{
304	int cpu = smp_processor_id();
305
306	switch (mode) {
307	case CLOCK_EVT_MODE_PERIODIC:
308		WARN_ON(1);	/* unsupported */
309		break;
310
311	case CLOCK_EVT_MODE_ONESHOT:
312		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
313			BUG();
314		break;
315
316	case CLOCK_EVT_MODE_UNUSED:
317	case CLOCK_EVT_MODE_SHUTDOWN:
318		if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
319		    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
320			BUG();
321		break;
322	case CLOCK_EVT_MODE_RESUME:
323		break;
324	}
325}
326
327static int xen_vcpuop_set_next_event(unsigned long delta,
328				     struct clock_event_device *evt)
329{
330	int cpu = smp_processor_id();
331	struct vcpu_set_singleshot_timer single;
332	int ret;
333
334	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
335
336	single.timeout_abs_ns = get_abs_timeout(delta);
337	single.flags = VCPU_SSHOTTMR_future;
338
339	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
340
341	BUG_ON(ret != 0 && ret != -ETIME);
342
343	return ret;
344}
345
346static const struct clock_event_device xen_vcpuop_clockevent = {
347	.name = "xen",
348	.features = CLOCK_EVT_FEAT_ONESHOT,
349
350	.max_delta_ns = 0xffffffff,
351	.min_delta_ns = TIMER_SLOP,
352
353	.mult = 1,
354	.shift = 0,
355	.rating = 500,
356
357	.set_mode = xen_vcpuop_set_mode,
358	.set_next_event = xen_vcpuop_set_next_event,
359};
360
361static const struct clock_event_device *xen_clockevent =
362	&xen_timerop_clockevent;
363static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
364
365static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
366{
367	struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
368	irqreturn_t ret;
369
370	ret = IRQ_NONE;
371	if (evt->event_handler) {
372		evt->event_handler(evt);
373		ret = IRQ_HANDLED;
374	}
375
376	do_stolen_accounting();
377
378	return ret;
379}
380
381void xen_setup_timer(int cpu)
382{
383	const char *name;
384	struct clock_event_device *evt;
385	int irq;
386
387	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
388
389	name = kasprintf(GFP_KERNEL, "timer%d", cpu);
390	if (!name)
391		name = "<timer kasprintf failed>";
392
393	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
394				      IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
395				      name, NULL);
396
397	evt = &per_cpu(xen_clock_events, cpu);
398	memcpy(evt, xen_clockevent, sizeof(*evt));
399
400	evt->cpumask = cpumask_of(cpu);
401	evt->irq = irq;
402}
403
404void xen_teardown_timer(int cpu)
405{
406	struct clock_event_device *evt;
407	BUG_ON(cpu == 0);
408	evt = &per_cpu(xen_clock_events, cpu);
409	unbind_from_irqhandler(evt->irq, NULL);
410}
411
412void xen_setup_cpu_clockevents(void)
413{
414	BUG_ON(preemptible());
415
416	clockevents_register_device(&__get_cpu_var(xen_clock_events));
417}
418
419void xen_timer_resume(void)
420{
421	int cpu;
422
423	if (xen_clockevent != &xen_vcpuop_clockevent)
424		return;
425
426	for_each_online_cpu(cpu) {
427		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
428			BUG();
429	}
430}
431
432static const struct pv_time_ops xen_time_ops __initdata = {
433	.sched_clock = xen_clocksource_read,
434};
435
436static __init void xen_time_init(void)
437{
438	int cpu = smp_processor_id();
439	struct timespec tp;
440
441	clocksource_register(&xen_clocksource);
442
443	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
444		/* Successfully turned off 100Hz tick, so we have the
445		   vcpuop-based timer interface */
446		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
447		xen_clockevent = &xen_vcpuop_clockevent;
448	}
449
450	/* Set initial system time with full resolution */
451	xen_read_wallclock(&tp);
452	do_settimeofday(&tp);
453
454	setup_force_cpu_cap(X86_FEATURE_TSC);
455
456	xen_setup_runstate_info(cpu);
457	xen_setup_timer(cpu);
458	xen_setup_cpu_clockevents();
459}
460
461__init void xen_init_time_ops(void)
462{
463	pv_time_ops = xen_time_ops;
464
465	x86_init.timers.timer_init = xen_time_init;
466	x86_init.timers.setup_percpu_clockev = x86_init_noop;
467	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
468
469	x86_platform.calibrate_tsc = xen_tsc_khz;
470	x86_platform.get_wallclock = xen_get_wallclock;
471	x86_platform.set_wallclock = xen_set_wallclock;
472}
473
474#ifdef CONFIG_XEN_PVHVM
475static void xen_hvm_setup_cpu_clockevents(void)
476{
477	int cpu = smp_processor_id();
478	xen_setup_runstate_info(cpu);
479	xen_setup_timer(cpu);
480	xen_setup_cpu_clockevents();
481}
482
483__init void xen_hvm_init_time_ops(void)
484{
485	/* vector callback is needed otherwise we cannot receive interrupts
486	 * on cpu > 0 and at this point we don't know how many cpus are
487	 * available */
488	if (!xen_have_vector_callback)
489		return;
490	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
491		printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
492				"disable pv timer\n");
493		return;
494	}
495
496	pv_time_ops = xen_time_ops;
497	x86_init.timers.setup_percpu_clockev = xen_time_init;
498	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
499
500	x86_platform.calibrate_tsc = xen_tsc_khz;
501	x86_platform.get_wallclock = xen_get_wallclock;
502	x86_platform.set_wallclock = xen_set_wallclock;
503}
504#endif
505