1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 *  linux/drivers/clocksource/arm_arch_timer.c
4 *
5 *  Copyright (C) 2011 ARM Ltd.
6 *  All Rights Reserved
7 */
8
9#define pr_fmt(fmt) 	"arch_timer: " fmt
10
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/device.h>
14#include <linux/smp.h>
15#include <linux/cpu.h>
16#include <linux/cpu_pm.h>
17#include <linux/clockchips.h>
18#include <linux/clocksource.h>
19#include <linux/clocksource_ids.h>
20#include <linux/interrupt.h>
21#include <linux/kstrtox.h>
22#include <linux/of_irq.h>
23#include <linux/of_address.h>
24#include <linux/io.h>
25#include <linux/slab.h>
26#include <linux/sched/clock.h>
27#include <linux/sched_clock.h>
28#include <linux/acpi.h>
29#include <linux/arm-smccc.h>
30#include <linux/ptp_kvm.h>
31
32#include <asm/arch_timer.h>
33#include <asm/virt.h>
34
35#include <clocksource/arm_arch_timer.h>
36
37#define CNTTIDR		0x08
38#define CNTTIDR_VIRT(n)	(BIT(1) << ((n) * 4))
39
40#define CNTACR(n)	(0x40 + ((n) * 4))
41#define CNTACR_RPCT	BIT(0)
42#define CNTACR_RVCT	BIT(1)
43#define CNTACR_RFRQ	BIT(2)
44#define CNTACR_RVOFF	BIT(3)
45#define CNTACR_RWVT	BIT(4)
46#define CNTACR_RWPT	BIT(5)
47
48#define CNTPCT_LO	0x00
49#define CNTVCT_LO	0x08
50#define CNTFRQ		0x10
51#define CNTP_CVAL_LO	0x20
52#define CNTP_CTL	0x2c
53#define CNTV_CVAL_LO	0x30
54#define CNTV_CTL	0x3c
55
56/*
57 * The minimum amount of time a generic counter is guaranteed to not roll over
58 * (40 years)
59 */
60#define MIN_ROLLOVER_SECS	(40ULL * 365 * 24 * 3600)
61
62static unsigned arch_timers_present __initdata;
63
64struct arch_timer {
65	void __iomem *base;
66	struct clock_event_device evt;
67};
68
69static struct arch_timer *arch_timer_mem __ro_after_init;
70
71#define to_arch_timer(e) container_of(e, struct arch_timer, evt)
72
73static u32 arch_timer_rate __ro_after_init;
74static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;
75
76static const char *arch_timer_ppi_names[ARCH_TIMER_MAX_TIMER_PPI] = {
77	[ARCH_TIMER_PHYS_SECURE_PPI]	= "sec-phys",
78	[ARCH_TIMER_PHYS_NONSECURE_PPI]	= "phys",
79	[ARCH_TIMER_VIRT_PPI]		= "virt",
80	[ARCH_TIMER_HYP_PPI]		= "hyp-phys",
81	[ARCH_TIMER_HYP_VIRT_PPI]	= "hyp-virt",
82};
83
84static struct clock_event_device __percpu *arch_timer_evt;
85
86static enum arch_timer_ppi_nr arch_timer_uses_ppi __ro_after_init = ARCH_TIMER_VIRT_PPI;
87static bool arch_timer_c3stop __ro_after_init;
88static bool arch_timer_mem_use_virtual __ro_after_init;
89static bool arch_counter_suspend_stop __ro_after_init;
90#ifdef CONFIG_GENERIC_GETTIMEOFDAY
91static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER;
92#else
93static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_NONE;
94#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
95
96static cpumask_t evtstrm_available = CPU_MASK_NONE;
97static bool evtstrm_enable __ro_after_init = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
98
99static int __init early_evtstrm_cfg(char *buf)
100{
101	return kstrtobool(buf, &evtstrm_enable);
102}
103early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg);
104
105/*
106 * Makes an educated guess at a valid counter width based on the Generic Timer
107 * specification. Of note:
108 *   1) the system counter is at least 56 bits wide
109 *   2) a roll-over time of not less than 40 years
110 *
111 * See 'ARM DDI 0487G.a D11.1.2 ("The system counter")' for more details.
112 */
113static int arch_counter_get_width(void)
114{
115	u64 min_cycles = MIN_ROLLOVER_SECS * arch_timer_rate;
116
117	/* guarantee the returned width is within the valid range */
118	return clamp_val(ilog2(min_cycles - 1) + 1, 56, 64);
119}
120
121/*
122 * Architected system timer support.
123 */
124
125static __always_inline
126void arch_timer_reg_write(int access, enum arch_timer_reg reg, u64 val,
127			  struct clock_event_device *clk)
128{
129	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
130		struct arch_timer *timer = to_arch_timer(clk);
131		switch (reg) {
132		case ARCH_TIMER_REG_CTRL:
133			writel_relaxed((u32)val, timer->base + CNTP_CTL);
134			break;
135		case ARCH_TIMER_REG_CVAL:
136			/*
137			 * Not guaranteed to be atomic, so the timer
138			 * must be disabled at this point.
139			 */
140			writeq_relaxed(val, timer->base + CNTP_CVAL_LO);
141			break;
142		default:
143			BUILD_BUG();
144		}
145	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
146		struct arch_timer *timer = to_arch_timer(clk);
147		switch (reg) {
148		case ARCH_TIMER_REG_CTRL:
149			writel_relaxed((u32)val, timer->base + CNTV_CTL);
150			break;
151		case ARCH_TIMER_REG_CVAL:
152			/* Same restriction as above */
153			writeq_relaxed(val, timer->base + CNTV_CVAL_LO);
154			break;
155		default:
156			BUILD_BUG();
157		}
158	} else {
159		arch_timer_reg_write_cp15(access, reg, val);
160	}
161}
162
163static __always_inline
164u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
165			struct clock_event_device *clk)
166{
167	u32 val;
168
169	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
170		struct arch_timer *timer = to_arch_timer(clk);
171		switch (reg) {
172		case ARCH_TIMER_REG_CTRL:
173			val = readl_relaxed(timer->base + CNTP_CTL);
174			break;
175		default:
176			BUILD_BUG();
177		}
178	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
179		struct arch_timer *timer = to_arch_timer(clk);
180		switch (reg) {
181		case ARCH_TIMER_REG_CTRL:
182			val = readl_relaxed(timer->base + CNTV_CTL);
183			break;
184		default:
185			BUILD_BUG();
186		}
187	} else {
188		val = arch_timer_reg_read_cp15(access, reg);
189	}
190
191	return val;
192}
193
194static noinstr u64 raw_counter_get_cntpct_stable(void)
195{
196	return __arch_counter_get_cntpct_stable();
197}
198
199static notrace u64 arch_counter_get_cntpct_stable(void)
200{
201	u64 val;
202	preempt_disable_notrace();
203	val = __arch_counter_get_cntpct_stable();
204	preempt_enable_notrace();
205	return val;
206}
207
208static noinstr u64 arch_counter_get_cntpct(void)
209{
210	return __arch_counter_get_cntpct();
211}
212
213static noinstr u64 raw_counter_get_cntvct_stable(void)
214{
215	return __arch_counter_get_cntvct_stable();
216}
217
218static notrace u64 arch_counter_get_cntvct_stable(void)
219{
220	u64 val;
221	preempt_disable_notrace();
222	val = __arch_counter_get_cntvct_stable();
223	preempt_enable_notrace();
224	return val;
225}
226
227static noinstr u64 arch_counter_get_cntvct(void)
228{
229	return __arch_counter_get_cntvct();
230}
231
232/*
233 * Default to cp15 based access because arm64 uses this function for
234 * sched_clock() before DT is probed and the cp15 method is guaranteed
235 * to exist on arm64. arm doesn't use this before DT is probed so even
236 * if we don't have the cp15 accessors we won't have a problem.
237 */
238u64 (*arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
239EXPORT_SYMBOL_GPL(arch_timer_read_counter);
240
241static u64 arch_counter_read(struct clocksource *cs)
242{
243	return arch_timer_read_counter();
244}
245
246static u64 arch_counter_read_cc(const struct cyclecounter *cc)
247{
248	return arch_timer_read_counter();
249}
250
251static struct clocksource clocksource_counter = {
252	.name	= "arch_sys_counter",
253	.id	= CSID_ARM_ARCH_COUNTER,
254	.rating	= 400,
255	.read	= arch_counter_read,
256	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
257};
258
259static struct cyclecounter cyclecounter __ro_after_init = {
260	.read	= arch_counter_read_cc,
261};
262
263struct ate_acpi_oem_info {
264	char oem_id[ACPI_OEM_ID_SIZE + 1];
265	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
266	u32 oem_revision;
267};
268
269#ifdef CONFIG_FSL_ERRATUM_A008585
270/*
271 * The number of retries is an arbitrary value well beyond the highest number
272 * of iterations the loop has been observed to take.
273 */
274#define __fsl_a008585_read_reg(reg) ({			\
275	u64 _old, _new;					\
276	int _retries = 200;				\
277							\
278	do {						\
279		_old = read_sysreg(reg);		\
280		_new = read_sysreg(reg);		\
281		_retries--;				\
282	} while (unlikely(_old != _new) && _retries);	\
283							\
284	WARN_ON_ONCE(!_retries);			\
285	_new;						\
286})
287
288static u64 notrace fsl_a008585_read_cntpct_el0(void)
289{
290	return __fsl_a008585_read_reg(cntpct_el0);
291}
292
293static u64 notrace fsl_a008585_read_cntvct_el0(void)
294{
295	return __fsl_a008585_read_reg(cntvct_el0);
296}
297#endif
298
299#ifdef CONFIG_HISILICON_ERRATUM_161010101
300/*
301 * Verify whether the value of the second read is larger than the first by
302 * less than 32 is the only way to confirm the value is correct, so clear the
303 * lower 5 bits to check whether the difference is greater than 32 or not.
304 * Theoretically the erratum should not occur more than twice in succession
305 * when reading the system counter, but it is possible that some interrupts
306 * may lead to more than twice read errors, triggering the warning, so setting
307 * the number of retries far beyond the number of iterations the loop has been
308 * observed to take.
309 */
310#define __hisi_161010101_read_reg(reg) ({				\
311	u64 _old, _new;						\
312	int _retries = 50;					\
313								\
314	do {							\
315		_old = read_sysreg(reg);			\
316		_new = read_sysreg(reg);			\
317		_retries--;					\
318	} while (unlikely((_new - _old) >> 5) && _retries);	\
319								\
320	WARN_ON_ONCE(!_retries);				\
321	_new;							\
322})
323
324static u64 notrace hisi_161010101_read_cntpct_el0(void)
325{
326	return __hisi_161010101_read_reg(cntpct_el0);
327}
328
329static u64 notrace hisi_161010101_read_cntvct_el0(void)
330{
331	return __hisi_161010101_read_reg(cntvct_el0);
332}
333
334static struct ate_acpi_oem_info hisi_161010101_oem_info[] = {
335	/*
336	 * Note that trailing spaces are required to properly match
337	 * the OEM table information.
338	 */
339	{
340		.oem_id		= "HISI  ",
341		.oem_table_id	= "HIP05   ",
342		.oem_revision	= 0,
343	},
344	{
345		.oem_id		= "HISI  ",
346		.oem_table_id	= "HIP06   ",
347		.oem_revision	= 0,
348	},
349	{
350		.oem_id		= "HISI  ",
351		.oem_table_id	= "HIP07   ",
352		.oem_revision	= 0,
353	},
354	{ /* Sentinel indicating the end of the OEM array */ },
355};
356#endif
357
358#ifdef CONFIG_ARM64_ERRATUM_858921
359static u64 notrace arm64_858921_read_cntpct_el0(void)
360{
361	u64 old, new;
362
363	old = read_sysreg(cntpct_el0);
364	new = read_sysreg(cntpct_el0);
365	return (((old ^ new) >> 32) & 1) ? old : new;
366}
367
368static u64 notrace arm64_858921_read_cntvct_el0(void)
369{
370	u64 old, new;
371
372	old = read_sysreg(cntvct_el0);
373	new = read_sysreg(cntvct_el0);
374	return (((old ^ new) >> 32) & 1) ? old : new;
375}
376#endif
377
378#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
379/*
380 * The low bits of the counter registers are indeterminate while bit 10 or
381 * greater is rolling over. Since the counter value can jump both backward
382 * (7ff -> 000 -> 800) and forward (7ff -> fff -> 800), ignore register values
383 * with all ones or all zeros in the low bits. Bound the loop by the maximum
384 * number of CPU cycles in 3 consecutive 24 MHz counter periods.
385 */
386#define __sun50i_a64_read_reg(reg) ({					\
387	u64 _val;							\
388	int _retries = 150;						\
389									\
390	do {								\
391		_val = read_sysreg(reg);				\
392		_retries--;						\
393	} while (((_val + 1) & GENMASK(8, 0)) <= 1 && _retries);	\
394									\
395	WARN_ON_ONCE(!_retries);					\
396	_val;								\
397})
398
399static u64 notrace sun50i_a64_read_cntpct_el0(void)
400{
401	return __sun50i_a64_read_reg(cntpct_el0);
402}
403
404static u64 notrace sun50i_a64_read_cntvct_el0(void)
405{
406	return __sun50i_a64_read_reg(cntvct_el0);
407}
408#endif
409
410#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND
411DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround);
412EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
413
414static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0);
415
416/*
417 * Force the inlining of this function so that the register accesses
418 * can be themselves correctly inlined.
419 */
420static __always_inline
421void erratum_set_next_event_generic(const int access, unsigned long evt,
422				    struct clock_event_device *clk)
423{
424	unsigned long ctrl;
425	u64 cval;
426
427	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
428	ctrl |= ARCH_TIMER_CTRL_ENABLE;
429	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
430
431	if (access == ARCH_TIMER_PHYS_ACCESS) {
432		cval = evt + arch_counter_get_cntpct_stable();
433		write_sysreg(cval, cntp_cval_el0);
434	} else {
435		cval = evt + arch_counter_get_cntvct_stable();
436		write_sysreg(cval, cntv_cval_el0);
437	}
438
439	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
440}
441
442static __maybe_unused int erratum_set_next_event_virt(unsigned long evt,
443					    struct clock_event_device *clk)
444{
445	erratum_set_next_event_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk);
446	return 0;
447}
448
449static __maybe_unused int erratum_set_next_event_phys(unsigned long evt,
450					    struct clock_event_device *clk)
451{
452	erratum_set_next_event_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk);
453	return 0;
454}
455
456static const struct arch_timer_erratum_workaround ool_workarounds[] = {
457#ifdef CONFIG_FSL_ERRATUM_A008585
458	{
459		.match_type = ate_match_dt,
460		.id = "fsl,erratum-a008585",
461		.desc = "Freescale erratum a005858",
462		.read_cntpct_el0 = fsl_a008585_read_cntpct_el0,
463		.read_cntvct_el0 = fsl_a008585_read_cntvct_el0,
464		.set_next_event_phys = erratum_set_next_event_phys,
465		.set_next_event_virt = erratum_set_next_event_virt,
466	},
467#endif
468#ifdef CONFIG_HISILICON_ERRATUM_161010101
469	{
470		.match_type = ate_match_dt,
471		.id = "hisilicon,erratum-161010101",
472		.desc = "HiSilicon erratum 161010101",
473		.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
474		.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
475		.set_next_event_phys = erratum_set_next_event_phys,
476		.set_next_event_virt = erratum_set_next_event_virt,
477	},
478	{
479		.match_type = ate_match_acpi_oem_info,
480		.id = hisi_161010101_oem_info,
481		.desc = "HiSilicon erratum 161010101",
482		.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
483		.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
484		.set_next_event_phys = erratum_set_next_event_phys,
485		.set_next_event_virt = erratum_set_next_event_virt,
486	},
487#endif
488#ifdef CONFIG_ARM64_ERRATUM_858921
489	{
490		.match_type = ate_match_local_cap_id,
491		.id = (void *)ARM64_WORKAROUND_858921,
492		.desc = "ARM erratum 858921",
493		.read_cntpct_el0 = arm64_858921_read_cntpct_el0,
494		.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
495		.set_next_event_phys = erratum_set_next_event_phys,
496		.set_next_event_virt = erratum_set_next_event_virt,
497	},
498#endif
499#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
500	{
501		.match_type = ate_match_dt,
502		.id = "allwinner,erratum-unknown1",
503		.desc = "Allwinner erratum UNKNOWN1",
504		.read_cntpct_el0 = sun50i_a64_read_cntpct_el0,
505		.read_cntvct_el0 = sun50i_a64_read_cntvct_el0,
506		.set_next_event_phys = erratum_set_next_event_phys,
507		.set_next_event_virt = erratum_set_next_event_virt,
508	},
509#endif
510#ifdef CONFIG_ARM64_ERRATUM_1418040
511	{
512		.match_type = ate_match_local_cap_id,
513		.id = (void *)ARM64_WORKAROUND_1418040,
514		.desc = "ARM erratum 1418040",
515		.disable_compat_vdso = true,
516	},
517#endif
518};
519
520typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *,
521			       const void *);
522
523static
524bool arch_timer_check_dt_erratum(const struct arch_timer_erratum_workaround *wa,
525				 const void *arg)
526{
527	const struct device_node *np = arg;
528
529	return of_property_read_bool(np, wa->id);
530}
531
532static
533bool arch_timer_check_local_cap_erratum(const struct arch_timer_erratum_workaround *wa,
534					const void *arg)
535{
536	return this_cpu_has_cap((uintptr_t)wa->id);
537}
538
539
540static
541bool arch_timer_check_acpi_oem_erratum(const struct arch_timer_erratum_workaround *wa,
542				       const void *arg)
543{
544	static const struct ate_acpi_oem_info empty_oem_info = {};
545	const struct ate_acpi_oem_info *info = wa->id;
546	const struct acpi_table_header *table = arg;
547
548	/* Iterate over the ACPI OEM info array, looking for a match */
549	while (memcmp(info, &empty_oem_info, sizeof(*info))) {
550		if (!memcmp(info->oem_id, table->oem_id, ACPI_OEM_ID_SIZE) &&
551		    !memcmp(info->oem_table_id, table->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
552		    info->oem_revision == table->oem_revision)
553			return true;
554
555		info++;
556	}
557
558	return false;
559}
560
561static const struct arch_timer_erratum_workaround *
562arch_timer_iterate_errata(enum arch_timer_erratum_match_type type,
563			  ate_match_fn_t match_fn,
564			  void *arg)
565{
566	int i;
567
568	for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) {
569		if (ool_workarounds[i].match_type != type)
570			continue;
571
572		if (match_fn(&ool_workarounds[i], arg))
573			return &ool_workarounds[i];
574	}
575
576	return NULL;
577}
578
579static
580void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa,
581				  bool local)
582{
583	int i;
584
585	if (local) {
586		__this_cpu_write(timer_unstable_counter_workaround, wa);
587	} else {
588		for_each_possible_cpu(i)
589			per_cpu(timer_unstable_counter_workaround, i) = wa;
590	}
591
592	if (wa->read_cntvct_el0 || wa->read_cntpct_el0)
593		atomic_set(&timer_unstable_counter_workaround_in_use, 1);
594
595	/*
596	 * Don't use the vdso fastpath if errata require using the
597	 * out-of-line counter accessor. We may change our mind pretty
598	 * late in the game (with a per-CPU erratum, for example), so
599	 * change both the default value and the vdso itself.
600	 */
601	if (wa->read_cntvct_el0) {
602		clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE;
603		vdso_default = VDSO_CLOCKMODE_NONE;
604	} else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) {
605		vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT;
606		clocksource_counter.vdso_clock_mode = vdso_default;
607	}
608}
609
610static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type,
611					    void *arg)
612{
613	const struct arch_timer_erratum_workaround *wa, *__wa;
614	ate_match_fn_t match_fn = NULL;
615	bool local = false;
616
617	switch (type) {
618	case ate_match_dt:
619		match_fn = arch_timer_check_dt_erratum;
620		break;
621	case ate_match_local_cap_id:
622		match_fn = arch_timer_check_local_cap_erratum;
623		local = true;
624		break;
625	case ate_match_acpi_oem_info:
626		match_fn = arch_timer_check_acpi_oem_erratum;
627		break;
628	default:
629		WARN_ON(1);
630		return;
631	}
632
633	wa = arch_timer_iterate_errata(type, match_fn, arg);
634	if (!wa)
635		return;
636
637	__wa = __this_cpu_read(timer_unstable_counter_workaround);
638	if (__wa && wa != __wa)
639		pr_warn("Can't enable workaround for %s (clashes with %s\n)",
640			wa->desc, __wa->desc);
641
642	if (__wa)
643		return;
644
645	arch_timer_enable_workaround(wa, local);
646	pr_info("Enabling %s workaround for %s\n",
647		local ? "local" : "global", wa->desc);
648}
649
650static bool arch_timer_this_cpu_has_cntvct_wa(void)
651{
652	return has_erratum_handler(read_cntvct_el0);
653}
654
655static bool arch_timer_counter_has_wa(void)
656{
657	return atomic_read(&timer_unstable_counter_workaround_in_use);
658}
659#else
660#define arch_timer_check_ool_workaround(t,a)		do { } while(0)
661#define arch_timer_this_cpu_has_cntvct_wa()		({false;})
662#define arch_timer_counter_has_wa()			({false;})
663#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
664
665static __always_inline irqreturn_t timer_handler(const int access,
666					struct clock_event_device *evt)
667{
668	unsigned long ctrl;
669
670	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt);
671	if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
672		ctrl |= ARCH_TIMER_CTRL_IT_MASK;
673		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
674		evt->event_handler(evt);
675		return IRQ_HANDLED;
676	}
677
678	return IRQ_NONE;
679}
680
681static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id)
682{
683	struct clock_event_device *evt = dev_id;
684
685	return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt);
686}
687
688static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
689{
690	struct clock_event_device *evt = dev_id;
691
692	return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
693}
694
695static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
696{
697	struct clock_event_device *evt = dev_id;
698
699	return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
700}
701
702static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
703{
704	struct clock_event_device *evt = dev_id;
705
706	return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
707}
708
709static __always_inline int arch_timer_shutdown(const int access,
710					       struct clock_event_device *clk)
711{
712	unsigned long ctrl;
713
714	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
715	ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
716	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
717
718	return 0;
719}
720
721static int arch_timer_shutdown_virt(struct clock_event_device *clk)
722{
723	return arch_timer_shutdown(ARCH_TIMER_VIRT_ACCESS, clk);
724}
725
726static int arch_timer_shutdown_phys(struct clock_event_device *clk)
727{
728	return arch_timer_shutdown(ARCH_TIMER_PHYS_ACCESS, clk);
729}
730
731static int arch_timer_shutdown_virt_mem(struct clock_event_device *clk)
732{
733	return arch_timer_shutdown(ARCH_TIMER_MEM_VIRT_ACCESS, clk);
734}
735
736static int arch_timer_shutdown_phys_mem(struct clock_event_device *clk)
737{
738	return arch_timer_shutdown(ARCH_TIMER_MEM_PHYS_ACCESS, clk);
739}
740
741static __always_inline void set_next_event(const int access, unsigned long evt,
742					   struct clock_event_device *clk)
743{
744	unsigned long ctrl;
745	u64 cnt;
746
747	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
748	ctrl |= ARCH_TIMER_CTRL_ENABLE;
749	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
750
751	if (access == ARCH_TIMER_PHYS_ACCESS)
752		cnt = __arch_counter_get_cntpct();
753	else
754		cnt = __arch_counter_get_cntvct();
755
756	arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
757	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
758}
759
760static int arch_timer_set_next_event_virt(unsigned long evt,
761					  struct clock_event_device *clk)
762{
763	set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
764	return 0;
765}
766
767static int arch_timer_set_next_event_phys(unsigned long evt,
768					  struct clock_event_device *clk)
769{
770	set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
771	return 0;
772}
773
774static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
775{
776	u32 cnt_lo, cnt_hi, tmp_hi;
777
778	do {
779		cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
780		cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo));
781		tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
782	} while (cnt_hi != tmp_hi);
783
784	return ((u64) cnt_hi << 32) | cnt_lo;
785}
786
787static __always_inline void set_next_event_mem(const int access, unsigned long evt,
788					   struct clock_event_device *clk)
789{
790	struct arch_timer *timer = to_arch_timer(clk);
791	unsigned long ctrl;
792	u64 cnt;
793
794	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
795
796	/* Timer must be disabled before programming CVAL */
797	if (ctrl & ARCH_TIMER_CTRL_ENABLE) {
798		ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
799		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
800	}
801
802	ctrl |= ARCH_TIMER_CTRL_ENABLE;
803	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
804
805	if (access ==  ARCH_TIMER_MEM_VIRT_ACCESS)
806		cnt = arch_counter_get_cnt_mem(timer, CNTVCT_LO);
807	else
808		cnt = arch_counter_get_cnt_mem(timer, CNTPCT_LO);
809
810	arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
811	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
812}
813
814static int arch_timer_set_next_event_virt_mem(unsigned long evt,
815					      struct clock_event_device *clk)
816{
817	set_next_event_mem(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk);
818	return 0;
819}
820
821static int arch_timer_set_next_event_phys_mem(unsigned long evt,
822					      struct clock_event_device *clk)
823{
824	set_next_event_mem(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk);
825	return 0;
826}
827
828static u64 __arch_timer_check_delta(void)
829{
830#ifdef CONFIG_ARM64
831	const struct midr_range broken_cval_midrs[] = {
832		/*
833		 * XGene-1 implements CVAL in terms of TVAL, meaning
834		 * that the maximum timer range is 32bit. Shame on them.
835		 *
836		 * Note that TVAL is signed, thus has only 31 of its
837		 * 32 bits to express magnitude.
838		 */
839		MIDR_REV_RANGE(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
840					      APM_CPU_PART_XGENE),
841			       APM_CPU_VAR_POTENZA, 0x0, 0xf),
842		{},
843	};
844
845	if (is_midr_in_range_list(read_cpuid_id(), broken_cval_midrs)) {
846		pr_warn_once("Broken CNTx_CVAL_EL1, using 31 bit TVAL instead.\n");
847		return CLOCKSOURCE_MASK(31);
848	}
849#endif
850	return CLOCKSOURCE_MASK(arch_counter_get_width());
851}
852
853static void __arch_timer_setup(unsigned type,
854			       struct clock_event_device *clk)
855{
856	u64 max_delta;
857
858	clk->features = CLOCK_EVT_FEAT_ONESHOT;
859
860	if (type == ARCH_TIMER_TYPE_CP15) {
861		typeof(clk->set_next_event) sne;
862
863		arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
864
865		if (arch_timer_c3stop)
866			clk->features |= CLOCK_EVT_FEAT_C3STOP;
867		clk->name = "arch_sys_timer";
868		clk->rating = 450;
869		clk->cpumask = cpumask_of(smp_processor_id());
870		clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
871		switch (arch_timer_uses_ppi) {
872		case ARCH_TIMER_VIRT_PPI:
873			clk->set_state_shutdown = arch_timer_shutdown_virt;
874			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
875			sne = erratum_handler(set_next_event_virt);
876			break;
877		case ARCH_TIMER_PHYS_SECURE_PPI:
878		case ARCH_TIMER_PHYS_NONSECURE_PPI:
879		case ARCH_TIMER_HYP_PPI:
880			clk->set_state_shutdown = arch_timer_shutdown_phys;
881			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
882			sne = erratum_handler(set_next_event_phys);
883			break;
884		default:
885			BUG();
886		}
887
888		clk->set_next_event = sne;
889		max_delta = __arch_timer_check_delta();
890	} else {
891		clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
892		clk->name = "arch_mem_timer";
893		clk->rating = 400;
894		clk->cpumask = cpu_possible_mask;
895		if (arch_timer_mem_use_virtual) {
896			clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
897			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;
898			clk->set_next_event =
899				arch_timer_set_next_event_virt_mem;
900		} else {
901			clk->set_state_shutdown = arch_timer_shutdown_phys_mem;
902			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem;
903			clk->set_next_event =
904				arch_timer_set_next_event_phys_mem;
905		}
906
907		max_delta = CLOCKSOURCE_MASK(56);
908	}
909
910	clk->set_state_shutdown(clk);
911
912	clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
913}
914
915static void arch_timer_evtstrm_enable(unsigned int divider)
916{
917	u32 cntkctl = arch_timer_get_cntkctl();
918
919#ifdef CONFIG_ARM64
920	/* ECV is likely to require a large divider. Use the EVNTIS flag. */
921	if (cpus_have_final_cap(ARM64_HAS_ECV) && divider > 15) {
922		cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
923		divider -= 8;
924	}
925#endif
926
927	divider = min(divider, 15U);
928	cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
929	/* Set the divider and enable virtual event stream */
930	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
931			| ARCH_TIMER_VIRT_EVT_EN;
932	arch_timer_set_cntkctl(cntkctl);
933	arch_timer_set_evtstrm_feature();
934	cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
935}
936
937static void arch_timer_configure_evtstream(void)
938{
939	int evt_stream_div, lsb;
940
941	/*
942	 * As the event stream can at most be generated at half the frequency
943	 * of the counter, use half the frequency when computing the divider.
944	 */
945	evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ / 2;
946
947	/*
948	 * Find the closest power of two to the divisor. If the adjacent bit
949	 * of lsb (last set bit, starts from 0) is set, then we use (lsb + 1).
950	 */
951	lsb = fls(evt_stream_div) - 1;
952	if (lsb > 0 && (evt_stream_div & BIT(lsb - 1)))
953		lsb++;
954
955	/* enable event stream */
956	arch_timer_evtstrm_enable(max(0, lsb));
957}
958
959static int arch_timer_evtstrm_starting_cpu(unsigned int cpu)
960{
961	arch_timer_configure_evtstream();
962	return 0;
963}
964
965static int arch_timer_evtstrm_dying_cpu(unsigned int cpu)
966{
967	cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
968	return 0;
969}
970
971static int __init arch_timer_evtstrm_register(void)
972{
973	if (!arch_timer_evt || !evtstrm_enable)
974		return 0;
975
976	return cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING,
977				 "clockevents/arm/arch_timer_evtstrm:starting",
978				 arch_timer_evtstrm_starting_cpu,
979				 arch_timer_evtstrm_dying_cpu);
980}
981core_initcall(arch_timer_evtstrm_register);
982
983static void arch_counter_set_user_access(void)
984{
985	u32 cntkctl = arch_timer_get_cntkctl();
986
987	/* Disable user access to the timers and both counters */
988	/* Also disable virtual event stream */
989	cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN
990			| ARCH_TIMER_USR_VT_ACCESS_EN
991		        | ARCH_TIMER_USR_VCT_ACCESS_EN
992			| ARCH_TIMER_VIRT_EVT_EN
993			| ARCH_TIMER_USR_PCT_ACCESS_EN);
994
995	/*
996	 * Enable user access to the virtual counter if it doesn't
997	 * need to be workaround. The vdso may have been already
998	 * disabled though.
999	 */
1000	if (arch_timer_this_cpu_has_cntvct_wa())
1001		pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id());
1002	else
1003		cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
1004
1005	arch_timer_set_cntkctl(cntkctl);
1006}
1007
1008static bool arch_timer_has_nonsecure_ppi(void)
1009{
1010	return (arch_timer_uses_ppi == ARCH_TIMER_PHYS_SECURE_PPI &&
1011		arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
1012}
1013
1014static u32 check_ppi_trigger(int irq)
1015{
1016	u32 flags = irq_get_trigger_type(irq);
1017
1018	if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
1019		pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
1020		pr_warn("WARNING: Please fix your firmware\n");
1021		flags = IRQF_TRIGGER_LOW;
1022	}
1023
1024	return flags;
1025}
1026
1027static int arch_timer_starting_cpu(unsigned int cpu)
1028{
1029	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
1030	u32 flags;
1031
1032	__arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk);
1033
1034	flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
1035	enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
1036
1037	if (arch_timer_has_nonsecure_ppi()) {
1038		flags = check_ppi_trigger(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
1039		enable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
1040				  flags);
1041	}
1042
1043	arch_counter_set_user_access();
1044
1045	return 0;
1046}
1047
1048static int validate_timer_rate(void)
1049{
1050	if (!arch_timer_rate)
1051		return -EINVAL;
1052
1053	/* Arch timer frequency < 1MHz can cause trouble */
1054	WARN_ON(arch_timer_rate < 1000000);
1055
1056	return 0;
1057}
1058
1059/*
1060 * For historical reasons, when probing with DT we use whichever (non-zero)
1061 * rate was probed first, and don't verify that others match. If the first node
1062 * probed has a clock-frequency property, this overrides the HW register.
1063 */
1064static void __init arch_timer_of_configure_rate(u32 rate, struct device_node *np)
1065{
1066	/* Who has more than one independent system counter? */
1067	if (arch_timer_rate)
1068		return;
1069
1070	if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate))
1071		arch_timer_rate = rate;
1072
1073	/* Check the timer frequency. */
1074	if (validate_timer_rate())
1075		pr_warn("frequency not available\n");
1076}
1077
1078static void __init arch_timer_banner(unsigned type)
1079{
1080	pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
1081		type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "",
1082		type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ?
1083			" and " : "",
1084		type & ARCH_TIMER_TYPE_MEM ? "mmio" : "",
1085		(unsigned long)arch_timer_rate / 1000000,
1086		(unsigned long)(arch_timer_rate / 10000) % 100,
1087		type & ARCH_TIMER_TYPE_CP15 ?
1088			(arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" :
1089			"",
1090		type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "",
1091		type & ARCH_TIMER_TYPE_MEM ?
1092			arch_timer_mem_use_virtual ? "virt" : "phys" :
1093			"");
1094}
1095
1096u32 arch_timer_get_rate(void)
1097{
1098	return arch_timer_rate;
1099}
1100
1101bool arch_timer_evtstrm_available(void)
1102{
1103	/*
1104	 * We might get called from a preemptible context. This is fine
1105	 * because availability of the event stream should be always the same
1106	 * for a preemptible context and context where we might resume a task.
1107	 */
1108	return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
1109}
1110
1111static noinstr u64 arch_counter_get_cntvct_mem(void)
1112{
1113	return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO);
1114}
1115
1116static struct arch_timer_kvm_info arch_timer_kvm_info;
1117
1118struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
1119{
1120	return &arch_timer_kvm_info;
1121}
1122
1123static void __init arch_counter_register(unsigned type)
1124{
1125	u64 (*scr)(void);
1126	u64 start_count;
1127	int width;
1128
1129	/* Register the CP15 based counter if we have one */
1130	if (type & ARCH_TIMER_TYPE_CP15) {
1131		u64 (*rd)(void);
1132
1133		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
1134		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
1135			if (arch_timer_counter_has_wa()) {
1136				rd = arch_counter_get_cntvct_stable;
1137				scr = raw_counter_get_cntvct_stable;
1138			} else {
1139				rd = arch_counter_get_cntvct;
1140				scr = arch_counter_get_cntvct;
1141			}
1142		} else {
1143			if (arch_timer_counter_has_wa()) {
1144				rd = arch_counter_get_cntpct_stable;
1145				scr = raw_counter_get_cntpct_stable;
1146			} else {
1147				rd = arch_counter_get_cntpct;
1148				scr = arch_counter_get_cntpct;
1149			}
1150		}
1151
1152		arch_timer_read_counter = rd;
1153		clocksource_counter.vdso_clock_mode = vdso_default;
1154	} else {
1155		arch_timer_read_counter = arch_counter_get_cntvct_mem;
1156		scr = arch_counter_get_cntvct_mem;
1157	}
1158
1159	width = arch_counter_get_width();
1160	clocksource_counter.mask = CLOCKSOURCE_MASK(width);
1161	cyclecounter.mask = CLOCKSOURCE_MASK(width);
1162
1163	if (!arch_counter_suspend_stop)
1164		clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1165	start_count = arch_timer_read_counter();
1166	clocksource_register_hz(&clocksource_counter, arch_timer_rate);
1167	cyclecounter.mult = clocksource_counter.mult;
1168	cyclecounter.shift = clocksource_counter.shift;
1169	timecounter_init(&arch_timer_kvm_info.timecounter,
1170			 &cyclecounter, start_count);
1171
1172	sched_clock_register(scr, width, arch_timer_rate);
1173}
1174
1175static void arch_timer_stop(struct clock_event_device *clk)
1176{
1177	pr_debug("disable IRQ%d cpu #%d\n", clk->irq, smp_processor_id());
1178
1179	disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]);
1180	if (arch_timer_has_nonsecure_ppi())
1181		disable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
1182
1183	clk->set_state_shutdown(clk);
1184}
1185
1186static int arch_timer_dying_cpu(unsigned int cpu)
1187{
1188	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
1189
1190	arch_timer_stop(clk);
1191	return 0;
1192}
1193
1194#ifdef CONFIG_CPU_PM
1195static DEFINE_PER_CPU(unsigned long, saved_cntkctl);
1196static int arch_timer_cpu_pm_notify(struct notifier_block *self,
1197				    unsigned long action, void *hcpu)
1198{
1199	if (action == CPU_PM_ENTER) {
1200		__this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
1201
1202		cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
1203	} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
1204		arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
1205
1206		if (arch_timer_have_evtstrm_feature())
1207			cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
1208	}
1209	return NOTIFY_OK;
1210}
1211
1212static struct notifier_block arch_timer_cpu_pm_notifier = {
1213	.notifier_call = arch_timer_cpu_pm_notify,
1214};
1215
1216static int __init arch_timer_cpu_pm_init(void)
1217{
1218	return cpu_pm_register_notifier(&arch_timer_cpu_pm_notifier);
1219}
1220
1221static void __init arch_timer_cpu_pm_deinit(void)
1222{
1223	WARN_ON(cpu_pm_unregister_notifier(&arch_timer_cpu_pm_notifier));
1224}
1225
1226#else
1227static int __init arch_timer_cpu_pm_init(void)
1228{
1229	return 0;
1230}
1231
1232static void __init arch_timer_cpu_pm_deinit(void)
1233{
1234}
1235#endif
1236
1237static int __init arch_timer_register(void)
1238{
1239	int err;
1240	int ppi;
1241
1242	arch_timer_evt = alloc_percpu(struct clock_event_device);
1243	if (!arch_timer_evt) {
1244		err = -ENOMEM;
1245		goto out;
1246	}
1247
1248	ppi = arch_timer_ppi[arch_timer_uses_ppi];
1249	switch (arch_timer_uses_ppi) {
1250	case ARCH_TIMER_VIRT_PPI:
1251		err = request_percpu_irq(ppi, arch_timer_handler_virt,
1252					 "arch_timer", arch_timer_evt);
1253		break;
1254	case ARCH_TIMER_PHYS_SECURE_PPI:
1255	case ARCH_TIMER_PHYS_NONSECURE_PPI:
1256		err = request_percpu_irq(ppi, arch_timer_handler_phys,
1257					 "arch_timer", arch_timer_evt);
1258		if (!err && arch_timer_has_nonsecure_ppi()) {
1259			ppi = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
1260			err = request_percpu_irq(ppi, arch_timer_handler_phys,
1261						 "arch_timer", arch_timer_evt);
1262			if (err)
1263				free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI],
1264						arch_timer_evt);
1265		}
1266		break;
1267	case ARCH_TIMER_HYP_PPI:
1268		err = request_percpu_irq(ppi, arch_timer_handler_phys,
1269					 "arch_timer", arch_timer_evt);
1270		break;
1271	default:
1272		BUG();
1273	}
1274
1275	if (err) {
1276		pr_err("can't register interrupt %d (%d)\n", ppi, err);
1277		goto out_free;
1278	}
1279
1280	err = arch_timer_cpu_pm_init();
1281	if (err)
1282		goto out_unreg_notify;
1283
1284	/* Register and immediately configure the timer on the boot CPU */
1285	err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
1286				"clockevents/arm/arch_timer:starting",
1287				arch_timer_starting_cpu, arch_timer_dying_cpu);
1288	if (err)
1289		goto out_unreg_cpupm;
1290	return 0;
1291
1292out_unreg_cpupm:
1293	arch_timer_cpu_pm_deinit();
1294
1295out_unreg_notify:
1296	free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt);
1297	if (arch_timer_has_nonsecure_ppi())
1298		free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
1299				arch_timer_evt);
1300
1301out_free:
1302	free_percpu(arch_timer_evt);
1303	arch_timer_evt = NULL;
1304out:
1305	return err;
1306}
1307
1308static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq)
1309{
1310	int ret;
1311	irq_handler_t func;
1312
1313	arch_timer_mem = kzalloc(sizeof(*arch_timer_mem), GFP_KERNEL);
1314	if (!arch_timer_mem)
1315		return -ENOMEM;
1316
1317	arch_timer_mem->base = base;
1318	arch_timer_mem->evt.irq = irq;
1319	__arch_timer_setup(ARCH_TIMER_TYPE_MEM, &arch_timer_mem->evt);
1320
1321	if (arch_timer_mem_use_virtual)
1322		func = arch_timer_handler_virt_mem;
1323	else
1324		func = arch_timer_handler_phys_mem;
1325
1326	ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &arch_timer_mem->evt);
1327	if (ret) {
1328		pr_err("Failed to request mem timer irq\n");
1329		kfree(arch_timer_mem);
1330		arch_timer_mem = NULL;
1331	}
1332
1333	return ret;
1334}
1335
1336static const struct of_device_id arch_timer_of_match[] __initconst = {
1337	{ .compatible   = "arm,armv7-timer",    },
1338	{ .compatible   = "arm,armv8-timer",    },
1339	{},
1340};
1341
1342static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
1343	{ .compatible   = "arm,armv7-timer-mem", },
1344	{},
1345};
1346
1347static bool __init arch_timer_needs_of_probing(void)
1348{
1349	struct device_node *dn;
1350	bool needs_probing = false;
1351	unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM;
1352
1353	/* We have two timers, and both device-tree nodes are probed. */
1354	if ((arch_timers_present & mask) == mask)
1355		return false;
1356
1357	/*
1358	 * Only one type of timer is probed,
1359	 * check if we have another type of timer node in device-tree.
1360	 */
1361	if (arch_timers_present & ARCH_TIMER_TYPE_CP15)
1362		dn = of_find_matching_node(NULL, arch_timer_mem_of_match);
1363	else
1364		dn = of_find_matching_node(NULL, arch_timer_of_match);
1365
1366	if (dn && of_device_is_available(dn))
1367		needs_probing = true;
1368
1369	of_node_put(dn);
1370
1371	return needs_probing;
1372}
1373
1374static int __init arch_timer_common_init(void)
1375{
1376	arch_timer_banner(arch_timers_present);
1377	arch_counter_register(arch_timers_present);
1378	return arch_timer_arch_init();
1379}
1380
1381/**
1382 * arch_timer_select_ppi() - Select suitable PPI for the current system.
1383 *
1384 * If HYP mode is available, we know that the physical timer
1385 * has been configured to be accessible from PL1. Use it, so
1386 * that a guest can use the virtual timer instead.
1387 *
1388 * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
1389 * accesses to CNTP_*_EL1 registers are silently redirected to
1390 * their CNTHP_*_EL2 counterparts, and use a different PPI
1391 * number.
1392 *
1393 * If no interrupt provided for virtual timer, we'll have to
1394 * stick to the physical timer. It'd better be accessible...
1395 * For arm64 we never use the secure interrupt.
1396 *
1397 * Return: a suitable PPI type for the current system.
1398 */
1399static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void)
1400{
1401	if (is_kernel_in_hyp_mode())
1402		return ARCH_TIMER_HYP_PPI;
1403
1404	if (!is_hyp_mode_available() && arch_timer_ppi[ARCH_TIMER_VIRT_PPI])
1405		return ARCH_TIMER_VIRT_PPI;
1406
1407	if (IS_ENABLED(CONFIG_ARM64))
1408		return ARCH_TIMER_PHYS_NONSECURE_PPI;
1409
1410	return ARCH_TIMER_PHYS_SECURE_PPI;
1411}
1412
1413static void __init arch_timer_populate_kvm_info(void)
1414{
1415	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
1416	if (is_kernel_in_hyp_mode())
1417		arch_timer_kvm_info.physical_irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
1418}
1419
1420static int __init arch_timer_of_init(struct device_node *np)
1421{
1422	int i, irq, ret;
1423	u32 rate;
1424	bool has_names;
1425
1426	if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
1427		pr_warn("multiple nodes in dt, skipping\n");
1428		return 0;
1429	}
1430
1431	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
1432
1433	has_names = of_property_read_bool(np, "interrupt-names");
1434
1435	for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) {
1436		if (has_names)
1437			irq = of_irq_get_byname(np, arch_timer_ppi_names[i]);
1438		else
1439			irq = of_irq_get(np, i);
1440		if (irq > 0)
1441			arch_timer_ppi[i] = irq;
1442	}
1443
1444	arch_timer_populate_kvm_info();
1445
1446	rate = arch_timer_get_cntfrq();
1447	arch_timer_of_configure_rate(rate, np);
1448
1449	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
1450
1451	/* Check for globally applicable workarounds */
1452	arch_timer_check_ool_workaround(ate_match_dt, np);
1453
1454	/*
1455	 * If we cannot rely on firmware initializing the timer registers then
1456	 * we should use the physical timers instead.
1457	 */
1458	if (IS_ENABLED(CONFIG_ARM) &&
1459	    of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
1460		arch_timer_uses_ppi = ARCH_TIMER_PHYS_SECURE_PPI;
1461	else
1462		arch_timer_uses_ppi = arch_timer_select_ppi();
1463
1464	if (!arch_timer_ppi[arch_timer_uses_ppi]) {
1465		pr_err("No interrupt available, giving up\n");
1466		return -EINVAL;
1467	}
1468
1469	/* On some systems, the counter stops ticking when in suspend. */
1470	arch_counter_suspend_stop = of_property_read_bool(np,
1471							 "arm,no-tick-in-suspend");
1472
1473	ret = arch_timer_register();
1474	if (ret)
1475		return ret;
1476
1477	if (arch_timer_needs_of_probing())
1478		return 0;
1479
1480	return arch_timer_common_init();
1481}
1482TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
1483TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
1484
1485static u32 __init
1486arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
1487{
1488	void __iomem *base;
1489	u32 rate;
1490
1491	base = ioremap(frame->cntbase, frame->size);
1492	if (!base) {
1493		pr_err("Unable to map frame @ %pa\n", &frame->cntbase);
1494		return 0;
1495	}
1496
1497	rate = readl_relaxed(base + CNTFRQ);
1498
1499	iounmap(base);
1500
1501	return rate;
1502}
1503
1504static struct arch_timer_mem_frame * __init
1505arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
1506{
1507	struct arch_timer_mem_frame *frame, *best_frame = NULL;
1508	void __iomem *cntctlbase;
1509	u32 cnttidr;
1510	int i;
1511
1512	cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size);
1513	if (!cntctlbase) {
1514		pr_err("Can't map CNTCTLBase @ %pa\n",
1515			&timer_mem->cntctlbase);
1516		return NULL;
1517	}
1518
1519	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
1520
1521	/*
1522	 * Try to find a virtual capable frame. Otherwise fall back to a
1523	 * physical capable frame.
1524	 */
1525	for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
1526		u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
1527			     CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
1528
1529		frame = &timer_mem->frame[i];
1530		if (!frame->valid)
1531			continue;
1532
1533		/* Try enabling everything, and see what sticks */
1534		writel_relaxed(cntacr, cntctlbase + CNTACR(i));
1535		cntacr = readl_relaxed(cntctlbase + CNTACR(i));
1536
1537		if ((cnttidr & CNTTIDR_VIRT(i)) &&
1538		    !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) {
1539			best_frame = frame;
1540			arch_timer_mem_use_virtual = true;
1541			break;
1542		}
1543
1544		if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT))
1545			continue;
1546
1547		best_frame = frame;
1548	}
1549
1550	iounmap(cntctlbase);
1551
1552	return best_frame;
1553}
1554
1555static int __init
1556arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame)
1557{
1558	void __iomem *base;
1559	int ret, irq = 0;
1560
1561	if (arch_timer_mem_use_virtual)
1562		irq = frame->virt_irq;
1563	else
1564		irq = frame->phys_irq;
1565
1566	if (!irq) {
1567		pr_err("Frame missing %s irq.\n",
1568		       arch_timer_mem_use_virtual ? "virt" : "phys");
1569		return -EINVAL;
1570	}
1571
1572	if (!request_mem_region(frame->cntbase, frame->size,
1573				"arch_mem_timer"))
1574		return -EBUSY;
1575
1576	base = ioremap(frame->cntbase, frame->size);
1577	if (!base) {
1578		pr_err("Can't map frame's registers\n");
1579		return -ENXIO;
1580	}
1581
1582	ret = arch_timer_mem_register(base, irq);
1583	if (ret) {
1584		iounmap(base);
1585		return ret;
1586	}
1587
1588	arch_timers_present |= ARCH_TIMER_TYPE_MEM;
1589
1590	return 0;
1591}
1592
1593static int __init arch_timer_mem_of_init(struct device_node *np)
1594{
1595	struct arch_timer_mem *timer_mem;
1596	struct arch_timer_mem_frame *frame;
1597	struct device_node *frame_node;
1598	struct resource res;
1599	int ret = -EINVAL;
1600	u32 rate;
1601
1602	timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL);
1603	if (!timer_mem)
1604		return -ENOMEM;
1605
1606	if (of_address_to_resource(np, 0, &res))
1607		goto out;
1608	timer_mem->cntctlbase = res.start;
1609	timer_mem->size = resource_size(&res);
1610
1611	for_each_available_child_of_node(np, frame_node) {
1612		u32 n;
1613		struct arch_timer_mem_frame *frame;
1614
1615		if (of_property_read_u32(frame_node, "frame-number", &n)) {
1616			pr_err(FW_BUG "Missing frame-number.\n");
1617			of_node_put(frame_node);
1618			goto out;
1619		}
1620		if (n >= ARCH_TIMER_MEM_MAX_FRAMES) {
1621			pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n",
1622			       ARCH_TIMER_MEM_MAX_FRAMES - 1);
1623			of_node_put(frame_node);
1624			goto out;
1625		}
1626		frame = &timer_mem->frame[n];
1627
1628		if (frame->valid) {
1629			pr_err(FW_BUG "Duplicated frame-number.\n");
1630			of_node_put(frame_node);
1631			goto out;
1632		}
1633
1634		if (of_address_to_resource(frame_node, 0, &res)) {
1635			of_node_put(frame_node);
1636			goto out;
1637		}
1638		frame->cntbase = res.start;
1639		frame->size = resource_size(&res);
1640
1641		frame->virt_irq = irq_of_parse_and_map(frame_node,
1642						       ARCH_TIMER_VIRT_SPI);
1643		frame->phys_irq = irq_of_parse_and_map(frame_node,
1644						       ARCH_TIMER_PHYS_SPI);
1645
1646		frame->valid = true;
1647	}
1648
1649	frame = arch_timer_mem_find_best_frame(timer_mem);
1650	if (!frame) {
1651		pr_err("Unable to find a suitable frame in timer @ %pa\n",
1652			&timer_mem->cntctlbase);
1653		ret = -EINVAL;
1654		goto out;
1655	}
1656
1657	rate = arch_timer_mem_frame_get_cntfrq(frame);
1658	arch_timer_of_configure_rate(rate, np);
1659
1660	ret = arch_timer_mem_frame_register(frame);
1661	if (!ret && !arch_timer_needs_of_probing())
1662		ret = arch_timer_common_init();
1663out:
1664	kfree(timer_mem);
1665	return ret;
1666}
1667TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
1668		       arch_timer_mem_of_init);
1669
1670#ifdef CONFIG_ACPI_GTDT
1671static int __init
1672arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem)
1673{
1674	struct arch_timer_mem_frame *frame;
1675	u32 rate;
1676	int i;
1677
1678	for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
1679		frame = &timer_mem->frame[i];
1680
1681		if (!frame->valid)
1682			continue;
1683
1684		rate = arch_timer_mem_frame_get_cntfrq(frame);
1685		if (rate == arch_timer_rate)
1686			continue;
1687
1688		pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n",
1689			&frame->cntbase,
1690			(unsigned long)rate, (unsigned long)arch_timer_rate);
1691
1692		return -EINVAL;
1693	}
1694
1695	return 0;
1696}
1697
1698static int __init arch_timer_mem_acpi_init(int platform_timer_count)
1699{
1700	struct arch_timer_mem *timers, *timer;
1701	struct arch_timer_mem_frame *frame, *best_frame = NULL;
1702	int timer_count, i, ret = 0;
1703
1704	timers = kcalloc(platform_timer_count, sizeof(*timers),
1705			    GFP_KERNEL);
1706	if (!timers)
1707		return -ENOMEM;
1708
1709	ret = acpi_arch_timer_mem_init(timers, &timer_count);
1710	if (ret || !timer_count)
1711		goto out;
1712
1713	/*
1714	 * While unlikely, it's theoretically possible that none of the frames
1715	 * in a timer expose the combination of feature we want.
1716	 */
1717	for (i = 0; i < timer_count; i++) {
1718		timer = &timers[i];
1719
1720		frame = arch_timer_mem_find_best_frame(timer);
1721		if (!best_frame)
1722			best_frame = frame;
1723
1724		ret = arch_timer_mem_verify_cntfrq(timer);
1725		if (ret) {
1726			pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n");
1727			goto out;
1728		}
1729
1730		if (!best_frame) /* implies !frame */
1731			/*
1732			 * Only complain about missing suitable frames if we
1733			 * haven't already found one in a previous iteration.
1734			 */
1735			pr_err("Unable to find a suitable frame in timer @ %pa\n",
1736				&timer->cntctlbase);
1737	}
1738
1739	if (best_frame)
1740		ret = arch_timer_mem_frame_register(best_frame);
1741out:
1742	kfree(timers);
1743	return ret;
1744}
1745
1746/* Initialize per-processor generic timer and memory-mapped timer(if present) */
1747static int __init arch_timer_acpi_init(struct acpi_table_header *table)
1748{
1749	int ret, platform_timer_count;
1750
1751	if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
1752		pr_warn("already initialized, skipping\n");
1753		return -EINVAL;
1754	}
1755
1756	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
1757
1758	ret = acpi_gtdt_init(table, &platform_timer_count);
1759	if (ret)
1760		return ret;
1761
1762	arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] =
1763		acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI);
1764
1765	arch_timer_ppi[ARCH_TIMER_VIRT_PPI] =
1766		acpi_gtdt_map_ppi(ARCH_TIMER_VIRT_PPI);
1767
1768	arch_timer_ppi[ARCH_TIMER_HYP_PPI] =
1769		acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI);
1770
1771	arch_timer_populate_kvm_info();
1772
1773	/*
1774	 * When probing via ACPI, we have no mechanism to override the sysreg
1775	 * CNTFRQ value. This *must* be correct.
1776	 */
1777	arch_timer_rate = arch_timer_get_cntfrq();
1778	ret = validate_timer_rate();
1779	if (ret) {
1780		pr_err(FW_BUG "frequency not available.\n");
1781		return ret;
1782	}
1783
1784	arch_timer_uses_ppi = arch_timer_select_ppi();
1785	if (!arch_timer_ppi[arch_timer_uses_ppi]) {
1786		pr_err("No interrupt available, giving up\n");
1787		return -EINVAL;
1788	}
1789
1790	/* Always-on capability */
1791	arch_timer_c3stop = acpi_gtdt_c3stop(arch_timer_uses_ppi);
1792
1793	/* Check for globally applicable workarounds */
1794	arch_timer_check_ool_workaround(ate_match_acpi_oem_info, table);
1795
1796	ret = arch_timer_register();
1797	if (ret)
1798		return ret;
1799
1800	if (platform_timer_count &&
1801	    arch_timer_mem_acpi_init(platform_timer_count))
1802		pr_err("Failed to initialize memory-mapped timer.\n");
1803
1804	return arch_timer_common_init();
1805}
1806TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
1807#endif
1808
1809int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts,
1810				 enum clocksource_ids *cs_id)
1811{
1812	struct arm_smccc_res hvc_res;
1813	u32 ptp_counter;
1814	ktime_t ktime;
1815
1816	if (!IS_ENABLED(CONFIG_HAVE_ARM_SMCCC_DISCOVERY))
1817		return -EOPNOTSUPP;
1818
1819	if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI)
1820		ptp_counter = KVM_PTP_VIRT_COUNTER;
1821	else
1822		ptp_counter = KVM_PTP_PHYS_COUNTER;
1823
1824	arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID,
1825			     ptp_counter, &hvc_res);
1826
1827	if ((int)(hvc_res.a0) < 0)
1828		return -EOPNOTSUPP;
1829
1830	ktime = (u64)hvc_res.a0 << 32 | hvc_res.a1;
1831	*ts = ktime_to_timespec64(ktime);
1832	if (cycle)
1833		*cycle = (u64)hvc_res.a2 << 32 | hvc_res.a3;
1834	if (cs_id)
1835		*cs_id = CSID_ARM_ARCH_COUNTER;
1836
1837	return 0;
1838}
1839EXPORT_SYMBOL_GPL(kvm_arch_ptp_get_crosststamp);
1840