1/*	$OpenBSD: cpu.c,v 1.121 2024/06/23 10:17:16 kettenis Exp $	*/
2
3/*
4 * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
5 * Copyright (c) 2017 Mark Kettenis <kettenis@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include "kstat.h"
21
22#include <sys/param.h>
23#include <sys/systm.h>
24#include <sys/proc.h>
25#include <sys/malloc.h>
26#include <sys/device.h>
27#include <sys/sysctl.h>
28#include <sys/task.h>
29#include <sys/user.h>
30#include <sys/kstat.h>
31
32#include <uvm/uvm.h>
33
34#include <machine/fdt.h>
35
36#include <dev/ofw/openfirm.h>
37#include <dev/ofw/ofw_clock.h>
38#include <dev/ofw/ofw_regulator.h>
39#include <dev/ofw/ofw_thermal.h>
40#include <dev/ofw/fdt.h>
41
42#include <machine/cpufunc.h>
43
44#include "psci.h"
45#if NPSCI > 0
46#include <dev/fdt/pscivar.h>
47#endif
48
49/* CPU Identification */
50#define CPU_IMPL_ARM		0x41
51#define CPU_IMPL_CAVIUM		0x43
52#define CPU_IMPL_AMCC		0x50
53#define CPU_IMPL_QCOM		0x51
54#define CPU_IMPL_APPLE		0x61
55#define CPU_IMPL_AMPERE		0xc0
56
57/* ARM */
58#define CPU_PART_CORTEX_A34	0xd02
59#define CPU_PART_CORTEX_A53	0xd03
60#define CPU_PART_CORTEX_A35	0xd04
61#define CPU_PART_CORTEX_A55	0xd05
62#define CPU_PART_CORTEX_A65	0xd06
63#define CPU_PART_CORTEX_A57	0xd07
64#define CPU_PART_CORTEX_A72	0xd08
65#define CPU_PART_CORTEX_A73	0xd09
66#define CPU_PART_CORTEX_A75	0xd0a
67#define CPU_PART_CORTEX_A76	0xd0b
68#define CPU_PART_NEOVERSE_N1	0xd0c
69#define CPU_PART_CORTEX_A77	0xd0d
70#define CPU_PART_CORTEX_A76AE	0xd0e
71#define CPU_PART_NEOVERSE_V1	0xd40
72#define CPU_PART_CORTEX_A78	0xd41
73#define CPU_PART_CORTEX_A78AE	0xd42
74#define CPU_PART_CORTEX_A65AE	0xd43
75#define CPU_PART_CORTEX_X1	0xd44
76#define CPU_PART_CORTEX_A510	0xd46
77#define CPU_PART_CORTEX_A710	0xd47
78#define CPU_PART_CORTEX_X2	0xd48
79#define CPU_PART_NEOVERSE_N2	0xd49
80#define CPU_PART_NEOVERSE_E1	0xd4a
81#define CPU_PART_CORTEX_A78C	0xd4b
82#define CPU_PART_CORTEX_X1C	0xd4c
83#define CPU_PART_CORTEX_A715	0xd4d
84#define CPU_PART_CORTEX_X3	0xd4e
85#define CPU_PART_NEOVERSE_V2	0xd4f
86#define CPU_PART_CORTEX_A520	0xd80
87#define CPU_PART_CORTEX_A720	0xd81
88#define CPU_PART_CORTEX_X4	0xd82
89#define CPU_PART_NEOVERSE_V3	0xd84
90#define CPU_PART_CORTEX_X925	0xd85
91#define CPU_PART_CORTEX_A725	0xd87
92#define CPU_PART_CORTEX_A520AE	0xd88
93#define CPU_PART_CORTEX_A720AE	0xd89
94#define CPU_PART_NEOVERSE_N3	0xd8e
95
96/* Cavium */
97#define CPU_PART_THUNDERX_T88	0x0a1
98#define CPU_PART_THUNDERX_T81	0x0a2
99#define CPU_PART_THUNDERX_T83	0x0a3
100#define CPU_PART_THUNDERX2_T99	0x0af
101
102/* Applied Micro */
103#define CPU_PART_X_GENE		0x000
104
105/* Qualcomm */
106#define CPU_PART_ORYON		0x001
107#define CPU_PART_KRYO400_GOLD	0x804
108#define CPU_PART_KRYO400_SILVER	0x805
109
110/* Apple */
111#define CPU_PART_ICESTORM	0x022
112#define CPU_PART_FIRESTORM	0x023
113#define CPU_PART_ICESTORM_PRO	0x024
114#define CPU_PART_FIRESTORM_PRO	0x025
115#define CPU_PART_ICESTORM_MAX	0x028
116#define CPU_PART_FIRESTORM_MAX	0x029
117#define CPU_PART_BLIZZARD	0x032
118#define CPU_PART_AVALANCHE	0x033
119#define CPU_PART_BLIZZARD_PRO	0x034
120#define CPU_PART_AVALANCHE_PRO	0x035
121#define CPU_PART_BLIZZARD_MAX	0x038
122#define CPU_PART_AVALANCHE_MAX	0x039
123
124/* Ampere */
125#define CPU_PART_AMPERE1	0xac3
126
127#define CPU_IMPL(midr)  (((midr) >> 24) & 0xff)
128#define CPU_PART(midr)  (((midr) >> 4) & 0xfff)
129#define CPU_VAR(midr)   (((midr) >> 20) & 0xf)
130#define CPU_REV(midr)   (((midr) >> 0) & 0xf)
131
132struct cpu_cores {
133	int	id;
134	char	*name;
135};
136
137struct cpu_cores cpu_cores_none[] = {
138	{ 0, NULL },
139};
140
141struct cpu_cores cpu_cores_arm[] = {
142	{ CPU_PART_CORTEX_A34, "Cortex-A34" },
143	{ CPU_PART_CORTEX_A35, "Cortex-A35" },
144	{ CPU_PART_CORTEX_A53, "Cortex-A53" },
145	{ CPU_PART_CORTEX_A55, "Cortex-A55" },
146	{ CPU_PART_CORTEX_A57, "Cortex-A57" },
147	{ CPU_PART_CORTEX_A65, "Cortex-A65" },
148	{ CPU_PART_CORTEX_A65AE, "Cortex-A65AE" },
149	{ CPU_PART_CORTEX_A72, "Cortex-A72" },
150	{ CPU_PART_CORTEX_A73, "Cortex-A73" },
151	{ CPU_PART_CORTEX_A75, "Cortex-A75" },
152	{ CPU_PART_CORTEX_A76, "Cortex-A76" },
153	{ CPU_PART_CORTEX_A76AE, "Cortex-A76AE" },
154	{ CPU_PART_CORTEX_A77, "Cortex-A77" },
155	{ CPU_PART_CORTEX_A78, "Cortex-A78" },
156	{ CPU_PART_CORTEX_A78AE, "Cortex-A78AE" },
157	{ CPU_PART_CORTEX_A78C, "Cortex-A78C" },
158	{ CPU_PART_CORTEX_A510, "Cortex-A510" },
159	{ CPU_PART_CORTEX_A520, "Cortex-A520" },
160	{ CPU_PART_CORTEX_A520AE, "Cortex-A520AE" },
161	{ CPU_PART_CORTEX_A710, "Cortex-A710" },
162	{ CPU_PART_CORTEX_A715, "Cortex-A715" },
163	{ CPU_PART_CORTEX_A720, "Cortex-A720" },
164	{ CPU_PART_CORTEX_A720AE, "Cortex-A720AE" },
165	{ CPU_PART_CORTEX_A725, "Cortex-A725" },
166	{ CPU_PART_CORTEX_X1, "Cortex-X1" },
167	{ CPU_PART_CORTEX_X1C, "Cortex-X1C" },
168	{ CPU_PART_CORTEX_X2, "Cortex-X2" },
169	{ CPU_PART_CORTEX_X3, "Cortex-X3" },
170	{ CPU_PART_CORTEX_X4, "Cortex-X4" },
171	{ CPU_PART_CORTEX_X925, "Cortex-X925" },
172	{ CPU_PART_NEOVERSE_E1, "Neoverse E1" },
173	{ CPU_PART_NEOVERSE_N1, "Neoverse N1" },
174	{ CPU_PART_NEOVERSE_N2, "Neoverse N2" },
175	{ CPU_PART_NEOVERSE_N3, "Neoverse N3" },
176	{ CPU_PART_NEOVERSE_V1, "Neoverse V1" },
177	{ CPU_PART_NEOVERSE_V2, "Neoverse V2" },
178	{ CPU_PART_NEOVERSE_V3, "Neoverse V3" },
179	{ 0, NULL },
180};
181
182struct cpu_cores cpu_cores_cavium[] = {
183	{ CPU_PART_THUNDERX_T88, "ThunderX T88" },
184	{ CPU_PART_THUNDERX_T81, "ThunderX T81" },
185	{ CPU_PART_THUNDERX_T83, "ThunderX T83" },
186	{ CPU_PART_THUNDERX2_T99, "ThunderX2 T99" },
187	{ 0, NULL },
188};
189
190struct cpu_cores cpu_cores_amcc[] = {
191	{ CPU_PART_X_GENE, "X-Gene" },
192	{ 0, NULL },
193};
194
195struct cpu_cores cpu_cores_qcom[] = {
196	{ CPU_PART_KRYO400_GOLD, "Kryo 400 Gold" },
197	{ CPU_PART_KRYO400_SILVER, "Kryo 400 Silver" },
198	{ CPU_PART_ORYON, "Oryon" },
199	{ 0, NULL },
200};
201
202struct cpu_cores cpu_cores_apple[] = {
203	{ CPU_PART_ICESTORM, "Icestorm" },
204	{ CPU_PART_FIRESTORM, "Firestorm" },
205	{ CPU_PART_ICESTORM_PRO, "Icestorm Pro" },
206	{ CPU_PART_FIRESTORM_PRO, "Firestorm Pro" },
207	{ CPU_PART_ICESTORM_MAX, "Icestorm Max" },
208	{ CPU_PART_FIRESTORM_MAX, "Firestorm Max" },
209	{ CPU_PART_BLIZZARD, "Blizzard" },
210	{ CPU_PART_AVALANCHE, "Avalanche" },
211	{ CPU_PART_BLIZZARD_PRO, "Blizzard Pro" },
212	{ CPU_PART_AVALANCHE_PRO, "Avalanche Pro" },
213	{ CPU_PART_BLIZZARD_MAX, "Blizzard Max" },
214	{ CPU_PART_AVALANCHE_MAX, "Avalanche Max" },
215	{ 0, NULL },
216};
217
218struct cpu_cores cpu_cores_ampere[] = {
219	{ CPU_PART_AMPERE1, "AmpereOne" },
220	{ 0, NULL },
221};
222
223/* arm cores makers */
224const struct implementers {
225	int			id;
226	char			*name;
227	struct cpu_cores	*corelist;
228} cpu_implementers[] = {
229	{ CPU_IMPL_ARM,	"ARM", cpu_cores_arm },
230	{ CPU_IMPL_CAVIUM, "Cavium", cpu_cores_cavium },
231	{ CPU_IMPL_AMCC, "Applied Micro", cpu_cores_amcc },
232	{ CPU_IMPL_QCOM, "Qualcomm", cpu_cores_qcom },
233	{ CPU_IMPL_APPLE, "Apple", cpu_cores_apple },
234	{ CPU_IMPL_AMPERE, "Ampere", cpu_cores_ampere },
235	{ 0, NULL },
236};
237
238char cpu_model[64];
239int cpu_node;
240
241uint64_t cpu_id_aa64isar0;
242uint64_t cpu_id_aa64isar1;
243uint64_t cpu_id_aa64isar2;
244uint64_t cpu_id_aa64pfr0;
245uint64_t cpu_id_aa64pfr1;
246
247#ifdef CRYPTO
248int arm64_has_aes;
249#endif
250
251extern char trampoline_vectors_none[];
252extern char trampoline_vectors_loop_8[];
253extern char trampoline_vectors_loop_11[];
254extern char trampoline_vectors_loop_24[];
255extern char trampoline_vectors_loop_32[];
256#if NPSCI > 0
257extern char trampoline_vectors_psci_hvc[];
258extern char trampoline_vectors_psci_smc[];
259#endif
260extern char trampoline_vectors_clrbhb[];
261
262struct cpu_info *cpu_info_list = &cpu_info_primary;
263
264int	cpu_match(struct device *, void *, void *);
265void	cpu_attach(struct device *, struct device *, void *);
266
267const struct cfattach cpu_ca = {
268	sizeof(struct device), cpu_match, cpu_attach
269};
270
271struct cfdriver cpu_cd = {
272	NULL, "cpu", DV_DULL
273};
274
275void	cpu_opp_init(struct cpu_info *, uint32_t);
276void	cpu_psci_init(struct cpu_info *);
277
278void	cpu_flush_bp_noop(void);
279void	cpu_flush_bp_psci(void);
280void	cpu_serror_apple(void);
281
282#if NKSTAT > 0
283void	cpu_kstat_attach(struct cpu_info *ci);
284void	cpu_opp_kstat_attach(struct cpu_info *ci);
285#endif
286
287/*
288 * Enable mitigation for Spectre-V2 branch target injection
289 * vulnerabilities (CVE-2017-5715).
290 */
291void
292cpu_mitigate_spectre_v2(struct cpu_info *ci)
293{
294	uint64_t id;
295
296	/*
297	 * By default we let the firmware decide what mitigation is
298	 * necessary.
299	 */
300	ci->ci_flush_bp = cpu_flush_bp_psci;
301
302	/* Some specific CPUs are known not to be vulnerable. */
303	switch (CPU_IMPL(ci->ci_midr)) {
304	case CPU_IMPL_ARM:
305		switch (CPU_PART(ci->ci_midr)) {
306		case CPU_PART_CORTEX_A35:
307		case CPU_PART_CORTEX_A53:
308		case CPU_PART_CORTEX_A55:
309			/* Not vulnerable. */
310			ci->ci_flush_bp = cpu_flush_bp_noop;
311			break;
312		}
313		break;
314	case CPU_IMPL_QCOM:
315		switch (CPU_PART(ci->ci_midr)) {
316		case CPU_PART_KRYO400_SILVER:
317			/* Not vulnerable. */
318			ci->ci_flush_bp = cpu_flush_bp_noop;
319			break;
320		}
321	}
322
323	/*
324	 * The architecture has been updated to explicitly tell us if
325	 * we're not vulnerable to Spectre-V2.
326	 */
327	id = READ_SPECIALREG(id_aa64pfr0_el1);
328	if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_IMPL)
329		ci->ci_flush_bp = cpu_flush_bp_noop;
330}
331
332/*
333 * Enable mitigation for Spectre-BHB branch history injection
334 * vulnerabilities (CVE-2022-23960).
335*/
336void
337cpu_mitigate_spectre_bhb(struct cpu_info *ci)
338{
339	uint64_t id;
340
341	/*
342	 * If we know the CPU, we can add a branchy loop that cleans
343	 * the BHB.
344	 */
345	switch (CPU_IMPL(ci->ci_midr)) {
346	case CPU_IMPL_ARM:
347		switch (CPU_PART(ci->ci_midr)) {
348		case CPU_PART_CORTEX_A57:
349		case CPU_PART_CORTEX_A72:
350			ci->ci_trampoline_vectors =
351			    (vaddr_t)trampoline_vectors_loop_8;
352			break;
353		case CPU_PART_CORTEX_A76:
354		case CPU_PART_CORTEX_A76AE:
355		case CPU_PART_CORTEX_A77:
356		case CPU_PART_NEOVERSE_N1:
357			ci->ci_trampoline_vectors =
358			    (vaddr_t)trampoline_vectors_loop_24;
359			break;
360		case CPU_PART_CORTEX_A78:
361		case CPU_PART_CORTEX_A78AE:
362		case CPU_PART_CORTEX_A78C:
363		case CPU_PART_CORTEX_X1:
364		case CPU_PART_CORTEX_X2:
365		case CPU_PART_CORTEX_A710:
366		case CPU_PART_NEOVERSE_N2:
367		case CPU_PART_NEOVERSE_V1:
368			ci->ci_trampoline_vectors =
369			    (vaddr_t)trampoline_vectors_loop_32;
370			break;
371		}
372		break;
373	case CPU_IMPL_AMPERE:
374		switch (CPU_PART(ci->ci_midr)) {
375		case CPU_PART_AMPERE1:
376			ci->ci_trampoline_vectors =
377			    (vaddr_t)trampoline_vectors_loop_11;
378			break;
379		}
380		break;
381	}
382
383	/*
384	 * If we're not using a loop, let firmware decide.  This also
385	 * covers the original Spectre-V2 in addition to Spectre-BHB.
386	 */
387#if NPSCI > 0
388	if (ci->ci_trampoline_vectors == (vaddr_t)trampoline_vectors_none &&
389	    smccc_needs_arch_workaround_3()) {
390		ci->ci_flush_bp = cpu_flush_bp_noop;
391		if (psci_method() == PSCI_METHOD_HVC)
392			ci->ci_trampoline_vectors =
393			    (vaddr_t)trampoline_vectors_psci_hvc;
394		if (psci_method() == PSCI_METHOD_SMC)
395			ci->ci_trampoline_vectors =
396			    (vaddr_t)trampoline_vectors_psci_smc;
397	}
398#endif
399
400	/* Prefer CLRBHB to mitigate Spectre-BHB. */
401	id = READ_SPECIALREG(id_aa64isar2_el1);
402	if (ID_AA64ISAR2_CLRBHB(id) >= ID_AA64ISAR2_CLRBHB_IMPL)
403		ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_clrbhb;
404
405	/* ECBHB tells us Spectre-BHB is mitigated. */
406	id = READ_SPECIALREG(id_aa64mmfr1_el1);
407	if (ID_AA64MMFR1_ECBHB(id) >= ID_AA64MMFR1_ECBHB_IMPL)
408		ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
409
410	/*
411	 * The architecture has been updated to explicitly tell us if
412	 * we're not vulnerable to Spectre-BHB.
413	 */
414	id = READ_SPECIALREG(id_aa64pfr0_el1);
415	if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_HCXT)
416		ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
417}
418
419/*
420 * Enable mitigation for Spectre-V4 speculative store bypass
421 * vulnerabilities (CVE-2018-3639).
422 */
423void
424cpu_mitigate_spectre_v4(struct cpu_info *ci)
425{
426	uint64_t id;
427
428	switch (CPU_IMPL(ci->ci_midr)) {
429	case CPU_IMPL_ARM:
430		switch (CPU_PART(ci->ci_midr)) {
431		case CPU_PART_CORTEX_A35:
432		case CPU_PART_CORTEX_A53:
433		case CPU_PART_CORTEX_A55:
434			/* Not vulnerable. */
435			return;
436		}
437		break;
438	case CPU_IMPL_QCOM:
439		switch (CPU_PART(ci->ci_midr)) {
440		case CPU_PART_KRYO400_SILVER:
441			/* Not vulnerable. */
442			return;
443		}
444		break;
445	}
446
447	/* SSBS tells us Spectre-V4 is mitigated. */
448	id = READ_SPECIALREG(id_aa64pfr1_el1);
449	if (ID_AA64PFR1_SSBS(id) >= ID_AA64PFR1_SSBS_PSTATE)
450		return;
451
452	/* Enable firmware workaround if required. */
453	smccc_enable_arch_workaround_2();
454}
455
456void
457cpu_identify(struct cpu_info *ci)
458{
459	static uint64_t prev_id_aa64isar0;
460	static uint64_t prev_id_aa64isar1;
461	static uint64_t prev_id_aa64isar2;
462	static uint64_t prev_id_aa64mmfr0;
463	static uint64_t prev_id_aa64mmfr1;
464	static uint64_t prev_id_aa64pfr0;
465	static uint64_t prev_id_aa64pfr1;
466	uint64_t midr, impl, part;
467	uint64_t clidr, ccsidr, id;
468	uint32_t ctr, sets, ways, line;
469	const char *impl_name = NULL;
470	const char *part_name = NULL;
471	const char *il1p_name = NULL;
472	const char *sep;
473	struct cpu_cores *coreselecter = cpu_cores_none;
474	int ccidx;
475	int i;
476
477	midr = READ_SPECIALREG(midr_el1);
478	impl = CPU_IMPL(midr);
479	part = CPU_PART(midr);
480	ci->ci_midr = midr;
481
482	for (i = 0; cpu_implementers[i].name; i++) {
483		if (impl == cpu_implementers[i].id) {
484			impl_name = cpu_implementers[i].name;
485			coreselecter = cpu_implementers[i].corelist;
486			break;
487		}
488	}
489
490	for (i = 0; coreselecter[i].name; i++) {
491		if (part == coreselecter[i].id) {
492			part_name = coreselecter[i].name;
493			break;
494		}
495	}
496
497	if (impl_name && part_name) {
498		printf(" %s %s r%llup%llu", impl_name, part_name, CPU_VAR(midr),
499		    CPU_REV(midr));
500
501		if (CPU_IS_PRIMARY(ci))
502			snprintf(cpu_model, sizeof(cpu_model),
503			    "%s %s r%llup%llu", impl_name, part_name,
504			    CPU_VAR(midr), CPU_REV(midr));
505	} else {
506		printf(" Unknown, MIDR 0x%llx", midr);
507
508		if (CPU_IS_PRIMARY(ci))
509			snprintf(cpu_model, sizeof(cpu_model), "Unknown");
510	}
511
512	/* Print cache information. */
513
514	ctr = READ_SPECIALREG(ctr_el0);
515	switch (ctr & CTR_IL1P_MASK) {
516	case CTR_IL1P_AIVIVT:
517		il1p_name = "AIVIVT ";
518		break;
519	case CTR_IL1P_VIPT:
520		il1p_name = "VIPT ";
521		break;
522	case CTR_IL1P_PIPT:
523		il1p_name = "PIPT ";
524		break;
525	}
526
527	id = READ_SPECIALREG(id_aa64mmfr2_el1);
528	clidr = READ_SPECIALREG(clidr_el1);
529	if (ID_AA64MMFR2_CCIDX(id) > ID_AA64MMFR2_CCIDX_IMPL) {
530		/* Reserved value.  Don't print cache information. */
531		clidr = 0;
532	} else if (ID_AA64MMFR2_CCIDX(id) == ID_AA64MMFR2_CCIDX_IMPL) {
533		/* CCSIDR_EL1 uses the new 64-bit format. */
534		ccidx = 1;
535	} else {
536		/* CCSIDR_EL1 uses the old 32-bit format. */
537		ccidx = 0;
538	}
539	for (i = 0; i < 7; i++) {
540		if ((clidr & CLIDR_CTYPE_MASK) == 0)
541			break;
542		printf("\n%s:", ci->ci_dev->dv_xname);
543		sep = "";
544		if (clidr & CLIDR_CTYPE_INSN) {
545			WRITE_SPECIALREG(csselr_el1,
546			    i << CSSELR_LEVEL_SHIFT | CSSELR_IND);
547			__asm volatile("isb");
548			ccsidr = READ_SPECIALREG(ccsidr_el1);
549			if (ccidx) {
550				sets = CCSIDR_CCIDX_SETS(ccsidr);
551				ways = CCSIDR_CCIDX_WAYS(ccsidr);
552				line = CCSIDR_CCIDX_LINE_SIZE(ccsidr);
553			} else {
554				sets = CCSIDR_SETS(ccsidr);
555				ways = CCSIDR_WAYS(ccsidr);
556				line = CCSIDR_LINE_SIZE(ccsidr);
557			}
558			printf("%s %dKB %db/line %d-way L%d %sI-cache", sep,
559			    (sets * ways * line) / 1024, line, ways, (i + 1),
560			    il1p_name);
561			il1p_name = "";
562			sep = ",";
563		}
564		if (clidr & CLIDR_CTYPE_DATA) {
565			WRITE_SPECIALREG(csselr_el1, i << CSSELR_LEVEL_SHIFT);
566			__asm volatile("isb");
567			ccsidr = READ_SPECIALREG(ccsidr_el1);
568			if (ccidx) {
569				sets = CCSIDR_CCIDX_SETS(ccsidr);
570				ways = CCSIDR_CCIDX_WAYS(ccsidr);
571				line = CCSIDR_CCIDX_LINE_SIZE(ccsidr);
572			} else {
573				sets = CCSIDR_SETS(ccsidr);
574				ways = CCSIDR_WAYS(ccsidr);
575				line = CCSIDR_LINE_SIZE(ccsidr);
576			}
577			printf("%s %dKB %db/line %d-way L%d D-cache", sep,
578			    (sets * ways * line) / 1024, line, ways, (i + 1));
579			sep = ",";
580		}
581		if (clidr & CLIDR_CTYPE_UNIFIED) {
582			WRITE_SPECIALREG(csselr_el1, i << CSSELR_LEVEL_SHIFT);
583			__asm volatile("isb");
584			ccsidr = READ_SPECIALREG(ccsidr_el1);
585			if (ccidx) {
586				sets = CCSIDR_CCIDX_SETS(ccsidr);
587				ways = CCSIDR_CCIDX_WAYS(ccsidr);
588				line = CCSIDR_CCIDX_LINE_SIZE(ccsidr);
589			} else {
590				sets = CCSIDR_SETS(ccsidr);
591				ways = CCSIDR_WAYS(ccsidr);
592				line = CCSIDR_LINE_SIZE(ccsidr);
593			}
594			printf("%s %dKB %db/line %d-way L%d cache", sep,
595			    (sets * ways * line) / 1024, line, ways, (i + 1));
596		}
597		clidr >>= 3;
598	}
599
600	cpu_mitigate_spectre_v2(ci);
601	cpu_mitigate_spectre_bhb(ci);
602	cpu_mitigate_spectre_v4(ci);
603
604	/*
605	 * Apple CPUs provide detailed information for SError.
606	 */
607	if (impl == CPU_IMPL_APPLE)
608		ci->ci_serror = cpu_serror_apple;
609
610	/*
611	 * Skip printing CPU features if they are identical to the
612	 * previous CPU.
613	 */
614	if (READ_SPECIALREG(id_aa64isar0_el1) == prev_id_aa64isar0 &&
615	    READ_SPECIALREG(id_aa64isar1_el1) == prev_id_aa64isar1 &&
616	    READ_SPECIALREG(id_aa64isar2_el1) == prev_id_aa64isar2 &&
617	    READ_SPECIALREG(id_aa64mmfr0_el1) == prev_id_aa64mmfr0 &&
618	    READ_SPECIALREG(id_aa64mmfr1_el1) == prev_id_aa64mmfr1 &&
619	    READ_SPECIALREG(id_aa64pfr0_el1) == prev_id_aa64pfr0 &&
620	    READ_SPECIALREG(id_aa64pfr1_el1) == prev_id_aa64pfr1)
621		return;
622
623	/*
624	 * Print CPU features encoded in the ID registers.
625	 */
626
627	if (READ_SPECIALREG(id_aa64isar0_el1) != cpu_id_aa64isar0) {
628		printf("\n%s: mismatched ID_AA64ISAR0_EL1",
629		    ci->ci_dev->dv_xname);
630	}
631	if (READ_SPECIALREG(id_aa64isar1_el1) != cpu_id_aa64isar1) {
632		printf("\n%s: mismatched ID_AA64ISAR1_EL1",
633		    ci->ci_dev->dv_xname);
634	}
635	if (READ_SPECIALREG(id_aa64isar2_el1) != cpu_id_aa64isar2) {
636		printf("\n%s: mismatched ID_AA64ISAR2_EL1",
637		    ci->ci_dev->dv_xname);
638	}
639	id = READ_SPECIALREG(id_aa64pfr0_el1);
640	/* Allow CSV2/CVS3 to be different. */
641	id &= ~ID_AA64PFR0_CSV2_MASK;
642	id &= ~ID_AA64PFR0_CSV3_MASK;
643	/* Ignore 32-bit support in all exception levels. */
644	id &= ~ID_AA64PFR0_EL0_MASK;
645	id &= ~ID_AA64PFR0_EL1_MASK;
646	id &= ~ID_AA64PFR0_EL2_MASK;
647	id &= ~ID_AA64PFR0_EL3_MASK;
648	if (id != cpu_id_aa64pfr0) {
649		printf("\n%s: mismatched ID_AA64PFR0_EL1",
650		    ci->ci_dev->dv_xname);
651	}
652	if (READ_SPECIALREG(id_aa64pfr1_el1) != cpu_id_aa64pfr1) {
653		printf("\n%s: mismatched ID_AA64PFR1_EL1",
654		    ci->ci_dev->dv_xname);
655	}
656
657	printf("\n%s: ", ci->ci_dev->dv_xname);
658
659	/*
660	 * ID_AA64ISAR0
661	 */
662	id = READ_SPECIALREG(id_aa64isar0_el1);
663	sep = "";
664
665	if (ID_AA64ISAR0_RNDR(id) >= ID_AA64ISAR0_RNDR_IMPL) {
666		printf("%sRNDR", sep);
667		sep = ",";
668	}
669
670	if (ID_AA64ISAR0_TLB(id) >= ID_AA64ISAR0_TLB_IOS) {
671		printf("%sTLBIOS", sep);
672		sep = ",";
673	}
674	if (ID_AA64ISAR0_TLB(id) >= ID_AA64ISAR0_TLB_IRANGE)
675		printf("+IRANGE");
676
677	if (ID_AA64ISAR0_TS(id) >= ID_AA64ISAR0_TS_BASE) {
678		printf("%sTS", sep);
679		sep = ",";
680	}
681	if (ID_AA64ISAR0_TS(id) >= ID_AA64ISAR0_TS_AXFLAG)
682		printf("+AXFLAG");
683
684	if (ID_AA64ISAR0_FHM(id) >= ID_AA64ISAR0_FHM_IMPL) {
685		printf("%sFHM", sep);
686		sep = ",";
687	}
688
689	if (ID_AA64ISAR0_DP(id) >= ID_AA64ISAR0_DP_IMPL) {
690		printf("%sDP", sep);
691		sep = ",";
692	}
693
694	if (ID_AA64ISAR0_SM4(id) >= ID_AA64ISAR0_SM4_IMPL) {
695		printf("%sSM4", sep);
696		sep = ",";
697	}
698
699	if (ID_AA64ISAR0_SM3(id) >= ID_AA64ISAR0_SM3_IMPL) {
700		printf("%sSM3", sep);
701		sep = ",";
702	}
703
704	if (ID_AA64ISAR0_SHA3(id) >= ID_AA64ISAR0_SHA3_IMPL) {
705		printf("%sSHA3", sep);
706		sep = ",";
707	}
708
709	if (ID_AA64ISAR0_RDM(id) >= ID_AA64ISAR0_RDM_IMPL) {
710		printf("%sRDM", sep);
711		sep = ",";
712	}
713
714	if (ID_AA64ISAR0_ATOMIC(id) >= ID_AA64ISAR0_ATOMIC_IMPL) {
715		printf("%sAtomic", sep);
716		sep = ",";
717	}
718
719	if (ID_AA64ISAR0_CRC32(id) >= ID_AA64ISAR0_CRC32_BASE) {
720		printf("%sCRC32", sep);
721		sep = ",";
722	}
723
724	if (ID_AA64ISAR0_SHA2(id) >= ID_AA64ISAR0_SHA2_BASE) {
725		printf("%sSHA2", sep);
726		sep = ",";
727	}
728	if (ID_AA64ISAR0_SHA2(id) >= ID_AA64ISAR0_SHA2_512)
729		printf("+SHA512");
730
731	if (ID_AA64ISAR0_SHA1(id) >= ID_AA64ISAR0_SHA1_BASE) {
732		printf("%sSHA1", sep);
733		sep = ",";
734	}
735
736	if (ID_AA64ISAR0_AES(id) >= ID_AA64ISAR0_AES_BASE) {
737		printf("%sAES", sep);
738		sep = ",";
739#ifdef CRYPTO
740		arm64_has_aes = 1;
741#endif
742	}
743	if (ID_AA64ISAR0_AES(id) >= ID_AA64ISAR0_AES_PMULL)
744		printf("+PMULL");
745
746	/*
747	 * ID_AA64ISAR1
748	 */
749	id = READ_SPECIALREG(id_aa64isar1_el1);
750
751	if (ID_AA64ISAR1_LS64(id) >= ID_AA64ISAR1_LS64_BASE) {
752		printf("%sLS64", sep);
753		sep = ",";
754	}
755	if (ID_AA64ISAR1_LS64(id) >= ID_AA64ISAR1_LS64_V)
756		printf("+V");
757	if (ID_AA64ISAR1_LS64(id) >= ID_AA64ISAR1_LS64_ACCDATA)
758		printf("+ACCDATA");
759
760	if (ID_AA64ISAR1_XS(id) >= ID_AA64ISAR1_XS_IMPL) {
761		printf("%sXS", sep);
762		sep = ",";
763	}
764
765	if (ID_AA64ISAR1_I8MM(id) >= ID_AA64ISAR1_I8MM_IMPL) {
766		printf("%sI8MM", sep);
767		sep = ",";
768	}
769
770	if (ID_AA64ISAR1_DGH(id) >= ID_AA64ISAR1_DGH_IMPL) {
771		printf("%sDGH", sep);
772		sep = ",";
773	}
774
775	if (ID_AA64ISAR1_BF16(id) >= ID_AA64ISAR1_BF16_BASE) {
776		printf("%sBF16", sep);
777		sep = ",";
778	}
779	if (ID_AA64ISAR1_BF16(id) >= ID_AA64ISAR1_BF16_EBF)
780		printf("+EBF");
781
782	if (ID_AA64ISAR1_SPECRES(id) >= ID_AA64ISAR1_SPECRES_IMPL) {
783		printf("%sSPECRES", sep);
784		sep = ",";
785	}
786
787	if (ID_AA64ISAR1_SB(id) >= ID_AA64ISAR1_SB_IMPL) {
788		printf("%sSB", sep);
789		sep = ",";
790	}
791
792	if (ID_AA64ISAR1_FRINTTS(id) >= ID_AA64ISAR1_FRINTTS_IMPL) {
793		printf("%sFRINTTS", sep);
794		sep = ",";
795	}
796
797	if (ID_AA64ISAR1_GPI(id) >= ID_AA64ISAR1_GPI_IMPL) {
798		printf("%sGPI", sep);
799		sep = ",";
800	}
801
802	if (ID_AA64ISAR1_GPA(id) >= ID_AA64ISAR1_GPA_IMPL) {
803		printf("%sGPA", sep);
804		sep = ",";
805	}
806
807	if (ID_AA64ISAR1_LRCPC(id) >= ID_AA64ISAR1_LRCPC_BASE) {
808		printf("%sLRCPC", sep);
809		sep = ",";
810	}
811	if (ID_AA64ISAR1_LRCPC(id) >= ID_AA64ISAR1_LRCPC_LDAPUR)
812		printf("+LDAPUR");
813
814	if (ID_AA64ISAR1_FCMA(id) >= ID_AA64ISAR1_FCMA_IMPL) {
815		printf("%sFCMA", sep);
816		sep = ",";
817	}
818
819	if (ID_AA64ISAR1_JSCVT(id) >= ID_AA64ISAR1_JSCVT_IMPL) {
820		printf("%sJSCVT", sep);
821		sep = ",";
822	}
823
824	if (ID_AA64ISAR1_API(id) >= ID_AA64ISAR1_API_BASE) {
825		printf("%sAPI", sep);
826		sep = ",";
827	}
828	if (ID_AA64ISAR1_API(id) >= ID_AA64ISAR1_API_PAC)
829		printf("+PAC");
830
831	if (ID_AA64ISAR1_APA(id) >= ID_AA64ISAR1_APA_BASE) {
832		printf("%sAPA", sep);
833		sep = ",";
834	}
835	if (ID_AA64ISAR1_APA(id) >= ID_AA64ISAR1_APA_PAC)
836		printf("+PAC");
837
838	if (ID_AA64ISAR1_DPB(id) >= ID_AA64ISAR1_DPB_IMPL) {
839		printf("%sDPB", sep);
840		sep = ",";
841	}
842
843	/*
844	 * ID_AA64ISAR2
845	 */
846	id = READ_SPECIALREG(id_aa64isar2_el1);
847
848	if (ID_AA64ISAR2_CLRBHB(id) >= ID_AA64ISAR2_CLRBHB_IMPL) {
849		printf("%sCLRBHB", sep);
850		sep = ",";
851	}
852
853	/*
854	 * ID_AA64MMFR0
855	 *
856	 * We only print ASIDBits for now.
857	 */
858	id = READ_SPECIALREG(id_aa64mmfr0_el1);
859
860	if (ID_AA64MMFR0_ASID_BITS(id) == ID_AA64MMFR0_ASID_BITS_16) {
861		printf("%sASID16", sep);
862		sep = ",";
863	}
864
865	/*
866	 * ID_AA64MMFR1
867	 *
868	 * We omit printing most virtualization related fields for now.
869	 */
870	id = READ_SPECIALREG(id_aa64mmfr1_el1);
871
872	if (ID_AA64MMFR1_SPECSEI(id) >= ID_AA64MMFR1_SPECSEI_IMPL) {
873		printf("%sSpecSEI", sep);
874		sep = ",";
875	}
876
877	if (ID_AA64MMFR1_PAN(id) >= ID_AA64MMFR1_PAN_IMPL) {
878		printf("%sPAN", sep);
879		sep = ",";
880	}
881	if (ID_AA64MMFR1_PAN(id) >= ID_AA64MMFR1_PAN_ATS1E1)
882		printf("+ATS1E1");
883	if (ID_AA64MMFR1_PAN(id) >= ID_AA64MMFR1_PAN_EPAN)
884		printf("+EPAN");
885
886	if (ID_AA64MMFR1_LO(id) >= ID_AA64MMFR1_LO_IMPL) {
887		printf("%sLO", sep);
888		sep = ",";
889	}
890
891	if (ID_AA64MMFR1_HPDS(id) >= ID_AA64MMFR1_HPDS_IMPL) {
892		printf("%sHPDS", sep);
893		sep = ",";
894	}
895
896	if (ID_AA64MMFR1_VH(id) >= ID_AA64MMFR1_VH_IMPL) {
897		printf("%sVH", sep);
898		sep = ",";
899	}
900
901	if (ID_AA64MMFR1_HAFDBS(id) >= ID_AA64MMFR1_HAFDBS_AF) {
902		printf("%sHAF", sep);
903		sep = ",";
904	}
905	if (ID_AA64MMFR1_HAFDBS(id) >= ID_AA64MMFR1_HAFDBS_AF_DBS)
906		printf("DBS");
907
908	if (ID_AA64MMFR1_ECBHB(id) >= ID_AA64MMFR1_ECBHB_IMPL) {
909		printf("%sECBHB", sep);
910		sep = ",";
911	}
912
913	/*
914	 * ID_AA64PFR0
915	 */
916	id = READ_SPECIALREG(id_aa64pfr0_el1);
917
918	if (ID_AA64PFR0_CSV3(id) >= ID_AA64PFR0_CSV3_IMPL) {
919		printf("%sCSV3", sep);
920		sep = ",";
921	}
922
923	if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_IMPL) {
924		printf("%sCSV2", sep);
925		sep = ",";
926	}
927	if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_SCXT)
928		printf("+SCXT");
929	if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_HCXT)
930		printf("+HCXT");
931
932	if (ID_AA64PFR0_DIT(id) >= ID_AA64PFR0_DIT_IMPL) {
933		printf("%sDIT", sep);
934		sep = ",";
935	}
936
937	/*
938	 * ID_AA64PFR1
939	 */
940	id = READ_SPECIALREG(id_aa64pfr1_el1);
941
942	if (ID_AA64PFR1_BT(id) >= ID_AA64PFR1_BT_IMPL) {
943		printf("%sBT", sep);
944		sep = ",";
945	}
946
947	if (ID_AA64PFR1_SSBS(id) >= ID_AA64PFR1_SSBS_PSTATE) {
948		printf("%sSSBS", sep);
949		sep = ",";
950	}
951	if (ID_AA64PFR1_SSBS(id) >= ID_AA64PFR1_SSBS_PSTATE_MSR)
952		printf("+MSR");
953
954	if (ID_AA64PFR1_MTE(id) >= ID_AA64PFR1_MTE_IMPL) {
955		printf("%sMTE", sep);
956		sep = ",";
957	}
958
959	prev_id_aa64isar0 = READ_SPECIALREG(id_aa64isar0_el1);
960	prev_id_aa64isar1 = READ_SPECIALREG(id_aa64isar1_el1);
961	prev_id_aa64isar2 = READ_SPECIALREG(id_aa64isar2_el1);
962	prev_id_aa64mmfr0 = READ_SPECIALREG(id_aa64mmfr0_el1);
963	prev_id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
964	prev_id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1);
965	prev_id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1);
966
967#ifdef CPU_DEBUG
968	id = READ_SPECIALREG(id_aa64afr0_el1);
969	printf("\nID_AA64AFR0_EL1: 0x%016llx", id);
970	id = READ_SPECIALREG(id_aa64afr1_el1);
971	printf("\nID_AA64AFR1_EL1: 0x%016llx", id);
972	id = READ_SPECIALREG(id_aa64dfr0_el1);
973	printf("\nID_AA64DFR0_EL1: 0x%016llx", id);
974	id = READ_SPECIALREG(id_aa64dfr1_el1);
975	printf("\nID_AA64DFR1_EL1: 0x%016llx", id);
976	id = READ_SPECIALREG(id_aa64isar0_el1);
977	printf("\nID_AA64ISAR0_EL1: 0x%016llx", id);
978	id = READ_SPECIALREG(id_aa64isar1_el1);
979	printf("\nID_AA64ISAR1_EL1: 0x%016llx", id);
980	id = READ_SPECIALREG(id_aa64isar2_el1);
981	printf("\nID_AA64ISAR2_EL1: 0x%016llx", id);
982	id = READ_SPECIALREG(id_aa64mmfr0_el1);
983	printf("\nID_AA64MMFR0_EL1: 0x%016llx", id);
984	id = READ_SPECIALREG(id_aa64mmfr1_el1);
985	printf("\nID_AA64MMFR1_EL1: 0x%016llx", id);
986	id = READ_SPECIALREG(id_aa64mmfr2_el1);
987	printf("\nID_AA64MMFR2_EL1: 0x%016llx", id);
988	id = READ_SPECIALREG(id_aa64pfr0_el1);
989	printf("\nID_AA64PFR0_EL1: 0x%016llx", id);
990	id = READ_SPECIALREG(id_aa64pfr1_el1);
991	printf("\nID_AA64PFR1_EL1: 0x%016llx", id);
992#endif
993}
994
995void	cpu_init(void);
996int	cpu_start_secondary(struct cpu_info *ci, int, uint64_t);
997int	cpu_clockspeed(int *);
998
999int
1000cpu_match(struct device *parent, void *cfdata, void *aux)
1001{
1002	struct fdt_attach_args *faa = aux;
1003	uint64_t mpidr = READ_SPECIALREG(mpidr_el1);
1004	char buf[32];
1005
1006	if (OF_getprop(faa->fa_node, "device_type", buf, sizeof(buf)) <= 0 ||
1007	    strcmp(buf, "cpu") != 0)
1008		return 0;
1009
1010	if (ncpus < MAXCPUS || faa->fa_reg[0].addr == (mpidr & MPIDR_AFF))
1011		return 1;
1012
1013	return 0;
1014}
1015
1016void
1017cpu_attach(struct device *parent, struct device *dev, void *aux)
1018{
1019	struct fdt_attach_args *faa = aux;
1020	struct cpu_info *ci;
1021	void *kstack;
1022#ifdef MULTIPROCESSOR
1023	uint64_t mpidr = READ_SPECIALREG(mpidr_el1);
1024#endif
1025	uint32_t opp;
1026
1027	KASSERT(faa->fa_nreg > 0);
1028
1029#ifdef MULTIPROCESSOR
1030	if (faa->fa_reg[0].addr == (mpidr & MPIDR_AFF)) {
1031		ci = &cpu_info_primary;
1032		ci->ci_flags |= CPUF_RUNNING | CPUF_PRESENT | CPUF_PRIMARY;
1033	} else {
1034		ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK | M_ZERO);
1035		cpu_info[dev->dv_unit] = ci;
1036		ci->ci_next = cpu_info_list->ci_next;
1037		cpu_info_list->ci_next = ci;
1038		ci->ci_flags |= CPUF_AP;
1039		ncpus++;
1040	}
1041#else
1042	ci = &cpu_info_primary;
1043#endif
1044
1045	ci->ci_dev = dev;
1046	ci->ci_cpuid = dev->dv_unit;
1047	ci->ci_mpidr = faa->fa_reg[0].addr;
1048	ci->ci_node = faa->fa_node;
1049	ci->ci_self = ci;
1050
1051	printf(" mpidr %llx:", ci->ci_mpidr);
1052
1053	kstack = km_alloc(USPACE, &kv_any, &kp_zero, &kd_waitok);
1054	ci->ci_el1_stkend = (vaddr_t)kstack + USPACE - 16;
1055	ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
1056
1057#ifdef MULTIPROCESSOR
1058	if (ci->ci_flags & CPUF_AP) {
1059		char buf[32];
1060		uint64_t spinup_data = 0;
1061		int spinup_method = 0;
1062		int timeout = 10000;
1063		int len;
1064
1065		len = OF_getprop(ci->ci_node, "enable-method",
1066		    buf, sizeof(buf));
1067		if (strcmp(buf, "psci") == 0) {
1068			spinup_method = 1;
1069		} else if (strcmp(buf, "spin-table") == 0) {
1070			spinup_method = 2;
1071			spinup_data = OF_getpropint64(ci->ci_node,
1072			    "cpu-release-addr", 0);
1073		}
1074
1075		clockqueue_init(&ci->ci_queue);
1076		sched_init_cpu(ci);
1077		if (cpu_start_secondary(ci, spinup_method, spinup_data)) {
1078			atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
1079			__asm volatile("dsb sy; sev" ::: "memory");
1080
1081			while ((ci->ci_flags & CPUF_IDENTIFIED) == 0 &&
1082			    --timeout)
1083				delay(1000);
1084			if (timeout == 0) {
1085				printf(" failed to identify");
1086				ci->ci_flags = 0;
1087			}
1088		} else {
1089			printf(" failed to spin up");
1090			ci->ci_flags = 0;
1091		}
1092	} else {
1093#endif
1094		cpu_id_aa64isar0 = READ_SPECIALREG(id_aa64isar0_el1);
1095		cpu_id_aa64isar1 = READ_SPECIALREG(id_aa64isar1_el1);
1096		cpu_id_aa64isar2 = READ_SPECIALREG(id_aa64isar2_el1);
1097		cpu_id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1);
1098		cpu_id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1);
1099
1100		/*
1101		 * The CSV2/CSV3 "features" are handled on a
1102		 * per-processor basis.  So it is fine if these fields
1103		 * differ between CPU cores.  Mask off these fields to
1104		 * prevent exporting these to userland.
1105		 */
1106		cpu_id_aa64pfr0 &= ~ID_AA64PFR0_CSV2_MASK;
1107		cpu_id_aa64pfr0 &= ~ID_AA64PFR0_CSV3_MASK;
1108
1109		/*
1110		 * We only support 64-bit mode, so we don't care about
1111		 * differences in support for 32-bit mode between
1112		 * cores.  Mask off these fields as well.
1113		 */
1114		cpu_id_aa64pfr0 &= ~ID_AA64PFR0_EL0_MASK;
1115		cpu_id_aa64pfr0 &= ~ID_AA64PFR0_EL1_MASK;
1116		cpu_id_aa64pfr0 &= ~ID_AA64PFR0_EL2_MASK;
1117		cpu_id_aa64pfr0 &= ~ID_AA64PFR0_EL3_MASK;
1118
1119		/*
1120		 * Lenovo X13s ships with broken EL2 firmware that
1121		 * hangs the machine if we enable PAuth.
1122		 */
1123		if (hw_vendor && hw_prod && strcmp(hw_vendor, "LENOVO") == 0) {
1124			if (strncmp(hw_prod, "21BX", 4) == 0 ||
1125			    strncmp(hw_prod, "21BY", 4) == 0) {
1126				cpu_id_aa64isar1 &= ~ID_AA64ISAR1_APA_MASK;
1127				cpu_id_aa64isar1 &= ~ID_AA64ISAR1_GPA_MASK;
1128			}
1129		}
1130
1131		cpu_identify(ci);
1132
1133		if (OF_getproplen(ci->ci_node, "clocks") > 0) {
1134			cpu_node = ci->ci_node;
1135			cpu_cpuspeed = cpu_clockspeed;
1136		}
1137
1138		cpu_init();
1139#ifdef MULTIPROCESSOR
1140	}
1141#endif
1142
1143#if NKSTAT > 0
1144	cpu_kstat_attach(ci);
1145#endif
1146
1147	opp = OF_getpropint(ci->ci_node, "operating-points-v2", 0);
1148	if (opp)
1149		cpu_opp_init(ci, opp);
1150
1151	cpu_psci_init(ci);
1152
1153	printf("\n");
1154}
1155
1156void
1157cpu_init(void)
1158{
1159	uint64_t id_aa64mmfr1, sctlr;
1160	uint64_t id_aa64pfr0;
1161	uint64_t tcr;
1162
1163	WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa);
1164	__asm volatile("isb");
1165	tcr = READ_SPECIALREG(tcr_el1);
1166	tcr &= ~TCR_T0SZ(0x3f);
1167	tcr |= TCR_T0SZ(64 - USER_SPACE_BITS);
1168	tcr |= TCR_A1;
1169	WRITE_SPECIALREG(tcr_el1, tcr);
1170	cpu_tlb_flush();
1171
1172	/* Enable PAN. */
1173	id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
1174	if (ID_AA64MMFR1_PAN(id_aa64mmfr1) >= ID_AA64MMFR1_PAN_IMPL) {
1175		sctlr = READ_SPECIALREG(sctlr_el1);
1176		sctlr &= ~SCTLR_SPAN;
1177		if (ID_AA64MMFR1_PAN(id_aa64mmfr1) >= ID_AA64MMFR1_PAN_EPAN)
1178			sctlr |= SCTLR_EPAN;
1179		WRITE_SPECIALREG(sctlr_el1, sctlr);
1180	}
1181
1182	/* Enable DIT. */
1183	id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1);
1184	if (ID_AA64PFR0_DIT(id_aa64pfr0) >= ID_AA64PFR0_DIT_IMPL)
1185		__asm volatile (".arch armv8.4-a; msr dit, #1");
1186
1187	/* Enable PAuth. */
1188	if (ID_AA64ISAR1_APA(cpu_id_aa64isar1) >= ID_AA64ISAR1_APA_BASE ||
1189	    ID_AA64ISAR1_API(cpu_id_aa64isar1) >= ID_AA64ISAR1_API_BASE) {
1190		sctlr = READ_SPECIALREG(sctlr_el1);
1191		sctlr |= SCTLR_EnIA | SCTLR_EnDA;
1192		sctlr |= SCTLR_EnIB | SCTLR_EnDB;
1193		WRITE_SPECIALREG(sctlr_el1, sctlr);
1194	}
1195
1196	/* Enable strict BTI compatibility for PACIASP and PACIBSP. */
1197	if (ID_AA64PFR1_BT(cpu_id_aa64pfr1) >= ID_AA64PFR1_BT_IMPL) {
1198		sctlr = READ_SPECIALREG(sctlr_el1);
1199		sctlr |= SCTLR_BT0 | SCTLR_BT1;
1200		WRITE_SPECIALREG(sctlr_el1, sctlr);
1201	}
1202
1203	/* Initialize debug registers. */
1204	WRITE_SPECIALREG(mdscr_el1, DBG_MDSCR_TDCC);
1205	WRITE_SPECIALREG(oslar_el1, 0);
1206}
1207
1208void
1209cpu_flush_bp_noop(void)
1210{
1211}
1212
1213void
1214cpu_flush_bp_psci(void)
1215{
1216#if NPSCI > 0
1217	psci_flush_bp();
1218#endif
1219}
1220
1221void
1222cpu_serror_apple(void)
1223{
1224	__asm volatile("dsb sy; isb" ::: "memory");
1225	printf("l2c_err_sts 0x%llx\n", READ_SPECIALREG(s3_3_c15_c8_0));
1226	printf("l2c_err_adr 0x%llx\n", READ_SPECIALREG(s3_3_c15_c9_0));
1227	printf("l2c_err_inf 0x%llx\n", READ_SPECIALREG(s3_3_c15_c10_0));
1228}
1229
1230int
1231cpu_clockspeed(int *freq)
1232{
1233	*freq = clock_get_frequency(cpu_node, NULL) / 1000000;
1234	return 0;
1235}
1236
1237#ifdef MULTIPROCESSOR
1238
1239void cpu_boot_secondary(struct cpu_info *ci);
1240void cpu_hatch_secondary(void);
1241void cpu_hatch_secondary_spin(void);
1242
1243void cpu_suspend_cycle(void);
1244
1245void
1246cpu_boot_secondary_processors(void)
1247{
1248	struct cpu_info *ci;
1249	CPU_INFO_ITERATOR cii;
1250
1251	CPU_INFO_FOREACH(cii, ci) {
1252		if ((ci->ci_flags & CPUF_AP) == 0)
1253			continue;
1254		if (ci->ci_flags & CPUF_PRIMARY)
1255			continue;
1256
1257		ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;
1258		cpu_boot_secondary(ci);
1259	}
1260}
1261
1262void
1263cpu_start_spin_table(struct cpu_info *ci, uint64_t start, uint64_t data)
1264{
1265	extern paddr_t cpu_hatch_ci;
1266
1267	pmap_extract(pmap_kernel(), (vaddr_t)ci, &cpu_hatch_ci);
1268	cpu_dcache_wb_range((vaddr_t)&cpu_hatch_ci, sizeof(paddr_t));
1269
1270	/* this reuses the zero page for the core */
1271	vaddr_t start_pg = zero_page + (PAGE_SIZE * ci->ci_cpuid);
1272	paddr_t pa = trunc_page(data);
1273	uint64_t offset = data - pa;
1274	uint64_t *startvec = (uint64_t *)(start_pg + offset);
1275
1276	pmap_kenter_cache(start_pg, pa, PROT_READ|PROT_WRITE, PMAP_CACHE_CI);
1277
1278	*startvec = start;
1279	__asm volatile("dsb sy; sev" ::: "memory");
1280
1281	pmap_kremove(start_pg, PAGE_SIZE);
1282}
1283
1284int
1285cpu_start_secondary(struct cpu_info *ci, int method, uint64_t data)
1286{
1287	vaddr_t start_va;
1288	paddr_t ci_pa, start_pa;
1289	uint64_t ttbr1;
1290	int32_t status;
1291
1292	__asm("mrs %x0, ttbr1_el1": "=r"(ttbr1));
1293	ci->ci_ttbr1 = ttbr1;
1294	cpu_dcache_wb_range((vaddr_t)ci, sizeof(*ci));
1295
1296	switch (method) {
1297#if NPSCI > 0
1298	case 1:
1299		/* psci */
1300		start_va = (vaddr_t)cpu_hatch_secondary;
1301		pmap_extract(pmap_kernel(), start_va, &start_pa);
1302		pmap_extract(pmap_kernel(), (vaddr_t)ci, &ci_pa);
1303		status = psci_cpu_on(ci->ci_mpidr, start_pa, ci_pa);
1304		return (status == PSCI_SUCCESS);
1305#endif
1306	case 2:
1307		/* spin-table */
1308		start_va = (vaddr_t)cpu_hatch_secondary_spin;
1309		pmap_extract(pmap_kernel(), start_va, &start_pa);
1310		cpu_start_spin_table(ci, start_pa, data);
1311		return 1;
1312	}
1313
1314	return 0;
1315}
1316
1317void
1318cpu_boot_secondary(struct cpu_info *ci)
1319{
1320	atomic_setbits_int(&ci->ci_flags, CPUF_GO);
1321	__asm volatile("dsb sy; sev" ::: "memory");
1322
1323	/*
1324	 * Send an interrupt as well to make sure the CPU wakes up
1325	 * regardless of whether it is in a WFE or a WFI loop.
1326	 */
1327	arm_send_ipi(ci, ARM_IPI_NOP);
1328
1329	while ((ci->ci_flags & CPUF_RUNNING) == 0)
1330		__asm volatile("wfe");
1331}
1332
1333void
1334cpu_init_secondary(struct cpu_info *ci)
1335{
1336	struct proc *p;
1337	struct pcb *pcb;
1338	struct trapframe *tf;
1339	struct switchframe *sf;
1340	int s;
1341
1342	ci->ci_flags |= CPUF_PRESENT;
1343	__asm volatile("dsb sy" ::: "memory");
1344
1345	if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
1346		while ((ci->ci_flags & CPUF_IDENTIFY) == 0)
1347			__asm volatile("wfe");
1348
1349		cpu_identify(ci);
1350		atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFIED);
1351		__asm volatile("dsb sy" ::: "memory");
1352	}
1353
1354	while ((ci->ci_flags & CPUF_GO) == 0)
1355		__asm volatile("wfe");
1356
1357	cpu_init();
1358
1359	/*
1360	 * Start from a clean slate regardless of whether this is the
1361	 * initial power up or a wakeup of a suspended CPU.
1362	 */
1363
1364	ci->ci_curproc = NULL;
1365	ci->ci_curpcb = NULL;
1366	ci->ci_curpm = NULL;
1367	ci->ci_cpl = IPL_NONE;
1368	ci->ci_ipending = 0;
1369	ci->ci_idepth = 0;
1370
1371#ifdef DIAGNOSTIC
1372	ci->ci_mutex_level = 0;
1373#endif
1374
1375	/*
1376	 * Re-create the switchframe for this CPUs idle process.
1377	 */
1378
1379	p = ci->ci_schedstate.spc_idleproc;
1380	pcb = &p->p_addr->u_pcb;
1381
1382	tf = (struct trapframe *)((u_long)p->p_addr
1383	    + USPACE
1384	    - sizeof(struct trapframe)
1385	    - 0x10);
1386
1387	tf = (struct trapframe *)STACKALIGN(tf);
1388	pcb->pcb_tf = tf;
1389
1390	sf = (struct switchframe *)tf - 1;
1391	sf->sf_x19 = (uint64_t)sched_idle;
1392	sf->sf_x20 = (uint64_t)ci;
1393	sf->sf_lr = (uint64_t)proc_trampoline;
1394	pcb->pcb_sp = (uint64_t)sf;
1395
1396	s = splhigh();
1397	arm_intr_cpu_enable();
1398	cpu_startclock();
1399
1400	atomic_setbits_int(&ci->ci_flags, CPUF_RUNNING);
1401	__asm volatile("dsb sy; sev" ::: "memory");
1402
1403	spllower(IPL_NONE);
1404
1405	sched_toidle();
1406}
1407
1408void
1409cpu_halt(void)
1410{
1411	struct cpu_info *ci = curcpu();
1412	vaddr_t start_va;
1413	paddr_t ci_pa, start_pa;
1414	int count = 0;
1415	u_long psw;
1416	int32_t status;
1417
1418	KERNEL_ASSERT_UNLOCKED();
1419	SCHED_ASSERT_UNLOCKED();
1420
1421	start_va = (vaddr_t)cpu_hatch_secondary;
1422	pmap_extract(pmap_kernel(), start_va, &start_pa);
1423	pmap_extract(pmap_kernel(), (vaddr_t)ci, &ci_pa);
1424
1425	psw = intr_disable();
1426
1427	atomic_clearbits_int(&ci->ci_flags,
1428	    CPUF_RUNNING | CPUF_PRESENT | CPUF_GO);
1429
1430#if NPSCI > 0
1431	if (psci_can_suspend())
1432		psci_cpu_off();
1433#endif
1434
1435	/*
1436	 * If we failed to turn ourselves off using PSCI, declare that
1437	 * we're still present and spin in a low power state until
1438	 * we're told to wake up again by the primary CPU.
1439	 */
1440
1441	atomic_setbits_int(&ci->ci_flags, CPUF_PRESENT);
1442
1443	/* Mask clock interrupts. */
1444	WRITE_SPECIALREG(cntv_ctl_el0,
1445	    READ_SPECIALREG(cntv_ctl_el0) | CNTV_CTL_IMASK);
1446
1447	while ((ci->ci_flags & CPUF_GO) == 0) {
1448#if NPSCI > 0
1449		if (ci->ci_psci_suspend_param) {
1450			status = psci_cpu_suspend(ci->ci_psci_suspend_param,
1451			    start_pa, ci_pa);
1452			if (status != PSCI_SUCCESS)
1453				ci->ci_psci_suspend_param = 0;
1454		} else
1455#endif
1456			cpu_suspend_cycle();
1457		count++;
1458	}
1459
1460	atomic_setbits_int(&ci->ci_flags, CPUF_RUNNING);
1461	__asm volatile("dsb sy; sev" ::: "memory");
1462
1463	intr_restore(psw);
1464
1465	/* Unmask clock interrupts. */
1466	WRITE_SPECIALREG(cntv_ctl_el0,
1467	    READ_SPECIALREG(cntv_ctl_el0) & ~CNTV_CTL_IMASK);
1468}
1469
1470void
1471cpu_kick(struct cpu_info *ci)
1472{
1473	/* force cpu to enter kernel */
1474	if (ci != curcpu())
1475		arm_send_ipi(ci, ARM_IPI_NOP);
1476}
1477
1478void
1479cpu_unidle(struct cpu_info *ci)
1480{
1481	/*
1482	 * This could send IPI or SEV depending on if the other
1483	 * processor is sleeping (WFI or WFE), in userland, or if the
1484	 * cpu is in other possible wait states?
1485	 */
1486	if (ci != curcpu())
1487		arm_send_ipi(ci, ARM_IPI_NOP);
1488}
1489
1490#endif
1491
1492int cpu_suspended;
1493
1494#ifdef SUSPEND
1495
1496void cpu_hatch_primary(void);
1497
1498void (*cpu_suspend_cycle_fcn)(void) = cpu_wfi;
1499label_t cpu_suspend_jmpbuf;
1500
1501void
1502cpu_suspend_cycle(void)
1503{
1504	cpu_suspend_cycle_fcn();
1505}
1506
1507void
1508cpu_init_primary(void)
1509{
1510	cpu_init();
1511
1512	cpu_startclock();
1513
1514	longjmp(&cpu_suspend_jmpbuf);
1515}
1516
1517int
1518cpu_suspend_primary(void)
1519{
1520	struct cpu_info *ci = curcpu();
1521	vaddr_t start_va;
1522	paddr_t ci_pa, start_pa;
1523	uint64_t ttbr1;
1524	int32_t status;
1525	int count = 0;
1526
1527	__asm("mrs %x0, ttbr1_el1": "=r"(ttbr1));
1528	ci->ci_ttbr1 = ttbr1;
1529	cpu_dcache_wb_range((vaddr_t)ci, sizeof(*ci));
1530
1531	start_va = (vaddr_t)cpu_hatch_primary;
1532	pmap_extract(pmap_kernel(), start_va, &start_pa);
1533	pmap_extract(pmap_kernel(), (vaddr_t)ci, &ci_pa);
1534
1535#if NPSCI > 0
1536	if (psci_can_suspend()) {
1537		if (setjmp(&cpu_suspend_jmpbuf)) {
1538			/* XXX wait for debug output on Allwinner A64 */
1539			delay(200000);
1540			return 0;
1541		}
1542
1543		psci_system_suspend(start_pa, ci_pa);
1544
1545		return EOPNOTSUPP;
1546	}
1547#endif
1548
1549	if (setjmp(&cpu_suspend_jmpbuf))
1550		goto resume;
1551
1552	/*
1553	 * If PSCI doesn't support SYSTEM_SUSPEND, spin in a low power
1554	 * state waiting for an interrupt that wakes us up again.
1555	 */
1556
1557	/* Mask clock interrupts. */
1558	WRITE_SPECIALREG(cntv_ctl_el0,
1559	    READ_SPECIALREG(cntv_ctl_el0) | CNTV_CTL_IMASK);
1560
1561	/*
1562	 * All non-wakeup interrupts should be masked at this point;
1563	 * re-enable interrupts such that wakeup interrupts actually
1564	 * wake us up.  Set a flag such that drivers can tell we're
1565	 * suspended and change their behaviour accordingly.  They can
1566	 * wake us up by clearing the flag.
1567	 */
1568	cpu_suspended = 1;
1569	arm_intr_func.setipl(IPL_NONE);
1570	intr_enable();
1571
1572	while (cpu_suspended) {
1573#if NPSCI > 0
1574		if (ci->ci_psci_suspend_param) {
1575			status = psci_cpu_suspend(ci->ci_psci_suspend_param,
1576			    start_pa, ci_pa);
1577			if (status != PSCI_SUCCESS)
1578				ci->ci_psci_suspend_param = 0;
1579		} else
1580#endif
1581			cpu_suspend_cycle();
1582		count++;
1583	}
1584
1585resume:
1586	intr_disable();
1587	arm_intr_func.setipl(IPL_HIGH);
1588
1589	/* Unmask clock interrupts. */
1590	WRITE_SPECIALREG(cntv_ctl_el0,
1591	    READ_SPECIALREG(cntv_ctl_el0) & ~CNTV_CTL_IMASK);
1592
1593	return 0;
1594}
1595
1596#ifdef MULTIPROCESSOR
1597
1598void
1599cpu_resume_secondary(struct cpu_info *ci)
1600{
1601	int timeout = 10000;
1602
1603	if (ci->ci_flags & CPUF_PRESENT)
1604		return;
1605
1606	cpu_start_secondary(ci, 1, 0);
1607	while ((ci->ci_flags & CPUF_PRESENT) == 0 && --timeout)
1608		delay(1000);
1609	if (timeout == 0) {
1610		printf("%s: failed to spin up\n",
1611		    ci->ci_dev->dv_xname);
1612		ci->ci_flags = 0;
1613	}
1614}
1615
1616#endif
1617
1618#endif
1619
1620/*
1621 * Dynamic voltage and frequency scaling implementation.
1622 */
1623
1624extern int perflevel;
1625
1626struct opp {
1627	uint64_t opp_hz;
1628	uint32_t opp_microvolt;
1629};
1630
1631struct opp_table {
1632	LIST_ENTRY(opp_table) ot_list;
1633	uint32_t ot_phandle;
1634
1635	struct opp *ot_opp;
1636	u_int ot_nopp;
1637	uint64_t ot_opp_hz_min;
1638	uint64_t ot_opp_hz_max;
1639
1640	struct cpu_info *ot_master;
1641};
1642
1643LIST_HEAD(, opp_table) opp_tables = LIST_HEAD_INITIALIZER(opp_tables);
1644struct task cpu_opp_task;
1645
1646void	cpu_opp_mountroot(struct device *);
1647void	cpu_opp_dotask(void *);
1648void	cpu_opp_setperf(int);
1649
1650uint32_t cpu_opp_get_cooling_level(void *, uint32_t *);
1651void	cpu_opp_set_cooling_level(void *, uint32_t *, uint32_t);
1652
1653void
1654cpu_opp_init(struct cpu_info *ci, uint32_t phandle)
1655{
1656	struct opp_table *ot;
1657	struct cooling_device *cd;
1658	int count, node, child;
1659	uint32_t opp_hz, opp_microvolt;
1660	uint32_t values[3];
1661	int i, j, len;
1662
1663	LIST_FOREACH(ot, &opp_tables, ot_list) {
1664		if (ot->ot_phandle == phandle) {
1665			ci->ci_opp_table = ot;
1666			return;
1667		}
1668	}
1669
1670	node = OF_getnodebyphandle(phandle);
1671	if (node == 0)
1672		return;
1673
1674	if (!OF_is_compatible(node, "operating-points-v2"))
1675		return;
1676
1677	count = 0;
1678	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
1679		if (OF_getproplen(child, "turbo-mode") == 0)
1680			continue;
1681		count++;
1682	}
1683	if (count == 0)
1684		return;
1685
1686	ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK);
1687	ot->ot_phandle = phandle;
1688	ot->ot_opp = mallocarray(count, sizeof(struct opp),
1689	    M_DEVBUF, M_ZERO | M_WAITOK);
1690	ot->ot_nopp = count;
1691
1692	count = 0;
1693	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
1694		if (OF_getproplen(child, "turbo-mode") == 0)
1695			continue;
1696		opp_hz = OF_getpropint64(child, "opp-hz", 0);
1697		len = OF_getpropintarray(child, "opp-microvolt",
1698		    values, sizeof(values));
1699		opp_microvolt = 0;
1700		if (len == sizeof(uint32_t) || len == 3 * sizeof(uint32_t))
1701			opp_microvolt = values[0];
1702
1703		/* Insert into the array, keeping things sorted. */
1704		for (i = 0; i < count; i++) {
1705			if (opp_hz < ot->ot_opp[i].opp_hz)
1706				break;
1707		}
1708		for (j = count; j > i; j--)
1709			ot->ot_opp[j] = ot->ot_opp[j - 1];
1710		ot->ot_opp[i].opp_hz = opp_hz;
1711		ot->ot_opp[i].opp_microvolt = opp_microvolt;
1712		count++;
1713	}
1714
1715	ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz;
1716	ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz;
1717
1718	if (OF_getproplen(node, "opp-shared") == 0)
1719		ot->ot_master = ci;
1720
1721	LIST_INSERT_HEAD(&opp_tables, ot, ot_list);
1722
1723	ci->ci_opp_table = ot;
1724	ci->ci_opp_max = ot->ot_nopp - 1;
1725	ci->ci_cpu_supply = OF_getpropint(ci->ci_node, "cpu-supply", 0);
1726
1727	cd = malloc(sizeof(struct cooling_device), M_DEVBUF, M_ZERO | M_WAITOK);
1728	cd->cd_node = ci->ci_node;
1729	cd->cd_cookie = ci;
1730	cd->cd_get_level = cpu_opp_get_cooling_level;
1731	cd->cd_set_level = cpu_opp_set_cooling_level;
1732	cooling_device_register(cd);
1733
1734	/*
1735	 * Do additional checks at mountroot when all the clocks and
1736	 * regulators are available.
1737	 */
1738	config_mountroot(ci->ci_dev, cpu_opp_mountroot);
1739}
1740
1741void
1742cpu_opp_mountroot(struct device *self)
1743{
1744	struct cpu_info *ci;
1745	CPU_INFO_ITERATOR cii;
1746	int count = 0;
1747	int level = 0;
1748
1749	if (cpu_setperf)
1750		return;
1751
1752	CPU_INFO_FOREACH(cii, ci) {
1753		struct opp_table *ot = ci->ci_opp_table;
1754		uint64_t curr_hz;
1755		uint32_t curr_microvolt;
1756		int error;
1757
1758		if (ot == NULL)
1759			continue;
1760
1761#if NKSTAT > 0
1762		cpu_opp_kstat_attach(ci);
1763#endif
1764
1765		/* Skip if this table is shared and we're not the master. */
1766		if (ot->ot_master && ot->ot_master != ci)
1767			continue;
1768
1769		/* PWM regulators may need to be explicitly enabled. */
1770		regulator_enable(ci->ci_cpu_supply);
1771
1772		curr_hz = clock_get_frequency(ci->ci_node, NULL);
1773		curr_microvolt = regulator_get_voltage(ci->ci_cpu_supply);
1774
1775		/* Disable if clock isn't implemented. */
1776		error = ENODEV;
1777		if (curr_hz != 0)
1778			error = clock_set_frequency(ci->ci_node, NULL, curr_hz);
1779		if (error) {
1780			ci->ci_opp_table = NULL;
1781			printf("%s: clock not implemented\n",
1782			       ci->ci_dev->dv_xname);
1783			continue;
1784		}
1785
1786		/* Disable if regulator isn't implemented. */
1787		error = ci->ci_cpu_supply ? ENODEV : 0;
1788		if (ci->ci_cpu_supply && curr_microvolt != 0)
1789			error = regulator_set_voltage(ci->ci_cpu_supply,
1790			    curr_microvolt);
1791		if (error) {
1792			ci->ci_opp_table = NULL;
1793			printf("%s: regulator not implemented\n",
1794			    ci->ci_dev->dv_xname);
1795			continue;
1796		}
1797
1798		/*
1799		 * Initialize performance level based on the current
1800		 * speed of the first CPU that supports DVFS.
1801		 */
1802		if (level == 0) {
1803			uint64_t min, max;
1804			uint64_t level_hz;
1805
1806			min = ot->ot_opp_hz_min;
1807			max = ot->ot_opp_hz_max;
1808			level_hz = clock_get_frequency(ci->ci_node, NULL);
1809			if (level_hz < min)
1810				level_hz = min;
1811			if (level_hz > max)
1812				level_hz = max;
1813			level = howmany(100 * (level_hz - min), (max - min));
1814		}
1815
1816		count++;
1817	}
1818
1819	if (count > 0) {
1820		task_set(&cpu_opp_task, cpu_opp_dotask, NULL);
1821		cpu_setperf = cpu_opp_setperf;
1822
1823		perflevel = (level > 0) ? level : 0;
1824		cpu_setperf(perflevel);
1825	}
1826}
1827
1828void
1829cpu_opp_dotask(void *arg)
1830{
1831	struct cpu_info *ci;
1832	CPU_INFO_ITERATOR cii;
1833
1834	CPU_INFO_FOREACH(cii, ci) {
1835		struct opp_table *ot = ci->ci_opp_table;
1836		uint64_t curr_hz, opp_hz;
1837		uint32_t curr_microvolt, opp_microvolt;
1838		int opp_idx;
1839		int error = 0;
1840
1841		if (ot == NULL)
1842			continue;
1843
1844		/* Skip if this table is shared and we're not the master. */
1845		if (ot->ot_master && ot->ot_master != ci)
1846			continue;
1847
1848		opp_idx = MIN(ci->ci_opp_idx, ci->ci_opp_max);
1849		opp_hz = ot->ot_opp[opp_idx].opp_hz;
1850		opp_microvolt = ot->ot_opp[opp_idx].opp_microvolt;
1851
1852		curr_hz = clock_get_frequency(ci->ci_node, NULL);
1853		curr_microvolt = regulator_get_voltage(ci->ci_cpu_supply);
1854
1855		if (error == 0 && opp_hz < curr_hz)
1856			error = clock_set_frequency(ci->ci_node, NULL, opp_hz);
1857		if (error == 0 && ci->ci_cpu_supply &&
1858		    opp_microvolt != 0 && opp_microvolt != curr_microvolt) {
1859			error = regulator_set_voltage(ci->ci_cpu_supply,
1860			    opp_microvolt);
1861		}
1862		if (error == 0 && opp_hz > curr_hz)
1863			error = clock_set_frequency(ci->ci_node, NULL, opp_hz);
1864
1865		if (error)
1866			printf("%s: DVFS failed\n", ci->ci_dev->dv_xname);
1867	}
1868}
1869
1870void
1871cpu_opp_setperf(int level)
1872{
1873	struct cpu_info *ci;
1874	CPU_INFO_ITERATOR cii;
1875
1876	CPU_INFO_FOREACH(cii, ci) {
1877		struct opp_table *ot = ci->ci_opp_table;
1878		uint64_t min, max;
1879		uint64_t level_hz, opp_hz;
1880		int opp_idx = -1;
1881		int i;
1882
1883		if (ot == NULL)
1884			continue;
1885
1886		/* Skip if this table is shared and we're not the master. */
1887		if (ot->ot_master && ot->ot_master != ci)
1888			continue;
1889
1890		min = ot->ot_opp_hz_min;
1891		max = ot->ot_opp_hz_max;
1892		level_hz = min + (level * (max - min)) / 100;
1893		opp_hz = min;
1894		for (i = 0; i < ot->ot_nopp; i++) {
1895			if (ot->ot_opp[i].opp_hz <= level_hz &&
1896			    ot->ot_opp[i].opp_hz >= opp_hz)
1897				opp_hz = ot->ot_opp[i].opp_hz;
1898		}
1899
1900		/* Find index of selected operating point. */
1901		for (i = 0; i < ot->ot_nopp; i++) {
1902			if (ot->ot_opp[i].opp_hz == opp_hz) {
1903				opp_idx = i;
1904				break;
1905			}
1906		}
1907		KASSERT(opp_idx >= 0);
1908
1909		ci->ci_opp_idx = opp_idx;
1910	}
1911
1912	/*
1913	 * Update the hardware from a task since setting the
1914	 * regulators might need process context.
1915	 */
1916	task_add(systq, &cpu_opp_task);
1917}
1918
1919uint32_t
1920cpu_opp_get_cooling_level(void *cookie, uint32_t *cells)
1921{
1922	struct cpu_info *ci = cookie;
1923	struct opp_table *ot = ci->ci_opp_table;
1924
1925	return ot->ot_nopp - ci->ci_opp_max - 1;
1926}
1927
1928void
1929cpu_opp_set_cooling_level(void *cookie, uint32_t *cells, uint32_t level)
1930{
1931	struct cpu_info *ci = cookie;
1932	struct opp_table *ot = ci->ci_opp_table;
1933	int opp_max;
1934
1935	if (level > (ot->ot_nopp - 1))
1936		level = ot->ot_nopp - 1;
1937
1938	opp_max = (ot->ot_nopp - level - 1);
1939	if (ci->ci_opp_max != opp_max) {
1940		ci->ci_opp_max = opp_max;
1941		task_add(systq, &cpu_opp_task);
1942	}
1943}
1944
1945
1946void
1947cpu_psci_init(struct cpu_info *ci)
1948{
1949	uint32_t *domains;
1950	uint32_t *domain;
1951	uint32_t *states;
1952	uint32_t ncells;
1953	uint32_t cluster;
1954	int idx, len, node;
1955
1956	/*
1957	 * Hunt for the deepest idle state for this CPU.  This is
1958	 * fairly complicated as it requires traversing quite a few
1959	 * nodes in the device tree.  The first step is to look up the
1960	 * "psci" power domain for this CPU.
1961	 */
1962
1963	idx = OF_getindex(ci->ci_node, "psci", "power-domain-names");
1964	if (idx < 0)
1965		return;
1966
1967	len = OF_getproplen(ci->ci_node, "power-domains");
1968	if (len <= 0)
1969		return;
1970
1971	domains = malloc(len, M_TEMP, M_WAITOK);
1972	OF_getpropintarray(ci->ci_node, "power-domains", domains, len);
1973
1974	domain = domains;
1975	while (domain && domain < domains + (len / sizeof(uint32_t))) {
1976		if (idx == 0)
1977			break;
1978
1979		node = OF_getnodebyphandle(domain[0]);
1980		if (node == 0)
1981			break;
1982
1983		ncells = OF_getpropint(node, "#power-domain-cells", 0);
1984		domain = domain + ncells + 1;
1985		idx--;
1986	}
1987
1988	node = idx == 0 ? OF_getnodebyphandle(domain[0]) : 0;
1989	free(domains, M_TEMP, len);
1990	if (node == 0)
1991		return;
1992
1993	/*
1994	 * We found the "psci" power domain.  If this power domain has
1995	 * a parent power domain, stash its phandle away for later.
1996	 */
1997
1998	cluster = OF_getpropint(node, "power-domains", 0);
1999
2000	/*
2001	 * Get the deepest idle state for the CPU; this should be the
2002	 * last one that is listed.
2003	 */
2004
2005	len = OF_getproplen(node, "domain-idle-states");
2006	if (len < sizeof(uint32_t))
2007		return;
2008
2009	states = malloc(len, M_TEMP, M_WAITOK);
2010	OF_getpropintarray(node, "domain-idle-states", states, len);
2011
2012	node = OF_getnodebyphandle(states[len / sizeof(uint32_t) - 1]);
2013	free(states, M_TEMP, len);
2014	if (node == 0)
2015		return;
2016
2017	ci->ci_psci_suspend_param =
2018		OF_getpropint(node, "arm,psci-suspend-param", 0);
2019
2020	/*
2021	 * Qualcomm Snapdragon always seem to operate in OS Initiated
2022	 * mode.  This means that the last CPU to suspend can pick the
2023	 * idle state that powers off the entire cluster.  In our case
2024	 * that will always be the primary CPU.
2025	 */
2026
2027#ifdef MULTIPROCESSOR
2028	if (ci->ci_flags & CPUF_AP)
2029		return;
2030#endif
2031
2032	node = OF_getnodebyphandle(cluster);
2033	if (node == 0)
2034		return;
2035
2036	/*
2037	 * Get the deepest idle state for the cluster; this should be
2038	 * the last one that is listed.
2039	 */
2040
2041	states = malloc(len, M_TEMP, M_WAITOK);
2042	OF_getpropintarray(node, "domain-idle-states", states, len);
2043
2044	node = OF_getnodebyphandle(states[len / sizeof(uint32_t) - 1]);
2045	free(states, M_TEMP, len);
2046	if (node == 0)
2047		return;
2048
2049	ci->ci_psci_suspend_param =
2050		OF_getpropint(node, "arm,psci-suspend-param", 0);
2051}
2052
2053#if NKSTAT > 0
2054
2055struct cpu_kstats {
2056	struct kstat_kv		ck_impl;
2057	struct kstat_kv		ck_part;
2058	struct kstat_kv		ck_rev;
2059};
2060
2061void
2062cpu_kstat_attach(struct cpu_info *ci)
2063{
2064	struct kstat *ks;
2065	struct cpu_kstats *ck;
2066	uint64_t impl, part;
2067	const char *impl_name = NULL, *part_name = NULL;
2068	const struct cpu_cores *coreselecter = cpu_cores_none;
2069	int i;
2070
2071	ks = kstat_create(ci->ci_dev->dv_xname, 0, "mach", 0, KSTAT_T_KV, 0);
2072	if (ks == NULL) {
2073		printf("%s: unable to create cpu kstats\n",
2074		    ci->ci_dev->dv_xname);
2075		return;
2076	}
2077
2078	ck = malloc(sizeof(*ck), M_DEVBUF, M_WAITOK);
2079
2080	impl = CPU_IMPL(ci->ci_midr);
2081	part = CPU_PART(ci->ci_midr);
2082
2083	for (i = 0; cpu_implementers[i].name; i++) {
2084		if (impl == cpu_implementers[i].id) {
2085			impl_name = cpu_implementers[i].name;
2086			coreselecter = cpu_implementers[i].corelist;
2087			break;
2088		}
2089	}
2090
2091	if (impl_name) {
2092		kstat_kv_init(&ck->ck_impl, "impl", KSTAT_KV_T_ISTR);
2093		strlcpy(kstat_kv_istr(&ck->ck_impl), impl_name,
2094		    sizeof(kstat_kv_istr(&ck->ck_impl)));
2095	} else
2096		kstat_kv_init(&ck->ck_impl, "impl", KSTAT_KV_T_NULL);
2097
2098	for (i = 0; coreselecter[i].name; i++) {
2099		if (part == coreselecter[i].id) {
2100			part_name = coreselecter[i].name;
2101			break;
2102		}
2103	}
2104
2105	if (part_name) {
2106		kstat_kv_init(&ck->ck_part, "part", KSTAT_KV_T_ISTR);
2107		strlcpy(kstat_kv_istr(&ck->ck_part), part_name,
2108		    sizeof(kstat_kv_istr(&ck->ck_part)));
2109	} else
2110		kstat_kv_init(&ck->ck_part, "part", KSTAT_KV_T_NULL);
2111
2112	kstat_kv_init(&ck->ck_rev, "rev", KSTAT_KV_T_ISTR);
2113	snprintf(kstat_kv_istr(&ck->ck_rev), sizeof(kstat_kv_istr(&ck->ck_rev)),
2114	    "r%llup%llu", CPU_VAR(ci->ci_midr), CPU_REV(ci->ci_midr));
2115
2116	ks->ks_softc = ci;
2117	ks->ks_data = ck;
2118	ks->ks_datalen = sizeof(*ck);
2119	ks->ks_read = kstat_read_nop;
2120
2121	kstat_install(ks);
2122
2123	/* XXX should we have a ci->ci_kstat = ks? */
2124}
2125
2126struct cpu_opp_kstats {
2127	struct kstat_kv		coppk_freq;
2128	struct kstat_kv		coppk_supply_v;
2129};
2130
2131int
2132cpu_opp_kstat_read(struct kstat *ks)
2133{
2134	struct cpu_info *ci = ks->ks_softc;
2135	struct cpu_opp_kstats *coppk = ks->ks_data;
2136
2137	struct opp_table *ot = ci->ci_opp_table;
2138	struct cpu_info *oci;
2139	struct timespec now, diff;
2140
2141	/* rate limit */
2142	getnanouptime(&now);
2143	timespecsub(&now, &ks->ks_updated, &diff);
2144	if (diff.tv_sec < 1)
2145		return (0);
2146
2147	if (ot == NULL)
2148		return (0);
2149
2150	oci = ot->ot_master;
2151	if (oci == NULL)
2152		oci = ci;
2153
2154	kstat_kv_freq(&coppk->coppk_freq) =
2155	    clock_get_frequency(oci->ci_node, NULL);
2156
2157	if (oci->ci_cpu_supply) {
2158		kstat_kv_volts(&coppk->coppk_supply_v) =
2159		    regulator_get_voltage(oci->ci_cpu_supply);
2160	}
2161
2162	ks->ks_updated = now;
2163
2164	return (0);
2165}
2166
2167void
2168cpu_opp_kstat_attach(struct cpu_info *ci)
2169{
2170	struct kstat *ks;
2171	struct cpu_opp_kstats *coppk;
2172	struct opp_table *ot = ci->ci_opp_table;
2173	struct cpu_info *oci = ot->ot_master;
2174
2175	if (oci == NULL)
2176		oci = ci;
2177
2178	ks = kstat_create(ci->ci_dev->dv_xname, 0, "dt-opp", 0,
2179	    KSTAT_T_KV, 0);
2180	if (ks == NULL) {
2181		printf("%s: unable to create cpu dt-opp kstats\n",
2182		    ci->ci_dev->dv_xname);
2183		return;
2184	}
2185
2186	coppk = malloc(sizeof(*coppk), M_DEVBUF, M_WAITOK);
2187
2188	kstat_kv_init(&coppk->coppk_freq, "freq", KSTAT_KV_T_FREQ);
2189	kstat_kv_init(&coppk->coppk_supply_v, "supply",
2190	    oci->ci_cpu_supply ? KSTAT_KV_T_VOLTS_DC : KSTAT_KV_T_NULL);
2191
2192	ks->ks_softc = oci;
2193	ks->ks_data = coppk;
2194	ks->ks_datalen = sizeof(*coppk);
2195	ks->ks_read = cpu_opp_kstat_read;
2196
2197	kstat_install(ks);
2198
2199	/* XXX should we have a ci->ci_opp_kstat = ks? */
2200}
2201
2202#endif /* NKSTAT > 0 */
2203