1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008 Marcel Moolenaar
5 * Copyright (c) 2009 Nathan Whitehorn
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31#include <sys/endian.h>
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/bus.h>
36#include <sys/pcpu.h>
37#include <sys/proc.h>
38#include <sys/sched.h>
39#include <sys/smp.h>
40#include <vm/vm.h>
41#include <vm/pmap.h>
42
43#include <machine/bus.h>
44#include <machine/cpu.h>
45#include <machine/hid.h>
46#include <machine/platformvar.h>
47#include <machine/rtas.h>
48#include <machine/smp.h>
49#include <machine/spr.h>
50#include <machine/trap.h>
51
52#include <dev/ofw/openfirm.h>
53#include <machine/ofw_machdep.h>
54
55#include "platform_if.h"
56
57#ifdef SMP
58extern void *ap_pcpu;
59#endif
60
61#ifdef __powerpc64__
62static uint8_t splpar_vpa[MAXCPU][640] __aligned(128); /* XXX: dpcpu */
63#endif
64
65static vm_offset_t realmaxaddr = VM_MAX_ADDRESS;
66
67static int chrp_probe(platform_t);
68static int chrp_attach(platform_t);
69void chrp_mem_regions(platform_t, struct mem_region *phys, int *physsz,
70    struct mem_region *avail, int *availsz);
71static vm_offset_t chrp_real_maxaddr(platform_t);
72static u_long chrp_timebase_freq(platform_t, struct cpuref *cpuref);
73static int chrp_smp_first_cpu(platform_t, struct cpuref *cpuref);
74static int chrp_smp_next_cpu(platform_t, struct cpuref *cpuref);
75static int chrp_smp_get_bsp(platform_t, struct cpuref *cpuref);
76static void chrp_smp_ap_init(platform_t);
77static int chrp_cpuref_init(void);
78#ifdef SMP
79static int chrp_smp_start_cpu(platform_t, struct pcpu *cpu);
80static void chrp_smp_probe_threads(platform_t plat);
81static struct cpu_group *chrp_smp_topo(platform_t plat);
82#endif
83static void chrp_reset(platform_t);
84#ifdef __powerpc64__
85#include "phyp-hvcall.h"
86static void phyp_cpu_idle(sbintime_t sbt);
87#endif
88
89static struct cpuref platform_cpuref[MAXCPU];
90static int platform_cpuref_cnt;
91static int platform_cpuref_valid;
92
93static platform_method_t chrp_methods[] = {
94	PLATFORMMETHOD(platform_probe, 		chrp_probe),
95	PLATFORMMETHOD(platform_attach,		chrp_attach),
96	PLATFORMMETHOD(platform_mem_regions,	chrp_mem_regions),
97	PLATFORMMETHOD(platform_real_maxaddr,	chrp_real_maxaddr),
98	PLATFORMMETHOD(platform_timebase_freq,	chrp_timebase_freq),
99
100	PLATFORMMETHOD(platform_smp_ap_init,	chrp_smp_ap_init),
101	PLATFORMMETHOD(platform_smp_first_cpu,	chrp_smp_first_cpu),
102	PLATFORMMETHOD(platform_smp_next_cpu,	chrp_smp_next_cpu),
103	PLATFORMMETHOD(platform_smp_get_bsp,	chrp_smp_get_bsp),
104#ifdef SMP
105	PLATFORMMETHOD(platform_smp_start_cpu,	chrp_smp_start_cpu),
106	PLATFORMMETHOD(platform_smp_probe_threads,	chrp_smp_probe_threads),
107	PLATFORMMETHOD(platform_smp_topo,	chrp_smp_topo),
108#endif
109
110	PLATFORMMETHOD(platform_reset,		chrp_reset),
111	{ 0, 0 }
112};
113
114static platform_def_t chrp_platform = {
115	"chrp",
116	chrp_methods,
117	0
118};
119
120PLATFORM_DEF(chrp_platform);
121
122static int
123chrp_probe(platform_t plat)
124{
125	if (OF_finddevice("/memory") != -1 || OF_finddevice("/memory@0") != -1)
126		return (BUS_PROBE_GENERIC);
127
128	return (ENXIO);
129}
130
131static int
132chrp_attach(platform_t plat)
133{
134	int quiesce;
135#ifdef __powerpc64__
136	int i;
137#if BYTE_ORDER == LITTLE_ENDIAN
138	int result;
139#endif
140
141	/* XXX: check for /rtas/ibm,hypertas-functions? */
142	if (!(mfmsr() & PSL_HV)) {
143		struct mem_region *phys, *avail;
144		int nphys, navail;
145		vm_offset_t off;
146
147		mem_regions(&phys, &nphys, &avail, &navail);
148
149		realmaxaddr = 0;
150		for (i = 0; i < nphys; i++) {
151			off = phys[i].mr_start + phys[i].mr_size;
152			realmaxaddr = MAX(off, realmaxaddr);
153		}
154
155		if (!radix_mmu)
156			pmap_mmu_install("mmu_phyp", BUS_PROBE_SPECIFIC);
157		cpu_idle_hook = phyp_cpu_idle;
158
159		/* Set up important VPA fields */
160		for (i = 0; i < MAXCPU; i++) {
161			/* First two: VPA size */
162			splpar_vpa[i][4] =
163			    (uint8_t)((sizeof(splpar_vpa[i]) >> 8) & 0xff);
164			splpar_vpa[i][5] =
165			    (uint8_t)(sizeof(splpar_vpa[i]) & 0xff);
166			splpar_vpa[i][0xba] = 1;	/* Maintain FPRs */
167			splpar_vpa[i][0xbb] = 1;	/* Maintain PMCs */
168			splpar_vpa[i][0xfc] = 0xff;	/* Maintain full SLB */
169			splpar_vpa[i][0xfd] = 0xff;
170			splpar_vpa[i][0xff] = 1;	/* Maintain Altivec */
171		}
172		mb();
173
174		/* Set up hypervisor CPU stuff */
175		chrp_smp_ap_init(plat);
176
177#if BYTE_ORDER == LITTLE_ENDIAN
178		/*
179		 * Ask the hypervisor to update the LPAR ILE bit.
180		 *
181		 * This involves all processors reentering the hypervisor
182		 * so the change appears simultaneously in all processors.
183		 * This can take a long time.
184		 */
185		for(;;) {
186			result = phyp_hcall(H_SET_MODE, 1UL,
187			    H_SET_MODE_RSRC_ILE, 0, 0);
188			if (result == H_SUCCESS)
189				break;
190			DELAY(1000);
191		}
192#endif
193
194	}
195#endif
196	chrp_cpuref_init();
197
198	/* Some systems (e.g. QEMU) need Open Firmware to stand down */
199	quiesce = 1;
200	TUNABLE_INT_FETCH("debug.quiesce_ofw", &quiesce);
201	if (quiesce)
202		ofw_quiesce();
203
204	return (0);
205}
206
207static int
208parse_drconf_memory(struct mem_region *ofmem, int *msz,
209		    struct mem_region *ofavail, int *asz)
210{
211	phandle_t phandle;
212	vm_offset_t base;
213	int i, idx, len, lasz, lmsz, res;
214	uint32_t flags, lmb_size[2];
215	uint32_t *dmem;
216
217	lmsz = *msz;
218	lasz = *asz;
219
220	phandle = OF_finddevice("/ibm,dynamic-reconfiguration-memory");
221	if (phandle == -1)
222		/* No drconf node, return. */
223		return (0);
224
225	res = OF_getencprop(phandle, "ibm,lmb-size", lmb_size,
226	    sizeof(lmb_size));
227	if (res == -1)
228		return (0);
229	printf("Logical Memory Block size: %d MB\n", lmb_size[1] >> 20);
230
231	/* Parse the /ibm,dynamic-memory.
232	   The first position gives the # of entries. The next two words
233 	   reflect the address of the memory block. The next four words are
234	   the DRC index, reserved, list index and flags.
235	   (see PAPR C.6.6.2 ibm,dynamic-reconfiguration-memory)
236
237	    #el  Addr   DRC-idx  res   list-idx  flags
238	   -------------------------------------------------
239	   | 4 |   8   |   4   |   4   |   4   |   4   |....
240	   -------------------------------------------------
241	*/
242
243	len = OF_getproplen(phandle, "ibm,dynamic-memory");
244	if (len > 0) {
245		/* We have to use a variable length array on the stack
246		   since we have very limited stack space.
247		*/
248		cell_t arr[len/sizeof(cell_t)];
249
250		res = OF_getencprop(phandle, "ibm,dynamic-memory", arr,
251		    sizeof(arr));
252		if (res == -1)
253			return (0);
254
255		/* Number of elements */
256		idx = arr[0];
257
258		/* First address, in arr[1], arr[2]*/
259		dmem = &arr[1];
260
261		for (i = 0; i < idx; i++) {
262			base = ((uint64_t)dmem[0] << 32) + dmem[1];
263			dmem += 4;
264			flags = dmem[1];
265			/* Use region only if available and not reserved. */
266			if ((flags & 0x8) && !(flags & 0x80)) {
267				ofmem[lmsz].mr_start = base;
268				ofmem[lmsz].mr_size = (vm_size_t)lmb_size[1];
269				ofavail[lasz].mr_start = base;
270				ofavail[lasz].mr_size = (vm_size_t)lmb_size[1];
271				lmsz++;
272				lasz++;
273			}
274			dmem += 2;
275		}
276	}
277
278	*msz = lmsz;
279	*asz = lasz;
280
281	return (1);
282}
283
284void
285chrp_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
286    struct mem_region *avail, int *availsz)
287{
288	vm_offset_t maxphysaddr;
289	int i;
290
291	ofw_mem_regions(phys, physsz, avail, availsz);
292	parse_drconf_memory(phys, physsz, avail, availsz);
293
294	/*
295	 * On some firmwares (SLOF), some memory may be marked available that
296	 * doesn't actually exist. This manifests as an extension of the last
297	 * available segment past the end of physical memory, so truncate that
298	 * one.
299	 */
300	maxphysaddr = 0;
301	for (i = 0; i < *physsz; i++)
302		if (phys[i].mr_start + phys[i].mr_size > maxphysaddr)
303			maxphysaddr = phys[i].mr_start + phys[i].mr_size;
304
305	for (i = 0; i < *availsz; i++)
306		if (avail[i].mr_start + avail[i].mr_size > maxphysaddr)
307			avail[i].mr_size = maxphysaddr - avail[i].mr_start;
308}
309
310static vm_offset_t
311chrp_real_maxaddr(platform_t plat)
312{
313	return (realmaxaddr);
314}
315
316static u_long
317chrp_timebase_freq(platform_t plat, struct cpuref *cpuref)
318{
319	phandle_t cpus, cpunode;
320	int32_t ticks = -1;
321	int res;
322	char buf[8];
323
324	cpus = OF_finddevice("/cpus");
325	if (cpus == -1)
326		panic("CPU tree not found on Open Firmware\n");
327
328	for (cpunode = OF_child(cpus); cpunode != 0; cpunode = OF_peer(cpunode)) {
329		res = OF_getprop(cpunode, "device_type", buf, sizeof(buf));
330		if (res > 0 && strcmp(buf, "cpu") == 0)
331			break;
332	}
333	if (cpunode <= 0)
334		panic("CPU node not found on Open Firmware\n");
335
336	OF_getencprop(cpunode, "timebase-frequency", &ticks, sizeof(ticks));
337
338	if (ticks <= 0)
339		panic("Unable to determine timebase frequency!");
340
341	return (ticks);
342}
343
344static int
345chrp_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
346{
347
348	if (platform_cpuref_valid == 0)
349		return (EINVAL);
350
351	cpuref->cr_cpuid = 0;
352	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
353
354	return (0);
355}
356
357static int
358chrp_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
359{
360	int id;
361
362	if (platform_cpuref_valid == 0)
363		return (EINVAL);
364
365	id = cpuref->cr_cpuid + 1;
366	if (id >= platform_cpuref_cnt)
367		return (ENOENT);
368
369	cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
370	cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
371
372	return (0);
373}
374
375static int
376chrp_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
377{
378
379	cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
380	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
381	return (0);
382}
383
384static void
385get_cpu_reg(phandle_t cpu, cell_t *reg)
386{
387	int res;
388
389	res = OF_getproplen(cpu, "reg");
390	if (res != sizeof(cell_t))
391		panic("Unexpected length for CPU property reg on Open Firmware\n");
392	OF_getencprop(cpu, "reg", reg, res);
393}
394
395static int
396chrp_cpuref_init(void)
397{
398	phandle_t cpu, dev, chosen, pbsp;
399	ihandle_t ibsp;
400	char buf[32];
401	int a, bsp, res, res2, tmp_cpuref_cnt;
402	static struct cpuref tmp_cpuref[MAXCPU];
403	cell_t interrupt_servers[32], addr_cells, size_cells, reg, bsp_reg;
404
405	if (platform_cpuref_valid)
406		return (0);
407
408	dev = OF_peer(0);
409	dev = OF_child(dev);
410	while (dev != 0) {
411		res = OF_getprop(dev, "name", buf, sizeof(buf));
412		if (res > 0 && strcmp(buf, "cpus") == 0)
413			break;
414		dev = OF_peer(dev);
415	}
416
417	/* Make sure that cpus reg property have 1 address cell and 0 size cells */
418	res = OF_getproplen(dev, "#address-cells");
419	res2 = OF_getproplen(dev, "#size-cells");
420	if (res != res2 || res != sizeof(cell_t))
421		panic("CPU properties #address-cells and #size-cells not found on Open Firmware\n");
422	OF_getencprop(dev, "#address-cells", &addr_cells, sizeof(addr_cells));
423	OF_getencprop(dev, "#size-cells", &size_cells, sizeof(size_cells));
424	if (addr_cells != 1 || size_cells != 0)
425		panic("Unexpected values for CPU properties #address-cells and #size-cells on Open Firmware\n");
426
427	/* Look for boot CPU in /chosen/cpu and /chosen/fdtbootcpu */
428
429	chosen = OF_finddevice("/chosen");
430	if (chosen == -1)
431		panic("Device /chosen not found on Open Firmware\n");
432
433	bsp_reg = -1;
434
435	/* /chosen/cpu */
436	if (OF_getproplen(chosen, "cpu") == sizeof(ihandle_t)) {
437		OF_getprop(chosen, "cpu", &ibsp, sizeof(ibsp));
438		pbsp = OF_instance_to_package(be32toh(ibsp));
439		if (pbsp != -1)
440			get_cpu_reg(pbsp, &bsp_reg);
441	}
442
443	/* /chosen/fdtbootcpu */
444	if (bsp_reg == -1) {
445		if (OF_getproplen(chosen, "fdtbootcpu") == sizeof(cell_t))
446			OF_getprop(chosen, "fdtbootcpu", &bsp_reg, sizeof(bsp_reg));
447	}
448
449	if (bsp_reg == -1)
450		panic("Boot CPU not found on Open Firmware\n");
451
452	bsp = -1;
453	tmp_cpuref_cnt = 0;
454	for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
455		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
456		if (res > 0 && strcmp(buf, "cpu") == 0) {
457			res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
458			if (res > 0) {
459				OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
460				    interrupt_servers, res);
461
462				get_cpu_reg(cpu, &reg);
463				if (reg == bsp_reg)
464					bsp = tmp_cpuref_cnt;
465
466				for (a = 0; a < res/sizeof(cell_t); a++) {
467					tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
468					tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
469					tmp_cpuref_cnt++;
470				}
471			}
472		}
473	}
474
475	if (bsp == -1)
476		panic("Boot CPU not found\n");
477
478	/* Map IDs, so BSP has CPUID 0 regardless of hwref */
479	for (a = bsp; a < tmp_cpuref_cnt; a++) {
480		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
481		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
482		platform_cpuref_cnt++;
483	}
484	for (a = 0; a < bsp; a++) {
485		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
486		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
487		platform_cpuref_cnt++;
488	}
489
490	platform_cpuref_valid = 1;
491
492	return (0);
493}
494
495#ifdef SMP
496static int
497chrp_smp_start_cpu(platform_t plat, struct pcpu *pc)
498{
499	cell_t start_cpu;
500	int result, err, timeout;
501
502	if (!rtas_exists()) {
503		printf("RTAS uninitialized: unable to start AP %d\n",
504		    pc->pc_cpuid);
505		return (ENXIO);
506	}
507
508	start_cpu = rtas_token_lookup("start-cpu");
509	if (start_cpu == -1) {
510		printf("RTAS unknown method: unable to start AP %d\n",
511		    pc->pc_cpuid);
512		return (ENXIO);
513	}
514
515	ap_pcpu = pc;
516	powerpc_sync();
517
518	result = rtas_call_method(start_cpu, 3, 1, pc->pc_hwref, EXC_RST, pc,
519	    &err);
520	if (result < 0 || err != 0) {
521		printf("RTAS error (%d/%d): unable to start AP %d\n",
522		    result, err, pc->pc_cpuid);
523		return (ENXIO);
524	}
525
526	timeout = 10000;
527	while (!pc->pc_awake && timeout--)
528		DELAY(100);
529
530	return ((pc->pc_awake) ? 0 : EBUSY);
531}
532
533static void
534chrp_smp_probe_threads(platform_t plat)
535{
536	struct pcpu *pc, *last_pc;
537	int i, ncores;
538
539	ncores = 0;
540	last_pc = NULL;
541	for (i = 0; i <= mp_maxid; i++) {
542		pc = pcpu_find(i);
543		if (pc == NULL)
544			continue;
545		if (last_pc == NULL || pc->pc_hwref != last_pc->pc_hwref)
546			ncores++;
547		last_pc = pc;
548	}
549
550	mp_ncores = ncores;
551	if (mp_ncpus % ncores == 0)
552		smp_threads_per_core = mp_ncpus / ncores;
553}
554
555static struct cpu_group *
556chrp_smp_topo(platform_t plat)
557{
558
559	if (mp_ncpus % mp_ncores != 0) {
560		printf("WARNING: Irregular SMP topology. Performance may be "
561		     "suboptimal (%d CPUS, %d cores)\n", mp_ncpus, mp_ncores);
562		return (smp_topo_none());
563	}
564
565	/* Don't do anything fancier for non-threaded SMP */
566	if (mp_ncpus == mp_ncores)
567		return (smp_topo_none());
568
569	return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core,
570	    CG_FLAG_SMT));
571}
572#endif
573
574static void
575chrp_reset(platform_t platform)
576{
577	OF_reboot();
578}
579
580#ifdef __powerpc64__
581static void
582phyp_cpu_idle(sbintime_t sbt)
583{
584	register_t msr;
585
586	msr = mfmsr();
587
588	mtmsr(msr & ~PSL_EE);
589	if (sched_runnable()) {
590		mtmsr(msr);
591		return;
592	}
593
594	phyp_hcall(H_CEDE); /* Re-enables interrupts internally */
595	mtmsr(msr);
596}
597
598static void
599chrp_smp_ap_init(platform_t platform)
600{
601	if (!(mfmsr() & PSL_HV)) {
602		/* Register VPA */
603		phyp_hcall(H_REGISTER_VPA, 1UL, PCPU_GET(hwref),
604		    splpar_vpa[PCPU_GET(hwref)]);
605
606		/* Set interrupt priority */
607		phyp_hcall(H_CPPR, 0xff);
608	}
609}
610#else
611static void
612chrp_smp_ap_init(platform_t platform)
613{
614}
615#endif
616