1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2008 Marcel Moolenaar
5 * Copyright (c) 2009 Nathan Whitehorn
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/endian.h>
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/bus.h>
38#include <sys/pcpu.h>
39#include <sys/proc.h>
40#include <sys/sched.h>
41#include <sys/smp.h>
42#include <vm/vm.h>
43#include <vm/pmap.h>
44
45#include <machine/bus.h>
46#include <machine/cpu.h>
47#include <machine/hid.h>
48#include <machine/platformvar.h>
49#include <machine/rtas.h>
50#include <machine/smp.h>
51#include <machine/spr.h>
52#include <machine/trap.h>
53
54#include <dev/ofw/openfirm.h>
55#include <machine/ofw_machdep.h>
56
57#include "platform_if.h"
58
59#ifdef SMP
60extern void *ap_pcpu;
61#endif
62
63#ifdef __powerpc64__
64static uint8_t splpar_vpa[MAXCPU][640] __aligned(128); /* XXX: dpcpu */
65#endif
66
67static vm_offset_t realmaxaddr = VM_MAX_ADDRESS;
68
69static int chrp_probe(platform_t);
70static int chrp_attach(platform_t);
71void chrp_mem_regions(platform_t, struct mem_region *phys, int *physsz,
72    struct mem_region *avail, int *availsz);
73static vm_offset_t chrp_real_maxaddr(platform_t);
74static u_long chrp_timebase_freq(platform_t, struct cpuref *cpuref);
75static int chrp_smp_first_cpu(platform_t, struct cpuref *cpuref);
76static int chrp_smp_next_cpu(platform_t, struct cpuref *cpuref);
77static int chrp_smp_get_bsp(platform_t, struct cpuref *cpuref);
78static void chrp_smp_ap_init(platform_t);
79static int chrp_cpuref_init(void);
80#ifdef SMP
81static int chrp_smp_start_cpu(platform_t, struct pcpu *cpu);
82static void chrp_smp_probe_threads(platform_t plat);
83static struct cpu_group *chrp_smp_topo(platform_t plat);
84#endif
85static void chrp_reset(platform_t);
86#ifdef __powerpc64__
87#include "phyp-hvcall.h"
88static void phyp_cpu_idle(sbintime_t sbt);
89#endif
90
91static struct cpuref platform_cpuref[MAXCPU];
92static int platform_cpuref_cnt;
93static int platform_cpuref_valid;
94
95static platform_method_t chrp_methods[] = {
96	PLATFORMMETHOD(platform_probe, 		chrp_probe),
97	PLATFORMMETHOD(platform_attach,		chrp_attach),
98	PLATFORMMETHOD(platform_mem_regions,	chrp_mem_regions),
99	PLATFORMMETHOD(platform_real_maxaddr,	chrp_real_maxaddr),
100	PLATFORMMETHOD(platform_timebase_freq,	chrp_timebase_freq),
101
102	PLATFORMMETHOD(platform_smp_ap_init,	chrp_smp_ap_init),
103	PLATFORMMETHOD(platform_smp_first_cpu,	chrp_smp_first_cpu),
104	PLATFORMMETHOD(platform_smp_next_cpu,	chrp_smp_next_cpu),
105	PLATFORMMETHOD(platform_smp_get_bsp,	chrp_smp_get_bsp),
106#ifdef SMP
107	PLATFORMMETHOD(platform_smp_start_cpu,	chrp_smp_start_cpu),
108	PLATFORMMETHOD(platform_smp_probe_threads,	chrp_smp_probe_threads),
109	PLATFORMMETHOD(platform_smp_topo,	chrp_smp_topo),
110#endif
111
112	PLATFORMMETHOD(platform_reset,		chrp_reset),
113	{ 0, 0 }
114};
115
116static platform_def_t chrp_platform = {
117	"chrp",
118	chrp_methods,
119	0
120};
121
122PLATFORM_DEF(chrp_platform);
123
124static int
125chrp_probe(platform_t plat)
126{
127	if (OF_finddevice("/memory") != -1 || OF_finddevice("/memory@0") != -1)
128		return (BUS_PROBE_GENERIC);
129
130	return (ENXIO);
131}
132
133static int
134chrp_attach(platform_t plat)
135{
136	int quiesce;
137#ifdef __powerpc64__
138	int i;
139#if BYTE_ORDER == LITTLE_ENDIAN
140	int result;
141#endif
142
143	/* XXX: check for /rtas/ibm,hypertas-functions? */
144	if (!(mfmsr() & PSL_HV)) {
145		struct mem_region *phys, *avail;
146		int nphys, navail;
147		vm_offset_t off;
148
149		mem_regions(&phys, &nphys, &avail, &navail);
150
151		realmaxaddr = 0;
152		for (i = 0; i < nphys; i++) {
153			off = phys[i].mr_start + phys[i].mr_size;
154			realmaxaddr = MAX(off, realmaxaddr);
155		}
156
157		pmap_mmu_install("mmu_phyp", BUS_PROBE_SPECIFIC);
158		cpu_idle_hook = phyp_cpu_idle;
159
160		/* Set up important VPA fields */
161		for (i = 0; i < MAXCPU; i++) {
162			/* First two: VPA size */
163			splpar_vpa[i][4] =
164			    (uint8_t)((sizeof(splpar_vpa[i]) >> 8) & 0xff);
165			splpar_vpa[i][5] =
166			    (uint8_t)(sizeof(splpar_vpa[i]) & 0xff);
167			splpar_vpa[i][0xba] = 1;	/* Maintain FPRs */
168			splpar_vpa[i][0xbb] = 1;	/* Maintain PMCs */
169			splpar_vpa[i][0xfc] = 0xff;	/* Maintain full SLB */
170			splpar_vpa[i][0xfd] = 0xff;
171			splpar_vpa[i][0xff] = 1;	/* Maintain Altivec */
172		}
173		mb();
174
175		/* Set up hypervisor CPU stuff */
176		chrp_smp_ap_init(plat);
177
178#if BYTE_ORDER == LITTLE_ENDIAN
179		/*
180		 * Ask the hypervisor to update the LPAR ILE bit.
181		 *
182		 * This involves all processors reentering the hypervisor
183		 * so the change appears simultaneously in all processors.
184		 * This can take a long time.
185		 */
186		for(;;) {
187			result = phyp_hcall(H_SET_MODE, 1UL,
188			    H_SET_MODE_RSRC_ILE, 0, 0);
189			if (result == H_SUCCESS)
190				break;
191			DELAY(1000);
192		}
193#endif
194
195	}
196#endif
197	chrp_cpuref_init();
198
199	/* Some systems (e.g. QEMU) need Open Firmware to stand down */
200	quiesce = 1;
201	TUNABLE_INT_FETCH("debug.quiesce_ofw", &quiesce);
202	if (quiesce)
203		ofw_quiesce();
204
205	return (0);
206}
207
208static int
209parse_drconf_memory(struct mem_region *ofmem, int *msz,
210		    struct mem_region *ofavail, int *asz)
211{
212	phandle_t phandle;
213	vm_offset_t base;
214	int i, idx, len, lasz, lmsz, res;
215	uint32_t flags, lmb_size[2];
216	uint32_t *dmem;
217
218	lmsz = *msz;
219	lasz = *asz;
220
221	phandle = OF_finddevice("/ibm,dynamic-reconfiguration-memory");
222	if (phandle == -1)
223		/* No drconf node, return. */
224		return (0);
225
226	res = OF_getencprop(phandle, "ibm,lmb-size", lmb_size,
227	    sizeof(lmb_size));
228	if (res == -1)
229		return (0);
230	printf("Logical Memory Block size: %d MB\n", lmb_size[1] >> 20);
231
232	/* Parse the /ibm,dynamic-memory.
233	   The first position gives the # of entries. The next two words
234 	   reflect the address of the memory block. The next four words are
235	   the DRC index, reserved, list index and flags.
236	   (see PAPR C.6.6.2 ibm,dynamic-reconfiguration-memory)
237
238	    #el  Addr   DRC-idx  res   list-idx  flags
239	   -------------------------------------------------
240	   | 4 |   8   |   4   |   4   |   4   |   4   |....
241	   -------------------------------------------------
242	*/
243
244	len = OF_getproplen(phandle, "ibm,dynamic-memory");
245	if (len > 0) {
246		/* We have to use a variable length array on the stack
247		   since we have very limited stack space.
248		*/
249		cell_t arr[len/sizeof(cell_t)];
250
251		res = OF_getencprop(phandle, "ibm,dynamic-memory", arr,
252		    sizeof(arr));
253		if (res == -1)
254			return (0);
255
256		/* Number of elements */
257		idx = arr[0];
258
259		/* First address, in arr[1], arr[2]*/
260		dmem = &arr[1];
261
262		for (i = 0; i < idx; i++) {
263			base = ((uint64_t)dmem[0] << 32) + dmem[1];
264			dmem += 4;
265			flags = dmem[1];
266			/* Use region only if available and not reserved. */
267			if ((flags & 0x8) && !(flags & 0x80)) {
268				ofmem[lmsz].mr_start = base;
269				ofmem[lmsz].mr_size = (vm_size_t)lmb_size[1];
270				ofavail[lasz].mr_start = base;
271				ofavail[lasz].mr_size = (vm_size_t)lmb_size[1];
272				lmsz++;
273				lasz++;
274			}
275			dmem += 2;
276		}
277	}
278
279	*msz = lmsz;
280	*asz = lasz;
281
282	return (1);
283}
284
285void
286chrp_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
287    struct mem_region *avail, int *availsz)
288{
289	vm_offset_t maxphysaddr;
290	int i;
291
292	ofw_mem_regions(phys, physsz, avail, availsz);
293	parse_drconf_memory(phys, physsz, avail, availsz);
294
295	/*
296	 * On some firmwares (SLOF), some memory may be marked available that
297	 * doesn't actually exist. This manifests as an extension of the last
298	 * available segment past the end of physical memory, so truncate that
299	 * one.
300	 */
301	maxphysaddr = 0;
302	for (i = 0; i < *physsz; i++)
303		if (phys[i].mr_start + phys[i].mr_size > maxphysaddr)
304			maxphysaddr = phys[i].mr_start + phys[i].mr_size;
305
306	for (i = 0; i < *availsz; i++)
307		if (avail[i].mr_start + avail[i].mr_size > maxphysaddr)
308			avail[i].mr_size = maxphysaddr - avail[i].mr_start;
309}
310
311static vm_offset_t
312chrp_real_maxaddr(platform_t plat)
313{
314	return (realmaxaddr);
315}
316
317static u_long
318chrp_timebase_freq(platform_t plat, struct cpuref *cpuref)
319{
320	phandle_t cpus, cpunode;
321	int32_t ticks = -1;
322	int res;
323	char buf[8];
324
325	cpus = OF_finddevice("/cpus");
326	if (cpus == -1)
327		panic("CPU tree not found on Open Firmware\n");
328
329	for (cpunode = OF_child(cpus); cpunode != 0; cpunode = OF_peer(cpunode)) {
330		res = OF_getprop(cpunode, "device_type", buf, sizeof(buf));
331		if (res > 0 && strcmp(buf, "cpu") == 0)
332			break;
333	}
334	if (cpunode <= 0)
335		panic("CPU node not found on Open Firmware\n");
336
337	OF_getencprop(cpunode, "timebase-frequency", &ticks, sizeof(ticks));
338
339	if (ticks <= 0)
340		panic("Unable to determine timebase frequency!");
341
342	return (ticks);
343}
344
345static int
346chrp_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
347{
348
349	if (platform_cpuref_valid == 0)
350		return (EINVAL);
351
352	cpuref->cr_cpuid = 0;
353	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
354
355	return (0);
356}
357
358static int
359chrp_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
360{
361	int id;
362
363	if (platform_cpuref_valid == 0)
364		return (EINVAL);
365
366	id = cpuref->cr_cpuid + 1;
367	if (id >= platform_cpuref_cnt)
368		return (ENOENT);
369
370	cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
371	cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
372
373	return (0);
374}
375
376static int
377chrp_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
378{
379
380	cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
381	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
382	return (0);
383}
384
385static void
386get_cpu_reg(phandle_t cpu, cell_t *reg)
387{
388	int res;
389
390	res = OF_getproplen(cpu, "reg");
391	if (res != sizeof(cell_t))
392		panic("Unexpected length for CPU property reg on Open Firmware\n");
393	OF_getencprop(cpu, "reg", reg, res);
394}
395
396static int
397chrp_cpuref_init(void)
398{
399	phandle_t cpu, dev, chosen, pbsp;
400	ihandle_t ibsp;
401	char buf[32];
402	int a, bsp, res, res2, tmp_cpuref_cnt;
403	static struct cpuref tmp_cpuref[MAXCPU];
404	cell_t interrupt_servers[32], addr_cells, size_cells, reg, bsp_reg;
405
406	if (platform_cpuref_valid)
407		return (0);
408
409	dev = OF_peer(0);
410	dev = OF_child(dev);
411	while (dev != 0) {
412		res = OF_getprop(dev, "name", buf, sizeof(buf));
413		if (res > 0 && strcmp(buf, "cpus") == 0)
414			break;
415		dev = OF_peer(dev);
416	}
417
418	/* Make sure that cpus reg property have 1 address cell and 0 size cells */
419	res = OF_getproplen(dev, "#address-cells");
420	res2 = OF_getproplen(dev, "#size-cells");
421	if (res != res2 || res != sizeof(cell_t))
422		panic("CPU properties #address-cells and #size-cells not found on Open Firmware\n");
423	OF_getencprop(dev, "#address-cells", &addr_cells, sizeof(addr_cells));
424	OF_getencprop(dev, "#size-cells", &size_cells, sizeof(size_cells));
425	if (addr_cells != 1 || size_cells != 0)
426		panic("Unexpected values for CPU properties #address-cells and #size-cells on Open Firmware\n");
427
428	/* Look for boot CPU in /chosen/cpu and /chosen/fdtbootcpu */
429
430	chosen = OF_finddevice("/chosen");
431	if (chosen == -1)
432		panic("Device /chosen not found on Open Firmware\n");
433
434	bsp_reg = -1;
435
436	/* /chosen/cpu */
437	if (OF_getproplen(chosen, "cpu") == sizeof(ihandle_t)) {
438		OF_getprop(chosen, "cpu", &ibsp, sizeof(ibsp));
439		pbsp = OF_instance_to_package(be32toh(ibsp));
440		if (pbsp != -1)
441			get_cpu_reg(pbsp, &bsp_reg);
442	}
443
444	/* /chosen/fdtbootcpu */
445	if (bsp_reg == -1) {
446		if (OF_getproplen(chosen, "fdtbootcpu") == sizeof(cell_t))
447			OF_getprop(chosen, "fdtbootcpu", &bsp_reg, sizeof(bsp_reg));
448	}
449
450	if (bsp_reg == -1)
451		panic("Boot CPU not found on Open Firmware\n");
452
453	bsp = -1;
454	tmp_cpuref_cnt = 0;
455	for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
456		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
457		if (res > 0 && strcmp(buf, "cpu") == 0) {
458			res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
459			if (res > 0) {
460				OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
461				    interrupt_servers, res);
462
463				get_cpu_reg(cpu, &reg);
464				if (reg == bsp_reg)
465					bsp = tmp_cpuref_cnt;
466
467				for (a = 0; a < res/sizeof(cell_t); a++) {
468					tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
469					tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
470					tmp_cpuref_cnt++;
471				}
472			}
473		}
474	}
475
476	if (bsp == -1)
477		panic("Boot CPU not found\n");
478
479	/* Map IDs, so BSP has CPUID 0 regardless of hwref */
480	for (a = bsp; a < tmp_cpuref_cnt; a++) {
481		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
482		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
483		platform_cpuref_cnt++;
484	}
485	for (a = 0; a < bsp; a++) {
486		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
487		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
488		platform_cpuref_cnt++;
489	}
490
491	platform_cpuref_valid = 1;
492
493	return (0);
494}
495
496#ifdef SMP
497static int
498chrp_smp_start_cpu(platform_t plat, struct pcpu *pc)
499{
500	cell_t start_cpu;
501	int result, err, timeout;
502
503	if (!rtas_exists()) {
504		printf("RTAS uninitialized: unable to start AP %d\n",
505		    pc->pc_cpuid);
506		return (ENXIO);
507	}
508
509	start_cpu = rtas_token_lookup("start-cpu");
510	if (start_cpu == -1) {
511		printf("RTAS unknown method: unable to start AP %d\n",
512		    pc->pc_cpuid);
513		return (ENXIO);
514	}
515
516	ap_pcpu = pc;
517	powerpc_sync();
518
519	result = rtas_call_method(start_cpu, 3, 1, pc->pc_hwref, EXC_RST, pc,
520	    &err);
521	if (result < 0 || err != 0) {
522		printf("RTAS error (%d/%d): unable to start AP %d\n",
523		    result, err, pc->pc_cpuid);
524		return (ENXIO);
525	}
526
527	timeout = 10000;
528	while (!pc->pc_awake && timeout--)
529		DELAY(100);
530
531	return ((pc->pc_awake) ? 0 : EBUSY);
532}
533
534static void
535chrp_smp_probe_threads(platform_t plat)
536{
537	struct pcpu *pc, *last_pc;
538	int i, ncores;
539
540	ncores = 0;
541	last_pc = NULL;
542	for (i = 0; i <= mp_maxid; i++) {
543		pc = pcpu_find(i);
544		if (pc == NULL)
545			continue;
546		if (last_pc == NULL || pc->pc_hwref != last_pc->pc_hwref)
547			ncores++;
548		last_pc = pc;
549	}
550
551	mp_ncores = ncores;
552	if (mp_ncpus % ncores == 0)
553		smp_threads_per_core = mp_ncpus / ncores;
554}
555
556static struct cpu_group *
557chrp_smp_topo(platform_t plat)
558{
559
560	if (mp_ncpus % mp_ncores != 0) {
561		printf("WARNING: Irregular SMP topology. Performance may be "
562		     "suboptimal (%d CPUS, %d cores)\n", mp_ncpus, mp_ncores);
563		return (smp_topo_none());
564	}
565
566	/* Don't do anything fancier for non-threaded SMP */
567	if (mp_ncpus == mp_ncores)
568		return (smp_topo_none());
569
570	return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core,
571	    CG_FLAG_SMT));
572}
573#endif
574
575static void
576chrp_reset(platform_t platform)
577{
578	OF_reboot();
579}
580
581#ifdef __powerpc64__
582static void
583phyp_cpu_idle(sbintime_t sbt)
584{
585	register_t msr;
586
587	msr = mfmsr();
588
589	mtmsr(msr & ~PSL_EE);
590	if (sched_runnable()) {
591		mtmsr(msr);
592		return;
593	}
594
595	phyp_hcall(H_CEDE); /* Re-enables interrupts internally */
596	mtmsr(msr);
597}
598
599static void
600chrp_smp_ap_init(platform_t platform)
601{
602	if (!(mfmsr() & PSL_HV)) {
603		/* Register VPA */
604		phyp_hcall(H_REGISTER_VPA, 1UL, PCPU_GET(hwref),
605		    splpar_vpa[PCPU_GET(hwref)]);
606
607		/* Set interrupt priority */
608		phyp_hcall(H_CPPR, 0xff);
609	}
610}
611#else
612static void
613chrp_smp_ap_init(platform_t platform)
614{
615}
616#endif
617