1/*	$OpenBSD: aplcpu.c,v 1.8 2023/07/13 08:33:36 kettenis Exp $	*/
2/*
3 * Copyright (c) 2022 Mark Kettenis <kettenis@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <sys/param.h>
19#include <sys/systm.h>
20#include <sys/device.h>
21#include <sys/malloc.h>
22#include <sys/sensors.h>
23#include <sys/sysctl.h>
24
25#include <machine/bus.h>
26#include <machine/fdt.h>
27
28#include <dev/ofw/openfirm.h>
29#include <dev/ofw/fdt.h>
30
31#define DVFS_CMD			0x0020
32#define DVFS_CMD_BUSY			(1U << 31)
33#define DVFS_CMD_SET			(1 << 25)
34#define DVFS_CMD_PS2_MASK		(0x1f << 12)
35#define DVFS_CMD_PS2_SHIFT		12
36#define DVFS_CMD_PS1_MASK		(0x1f << 0)
37#define DVFS_CMD_PS1_SHIFT		0
38
39#define DVFS_STATUS			0x50
40#define DVFS_T8103_STATUS_CUR_PS_MASK	(0xf << 4)
41#define DVFS_T8103_STATUS_CUR_PS_SHIFT	4
42#define DVFS_T8112_STATUS_CUR_PS_MASK	(0x1f << 5)
43#define DVFS_T8112_STATUS_CUR_PS_SHIFT	5
44
45#define APLCPU_DEEP_WFI_LATENCY		10 /* microseconds */
46
47struct opp {
48	uint64_t opp_hz;
49	uint32_t opp_level;
50};
51
52struct opp_table {
53	LIST_ENTRY(opp_table) ot_list;
54	uint32_t ot_phandle;
55
56	struct opp *ot_opp;
57	u_int ot_nopp;
58	uint64_t ot_opp_hz_min;
59	uint64_t ot_opp_hz_max;
60};
61
62#define APLCPU_MAX_CLUSTERS	8
63
64struct aplcpu_softc {
65	struct device		sc_dev;
66	bus_space_tag_t		sc_iot;
67	bus_space_handle_t	sc_ioh[APLCPU_MAX_CLUSTERS];
68	bus_size_t		sc_ios[APLCPU_MAX_CLUSTERS];
69
70	int			sc_node;
71	u_int			sc_nclusters;
72	int			sc_perflevel;
73
74	uint32_t		sc_cur_ps_mask;
75	u_int			sc_cur_ps_shift;
76
77	LIST_HEAD(, opp_table)	sc_opp_tables;
78	struct opp_table	*sc_opp_table[APLCPU_MAX_CLUSTERS];
79	uint64_t		sc_opp_hz_min;
80	uint64_t		sc_opp_hz_max;
81
82	struct ksensordev	sc_sensordev;
83	struct ksensor		sc_sensor[APLCPU_MAX_CLUSTERS];
84};
85
86int	aplcpu_match(struct device *, void *, void *);
87void	aplcpu_attach(struct device *, struct device *, void *);
88
89const struct cfattach aplcpu_ca = {
90	sizeof (struct aplcpu_softc), aplcpu_match, aplcpu_attach
91};
92
93struct cfdriver aplcpu_cd = {
94	NULL, "aplcpu", DV_DULL
95};
96
97void	aplcpu_opp_init(struct aplcpu_softc *, int);
98uint32_t aplcpu_opp_level(struct aplcpu_softc *, int);
99int	aplcpu_clockspeed(int *);
100void	aplcpu_setperf(int level);
101void	aplcpu_refresh_sensors(void *);
102void	aplcpu_idle_cycle();
103void	aplcpu_deep_wfi(void);
104
105int
106aplcpu_match(struct device *parent, void *match, void *aux)
107{
108	struct fdt_attach_args *faa = aux;
109
110	return OF_is_compatible(faa->fa_node, "apple,soc-cpufreq") ||
111	    OF_is_compatible(faa->fa_node, "apple,cluster-cpufreq");
112}
113
114void
115aplcpu_attach(struct device *parent, struct device *self, void *aux)
116{
117	struct aplcpu_softc *sc = (struct aplcpu_softc *)self;
118	struct fdt_attach_args *faa = aux;
119	struct cpu_info *ci;
120	CPU_INFO_ITERATOR cii;
121	int i;
122
123	if (faa->fa_nreg < 1) {
124		printf(": no registers\n");
125		return;
126	}
127
128	if (faa->fa_nreg > APLCPU_MAX_CLUSTERS) {
129		printf(": too many registers\n");
130		return;
131	}
132
133	sc->sc_iot = faa->fa_iot;
134	for (i = 0; i < faa->fa_nreg; i++) {
135		if (bus_space_map(sc->sc_iot, faa->fa_reg[i].addr,
136		    faa->fa_reg[i].size, 0, &sc->sc_ioh[i])) {
137			printf(": can't map registers\n");
138			goto unmap;
139		}
140		sc->sc_ios[i] = faa->fa_reg[i].size;
141	}
142
143	printf("\n");
144
145	sc->sc_node = faa->fa_node;
146	sc->sc_nclusters = faa->fa_nreg;
147
148	if (OF_is_compatible(sc->sc_node, "apple,t8103-soc-cpufreq") ||
149	    OF_is_compatible(sc->sc_node, "apple,t8103-cluster-cpufreq")) {
150		sc->sc_cur_ps_mask = DVFS_T8103_STATUS_CUR_PS_MASK;
151		sc->sc_cur_ps_shift = DVFS_T8103_STATUS_CUR_PS_SHIFT;
152	} else if (OF_is_compatible(sc->sc_node, "apple,t8112-soc-cpufreq") ||
153	    OF_is_compatible(sc->sc_node, "apple,t8112-cluster-cpufreq")) {
154		sc->sc_cur_ps_mask = DVFS_T8112_STATUS_CUR_PS_MASK;
155		sc->sc_cur_ps_shift = DVFS_T8112_STATUS_CUR_PS_SHIFT;
156	}
157
158	sc->sc_opp_hz_min = UINT64_MAX;
159	sc->sc_opp_hz_max = 0;
160
161	LIST_INIT(&sc->sc_opp_tables);
162	CPU_INFO_FOREACH(cii, ci) {
163		aplcpu_opp_init(sc, ci->ci_node);
164	}
165
166	for (i = 0; i < sc->sc_nclusters; i++) {
167		sc->sc_sensor[i].type = SENSOR_FREQ;
168		sensor_attach(&sc->sc_sensordev, &sc->sc_sensor[i]);
169	}
170
171	aplcpu_refresh_sensors(sc);
172
173	strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
174	    sizeof(sc->sc_sensordev.xname));
175	sensordev_install(&sc->sc_sensordev);
176	sensor_task_register(sc, aplcpu_refresh_sensors, 1);
177
178	cpu_idle_cycle_fcn = aplcpu_idle_cycle;
179	cpu_suspend_cycle_fcn = aplcpu_deep_wfi;
180	cpu_cpuspeed = aplcpu_clockspeed;
181	cpu_setperf = aplcpu_setperf;
182	return;
183
184unmap:
185	for (i = 0; i < faa->fa_nreg; i++) {
186		if (sc->sc_ios[i] == 0)
187			continue;
188		bus_space_unmap(sc->sc_iot, sc->sc_ioh[i], sc->sc_ios[i]);
189	}
190}
191
192void
193aplcpu_opp_init(struct aplcpu_softc *sc, int node)
194{
195	struct opp_table *ot;
196	int count, child;
197	uint32_t freq_domain[2], phandle;
198	uint32_t opp_hz, opp_level;
199	int i, j;
200
201	freq_domain[0] = OF_getpropint(node, "performance-domains", 0);
202	freq_domain[1] = 0;
203	if (freq_domain[0] == 0) {
204		if (OF_getpropintarray(node, "apple,freq-domain", freq_domain,
205		    sizeof(freq_domain)) != sizeof(freq_domain))
206			return;
207		if (freq_domain[1] > APLCPU_MAX_CLUSTERS)
208			return;
209	}
210	if (freq_domain[0] != OF_getpropint(sc->sc_node, "phandle", 0))
211		return;
212
213	phandle = OF_getpropint(node, "operating-points-v2", 0);
214	if (phandle == 0)
215		return;
216
217	LIST_FOREACH(ot, &sc->sc_opp_tables, ot_list) {
218		if (ot->ot_phandle == phandle) {
219			sc->sc_opp_table[freq_domain[1]] = ot;
220			return;
221		}
222	}
223
224	node = OF_getnodebyphandle(phandle);
225	if (node == 0)
226		return;
227
228	if (!OF_is_compatible(node, "operating-points-v2"))
229		return;
230
231	count = 0;
232	for (child = OF_child(node); child != 0; child = OF_peer(child))
233		count++;
234	if (count == 0)
235		return;
236
237	ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK);
238	ot->ot_phandle = phandle;
239	ot->ot_opp = mallocarray(count, sizeof(struct opp),
240	    M_DEVBUF, M_ZERO | M_WAITOK);
241	ot->ot_nopp = count;
242
243	count = 0;
244	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
245		opp_hz = OF_getpropint64(child, "opp-hz", 0);
246		opp_level = OF_getpropint(child, "opp-level", 0);
247
248		/* Insert into the array, keeping things sorted. */
249		for (i = 0; i < count; i++) {
250			if (opp_hz < ot->ot_opp[i].opp_hz)
251				break;
252		}
253		for (j = count; j > i; j--)
254			ot->ot_opp[j] = ot->ot_opp[j - 1];
255		ot->ot_opp[i].opp_hz = opp_hz;
256		ot->ot_opp[i].opp_level = opp_level;
257		count++;
258	}
259
260	ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz;
261	ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz;
262
263	LIST_INSERT_HEAD(&sc->sc_opp_tables, ot, ot_list);
264	sc->sc_opp_table[freq_domain[1]] = ot;
265
266	/* Keep track of overall min/max frequency. */
267	if (sc->sc_opp_hz_min > ot->ot_opp_hz_min)
268		sc->sc_opp_hz_min = ot->ot_opp_hz_min;
269	if (sc->sc_opp_hz_max < ot->ot_opp_hz_max)
270		sc->sc_opp_hz_max = ot->ot_opp_hz_max;
271}
272
273uint32_t
274aplcpu_opp_level(struct aplcpu_softc *sc, int cluster)
275{
276	uint32_t opp_level;
277	uint64_t pstate;
278
279	if (sc->sc_cur_ps_mask) {
280		pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
281		    DVFS_STATUS);
282		opp_level = (pstate & sc->sc_cur_ps_mask);
283		opp_level >>= sc->sc_cur_ps_shift;
284	} else {
285		pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
286		    DVFS_CMD);
287		opp_level = (pstate & DVFS_CMD_PS1_MASK);
288		opp_level >>= DVFS_CMD_PS1_SHIFT;
289	}
290
291	return opp_level;
292}
293
294int
295aplcpu_clockspeed(int *freq)
296{
297	struct aplcpu_softc *sc;
298	struct opp_table *ot;
299	uint32_t opp_hz = 0, opp_level;
300	int i, j, k;
301
302	/*
303	 * Clusters can run at different frequencies.  We report the
304	 * highest frequency among all clusters.
305	 */
306
307	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
308		sc = aplcpu_cd.cd_devs[i];
309		if (sc == NULL)
310			continue;
311
312		for (j = 0; j < sc->sc_nclusters; j++) {
313			if (sc->sc_opp_table[j] == NULL)
314				continue;
315
316			opp_level = aplcpu_opp_level(sc, j);
317
318			/* Translate P-state to frequency. */
319			ot = sc->sc_opp_table[j];
320			for (k = 0; k < ot->ot_nopp; k++) {
321				if (ot->ot_opp[k].opp_level != opp_level)
322					continue;
323				opp_hz = MAX(opp_hz, ot->ot_opp[k].opp_hz);
324			}
325		}
326	}
327
328	if (opp_hz == 0)
329		return EINVAL;
330
331	*freq = opp_hz / 1000000;
332	return 0;
333}
334
335void
336aplcpu_setperf(int level)
337{
338	struct aplcpu_softc *sc;
339	struct opp_table *ot;
340	uint64_t min, max;
341	uint64_t level_hz;
342	uint32_t opp_level;
343	uint64_t reg;
344	int i, j, k, timo;
345
346	/*
347	 * We let the CPU performance level span the entire range
348	 * between the lowest frequency on any of the clusters and the
349	 * highest frequency on any of the clusters.  We pick a
350	 * frequency within that range based on the performance level
351	 * and set all the clusters to the frequency that is closest
352	 * to but less than that frequency.  This isn't a particularly
353	 * sensible method but it is easy to implement and it is hard
354	 * to come up with something more sensible given the
355	 * constraints of the hw.setperf sysctl interface.
356	 */
357	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
358		sc = aplcpu_cd.cd_devs[i];
359		if (sc == NULL)
360			continue;
361
362		min = sc->sc_opp_hz_min;
363		max = sc->sc_opp_hz_max;
364		level_hz = min + (level * (max - min)) / 100;
365	}
366
367	for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
368		sc = aplcpu_cd.cd_devs[i];
369		if (sc == NULL)
370			continue;
371		if (sc->sc_perflevel == level)
372			continue;
373
374		for (j = 0; j < sc->sc_nclusters; j++) {
375			if (sc->sc_opp_table[j] == NULL)
376				continue;
377
378			/* Translate performance level to a P-state. */
379			ot = sc->sc_opp_table[j];
380			opp_level = ot->ot_opp[0].opp_level;
381			for (k = 0; k < ot->ot_nopp; k++) {
382				if (ot->ot_opp[k].opp_hz <= level_hz &&
383				    ot->ot_opp[k].opp_level >= opp_level)
384					opp_level = ot->ot_opp[k].opp_level;
385			}
386
387			/* Wait until P-state logic isn't busy. */
388			for (timo = 100; timo > 0; timo--) {
389				reg = bus_space_read_8(sc->sc_iot,
390				    sc->sc_ioh[j], DVFS_CMD);
391				if ((reg & DVFS_CMD_BUSY) == 0)
392					break;
393				delay(1);
394			}
395			if (reg & DVFS_CMD_BUSY)
396				continue;
397
398			/* Set desired P-state. */
399			reg &= ~DVFS_CMD_PS1_MASK;
400			reg |= (opp_level << DVFS_CMD_PS1_SHIFT);
401			reg |= DVFS_CMD_SET;
402			bus_space_write_8(sc->sc_iot, sc->sc_ioh[j],
403			    DVFS_CMD, reg);
404		}
405
406		sc->sc_perflevel = level;
407	}
408}
409
410void
411aplcpu_refresh_sensors(void *arg)
412{
413	struct aplcpu_softc *sc = arg;
414	struct opp_table *ot;
415	uint32_t opp_level;
416	int i, j;
417
418	for (i = 0; i < sc->sc_nclusters; i++) {
419		if (sc->sc_opp_table[i] == NULL)
420			continue;
421
422		opp_level = aplcpu_opp_level(sc, i);
423
424		/* Translate P-state to frequency. */
425		ot = sc->sc_opp_table[i];
426		for (j = 0; j < ot->ot_nopp; j++) {
427			if (ot->ot_opp[j].opp_level == opp_level) {
428				sc->sc_sensor[i].value = ot->ot_opp[j].opp_hz;
429				break;
430			}
431		}
432	}
433}
434
435void
436aplcpu_idle_cycle(void)
437{
438	struct cpu_info *ci = curcpu();
439	struct timeval start, stop;
440	u_long itime;
441
442	microuptime(&start);
443
444	if (ci->ci_prev_sleep > 3 * APLCPU_DEEP_WFI_LATENCY)
445		aplcpu_deep_wfi();
446	else
447		cpu_wfi();
448
449	microuptime(&stop);
450	timersub(&stop, &start, &stop);
451	itime = stop.tv_sec * 1000000 + stop.tv_usec;
452
453	ci->ci_last_itime = itime;
454	itime >>= 1;
455	ci->ci_prev_sleep = (ci->ci_prev_sleep + (ci->ci_prev_sleep >> 1)
456	    + itime) >> 1;
457}
458