1/* $OpenBSD: aplcpu.c,v 1.8 2023/07/13 08:33:36 kettenis Exp $ */ 2/* 3 * Copyright (c) 2022 Mark Kettenis <kettenis@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18#include <sys/param.h> 19#include <sys/systm.h> 20#include <sys/device.h> 21#include <sys/malloc.h> 22#include <sys/sensors.h> 23#include <sys/sysctl.h> 24 25#include <machine/bus.h> 26#include <machine/fdt.h> 27 28#include <dev/ofw/openfirm.h> 29#include <dev/ofw/fdt.h> 30 31#define DVFS_CMD 0x0020 32#define DVFS_CMD_BUSY (1U << 31) 33#define DVFS_CMD_SET (1 << 25) 34#define DVFS_CMD_PS2_MASK (0x1f << 12) 35#define DVFS_CMD_PS2_SHIFT 12 36#define DVFS_CMD_PS1_MASK (0x1f << 0) 37#define DVFS_CMD_PS1_SHIFT 0 38 39#define DVFS_STATUS 0x50 40#define DVFS_T8103_STATUS_CUR_PS_MASK (0xf << 4) 41#define DVFS_T8103_STATUS_CUR_PS_SHIFT 4 42#define DVFS_T8112_STATUS_CUR_PS_MASK (0x1f << 5) 43#define DVFS_T8112_STATUS_CUR_PS_SHIFT 5 44 45#define APLCPU_DEEP_WFI_LATENCY 10 /* microseconds */ 46 47struct opp { 48 uint64_t opp_hz; 49 uint32_t opp_level; 50}; 51 52struct opp_table { 53 LIST_ENTRY(opp_table) ot_list; 54 uint32_t ot_phandle; 55 56 struct opp *ot_opp; 57 u_int ot_nopp; 58 uint64_t ot_opp_hz_min; 59 uint64_t ot_opp_hz_max; 60}; 61 62#define APLCPU_MAX_CLUSTERS 8 63 64struct aplcpu_softc { 65 struct device sc_dev; 66 bus_space_tag_t sc_iot; 67 bus_space_handle_t sc_ioh[APLCPU_MAX_CLUSTERS]; 68 bus_size_t sc_ios[APLCPU_MAX_CLUSTERS]; 69 70 int sc_node; 71 u_int sc_nclusters; 72 int sc_perflevel; 73 74 uint32_t sc_cur_ps_mask; 75 u_int sc_cur_ps_shift; 76 77 LIST_HEAD(, opp_table) sc_opp_tables; 78 struct opp_table *sc_opp_table[APLCPU_MAX_CLUSTERS]; 79 uint64_t sc_opp_hz_min; 80 uint64_t sc_opp_hz_max; 81 82 struct ksensordev sc_sensordev; 83 struct ksensor sc_sensor[APLCPU_MAX_CLUSTERS]; 84}; 85 86int aplcpu_match(struct device *, void *, void *); 87void aplcpu_attach(struct device *, struct device *, void *); 88 89const struct cfattach aplcpu_ca = { 90 sizeof (struct aplcpu_softc), aplcpu_match, aplcpu_attach 91}; 92 93struct cfdriver aplcpu_cd = { 94 NULL, "aplcpu", DV_DULL 95}; 96 97void aplcpu_opp_init(struct aplcpu_softc *, int); 98uint32_t aplcpu_opp_level(struct aplcpu_softc *, int); 99int aplcpu_clockspeed(int *); 100void aplcpu_setperf(int level); 101void aplcpu_refresh_sensors(void *); 102void aplcpu_idle_cycle(); 103void aplcpu_deep_wfi(void); 104 105int 106aplcpu_match(struct device *parent, void *match, void *aux) 107{ 108 struct fdt_attach_args *faa = aux; 109 110 return OF_is_compatible(faa->fa_node, "apple,soc-cpufreq") || 111 OF_is_compatible(faa->fa_node, "apple,cluster-cpufreq"); 112} 113 114void 115aplcpu_attach(struct device *parent, struct device *self, void *aux) 116{ 117 struct aplcpu_softc *sc = (struct aplcpu_softc *)self; 118 struct fdt_attach_args *faa = aux; 119 struct cpu_info *ci; 120 CPU_INFO_ITERATOR cii; 121 int i; 122 123 if (faa->fa_nreg < 1) { 124 printf(": no registers\n"); 125 return; 126 } 127 128 if (faa->fa_nreg > APLCPU_MAX_CLUSTERS) { 129 printf(": too many registers\n"); 130 return; 131 } 132 133 sc->sc_iot = faa->fa_iot; 134 for (i = 0; i < faa->fa_nreg; i++) { 135 if (bus_space_map(sc->sc_iot, faa->fa_reg[i].addr, 136 faa->fa_reg[i].size, 0, &sc->sc_ioh[i])) { 137 printf(": can't map registers\n"); 138 goto unmap; 139 } 140 sc->sc_ios[i] = faa->fa_reg[i].size; 141 } 142 143 printf("\n"); 144 145 sc->sc_node = faa->fa_node; 146 sc->sc_nclusters = faa->fa_nreg; 147 148 if (OF_is_compatible(sc->sc_node, "apple,t8103-soc-cpufreq") || 149 OF_is_compatible(sc->sc_node, "apple,t8103-cluster-cpufreq")) { 150 sc->sc_cur_ps_mask = DVFS_T8103_STATUS_CUR_PS_MASK; 151 sc->sc_cur_ps_shift = DVFS_T8103_STATUS_CUR_PS_SHIFT; 152 } else if (OF_is_compatible(sc->sc_node, "apple,t8112-soc-cpufreq") || 153 OF_is_compatible(sc->sc_node, "apple,t8112-cluster-cpufreq")) { 154 sc->sc_cur_ps_mask = DVFS_T8112_STATUS_CUR_PS_MASK; 155 sc->sc_cur_ps_shift = DVFS_T8112_STATUS_CUR_PS_SHIFT; 156 } 157 158 sc->sc_opp_hz_min = UINT64_MAX; 159 sc->sc_opp_hz_max = 0; 160 161 LIST_INIT(&sc->sc_opp_tables); 162 CPU_INFO_FOREACH(cii, ci) { 163 aplcpu_opp_init(sc, ci->ci_node); 164 } 165 166 for (i = 0; i < sc->sc_nclusters; i++) { 167 sc->sc_sensor[i].type = SENSOR_FREQ; 168 sensor_attach(&sc->sc_sensordev, &sc->sc_sensor[i]); 169 } 170 171 aplcpu_refresh_sensors(sc); 172 173 strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname, 174 sizeof(sc->sc_sensordev.xname)); 175 sensordev_install(&sc->sc_sensordev); 176 sensor_task_register(sc, aplcpu_refresh_sensors, 1); 177 178 cpu_idle_cycle_fcn = aplcpu_idle_cycle; 179 cpu_suspend_cycle_fcn = aplcpu_deep_wfi; 180 cpu_cpuspeed = aplcpu_clockspeed; 181 cpu_setperf = aplcpu_setperf; 182 return; 183 184unmap: 185 for (i = 0; i < faa->fa_nreg; i++) { 186 if (sc->sc_ios[i] == 0) 187 continue; 188 bus_space_unmap(sc->sc_iot, sc->sc_ioh[i], sc->sc_ios[i]); 189 } 190} 191 192void 193aplcpu_opp_init(struct aplcpu_softc *sc, int node) 194{ 195 struct opp_table *ot; 196 int count, child; 197 uint32_t freq_domain[2], phandle; 198 uint32_t opp_hz, opp_level; 199 int i, j; 200 201 freq_domain[0] = OF_getpropint(node, "performance-domains", 0); 202 freq_domain[1] = 0; 203 if (freq_domain[0] == 0) { 204 if (OF_getpropintarray(node, "apple,freq-domain", freq_domain, 205 sizeof(freq_domain)) != sizeof(freq_domain)) 206 return; 207 if (freq_domain[1] > APLCPU_MAX_CLUSTERS) 208 return; 209 } 210 if (freq_domain[0] != OF_getpropint(sc->sc_node, "phandle", 0)) 211 return; 212 213 phandle = OF_getpropint(node, "operating-points-v2", 0); 214 if (phandle == 0) 215 return; 216 217 LIST_FOREACH(ot, &sc->sc_opp_tables, ot_list) { 218 if (ot->ot_phandle == phandle) { 219 sc->sc_opp_table[freq_domain[1]] = ot; 220 return; 221 } 222 } 223 224 node = OF_getnodebyphandle(phandle); 225 if (node == 0) 226 return; 227 228 if (!OF_is_compatible(node, "operating-points-v2")) 229 return; 230 231 count = 0; 232 for (child = OF_child(node); child != 0; child = OF_peer(child)) 233 count++; 234 if (count == 0) 235 return; 236 237 ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK); 238 ot->ot_phandle = phandle; 239 ot->ot_opp = mallocarray(count, sizeof(struct opp), 240 M_DEVBUF, M_ZERO | M_WAITOK); 241 ot->ot_nopp = count; 242 243 count = 0; 244 for (child = OF_child(node); child != 0; child = OF_peer(child)) { 245 opp_hz = OF_getpropint64(child, "opp-hz", 0); 246 opp_level = OF_getpropint(child, "opp-level", 0); 247 248 /* Insert into the array, keeping things sorted. */ 249 for (i = 0; i < count; i++) { 250 if (opp_hz < ot->ot_opp[i].opp_hz) 251 break; 252 } 253 for (j = count; j > i; j--) 254 ot->ot_opp[j] = ot->ot_opp[j - 1]; 255 ot->ot_opp[i].opp_hz = opp_hz; 256 ot->ot_opp[i].opp_level = opp_level; 257 count++; 258 } 259 260 ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz; 261 ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz; 262 263 LIST_INSERT_HEAD(&sc->sc_opp_tables, ot, ot_list); 264 sc->sc_opp_table[freq_domain[1]] = ot; 265 266 /* Keep track of overall min/max frequency. */ 267 if (sc->sc_opp_hz_min > ot->ot_opp_hz_min) 268 sc->sc_opp_hz_min = ot->ot_opp_hz_min; 269 if (sc->sc_opp_hz_max < ot->ot_opp_hz_max) 270 sc->sc_opp_hz_max = ot->ot_opp_hz_max; 271} 272 273uint32_t 274aplcpu_opp_level(struct aplcpu_softc *sc, int cluster) 275{ 276 uint32_t opp_level; 277 uint64_t pstate; 278 279 if (sc->sc_cur_ps_mask) { 280 pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster], 281 DVFS_STATUS); 282 opp_level = (pstate & sc->sc_cur_ps_mask); 283 opp_level >>= sc->sc_cur_ps_shift; 284 } else { 285 pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster], 286 DVFS_CMD); 287 opp_level = (pstate & DVFS_CMD_PS1_MASK); 288 opp_level >>= DVFS_CMD_PS1_SHIFT; 289 } 290 291 return opp_level; 292} 293 294int 295aplcpu_clockspeed(int *freq) 296{ 297 struct aplcpu_softc *sc; 298 struct opp_table *ot; 299 uint32_t opp_hz = 0, opp_level; 300 int i, j, k; 301 302 /* 303 * Clusters can run at different frequencies. We report the 304 * highest frequency among all clusters. 305 */ 306 307 for (i = 0; i < aplcpu_cd.cd_ndevs; i++) { 308 sc = aplcpu_cd.cd_devs[i]; 309 if (sc == NULL) 310 continue; 311 312 for (j = 0; j < sc->sc_nclusters; j++) { 313 if (sc->sc_opp_table[j] == NULL) 314 continue; 315 316 opp_level = aplcpu_opp_level(sc, j); 317 318 /* Translate P-state to frequency. */ 319 ot = sc->sc_opp_table[j]; 320 for (k = 0; k < ot->ot_nopp; k++) { 321 if (ot->ot_opp[k].opp_level != opp_level) 322 continue; 323 opp_hz = MAX(opp_hz, ot->ot_opp[k].opp_hz); 324 } 325 } 326 } 327 328 if (opp_hz == 0) 329 return EINVAL; 330 331 *freq = opp_hz / 1000000; 332 return 0; 333} 334 335void 336aplcpu_setperf(int level) 337{ 338 struct aplcpu_softc *sc; 339 struct opp_table *ot; 340 uint64_t min, max; 341 uint64_t level_hz; 342 uint32_t opp_level; 343 uint64_t reg; 344 int i, j, k, timo; 345 346 /* 347 * We let the CPU performance level span the entire range 348 * between the lowest frequency on any of the clusters and the 349 * highest frequency on any of the clusters. We pick a 350 * frequency within that range based on the performance level 351 * and set all the clusters to the frequency that is closest 352 * to but less than that frequency. This isn't a particularly 353 * sensible method but it is easy to implement and it is hard 354 * to come up with something more sensible given the 355 * constraints of the hw.setperf sysctl interface. 356 */ 357 for (i = 0; i < aplcpu_cd.cd_ndevs; i++) { 358 sc = aplcpu_cd.cd_devs[i]; 359 if (sc == NULL) 360 continue; 361 362 min = sc->sc_opp_hz_min; 363 max = sc->sc_opp_hz_max; 364 level_hz = min + (level * (max - min)) / 100; 365 } 366 367 for (i = 0; i < aplcpu_cd.cd_ndevs; i++) { 368 sc = aplcpu_cd.cd_devs[i]; 369 if (sc == NULL) 370 continue; 371 if (sc->sc_perflevel == level) 372 continue; 373 374 for (j = 0; j < sc->sc_nclusters; j++) { 375 if (sc->sc_opp_table[j] == NULL) 376 continue; 377 378 /* Translate performance level to a P-state. */ 379 ot = sc->sc_opp_table[j]; 380 opp_level = ot->ot_opp[0].opp_level; 381 for (k = 0; k < ot->ot_nopp; k++) { 382 if (ot->ot_opp[k].opp_hz <= level_hz && 383 ot->ot_opp[k].opp_level >= opp_level) 384 opp_level = ot->ot_opp[k].opp_level; 385 } 386 387 /* Wait until P-state logic isn't busy. */ 388 for (timo = 100; timo > 0; timo--) { 389 reg = bus_space_read_8(sc->sc_iot, 390 sc->sc_ioh[j], DVFS_CMD); 391 if ((reg & DVFS_CMD_BUSY) == 0) 392 break; 393 delay(1); 394 } 395 if (reg & DVFS_CMD_BUSY) 396 continue; 397 398 /* Set desired P-state. */ 399 reg &= ~DVFS_CMD_PS1_MASK; 400 reg |= (opp_level << DVFS_CMD_PS1_SHIFT); 401 reg |= DVFS_CMD_SET; 402 bus_space_write_8(sc->sc_iot, sc->sc_ioh[j], 403 DVFS_CMD, reg); 404 } 405 406 sc->sc_perflevel = level; 407 } 408} 409 410void 411aplcpu_refresh_sensors(void *arg) 412{ 413 struct aplcpu_softc *sc = arg; 414 struct opp_table *ot; 415 uint32_t opp_level; 416 int i, j; 417 418 for (i = 0; i < sc->sc_nclusters; i++) { 419 if (sc->sc_opp_table[i] == NULL) 420 continue; 421 422 opp_level = aplcpu_opp_level(sc, i); 423 424 /* Translate P-state to frequency. */ 425 ot = sc->sc_opp_table[i]; 426 for (j = 0; j < ot->ot_nopp; j++) { 427 if (ot->ot_opp[j].opp_level == opp_level) { 428 sc->sc_sensor[i].value = ot->ot_opp[j].opp_hz; 429 break; 430 } 431 } 432 } 433} 434 435void 436aplcpu_idle_cycle(void) 437{ 438 struct cpu_info *ci = curcpu(); 439 struct timeval start, stop; 440 u_long itime; 441 442 microuptime(&start); 443 444 if (ci->ci_prev_sleep > 3 * APLCPU_DEEP_WFI_LATENCY) 445 aplcpu_deep_wfi(); 446 else 447 cpu_wfi(); 448 449 microuptime(&stop); 450 timersub(&stop, &start, &stop); 451 itime = stop.tv_sec * 1000000 + stop.tv_usec; 452 453 ci->ci_last_itime = itime; 454 itime >>= 1; 455 ci->ci_prev_sleep = (ci->ci_prev_sleep + (ci->ci_prev_sleep >> 1) 456 + itime) >> 1; 457} 458