1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Common initialisation for Qualcomm Snapdragon boards.
4 *
5 * Copyright (c) 2024 Linaro Ltd.
6 * Author: Caleb Connolly <caleb.connolly@linaro.org>
7 */
8
9#include "time.h"
10#include <asm/armv8/mmu.h>
11#include <asm/gpio.h>
12#include <asm/io.h>
13#include <asm/psci.h>
14#include <asm/system.h>
15#include <dm/device.h>
16#include <dm/pinctrl.h>
17#include <dm/uclass-internal.h>
18#include <dm/read.h>
19#include <power/regulator.h>
20#include <env.h>
21#include <init.h>
22#include <linux/arm-smccc.h>
23#include <linux/bug.h>
24#include <linux/psci.h>
25#include <linux/sizes.h>
26#include <lmb.h>
27#include <malloc.h>
28#include <fdt_support.h>
29#include <usb.h>
30#include <sort.h>
31
32#include "qcom-priv.h"
33
34DECLARE_GLOBAL_DATA_PTR;
35
36static struct mm_region rbx_mem_map[CONFIG_NR_DRAM_BANKS + 2] = { { 0 } };
37
38struct mm_region *mem_map = rbx_mem_map;
39
40int dram_init(void)
41{
42	return fdtdec_setup_mem_size_base();
43}
44
45static int ddr_bank_cmp(const void *v1, const void *v2)
46{
47	const struct {
48		phys_addr_t start;
49		phys_size_t size;
50	} *res1 = v1, *res2 = v2;
51
52	if (!res1->size)
53		return 1;
54	if (!res2->size)
55		return -1;
56
57	return (res1->start >> 24) - (res2->start >> 24);
58}
59
60int dram_init_banksize(void)
61{
62	int ret;
63
64	ret = fdtdec_setup_memory_banksize();
65	if (ret < 0)
66		return ret;
67
68	if (CONFIG_NR_DRAM_BANKS < 2)
69		return 0;
70
71	/* Sort our RAM banks -_- */
72	qsort(gd->bd->bi_dram, CONFIG_NR_DRAM_BANKS, sizeof(gd->bd->bi_dram[0]), ddr_bank_cmp);
73
74	return 0;
75}
76
77static void show_psci_version(void)
78{
79	struct arm_smccc_res res;
80
81	arm_smccc_smc(ARM_PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0, 0, 0, 0, 0, &res);
82
83	debug("PSCI:  v%ld.%ld\n",
84	      PSCI_VERSION_MAJOR(res.a0),
85	      PSCI_VERSION_MINOR(res.a0));
86}
87
88void *board_fdt_blob_setup(int *err)
89{
90	phys_addr_t fdt;
91	/* Return DTB pointer passed by ABL */
92	*err = 0;
93	fdt = get_prev_bl_fdt_addr();
94
95	/*
96	 * If we bail then the board will simply not boot, instead let's
97	 * try and use the FDT built into U-Boot if there is one...
98	 * This avoids having a hard dependency on the previous stage bootloader
99	 */
100
101	if (IS_ENABLED(CONFIG_OF_SEPARATE) && (!fdt || fdt != ALIGN(fdt, SZ_4K) ||
102					       fdt_check_header((void *)fdt))) {
103		debug("%s: Using built in FDT, bootloader gave us %#llx\n", __func__, fdt);
104		return (void *)gd->fdt_blob;
105	}
106
107	return (void *)fdt;
108}
109
110void reset_cpu(void)
111{
112	psci_system_reset();
113}
114
115/*
116 * Some Qualcomm boards require GPIO configuration when switching USB modes.
117 * Support setting this configuration via pinctrl state.
118 */
119int board_usb_init(int index, enum usb_init_type init)
120{
121	struct udevice *usb;
122	int ret = 0;
123
124	/* USB device */
125	ret = uclass_find_device_by_seq(UCLASS_USB, index, &usb);
126	if (ret) {
127		printf("Cannot find USB device\n");
128		return ret;
129	}
130
131	ret = dev_read_stringlist_search(usb, "pinctrl-names",
132					 "device");
133	/* No "device" pinctrl state, so just bail */
134	if (ret < 0)
135		return 0;
136
137	/* Select "default" or "device" pinctrl */
138	switch (init) {
139	case USB_INIT_HOST:
140		pinctrl_select_state(usb, "default");
141		break;
142	case USB_INIT_DEVICE:
143		pinctrl_select_state(usb, "device");
144		break;
145	default:
146		debug("Unknown usb_init_type %d\n", init);
147		break;
148	}
149
150	return 0;
151}
152
153/*
154 * Some boards still need board specific init code, they can implement that by
155 * overriding this function.
156 *
157 * FIXME: get rid of board specific init code
158 */
159void __weak qcom_board_init(void)
160{
161}
162
163int board_init(void)
164{
165	regulators_enable_boot_on(false);
166	show_psci_version();
167	qcom_of_fixup_nodes();
168	qcom_board_init();
169	return 0;
170}
171
172/* Sets up the "board", and "soc" environment variables as well as constructing the devicetree
173 * path, with a few quirks to handle non-standard dtb filenames. This is not meant to be a
174 * comprehensive solution to automatically picking the DTB, but aims to be correct for the
175 * majority case. For most devices it should be possible to make this algorithm work by
176 * adjusting the root compatible property in the U-Boot DTS. Handling devices with multiple
177 * variants that are all supported by a single U-Boot image will require implementing device-
178 * specific detection.
179 */
180static void configure_env(void)
181{
182	const char *first_compat, *last_compat;
183	char *tmp;
184	char buf[32] = { 0 };
185	/*
186	 * Most DTB filenames follow the scheme: qcom/<soc>-[vendor]-<board>.dtb
187	 * The vendor is skipped when it's a Qualcomm reference board, or the
188	 * db845c.
189	 */
190	char dt_path[64] = { 0 };
191	int compat_count, ret;
192	ofnode root;
193
194	root = ofnode_root();
195	/* This is almost always 2, but be explicit that we want the first and last compatibles
196	 * not the first and second.
197	 */
198	compat_count = ofnode_read_string_count(root, "compatible");
199	if (compat_count < 2) {
200		log_warning("%s: only one root compatible bailing!\n", __func__);
201		return;
202	}
203
204	/* The most specific device compatible (e.g. "thundercomm,db845c") */
205	ret = ofnode_read_string_index(root, "compatible", 0, &first_compat);
206	if (ret < 0) {
207		log_warning("Can't read first compatible\n");
208		return;
209	}
210
211	/* The last compatible is always the SoC compatible */
212	ret = ofnode_read_string_index(root, "compatible", compat_count - 1, &last_compat);
213	if (ret < 0) {
214		log_warning("Can't read second compatible\n");
215		return;
216	}
217
218	/* Copy the second compat (e.g. "qcom,sdm845") into buf */
219	strlcpy(buf, last_compat, sizeof(buf) - 1);
220	tmp = buf;
221
222	/* strsep() is destructive, it replaces the comma with a \0 */
223	if (!strsep(&tmp, ",")) {
224		log_warning("second compatible '%s' has no ','\n", buf);
225		return;
226	}
227
228	/* tmp now points to just the "sdm845" part of the string */
229	env_set("soc", tmp);
230
231	/* Now figure out the "board" part from the first compatible */
232	memset(buf, 0, sizeof(buf));
233	strlcpy(buf, first_compat, sizeof(buf) - 1);
234	tmp = buf;
235
236	/* The Qualcomm reference boards (RBx, HDK, etc)  */
237	if (!strncmp("qcom", buf, strlen("qcom"))) {
238		/*
239		 * They all have the first compatible as "qcom,<soc>-<board>"
240		 * (e.g. "qcom,qrb5165-rb5"). We extract just the part after
241		 * the dash.
242		 */
243		if (!strsep(&tmp, "-")) {
244			log_warning("compatible '%s' has no '-'\n", buf);
245			return;
246		}
247		/* tmp is now "rb5" */
248		env_set("board", tmp);
249	} else {
250		if (!strsep(&tmp, ",")) {
251			log_warning("compatible '%s' has no ','\n", buf);
252			return;
253		}
254		/* for thundercomm we just want the bit after the comma (e.g. "db845c"),
255		 * for all other boards we replace the comma with a '-' and take both
256		 * (e.g. "oneplus-enchilada")
257		 */
258		if (!strncmp("thundercomm", buf, strlen("thundercomm"))) {
259			env_set("board", tmp);
260		} else {
261			*(tmp - 1) = '-';
262			env_set("board", buf);
263		}
264	}
265
266	/* Now build the full path name */
267	snprintf(dt_path, sizeof(dt_path), "qcom/%s-%s.dtb",
268		 env_get("soc"), env_get("board"));
269	env_set("fdtfile", dt_path);
270}
271
272void __weak qcom_late_init(void)
273{
274}
275
276#define KERNEL_COMP_SIZE	SZ_64M
277
278#define addr_alloc(lmb, size) lmb_alloc(lmb, size, SZ_2M)
279
280/* Stolen from arch/arm/mach-apple/board.c */
281int board_late_init(void)
282{
283	struct lmb lmb;
284	u32 status = 0;
285
286	lmb_init_and_reserve(&lmb, gd->bd, (void *)gd->fdt_blob);
287
288	/* We need to be fairly conservative here as we support boards with just 1G of TOTAL RAM */
289	status |= env_set_hex("kernel_addr_r", addr_alloc(&lmb, SZ_128M));
290	status |= env_set_hex("ramdisk_addr_r", addr_alloc(&lmb, SZ_128M));
291	status |= env_set_hex("kernel_comp_addr_r", addr_alloc(&lmb, KERNEL_COMP_SIZE));
292	status |= env_set_hex("kernel_comp_size", KERNEL_COMP_SIZE);
293	status |= env_set_hex("scriptaddr", addr_alloc(&lmb, SZ_4M));
294	status |= env_set_hex("pxefile_addr_r", addr_alloc(&lmb, SZ_4M));
295	status |= env_set_hex("fdt_addr_r", addr_alloc(&lmb, SZ_2M));
296
297	if (status)
298		log_warning("%s: Failed to set run time variables\n", __func__);
299
300	configure_env();
301	qcom_late_init();
302
303	return 0;
304}
305
306static void build_mem_map(void)
307{
308	int i, j;
309
310	/*
311	 * Ensure the peripheral block is sized to correctly cover the address range
312	 * up to the first memory bank.
313	 * Don't map the first page to ensure that we actually trigger an abort on a
314	 * null pointer access rather than just hanging.
315	 * FIXME: we should probably split this into more precise regions
316	 */
317	mem_map[0].phys = 0x1000;
318	mem_map[0].virt = mem_map[0].phys;
319	mem_map[0].size = gd->bd->bi_dram[0].start - mem_map[0].phys;
320	mem_map[0].attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |
321			 PTE_BLOCK_NON_SHARE |
322			 PTE_BLOCK_PXN | PTE_BLOCK_UXN;
323
324	for (i = 1, j = 0; i < ARRAY_SIZE(rbx_mem_map) - 1 && gd->bd->bi_dram[j].size; i++, j++) {
325		mem_map[i].phys = gd->bd->bi_dram[j].start;
326		mem_map[i].virt = mem_map[i].phys;
327		mem_map[i].size = gd->bd->bi_dram[j].size;
328		mem_map[i].attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) | \
329				   PTE_BLOCK_INNER_SHARE;
330	}
331
332	mem_map[i].phys = UINT64_MAX;
333	mem_map[i].size = 0;
334
335#ifdef DEBUG
336	debug("Configured memory map:\n");
337	for (i = 0; mem_map[i].size; i++)
338		debug("  0x%016llx - 0x%016llx: entry %d\n",
339		      mem_map[i].phys, mem_map[i].phys + mem_map[i].size, i);
340#endif
341}
342
343u64 get_page_table_size(void)
344{
345	return SZ_64K;
346}
347
348static int fdt_cmp_res(const void *v1, const void *v2)
349{
350	const struct fdt_resource *res1 = v1, *res2 = v2;
351
352	return res1->start - res2->start;
353}
354
355#define N_RESERVED_REGIONS 32
356
357/* Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access.
358 * On some platforms this is enough to trigger a security violation and trap
359 * to EL3.
360 */
361static void carve_out_reserved_memory(void)
362{
363	static struct fdt_resource res[N_RESERVED_REGIONS] = { 0 };
364	int parent, rmem, count, i = 0;
365	phys_addr_t start;
366	size_t size;
367
368	/* Some reserved nodes must be carved out, as the cache-prefetcher may otherwise
369	 * attempt to access them, causing a security exception.
370	 */
371	parent = fdt_path_offset(gd->fdt_blob, "/reserved-memory");
372	if (parent <= 0) {
373		log_err("No reserved memory regions found\n");
374		return;
375	}
376
377	/* Collect the reserved memory regions */
378	fdt_for_each_subnode(rmem, gd->fdt_blob, parent) {
379		const fdt32_t *ptr;
380		int len;
381		if (!fdt_getprop(gd->fdt_blob, rmem, "no-map", NULL))
382			continue;
383
384		if (i == N_RESERVED_REGIONS) {
385			log_err("Too many reserved regions!\n");
386			break;
387		}
388
389		/* Read the address and size out from the reg property. Doing this "properly" with
390		 * fdt_get_resource() takes ~70ms on SDM845, but open-coding the happy path here
391		 * takes <1ms... Oh the woes of no dcache.
392		 */
393		ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", &len);
394		if (ptr) {
395			/* Qualcomm devices use #address/size-cells = <2> but all reserved regions are within
396			 * the 32-bit address space. So we can cheat here for speed.
397			 */
398			res[i].start = fdt32_to_cpu(ptr[1]);
399			res[i].end = res[i].start + fdt32_to_cpu(ptr[3]);
400			i++;
401		}
402	}
403
404	/* Sort the reserved memory regions by address */
405	count = i;
406	qsort(res, count, sizeof(struct fdt_resource), fdt_cmp_res);
407
408	/* Now set the right attributes for them. Often a lot of the regions are tightly packed together
409	 * so we can optimise the number of calls to mmu_change_region_attr() by combining adjacent
410	 * regions.
411	 */
412	start = ALIGN_DOWN(res[0].start, SZ_2M);
413	size = ALIGN(res[0].end - start, SZ_2M);
414	for (i = 1; i <= count; i++) {
415		/* We ideally want to 2M align everything for more efficient pagetables, but we must avoid
416		 * overwriting reserved memory regions which shouldn't be mapped as FAULT (like those with
417		 * compatible properties).
418		 * If within 2M of the previous region, bump the size to include this region. Otherwise
419		 * start a new region.
420		 */
421		if (i == count || start + size < res[i].start - SZ_2M) {
422			debug("  0x%016llx - 0x%016llx: reserved\n",
423			      start, start + size);
424			mmu_change_region_attr(start, size, PTE_TYPE_FAULT);
425			/* If this is the final region then quit here before we index
426			 * out of bounds...
427			 */
428			if (i == count)
429				break;
430			start = ALIGN_DOWN(res[i].start, SZ_2M);
431			size = ALIGN(res[i].end - start, SZ_2M);
432		} else {
433			/* Bump size if this region is immediately after the previous one */
434			size = ALIGN(res[i].end - start, SZ_2M);
435		}
436	}
437}
438
439/* This function open-codes setup_all_pgtables() so that we can
440 * insert additional mappings *before* turning on the MMU.
441 */
442void enable_caches(void)
443{
444	u64 tlb_addr = gd->arch.tlb_addr;
445	u64 tlb_size = gd->arch.tlb_size;
446	u64 pt_size;
447	ulong carveout_start;
448
449	gd->arch.tlb_fillptr = tlb_addr;
450
451	build_mem_map();
452
453	icache_enable();
454
455	/* Create normal system page tables */
456	setup_pgtables();
457
458	pt_size = (uintptr_t)gd->arch.tlb_fillptr -
459		  (uintptr_t)gd->arch.tlb_addr;
460	debug("Primary pagetable size: %lluKiB\n", pt_size / 1024);
461
462	/* Create emergency page tables */
463	gd->arch.tlb_size -= pt_size;
464	gd->arch.tlb_addr = gd->arch.tlb_fillptr;
465	setup_pgtables();
466	gd->arch.tlb_emerg = gd->arch.tlb_addr;
467	gd->arch.tlb_addr = tlb_addr;
468	gd->arch.tlb_size = tlb_size;
469
470	/* We do the carveouts only for QCS404, for now. */
471	if (fdt_node_check_compatible(gd->fdt_blob, 0, "qcom,qcs404") == 0) {
472		carveout_start = get_timer(0);
473		/* Takes ~20-50ms on SDM845 */
474		carve_out_reserved_memory();
475		debug("carveout time: %lums\n", get_timer(carveout_start));
476	}
477	dcache_enable();
478}
479