1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*
28 * Since we are compiled outside of the normal kernel build process, we
29 * need to include opt_global.h manually.
30 */
31#include "opt_global.h"
32#include "opt_kernname.h"
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36#include <machine/asm.h>
37#include <sys/param.h>
38#include <sys/elf32.h>
39#include <sys/inflate.h>
40#include <machine/elf.h>
41#include <machine/pte-v4.h>
42#include <machine/cpufunc.h>
43#include <machine/armreg.h>
44#include <machine/cpu.h>
45#include <machine/vmparam.h>	/* For KERNVIRTADDR */
46
47#if __ARM_ARCH >= 6
48#error "elf_trampline is not supported on ARMv6/v7 platforms"
49#endif
50extern char kernel_start[];
51extern char kernel_end[];
52
53extern void *_end;
54
55void _start(void);
56void __start(void);
57void __startC(unsigned r0, unsigned r1, unsigned r2, unsigned r3);
58
59extern void do_call(void *, void *, void *, int);
60
61#define GZ_HEAD	0xa
62
63#if defined(CPU_ARM9E)
64#define cpu_idcache_wbinv_all	armv5_ec_idcache_wbinv_all
65extern void armv5_ec_idcache_wbinv_all(void);
66#endif
67#if defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
68#define cpu_l2cache_wbinv_all	sheeva_l2cache_wbinv_all
69extern void sheeva_l2cache_wbinv_all(void);
70#else
71#define cpu_l2cache_wbinv_all()
72#endif
73
74/*
75 * Boot parameters
76 */
77static struct arm_boot_params s_boot_params;
78
79static __inline void *
80memcpy(void *dst, const void *src, int len)
81{
82	const char *s = src;
83    	char *d = dst;
84
85	while (len) {
86		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
87		    !((vm_offset_t)s & 3)) {
88			*(uint32_t *)d = *(uint32_t *)s;
89			s += 4;
90			d += 4;
91			len -= 4;
92		} else {
93			*d++ = *s++;
94			len--;
95		}
96	}
97	return (dst);
98}
99
100static __inline void
101bzero(void *addr, int count)
102{
103	char *tmp = (char *)addr;
104
105	while (count > 0) {
106		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
107			*(uint32_t *)tmp = 0;
108			tmp += 4;
109			count -= 4;
110		} else {
111			*tmp = 0;
112			tmp++;
113			count--;
114		}
115	}
116}
117
118void
119_startC(unsigned r0, unsigned r1, unsigned r2, unsigned r3)
120{
121	int tmp1;
122	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
123	unsigned int pc, kernphysaddr;
124
125	s_boot_params.abp_r0 = r0;
126	s_boot_params.abp_r1 = r1;
127	s_boot_params.abp_r2 = r2;
128	s_boot_params.abp_r3 = r3;
129
130	/*
131	 * Figure out the physical address the kernel was loaded at.  This
132	 * assumes the entry point (this code right here) is in the first page,
133	 * which will always be the case for this trampoline code.
134	 */
135	__asm __volatile("mov %0, pc\n"
136	    : "=r" (pc));
137	kernphysaddr = pc & ~PAGE_MASK;
138
139#if defined(FLASHADDR) && defined(PHYSADDR) && defined(LOADERRAMADDR)
140	if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
141	    (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
142		/*
143		 * We're running from flash, so just copy the whole thing
144		 * from flash to memory.
145		 * This is far from optimal, we could do the relocation or
146		 * the unzipping directly from flash to memory to avoid this
147		 * needless copy, but it would require to know the flash
148		 * physical address.
149		 */
150		unsigned int target_addr;
151		unsigned int tmp_sp;
152		uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
153		    + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
154
155		target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
156		tmp_sp = target_addr + 0x100000 +
157		    (unsigned int)&_end - (unsigned int)&_start;
158		memcpy((char *)target_addr, (char *)src_addr,
159		    (unsigned int)&_end - (unsigned int)&_start);
160		/* Temporary set the sp and jump to the new location. */
161		__asm __volatile(
162		    "mov sp, %1\n"
163		    "mov r0, %2\n"
164		    "mov r1, %3\n"
165		    "mov r2, %4\n"
166		    "mov r3, %5\n"
167		    "mov pc, %0\n"
168		    : : "r" (target_addr), "r" (tmp_sp),
169		    "r" (s_boot_params.abp_r0), "r" (s_boot_params.abp_r1),
170		    "r" (s_boot_params.abp_r2), "r" (s_boot_params.abp_r3)
171		    : "r0", "r1", "r2", "r3");
172
173	}
174#endif
175#ifdef KZIP
176	sp += KERNSIZE + 0x100;
177	sp &= ~(L1_TABLE_SIZE - 1);
178	sp += 2 * L1_TABLE_SIZE;
179#endif
180	sp += 1024 * 1024; /* Should be enough for a stack */
181
182	__asm __volatile("adr %0, 2f\n"
183	    		 "bic %0, %0, #0xff000000\n"
184			 "and %1, %1, #0xff000000\n"
185			 "orr %0, %0, %1\n"
186			 "mrc p15, 0, %1, c1, c0, 0\n" /* CP15_SCTLR(%1)*/
187			 "bic %1, %1, #1\n" /* Disable MMU */
188			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
189						     WBUF enable */
190			 "orr %1, %1, #0x1000\n" /* Add IC enable */
191			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
192
193			 "mcr p15, 0, %1, c1, c0, 0\n" /* CP15_SCTLR(%1)*/
194			 "nop\n"
195			 "nop\n"
196			 "nop\n"
197			 "mov pc, %0\n"
198			 "2: nop\n"
199			 "mov sp, %2\n"
200			 : "=r" (tmp1), "+r" (kernphysaddr), "+r" (sp));
201	__start();
202}
203
204#ifdef KZIP
205static  unsigned char *orig_input, *i_input, *i_output;
206
207
208static u_int memcnt;		/* Memory allocated: blocks */
209static size_t memtot;		/* Memory allocated: bytes */
210/*
211 * Library functions required by inflate().
212 */
213
214#define MEMSIZ 0x8000
215
216/*
217 * Allocate memory block.
218 */
219unsigned char *
220kzipmalloc(int size)
221{
222	void *ptr;
223	static u_char mem[MEMSIZ];
224
225	if (memtot + size > MEMSIZ)
226		return NULL;
227	ptr = mem + memtot;
228	memtot += size;
229	memcnt++;
230	return ptr;
231}
232
233/*
234 * Free allocated memory block.
235 */
236void
237kzipfree(void *ptr)
238{
239	memcnt--;
240	if (!memcnt)
241		memtot = 0;
242}
243
244void
245putstr(char *dummy)
246{
247}
248
249static int
250input(void *dummy)
251{
252	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
253		return (GZ_EOF);
254	}
255	return *i_input++;
256}
257
258static int
259output(void *dummy, unsigned char *ptr, unsigned long len)
260{
261
262
263	memcpy(i_output, ptr, len);
264	i_output += len;
265	return (0);
266}
267
268static void *
269inflate_kernel(void *kernel, void *startaddr)
270{
271	struct inflate infl;
272	unsigned char slide[GZ_WSIZE];
273
274	orig_input = kernel;
275	memcnt = memtot = 0;
276	i_input = (unsigned char *)kernel + GZ_HEAD;
277	if (((char *)kernel)[3] & 0x18) {
278		while (*i_input)
279			i_input++;
280		i_input++;
281	}
282	i_output = startaddr;
283	bzero(&infl, sizeof(infl));
284	infl.gz_input = input;
285	infl.gz_output = output;
286	infl.gz_slide = slide;
287	inflate(&infl);
288	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
289}
290
291#endif
292
293void *
294load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
295    int d)
296{
297	Elf32_Ehdr *eh;
298	Elf32_Phdr phdr[64] /* XXX */, *php;
299	Elf32_Shdr shdr[64] /* XXX */;
300	int i,j;
301	void *entry_point;
302	int symtabindex = -1;
303	int symstrindex = -1;
304	vm_offset_t lastaddr = 0;
305	Elf_Addr ssym = 0;
306	Elf_Dyn *dp;
307	struct arm_boot_params local_boot_params;
308
309	eh = (Elf32_Ehdr *)kstart;
310	ssym = 0;
311	entry_point = (void*)eh->e_entry;
312	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
313	    eh->e_phnum * sizeof(phdr[0]));
314
315	/* Determine lastaddr. */
316	for (i = 0; i < eh->e_phnum; i++) {
317		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
318		    + phdr[i].p_memsz))
319			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
320			    curaddr + phdr[i].p_memsz;
321	}
322
323	/* Save the symbol tables, as there're about to be scratched. */
324	memcpy(shdr, (void *)(kstart + eh->e_shoff),
325	    sizeof(*shdr) * eh->e_shnum);
326	if (eh->e_shnum * eh->e_shentsize != 0 &&
327	    eh->e_shoff != 0) {
328		for (i = 0; i < eh->e_shnum; i++) {
329			if (shdr[i].sh_type == SHT_SYMTAB) {
330				for (j = 0; j < eh->e_phnum; j++) {
331					if (phdr[j].p_type == PT_LOAD &&
332					    shdr[i].sh_offset >=
333					    phdr[j].p_offset &&
334					    (shdr[i].sh_offset +
335					     shdr[i].sh_size <=
336					     phdr[j].p_offset +
337					     phdr[j].p_filesz)) {
338						shdr[i].sh_offset = 0;
339						shdr[i].sh_size = 0;
340						j = eh->e_phnum;
341					}
342				}
343				if (shdr[i].sh_offset != 0 &&
344				    shdr[i].sh_size != 0) {
345					symtabindex = i;
346					symstrindex = shdr[i].sh_link;
347				}
348			}
349		}
350		func_end = roundup(func_end, sizeof(long));
351		if (symtabindex >= 0 && symstrindex >= 0) {
352			ssym = lastaddr;
353			if (d) {
354				memcpy((void *)func_end, (void *)(
355				    shdr[symtabindex].sh_offset + kstart),
356				    shdr[symtabindex].sh_size);
357				memcpy((void *)(func_end +
358				    shdr[symtabindex].sh_size),
359				    (void *)(shdr[symstrindex].sh_offset +
360				    kstart), shdr[symstrindex].sh_size);
361			} else {
362				lastaddr += shdr[symtabindex].sh_size;
363				lastaddr = roundup(lastaddr,
364				    sizeof(shdr[symtabindex].sh_size));
365				lastaddr += sizeof(shdr[symstrindex].sh_size);
366				lastaddr += shdr[symstrindex].sh_size;
367				lastaddr = roundup(lastaddr,
368				    sizeof(shdr[symstrindex].sh_size));
369			}
370
371		}
372	}
373	if (!d)
374		return ((void *)lastaddr);
375
376	/*
377	 * Now the stack is fixed, copy boot params
378	 * before it's overrided
379	 */
380	memcpy(&local_boot_params, &s_boot_params, sizeof(local_boot_params));
381
382	j = eh->e_phnum;
383	for (i = 0; i < j; i++) {
384		volatile char c;
385
386		if (phdr[i].p_type != PT_LOAD)
387			continue;
388		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
389		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
390		/* Clean space from oversized segments, eg: bss. */
391		if (phdr[i].p_filesz < phdr[i].p_memsz)
392			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
393			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
394			    phdr[i].p_filesz);
395	}
396	/* Now grab the symbol tables. */
397	if (symtabindex >= 0 && symstrindex >= 0) {
398		*(Elf_Size *)lastaddr =
399		    shdr[symtabindex].sh_size;
400		lastaddr += sizeof(shdr[symtabindex].sh_size);
401		memcpy((void*)lastaddr,
402		    (void *)func_end,
403		    shdr[symtabindex].sh_size);
404		lastaddr += shdr[symtabindex].sh_size;
405		lastaddr = roundup(lastaddr,
406		    sizeof(shdr[symtabindex].sh_size));
407		*(Elf_Size *)lastaddr =
408		    shdr[symstrindex].sh_size;
409		lastaddr += sizeof(shdr[symstrindex].sh_size);
410		memcpy((void*)lastaddr,
411		    (void*)(func_end +
412			    shdr[symtabindex].sh_size),
413		    shdr[symstrindex].sh_size);
414		lastaddr += shdr[symstrindex].sh_size;
415		lastaddr = roundup(lastaddr,
416   		    sizeof(shdr[symstrindex].sh_size));
417		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
418		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
419		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
420	} else
421		*(Elf_Addr *)curaddr = 0;
422	/* Invalidate the instruction cache. */
423	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
424	    		 "mcr p15, 0, %0, c7, c10, 4\n"
425			 : : "r" (curaddr));
426	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
427	    "bic %0, %0, #1\n" /* MMU_ENABLE */
428	    "mcr p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
429	    : "=r" (ssym));
430	/* Jump to the entry point. */
431	((void(*)(unsigned, unsigned, unsigned, unsigned))
432	(entry_point - KERNVIRTADDR + curaddr))
433	(local_boot_params.abp_r0, local_boot_params.abp_r1,
434	local_boot_params.abp_r2, local_boot_params.abp_r3);
435	__asm __volatile(".globl func_end\n"
436	    "func_end:");
437
438	/* NOTREACHED */
439	return NULL;
440}
441
442extern char func_end[];
443
444
445#define PMAP_DOMAIN_KERNEL	0 /*
446				    * Just define it instead of including the
447				    * whole VM headers set.
448				    */
449int __hack;
450static __inline void
451setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
452    int write_back)
453{
454	unsigned int *pd = (unsigned int *)pt_addr;
455	vm_paddr_t addr;
456	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
457	int tmp;
458
459	bzero(pd, L1_TABLE_SIZE);
460	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
461		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
462		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
463		if (write_back && 0)
464			pd[addr >> L1_S_SHIFT] |= L1_S_B;
465	}
466	/* XXX: See below */
467	if (0xfff00000 < physstart || 0xfff00000 > physend)
468		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
469		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
470	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
471	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
472			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
473			 "mrc p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
474			 "orr %0, %0, #1\n" /* MMU_ENABLE */
475			 "mcr p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
476			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
477			 "mov r0, r0\n"
478			 "sub pc, pc, #4\n" :
479			 "=r" (tmp) : "r" (pd), "r" (domain));
480
481	/*
482	 * XXX: This is the most stupid workaround I've ever wrote.
483	 * For some reason, the KB9202 won't boot the kernel unless
484	 * we access an address which is not in the
485	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
486	 * what's going on later.
487	 */
488	__hack = *(volatile int *)0xfffff21c;
489}
490
491void
492__start(void)
493{
494	void *curaddr;
495	void *dst, *altdst;
496	char *kernel = (char *)&kernel_start;
497	int sp;
498	int pt_addr;
499
500	__asm __volatile("mov %0, pc"  :
501	    "=r" (curaddr));
502	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
503#ifdef KZIP
504	if (*kernel == 0x1f && kernel[1] == 0x8b) {
505		pt_addr = L1_TABLE_SIZE +
506		    rounddown2((int)&_end + KERNSIZE + 0x100, L1_TABLE_SIZE);
507
508		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
509		    (vm_paddr_t)curaddr + 0x10000000, 1);
510		/* Gzipped kernel */
511		dst = inflate_kernel(kernel, &_end);
512		kernel = (char *)&_end;
513		altdst = 4 + load_kernel((unsigned int)kernel,
514		    (unsigned int)curaddr,
515		    (unsigned int)&func_end + 800 , 0);
516		if (altdst > dst)
517			dst = altdst;
518
519		/*
520		 * Disable MMU.  Otherwise, setup_pagetables call below
521		 * might overwrite the L1 table we are currently using.
522		 */
523		cpu_idcache_wbinv_all();
524		cpu_l2cache_wbinv_all();
525		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
526		  "bic %0, %0, #1\n" /* MMU_DISABLE */
527		  "mcr p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
528		  :"=r" (pt_addr));
529	} else
530#endif
531		dst = 4 + load_kernel((unsigned int)&kernel_start,
532	    (unsigned int)curaddr,
533	    (unsigned int)&func_end, 0);
534	dst = (void *)(((vm_offset_t)dst & ~3));
535	pt_addr = L1_TABLE_SIZE + rounddown2((unsigned int)dst, L1_TABLE_SIZE);
536	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
537	    (vm_paddr_t)curaddr + 0x10000000, 0);
538	sp = pt_addr + L1_TABLE_SIZE + 8192;
539	sp = sp &~3;
540	dst = (void *)(sp + 4);
541	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
542	    (unsigned int)&load_kernel + 800);
543	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
544	    (unsigned int)(&load_kernel) + 800, sp);
545}
546
547/* We need to provide these functions but never call them */
548void __aeabi_unwind_cpp_pr0(void);
549void __aeabi_unwind_cpp_pr1(void);
550void __aeabi_unwind_cpp_pr2(void);
551
552__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
553__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
554void
555__aeabi_unwind_cpp_pr0(void)
556{
557}
558