elf_trampoline.c revision 327658
1/*-
2 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25/*
26 * Since we are compiled outside of the normal kernel build process, we
27 * need to include opt_global.h manually.
28 */
29#include "opt_global.h"
30#include "opt_kernname.h"
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/arm/arm/elf_trampoline.c 327658 2018-01-07 00:04:13Z ian $");
34#include <machine/asm.h>
35#include <sys/param.h>
36#include <sys/elf32.h>
37#include <sys/inflate.h>
38#include <machine/elf.h>
39#include <machine/pte-v4.h>
40#include <machine/cpufunc.h>
41#include <machine/armreg.h>
42#include <machine/vmparam.h>	/* For KERNVIRTADDR */
43
44extern char kernel_start[];
45extern char kernel_end[];
46
47extern void *_end;
48
49void _start(void);
50void __start(void);
51void __startC(void);
52
53extern unsigned int cpu_ident(void);
54extern void armv6_idcache_wbinv_all(void);
55extern void armv7_idcache_wbinv_all(void);
56extern void do_call(void *, void *, void *, int);
57
58#define GZ_HEAD	0xa
59
60#if defined(CPU_ARM9)
61#define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
62extern void arm9_idcache_wbinv_all(void);
63#elif defined(CPU_FA526)
64#define cpu_idcache_wbinv_all	fa526_idcache_wbinv_all
65extern void fa526_idcache_wbinv_all(void);
66#elif defined(CPU_ARM9E)
67#define cpu_idcache_wbinv_all	armv5_ec_idcache_wbinv_all
68extern void armv5_ec_idcache_wbinv_all(void);
69#elif defined(CPU_ARM1176)
70#define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
71#elif defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425)
72#define cpu_idcache_wbinv_all	xscale_cache_purgeID
73extern void xscale_cache_purgeID(void);
74#elif defined(CPU_XSCALE_81342)
75#define cpu_idcache_wbinv_all	xscalec3_cache_purgeID
76extern void xscalec3_cache_purgeID(void);
77#elif defined(CPU_MV_PJ4B)
78#if !defined(SOC_MV_ARMADAXP)
79#define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
80extern void armv6_idcache_wbinv_all(void);
81#else
82#define cpu_idcache_wbinv_all()	armadaxp_idcache_wbinv_all
83#endif
84#endif /* CPU_MV_PJ4B */
85#ifdef CPU_XSCALE_81342
86#define cpu_l2cache_wbinv_all	xscalec3_l2cache_purge
87extern void xscalec3_l2cache_purge(void);
88#elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
89#define cpu_l2cache_wbinv_all	sheeva_l2cache_wbinv_all
90extern void sheeva_l2cache_wbinv_all(void);
91#elif defined(CPU_CORTEXA) || defined(CPU_KRAIT)
92#define cpu_idcache_wbinv_all	armv7_idcache_wbinv_all
93#define cpu_l2cache_wbinv_all()
94#else
95#define cpu_l2cache_wbinv_all()
96#endif
97
98static void armadaxp_idcache_wbinv_all(void);
99
100int     arm_picache_size;
101int     arm_picache_line_size;
102int     arm_picache_ways;
103
104int     arm_pdcache_size;       /* and unified */
105int     arm_pdcache_line_size = 32;
106int     arm_pdcache_ways;
107
108int     arm_pcache_type;
109int     arm_pcache_unified;
110
111int     arm_dcache_align;
112int     arm_dcache_align_mask;
113
114int     arm_dcache_min_line_size = 32;
115int     arm_icache_min_line_size = 32;
116int     arm_idcache_min_line_size = 32;
117
118u_int	arm_cache_level;
119u_int	arm_cache_type[14];
120u_int	arm_cache_loc;
121
122/* Additional cache information local to this file.  Log2 of some of the
123      above numbers.  */
124static int      arm_dcache_l2_nsets;
125static int      arm_dcache_l2_assoc;
126static int      arm_dcache_l2_linesize;
127
128
129extern int arm9_dcache_sets_inc;
130extern int arm9_dcache_sets_max;
131extern int arm9_dcache_index_max;
132extern int arm9_dcache_index_inc;
133
134static __inline void *
135memcpy(void *dst, const void *src, int len)
136{
137	const char *s = src;
138    	char *d = dst;
139
140	while (len) {
141		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
142		    !((vm_offset_t)s & 3)) {
143			*(uint32_t *)d = *(uint32_t *)s;
144			s += 4;
145			d += 4;
146			len -= 4;
147		} else {
148			*d++ = *s++;
149			len--;
150		}
151	}
152	return (dst);
153}
154
155static __inline void
156bzero(void *addr, int count)
157{
158	char *tmp = (char *)addr;
159
160	while (count > 0) {
161		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
162			*(uint32_t *)tmp = 0;
163			tmp += 4;
164			count -= 4;
165		} else {
166			*tmp = 0;
167			tmp++;
168			count--;
169		}
170	}
171}
172
173static void arm9_setup(void);
174
175void
176_startC(void)
177{
178	int tmp1;
179	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
180	unsigned int pc, kernphysaddr;
181
182	/*
183	 * Figure out the physical address the kernel was loaded at.  This
184	 * assumes the entry point (this code right here) is in the first page,
185	 * which will always be the case for this trampoline code.
186	 */
187	__asm __volatile("mov %0, pc\n"
188	    : "=r" (pc));
189	kernphysaddr = pc & ~PAGE_MASK;
190
191#if defined(FLASHADDR) && defined(PHYSADDR) && defined(LOADERRAMADDR)
192	if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
193	    (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
194		/*
195		 * We're running from flash, so just copy the whole thing
196		 * from flash to memory.
197		 * This is far from optimal, we could do the relocation or
198		 * the unzipping directly from flash to memory to avoid this
199		 * needless copy, but it would require to know the flash
200		 * physical address.
201		 */
202		unsigned int target_addr;
203		unsigned int tmp_sp;
204		uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
205		    + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
206
207		target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
208		tmp_sp = target_addr + 0x100000 +
209		    (unsigned int)&_end - (unsigned int)&_start;
210		memcpy((char *)target_addr, (char *)src_addr,
211		    (unsigned int)&_end - (unsigned int)&_start);
212		/* Temporary set the sp and jump to the new location. */
213		__asm __volatile(
214		    "mov sp, %1\n"
215		    "mov pc, %0\n"
216		    : : "r" (target_addr), "r" (tmp_sp));
217
218	}
219#endif
220#ifdef KZIP
221	sp += KERNSIZE + 0x100;
222	sp &= ~(L1_TABLE_SIZE - 1);
223	sp += 2 * L1_TABLE_SIZE;
224#endif
225	sp += 1024 * 1024; /* Should be enough for a stack */
226
227	__asm __volatile("adr %0, 2f\n"
228	    		 "bic %0, %0, #0xff000000\n"
229			 "and %1, %1, #0xff000000\n"
230			 "orr %0, %0, %1\n"
231			 "mrc p15, 0, %1, c1, c0, 0\n" /* CP15_SCTLR(%1)*/
232			 "bic %1, %1, #1\n" /* Disable MMU */
233			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
234						     WBUF enable */
235			 "orr %1, %1, #0x1000\n" /* Add IC enable */
236			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
237
238			 "mcr p15, 0, %1, c1, c0, 0\n" /* CP15_SCTLR(%1)*/
239			 "nop\n"
240			 "nop\n"
241			 "nop\n"
242			 "mov pc, %0\n"
243			 "2: nop\n"
244			 "mov sp, %2\n"
245			 : "=r" (tmp1), "+r" (kernphysaddr), "+r" (sp));
246#ifndef KZIP
247#ifdef CPU_ARM9
248	/* So that idcache_wbinv works; */
249	if ((cpu_ident() & 0x0000f000) == 0x00009000)
250		arm9_setup();
251#endif
252#endif
253	__start();
254}
255
256static void
257get_cachetype_cp15()
258{
259	u_int ctype, isize, dsize, cpuid;
260	u_int clevel, csize, i, sel;
261	u_int multiplier;
262	u_char type;
263
264	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
265		: "=r" (ctype));
266
267	cpuid = cpu_ident();
268	/*
269	 * ...and thus spake the ARM ARM:
270	 *
271	 * If an <opcode2> value corresponding to an unimplemented or
272	 * reserved ID register is encountered, the System Control
273	 * processor returns the value of the main ID register.
274	 */
275	if (ctype == cpuid)
276		goto out;
277
278	if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) {
279		/* Resolve minimal cache line sizes */
280		arm_dcache_min_line_size = 1 << (CPU_CT_DMINLINE(ctype) + 2);
281		arm_icache_min_line_size = 1 << (CPU_CT_IMINLINE(ctype) + 2);
282		arm_idcache_min_line_size =
283		    (arm_dcache_min_line_size > arm_icache_min_line_size ?
284		    arm_icache_min_line_size : arm_dcache_min_line_size);
285
286		__asm __volatile("mrc p15, 1, %0, c0, c0, 1"
287		    : "=r" (clevel));
288		arm_cache_level = clevel;
289		arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level) + 1;
290		i = 0;
291		while ((type = (clevel & 0x7)) && i < 7) {
292			if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE ||
293			    type == CACHE_SEP_CACHE) {
294				sel = i << 1;
295				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
296				    : : "r" (sel));
297				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
298				    : "=r" (csize));
299				arm_cache_type[sel] = csize;
300			}
301			if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) {
302				sel = (i << 1) | 1;
303				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
304				    : : "r" (sel));
305				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
306				    : "=r" (csize));
307				arm_cache_type[sel] = csize;
308			}
309			i++;
310			clevel >>= 3;
311		}
312	} else {
313		if ((ctype & CPU_CT_S) == 0)
314			arm_pcache_unified = 1;
315
316		/*
317		 * If you want to know how this code works, go read the ARM ARM.
318		 */
319
320		arm_pcache_type = CPU_CT_CTYPE(ctype);
321
322		if (arm_pcache_unified == 0) {
323			isize = CPU_CT_ISIZE(ctype);
324			multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
325			arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
326			if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
327				if (isize & CPU_CT_xSIZE_M)
328					arm_picache_line_size = 0; /* not present */
329				else
330					arm_picache_ways = 1;
331			} else {
332				arm_picache_ways = multiplier <<
333				    (CPU_CT_xSIZE_ASSOC(isize) - 1);
334			}
335			arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
336		}
337
338		dsize = CPU_CT_DSIZE(ctype);
339		multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
340		arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
341		if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
342			if (dsize & CPU_CT_xSIZE_M)
343				arm_pdcache_line_size = 0; /* not present */
344			else
345				arm_pdcache_ways = 1;
346		} else {
347			arm_pdcache_ways = multiplier <<
348			    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
349		}
350		arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
351
352		arm_dcache_align = arm_pdcache_line_size;
353
354		arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
355		arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
356		arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
357		    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
358
359	out:
360		arm_dcache_align_mask = arm_dcache_align - 1;
361	}
362}
363
364static void
365arm9_setup(void)
366{
367
368	get_cachetype_cp15();
369	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
370	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
371	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
372	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
373	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
374}
375
376static void
377armadaxp_idcache_wbinv_all(void)
378{
379	uint32_t feat;
380
381	__asm __volatile("mrc p15, 0, %0, c0, c1, 0" : "=r" (feat));
382	if (feat & ARM_PFR0_THUMBEE_MASK)
383		armv7_idcache_wbinv_all();
384	else
385		armv6_idcache_wbinv_all();
386
387}
388#ifdef KZIP
389static  unsigned char *orig_input, *i_input, *i_output;
390
391
392static u_int memcnt;		/* Memory allocated: blocks */
393static size_t memtot;		/* Memory allocated: bytes */
394/*
395 * Library functions required by inflate().
396 */
397
398#define MEMSIZ 0x8000
399
400/*
401 * Allocate memory block.
402 */
403unsigned char *
404kzipmalloc(int size)
405{
406	void *ptr;
407	static u_char mem[MEMSIZ];
408
409	if (memtot + size > MEMSIZ)
410		return NULL;
411	ptr = mem + memtot;
412	memtot += size;
413	memcnt++;
414	return ptr;
415}
416
417/*
418 * Free allocated memory block.
419 */
420void
421kzipfree(void *ptr)
422{
423	memcnt--;
424	if (!memcnt)
425		memtot = 0;
426}
427
428void
429putstr(char *dummy)
430{
431}
432
433static int
434input(void *dummy)
435{
436	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
437		return (GZ_EOF);
438	}
439	return *i_input++;
440}
441
442static int
443output(void *dummy, unsigned char *ptr, unsigned long len)
444{
445
446
447	memcpy(i_output, ptr, len);
448	i_output += len;
449	return (0);
450}
451
452static void *
453inflate_kernel(void *kernel, void *startaddr)
454{
455	struct inflate infl;
456	unsigned char slide[GZ_WSIZE];
457
458	orig_input = kernel;
459	memcnt = memtot = 0;
460	i_input = (unsigned char *)kernel + GZ_HEAD;
461	if (((char *)kernel)[3] & 0x18) {
462		while (*i_input)
463			i_input++;
464		i_input++;
465	}
466	i_output = startaddr;
467	bzero(&infl, sizeof(infl));
468	infl.gz_input = input;
469	infl.gz_output = output;
470	infl.gz_slide = slide;
471	inflate(&infl);
472	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
473}
474
475#endif
476
477void *
478load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
479    int d)
480{
481	Elf32_Ehdr *eh;
482	Elf32_Phdr phdr[64] /* XXX */, *php;
483	Elf32_Shdr shdr[64] /* XXX */;
484	int i,j;
485	void *entry_point;
486	int symtabindex = -1;
487	int symstrindex = -1;
488	vm_offset_t lastaddr = 0;
489	Elf_Addr ssym = 0;
490	Elf_Dyn *dp;
491
492	eh = (Elf32_Ehdr *)kstart;
493	ssym = 0;
494	entry_point = (void*)eh->e_entry;
495	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
496	    eh->e_phnum * sizeof(phdr[0]));
497
498	/* Determine lastaddr. */
499	for (i = 0; i < eh->e_phnum; i++) {
500		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
501		    + phdr[i].p_memsz))
502			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
503			    curaddr + phdr[i].p_memsz;
504	}
505
506	/* Save the symbol tables, as there're about to be scratched. */
507	memcpy(shdr, (void *)(kstart + eh->e_shoff),
508	    sizeof(*shdr) * eh->e_shnum);
509	if (eh->e_shnum * eh->e_shentsize != 0 &&
510	    eh->e_shoff != 0) {
511		for (i = 0; i < eh->e_shnum; i++) {
512			if (shdr[i].sh_type == SHT_SYMTAB) {
513				for (j = 0; j < eh->e_phnum; j++) {
514					if (phdr[j].p_type == PT_LOAD &&
515					    shdr[i].sh_offset >=
516					    phdr[j].p_offset &&
517					    (shdr[i].sh_offset +
518					     shdr[i].sh_size <=
519					     phdr[j].p_offset +
520					     phdr[j].p_filesz)) {
521						shdr[i].sh_offset = 0;
522						shdr[i].sh_size = 0;
523						j = eh->e_phnum;
524					}
525				}
526				if (shdr[i].sh_offset != 0 &&
527				    shdr[i].sh_size != 0) {
528					symtabindex = i;
529					symstrindex = shdr[i].sh_link;
530				}
531			}
532		}
533		func_end = roundup(func_end, sizeof(long));
534		if (symtabindex >= 0 && symstrindex >= 0) {
535			ssym = lastaddr;
536			if (d) {
537				memcpy((void *)func_end, (void *)(
538				    shdr[symtabindex].sh_offset + kstart),
539				    shdr[symtabindex].sh_size);
540				memcpy((void *)(func_end +
541				    shdr[symtabindex].sh_size),
542				    (void *)(shdr[symstrindex].sh_offset +
543				    kstart), shdr[symstrindex].sh_size);
544			} else {
545				lastaddr += shdr[symtabindex].sh_size;
546				lastaddr = roundup(lastaddr,
547				    sizeof(shdr[symtabindex].sh_size));
548				lastaddr += sizeof(shdr[symstrindex].sh_size);
549				lastaddr += shdr[symstrindex].sh_size;
550				lastaddr = roundup(lastaddr,
551				    sizeof(shdr[symstrindex].sh_size));
552			}
553
554		}
555	}
556	if (!d)
557		return ((void *)lastaddr);
558
559	j = eh->e_phnum;
560	for (i = 0; i < j; i++) {
561		volatile char c;
562
563		if (phdr[i].p_type != PT_LOAD)
564			continue;
565		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
566		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
567		/* Clean space from oversized segments, eg: bss. */
568		if (phdr[i].p_filesz < phdr[i].p_memsz)
569			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
570			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
571			    phdr[i].p_filesz);
572	}
573	/* Now grab the symbol tables. */
574	if (symtabindex >= 0 && symstrindex >= 0) {
575		*(Elf_Size *)lastaddr =
576		    shdr[symtabindex].sh_size;
577		lastaddr += sizeof(shdr[symtabindex].sh_size);
578		memcpy((void*)lastaddr,
579		    (void *)func_end,
580		    shdr[symtabindex].sh_size);
581		lastaddr += shdr[symtabindex].sh_size;
582		lastaddr = roundup(lastaddr,
583		    sizeof(shdr[symtabindex].sh_size));
584		*(Elf_Size *)lastaddr =
585		    shdr[symstrindex].sh_size;
586		lastaddr += sizeof(shdr[symstrindex].sh_size);
587		memcpy((void*)lastaddr,
588		    (void*)(func_end +
589			    shdr[symtabindex].sh_size),
590		    shdr[symstrindex].sh_size);
591		lastaddr += shdr[symstrindex].sh_size;
592		lastaddr = roundup(lastaddr,
593   		    sizeof(shdr[symstrindex].sh_size));
594		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
595		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
596		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
597	} else
598		*(Elf_Addr *)curaddr = 0;
599	/* Invalidate the instruction cache. */
600	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
601	    		 "mcr p15, 0, %0, c7, c10, 4\n"
602			 : : "r" (curaddr));
603	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
604	    "bic %0, %0, #1\n" /* MMU_ENABLE */
605	    "mcr p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
606	    : "=r" (ssym));
607	/* Jump to the entry point. */
608	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
609	__asm __volatile(".globl func_end\n"
610	    "func_end:");
611
612	/* NOTREACHED */
613	return NULL;
614}
615
616extern char func_end[];
617
618
619#define PMAP_DOMAIN_KERNEL	0 /*
620				    * Just define it instead of including the
621				    * whole VM headers set.
622				    */
623int __hack;
624static __inline void
625setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
626    int write_back)
627{
628	unsigned int *pd = (unsigned int *)pt_addr;
629	vm_paddr_t addr;
630	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
631	int tmp;
632
633	bzero(pd, L1_TABLE_SIZE);
634	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
635		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
636		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
637		if (write_back && 0)
638			pd[addr >> L1_S_SHIFT] |= L1_S_B;
639	}
640	/* XXX: See below */
641	if (0xfff00000 < physstart || 0xfff00000 > physend)
642		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
643		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
644	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
645	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
646			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
647			 "mrc p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
648			 "orr %0, %0, #1\n" /* MMU_ENABLE */
649			 "mcr p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
650			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
651			 "mov r0, r0\n"
652			 "sub pc, pc, #4\n" :
653			 "=r" (tmp) : "r" (pd), "r" (domain));
654
655	/*
656	 * XXX: This is the most stupid workaround I've ever wrote.
657	 * For some reason, the KB9202 won't boot the kernel unless
658	 * we access an address which is not in the
659	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
660	 * what's going on later.
661	 */
662	__hack = *(volatile int *)0xfffff21c;
663}
664
665void
666__start(void)
667{
668	void *curaddr;
669	void *dst, *altdst;
670	char *kernel = (char *)&kernel_start;
671	int sp;
672	int pt_addr;
673
674	__asm __volatile("mov %0, pc"  :
675	    "=r" (curaddr));
676	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
677#ifdef KZIP
678	if (*kernel == 0x1f && kernel[1] == 0x8b) {
679		pt_addr = L1_TABLE_SIZE +
680		    rounddown2((int)&_end + KERNSIZE + 0x100, L1_TABLE_SIZE);
681
682#ifdef CPU_ARM9
683		/* So that idcache_wbinv works; */
684		if ((cpu_ident() & 0x0000f000) == 0x00009000)
685			arm9_setup();
686#endif
687		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
688		    (vm_paddr_t)curaddr + 0x10000000, 1);
689		/* Gzipped kernel */
690		dst = inflate_kernel(kernel, &_end);
691		kernel = (char *)&_end;
692		altdst = 4 + load_kernel((unsigned int)kernel,
693		    (unsigned int)curaddr,
694		    (unsigned int)&func_end + 800 , 0);
695		if (altdst > dst)
696			dst = altdst;
697
698		/*
699		 * Disable MMU.  Otherwise, setup_pagetables call below
700		 * might overwrite the L1 table we are currently using.
701		 */
702		cpu_idcache_wbinv_all();
703		cpu_l2cache_wbinv_all();
704		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
705		  "bic %0, %0, #1\n" /* MMU_DISABLE */
706		  "mcr p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/
707		  :"=r" (pt_addr));
708	} else
709#endif
710		dst = 4 + load_kernel((unsigned int)&kernel_start,
711	    (unsigned int)curaddr,
712	    (unsigned int)&func_end, 0);
713	dst = (void *)(((vm_offset_t)dst & ~3));
714	pt_addr = L1_TABLE_SIZE + rounddown2((unsigned int)dst, L1_TABLE_SIZE);
715	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
716	    (vm_paddr_t)curaddr + 0x10000000, 0);
717	sp = pt_addr + L1_TABLE_SIZE + 8192;
718	sp = sp &~3;
719	dst = (void *)(sp + 4);
720	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
721	    (unsigned int)&load_kernel + 800);
722	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
723	    (unsigned int)(&load_kernel) + 800, sp);
724}
725
726/* We need to provide these functions but never call them */
727void __aeabi_unwind_cpp_pr0(void);
728void __aeabi_unwind_cpp_pr1(void);
729void __aeabi_unwind_cpp_pr2(void);
730
731__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
732__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
733void
734__aeabi_unwind_cpp_pr0(void)
735{
736}
737