locore.S revision 295270
1/*-
2 * Copyright (c) 2012-2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/arm64/arm64/locore.S 295270 2016-02-04 17:22:15Z andrew $
27 */
28
29#include "assym.s"
30#include "opt_kstack_pages.h"
31#include <sys/syscall.h>
32#include <machine/asm.h>
33#include <machine/armreg.h>
34#include <machine/hypervisor.h>
35#include <machine/param.h>
36#include <machine/pte.h>
37
38#define	VIRT_BITS	39
39
40	.globl	kernbase
41	.set	kernbase, KERNBASE
42
43#define	DEVICE_MEM	0
44#define	NORMAL_UNCACHED	1
45#define	NORMAL_MEM	2
46
47/*
48 * We assume:
49 *  MMU      on with an identity map, or off
50 *  D-Cache: off
51 *  I-Cache: on or off
52 *  We are loaded at a 2MiB aligned address
53 */
54
55	.text
56	.globl _start
57_start:
58	/* Drop to EL1 */
59	bl	drop_to_el1
60
61	/*
62	 * Disable the MMU. We may have entered the kernel with it on and
63	 * will need to update the tables later. If this has been set up
64	 * with anything other than a VA == PA map then this will fail,
65	 * but in this case the code to find where we are running from
66	 * would have also failed.
67	 */
68	dsb	sy
69	mrs	x2, sctlr_el1
70	bic	x2, x2, SCTLR_M
71	msr	sctlr_el1, x2
72	isb
73
74	/* Set the context id */
75	msr	contextidr_el1, xzr
76
77	/* Get the virt -> phys offset */
78	bl	get_virt_delta
79
80	/*
81	 * At this point:
82	 * x29 = PA - VA
83	 * x28 = Our physical load address
84	 */
85
86	/* Create the page tables */
87	bl	create_pagetables
88
89	/*
90	 * At this point:
91	 * x27 = TTBR0 table
92	 * x26 = TTBR1 table
93	 */
94
95	/* Enable the mmu */
96	bl	start_mmu
97
98	/* Jump to the virtual address space */
99	ldr	x15, .Lvirtdone
100	br	x15
101
102virtdone:
103	/*
104	 * Now that we are in virtual address space,
105	 * we don't need the identity mapping in TTBR0 and
106	 * can set the TCR to a more useful value.
107	 */
108	ldr	x2, tcr
109	mrs	x3, id_aa64mmfr0_el1
110	bfi	x2, x3, #32, #3
111	msr	tcr_el1, x2
112
113	/* Set up the stack */
114	adr	x25, initstack_end
115	mov	sp, x25
116	sub	sp, sp, #PCB_SIZE
117
118	/* Zero the BSS */
119	ldr	x15, .Lbss
120	ldr	x14, .Lend
1211:
122	str	xzr, [x15], #8
123	cmp	x15, x14
124	b.lo	1b
125
126	/* Backup the module pointer */
127	mov	x1, x0
128
129	/* Make the page table base a virtual address */
130	sub	x26, x26, x29
131
132	sub	sp, sp, #(64 * 4)
133	mov	x0, sp
134
135	/* Degate the delda so it is VA -> PA */
136	neg	x29, x29
137
138	str	x1,  [x0]	/* modulep */
139	str	x26, [x0, 8]	/* kern_l1pt */
140	str	x29, [x0, 16]	/* kern_delta */
141	str	x25, [x0, 24]	/* kern_stack */
142
143	/* trace back starts here */
144	mov	fp, #0
145	/* Branch to C code */
146	bl	initarm
147	bl	mi_startup
148
149	/* We should not get here */
150	brk	0
151
152	.align 3
153.Lvirtdone:
154	.quad	virtdone
155.Lbss:
156	.quad	__bss_start
157.Lend:
158	.quad	_end
159
160#ifdef SMP
161/*
162 * mpentry(unsigned long)
163 *
164 * Called by a core when it is being brought online.
165 * The data in x0 is passed straight to init_secondary.
166 */
167ENTRY(mpentry)
168	/* Disable interrupts */
169	msr	daifset, #2
170
171	/* Drop to EL1 */
172	bl	drop_to_el1
173
174	/* Set the context id */
175	msr	contextidr_el1, x1
176
177	/* Load the kernel page table */
178	adr	x26, pagetable_l1_ttbr1
179	/* Load the identity page table */
180	adr	x27, pagetable_l0_ttbr0
181
182	/* Enable the mmu */
183	bl	start_mmu
184
185	/* Jump to the virtual address space */
186	ldr	x15, =mp_virtdone
187	br	x15
188
189mp_virtdone:
190	/*
191	 * Now that we are in virtual address space,
192	 * we don't need the identity mapping in TTBR0 and
193	 * can set the TCR to a more useful value.
194	 */
195	ldr	x2, tcr
196	mrs	x3, id_aa64mmfr0_el1
197	bfi	x2, x3, #32, #3
198	msr	tcr_el1, x2
199
200	ldr	x4, =secondary_stacks
201	mov	x5, #(PAGE_SIZE * KSTACK_PAGES)
202	mul	x5, x0, x5
203	add	sp, x4, x5
204
205	b	init_secondary
206END(mpentry)
207#endif
208
209/*
210 * If we are started in EL2, configure the required hypervisor
211 * registers and drop to EL1.
212 */
213drop_to_el1:
214	mrs	x1, CurrentEL
215	lsr	x1, x1, #2
216	cmp	x1, #0x2
217	b.eq	1f
218	ret
2191:
220	/* Configure the Hypervisor */
221	mov	x2, #(HCR_RW)
222	msr	hcr_el2, x2
223
224	/* Load the Virtualization Process ID Register */
225	mrs	x2, midr_el1
226	msr	vpidr_el2, x2
227
228	/* Load the Virtualization Multiprocess ID Register */
229	mrs	x2, mpidr_el1
230	msr	vmpidr_el2, x2
231
232	/* Set the bits that need to be 1 in sctlr_el1 */
233	ldr	x2, .Lsctlr_res1
234	msr	sctlr_el1, x2
235
236	/* Don't trap to EL2 for exceptions */
237	mov	x2, #CPTR_RES1
238	msr	cptr_el2, x2
239
240	/* Don't trap to EL2 for CP15 traps */
241	msr	hstr_el2, xzr
242
243	/* Enable access to the physical timers at EL1 */
244	mrs	x2, cnthctl_el2
245	orr	x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN)
246	msr	cnthctl_el2, x2
247
248	/* Set the counter offset to a known value */
249	msr	cntvoff_el2, xzr
250
251	/* Hypervisor trap functions */
252	adr	x2, hyp_vectors
253	msr	vbar_el2, x2
254
255	mov	x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
256	msr	spsr_el2, x2
257
258	/* Configure GICv3 CPU interface */
259	mrs	x2, id_aa64pfr0_el1
260	/* Extract GIC bits from the register */
261	ubfx	x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS
262	/* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */
263	cmp	x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT)
264	b.ne	2f
265
266	mrs	x2, icc_sre_el2
267	orr	x2, x2, #ICC_SRE_EL2_EN	/* Enable access from insecure EL1 */
268	msr	icc_sre_el2, x2
2692:
270
271	/* Set the address to return to our return address */
272	msr	elr_el2, x30
273	isb
274
275	eret
276
277	.align 3
278.Lsctlr_res1:
279	.quad SCTLR_RES1
280
281#define	VECT_EMPTY	\
282	.align 7;	\
283	1:	b	1b
284
285	.align 11
286hyp_vectors:
287	VECT_EMPTY	/* Synchronous EL2t */
288	VECT_EMPTY	/* IRQ EL2t */
289	VECT_EMPTY	/* FIQ EL2t */
290	VECT_EMPTY	/* Error EL2t */
291
292	VECT_EMPTY	/* Synchronous EL2h */
293	VECT_EMPTY	/* IRQ EL2h */
294	VECT_EMPTY	/* FIQ EL2h */
295	VECT_EMPTY	/* Error EL2h */
296
297	VECT_EMPTY	/* Synchronous 64-bit EL1 */
298	VECT_EMPTY	/* IRQ 64-bit EL1 */
299	VECT_EMPTY	/* FIQ 64-bit EL1 */
300	VECT_EMPTY	/* Error 64-bit EL1 */
301
302	VECT_EMPTY	/* Synchronous 32-bit EL1 */
303	VECT_EMPTY	/* IRQ 32-bit EL1 */
304	VECT_EMPTY	/* FIQ 32-bit EL1 */
305	VECT_EMPTY	/* Error 32-bit EL1 */
306
307/*
308 * Get the delta between the physical address we were loaded to and the
309 * virtual address we expect to run from. This is used when building the
310 * initial page table.
311 */
312get_virt_delta:
313	/* Load the physical address of virt_map */
314	adr	x29, virt_map
315	/* Load the virtual address of virt_map stored in virt_map */
316	ldr	x28, [x29]
317	/* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */
318	sub	x29, x29, x28
319	/* Find the load address for the kernel */
320	mov	x28, #(KERNBASE)
321	add	x28, x28, x29
322	ret
323
324	.align 3
325virt_map:
326	.quad	virt_map
327
328/*
329 * This builds the page tables containing the identity map, and the kernel
330 * virtual map.
331 *
332 * It relys on:
333 *  We were loaded to an address that is on a 2MiB boundary
334 *  All the memory must not cross a 1GiB boundaty
335 *  x28 contains the physical address we were loaded from
336 *
337 * TODO: This is out of date.
338 *  There are at least 5 pages before that address for the page tables
339 *   The pages used are:
340 *    - The identity (PA = VA) table (TTBR0)
341 *    - The Kernel L1 table          (TTBR1)(not yet)
342 *    -  The PA != VA L2 table to jump into (not yet)
343 *    -  The FDT L2 table                   (not yet)
344 */
345create_pagetables:
346	/* Save the Link register */
347	mov	x5, x30
348
349	/* Clean the page table */
350	adr	x6, pagetable
351	mov	x26, x6
352	adr	x27, pagetable_end
3531:
354	stp	xzr, xzr, [x6], #16
355	stp	xzr, xzr, [x6], #16
356	stp	xzr, xzr, [x6], #16
357	stp	xzr, xzr, [x6], #16
358	cmp	x6, x27
359	b.lo	1b
360
361	/*
362	 * Build the TTBR1 maps.
363	 */
364
365	/* Find the size of the kernel */
366	mov	x6, #(KERNBASE)
367	ldr	x7, .Lend
368	/* Find the end - begin */
369	sub	x8, x7, x6
370	/* Get the number of l2 pages to allocate, rounded down */
371	lsr	x10, x8, #(L2_SHIFT)
372	/* Add 8 MiB for any rounding above and the module data */
373	add	x10, x10, #4
374
375	/* Create the kernel space L2 table */
376	mov	x6, x26
377	mov	x7, #NORMAL_MEM
378	mov	x8, #(KERNBASE & L2_BLOCK_MASK)
379	mov	x9, x28
380	bl	build_l2_block_pagetable
381
382	/* Move to the l1 table */
383	add	x26, x26, #PAGE_SIZE
384
385	/* Link the l1 -> l2 table */
386	mov	x9, x6
387	mov	x6, x26
388	bl	link_l1_pagetable
389
390
391	/*
392	 * Build the TTBR0 maps.
393	 */
394	add	x27, x26, #PAGE_SIZE
395
396	mov	x6, x27		/* The initial page table */
397#if defined(SOCDEV_PA) && defined(SOCDEV_VA)
398	/* Create a table for the UART */
399	mov	x7, #DEVICE_MEM
400	mov	x8, #(SOCDEV_VA)	/* VA start */
401	mov	x9, #(SOCDEV_PA)	/* PA start */
402	mov	x10, #1
403	bl	build_l1_block_pagetable
404#endif
405
406	/* Create the VA = PA map */
407	mov	x7, #NORMAL_UNCACHED /* Uncached as it's only needed early on */
408	mov	x9, x27
409	mov	x8, x9		/* VA start (== PA start) */
410	mov	x10, #1
411	bl	build_l1_block_pagetable
412
413	/* Move to the l0 table */
414	add	x27, x27, #PAGE_SIZE
415
416	/* Link the l0 -> l1 table */
417	mov	x9, x6
418	mov	x6, x27
419	bl	link_l0_pagetable
420
421	/* Restore the Link register */
422	mov	x30, x5
423	ret
424
425/*
426 * Builds an L0 -> L1 table descriptor
427 *
428 * This is a link for a 512GiB block of memory with up to 1GiB regions mapped
429 * within it by build_l1_block_pagetable.
430 *
431 *  x6  = L0 table
432 *  x8  = Virtual Address
433 *  x9  = L1 PA (trashed)
434 *  x11, x12 and x13 are trashed
435 */
436link_l0_pagetable:
437	/*
438	 * Link an L0 -> L1 table entry.
439	 */
440	/* Find the table index */
441	lsr	x11, x8, #L0_SHIFT
442	and	x11, x11, #Ln_ADDR_MASK
443
444	/* Build the L0 block entry */
445	mov	x12, #L0_TABLE
446
447	/* Only use the output address bits */
448	lsr	x9, x9, #12
449	orr	x12, x12, x9, lsl #12
450
451	/* Store the entry */
452	str	x12, [x6, x11, lsl #3]
453
454	ret
455
456/*
457 * Builds an L1 -> L2 table descriptor
458 *
459 * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
460 * within it by build_l2_block_pagetable.
461 *
462 *  x6  = L1 table
463 *  x8  = Virtual Address
464 *  x9  = L2 PA (trashed)
465 *  x11, x12 and x13 are trashed
466 */
467link_l1_pagetable:
468	/*
469	 * Link an L1 -> L2 table entry.
470	 */
471	/* Find the table index */
472	lsr	x11, x8, #L1_SHIFT
473	and	x11, x11, #Ln_ADDR_MASK
474
475	/* Build the L1 block entry */
476	mov	x12, #L1_TABLE
477
478	/* Only use the output address bits */
479	lsr	x9, x9, #12
480	orr	x12, x12, x9, lsl #12
481
482	/* Store the entry */
483	str	x12, [x6, x11, lsl #3]
484
485	ret
486
487/*
488 * Builds count 1 GiB page table entry
489 *  x6  = L1 table
490 *  x7  = Type (0 = Device, 1 = Normal)
491 *  x8  = VA start
492 *  x9  = PA start (trashed)
493 *  x10 = Entry count (TODO)
494 *  x11, x12 and x13 are trashed
495 */
496build_l1_block_pagetable:
497	/*
498	 * Build the L1 table entry.
499	 */
500	/* Find the table index */
501	lsr	x11, x8, #L1_SHIFT
502	and	x11, x11, #Ln_ADDR_MASK
503
504	/* Build the L1 block entry */
505	lsl	x12, x7, #2
506	orr	x12, x12, #L1_BLOCK
507	orr	x12, x12, #(ATTR_AF)
508#ifdef SMP
509	orr	x12, x12, ATTR_SH(ATTR_SH_IS)
510#endif
511
512	/* Only use the output address bits */
513	lsr	x9, x9, #L1_SHIFT
514
515	/* Set the physical address for this virtual address */
5161:	orr	x12, x12, x9, lsl #L1_SHIFT
517
518	/* Store the entry */
519	str	x12, [x6, x11, lsl #3]
520
521	/* Clear the address bits */
522	and	x12, x12, #ATTR_MASK_L
523
524	sub	x10, x10, #1
525	add	x11, x11, #1
526	add	x9, x9, #1
527	cbnz	x10, 1b
528
5292:	ret
530
531/*
532 * Builds count 2 MiB page table entry
533 *  x6  = L2 table
534 *  x7  = Type (0 = Device, 1 = Normal)
535 *  x8  = VA start
536 *  x9  = PA start (trashed)
537 *  x10 = Entry count (TODO)
538 *  x11, x12 and x13 are trashed
539 */
540build_l2_block_pagetable:
541	/*
542	 * Build the L2 table entry.
543	 */
544	/* Find the table index */
545	lsr	x11, x8, #L2_SHIFT
546	and	x11, x11, #Ln_ADDR_MASK
547
548	/* Build the L2 block entry */
549	lsl	x12, x7, #2
550	orr	x12, x12, #L2_BLOCK
551	orr	x12, x12, #(ATTR_AF)
552#ifdef SMP
553	orr	x12, x12, ATTR_SH(ATTR_SH_IS)
554#endif
555
556	/* Only use the output address bits */
557	lsr	x9, x9, #L2_SHIFT
558
559	/* Set the physical address for this virtual address */
5601:	orr	x12, x12, x9, lsl #L2_SHIFT
561
562	/* Store the entry */
563	str	x12, [x6, x11, lsl #3]
564
565	/* Clear the address bits */
566	and	x12, x12, #ATTR_MASK_L
567
568	sub	x10, x10, #1
569	add	x11, x11, #1
570	add	x9, x9, #1
571	cbnz	x10, 1b
572
5732:	ret
574
575start_mmu:
576	dsb	sy
577
578	/* Load the exception vectors */
579	ldr	x2, =exception_vectors
580	msr	vbar_el1, x2
581
582	/* Load ttbr0 and ttbr1 */
583	msr	ttbr0_el1, x27
584	msr	ttbr1_el1, x26
585	isb
586
587	/* Clear the Monitor Debug System control register */
588	msr	mdscr_el1, xzr
589
590	/* Invalidate the TLB */
591	tlbi	vmalle1is
592
593	ldr	x2, mair
594	msr	mair_el1, x2
595
596	/*
597	 * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1.
598	 * Some machines have physical memory mapped >512GiB, which can not
599	 * be identity-mapped using the default 39 VA bits. Thus, use
600	 * 48 VA bits for now and switch back to 39 after the VA jump.
601	 */
602	ldr	x2, tcr_early
603	mrs	x3, id_aa64mmfr0_el1
604	bfi	x2, x3, #32, #3
605	msr	tcr_el1, x2
606
607	/* Setup SCTLR */
608	ldr	x2, sctlr_set
609	ldr	x3, sctlr_clear
610	mrs	x1, sctlr_el1
611	bic	x1, x1, x3	/* Clear the required bits */
612	orr	x1, x1, x2	/* Set the required bits */
613	msr	sctlr_el1, x1
614	isb
615
616	ret
617
618	.align 3
619mair:
620		/* Device            Normal, no cache     Normal, write-back */
621	.quad	MAIR_ATTR(0x00, 0) | MAIR_ATTR(0x44, 1) | MAIR_ATTR(0xff, 2)
622tcr:
623	.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \
624	    TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
625tcr_early:
626	.quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
627	    TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
628sctlr_set:
629	/* Bits to set */
630	.quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
631	    SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | SCTLR_M)
632sctlr_clear:
633	/* Bits to clear */
634	.quad (SCTLR_EE | SCTLR_EOE | SCTLR_WXN | SCTLR_UMA | SCTLR_ITD | \
635	    SCTLR_THEE | SCTLR_CP15BEN | SCTLR_A)
636
637	.globl abort
638abort:
639	b abort
640
641	//.section .init_pagetable
642	.align 12 /* 4KiB aligned */
643	/*
644	 * 3 initial tables (in the following order):
645	 *           L2 for kernel (High addresses)
646	 *           L1 for kernel
647	 *           L1 for user   (Low addresses)
648	 */
649pagetable:
650	.space	PAGE_SIZE
651pagetable_l1_ttbr1:
652	.space	PAGE_SIZE
653pagetable_l1_ttbr0:
654	.space	PAGE_SIZE
655pagetable_l0_ttbr0:
656	.space	PAGE_SIZE
657pagetable_end:
658
659el2_pagetable:
660	.space	PAGE_SIZE
661
662	.globl init_pt_va
663init_pt_va:
664	.quad pagetable		/* XXX: Keep page tables VA */
665
666	.align	4
667initstack:
668	.space	(PAGE_SIZE * KSTACK_PAGES)
669initstack_end:
670
671
672ENTRY(sigcode)
673	mov	x0, sp
674	add	x0, x0, #SF_UC
675
6761:
677	mov	x8, #SYS_sigreturn
678	svc	0
679
680	/* sigreturn failed, exit */
681	mov	x8, #SYS_exit
682	svc	0
683
684	b	1b
685END(sigcode)
686	/* This may be copied to the stack, keep it 16-byte aligned */
687	.align	3
688esigcode:
689
690	.data
691	.align	3
692	.global	szsigcode
693szsigcode:
694	.quad	esigcode - sigcode
695