locore.s revision 285686
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * William Jolitz.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
33 * $FreeBSD: head/sys/i386/i386/locore.s 285686 2015-07-19 10:45:58Z kib $
34 *
35 *		originally from: locore.s, by William F. Jolitz
36 *
37 *		Substantially rewritten by David Greenman, Rod Grimes,
38 *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
39 *			and many others.
40 */
41
42#include "opt_bootp.h"
43#include "opt_compat.h"
44#include "opt_nfsroot.h"
45#include "opt_pmap.h"
46
47#include <sys/syscall.h>
48#include <sys/reboot.h>
49
50#include <machine/asmacros.h>
51#include <machine/cputypes.h>
52#include <machine/psl.h>
53#include <machine/pmap.h>
54#include <machine/specialreg.h>
55
56#include "assym.s"
57
58/*
59 *	XXX
60 *
61 * Note: This version greatly munged to avoid various assembler errors
62 * that may be fixed in newer versions of gas. Perhaps newer versions
63 * will have more pleasant appearance.
64 */
65
66/*
67 * PTmap is recursive pagemap at top of virtual address space.
68 * Within PTmap, the page directory can be found (third indirection).
69 */
70	.globl	PTmap,PTD,PTDpde
71	.set	PTmap,(PTDPTDI << PDRSHIFT)
72	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
73	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
74
75/*
76 * Compiled KERNBASE location and the kernel load address
77 */
78	.globl	kernbase
79	.set	kernbase,KERNBASE
80	.globl	kernload
81	.set	kernload,KERNLOAD
82
83/*
84 * Globals
85 */
86	.data
87	ALIGN_DATA			/* just to be sure */
88
89	.space	0x2000			/* space for tmpstk - temporary stack */
90tmpstk:
91
92	.globl	bootinfo
93bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
94
95		.globl KERNend
96KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
97physfree:	.long	0		/* phys addr of next free page */
98
99	.globl	IdlePTD
100IdlePTD:	.long	0		/* phys addr of kernel PTD */
101
102#if defined(PAE) || defined(PAE_TABLES)
103	.globl	IdlePDPT
104IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
105#endif
106
107	.globl	KPTmap
108KPTmap:		.long	0		/* address of kernel page tables */
109
110	.globl	KPTphys
111KPTphys:	.long	0		/* phys addr of kernel page tables */
112
113	.globl	proc0kstack
114proc0kstack:	.long	0		/* address of proc 0 kstack space */
115p0kpa:		.long	0		/* phys addr of proc0's STACK */
116
117vm86phystk:	.long	0		/* PA of vm86/bios stack */
118
119	.globl	vm86paddr, vm86pa
120vm86paddr:	.long	0		/* address of vm86 region */
121vm86pa:		.long	0		/* phys addr of vm86 region */
122
123#ifdef PC98
124	.globl	pc98_system_parameter
125pc98_system_parameter:
126	.space	0x240
127#endif
128
129/**********************************************************************
130 *
131 * Some handy macros
132 *
133 */
134
135#define R(foo) ((foo)-KERNBASE)
136
137#define ALLOCPAGES(foo) \
138	movl	R(physfree), %esi ; \
139	movl	$((foo)*PAGE_SIZE), %eax ; \
140	addl	%esi, %eax ; \
141	movl	%eax, R(physfree) ; \
142	movl	%esi, %edi ; \
143	movl	$((foo)*PAGE_SIZE),%ecx ; \
144	xorl	%eax,%eax ; \
145	cld ; \
146	rep ; \
147	stosb
148
149/*
150 * fillkpt
151 *	eax = page frame address
152 *	ebx = index into page table
153 *	ecx = how many pages to map
154 * 	base = base address of page dir/table
155 *	prot = protection bits
156 */
157#define	fillkpt(base, prot)		  \
158	shll	$PTESHIFT,%ebx		; \
159	addl	base,%ebx		; \
160	orl	$PG_V,%eax		; \
161	orl	prot,%eax		; \
1621:	movl	%eax,(%ebx)		; \
163	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
164	addl	$PTESIZE,%ebx		; /* next pte */ \
165	loop	1b
166
167/*
168 * fillkptphys(prot)
169 *	eax = physical address
170 *	ecx = how many pages to map
171 *	prot = protection bits
172 */
173#define	fillkptphys(prot)		  \
174	movl	%eax, %ebx		; \
175	shrl	$PAGE_SHIFT, %ebx	; \
176	fillkpt(R(KPTphys), prot)
177
178	.text
179/**********************************************************************
180 *
181 * This is where the bootblocks start us, set the ball rolling...
182 *
183 */
184NON_GPROF_ENTRY(btext)
185
186#ifdef PC98
187	/* save SYSTEM PARAMETER for resume (NS/T or other) */
188	movl	$0xa1400,%esi
189	movl	$R(pc98_system_parameter),%edi
190	movl	$0x0240,%ecx
191	cld
192	rep
193	movsb
194#else	/* IBM-PC */
195/* Tell the bios to warmboot next time */
196	movw	$0x1234,0x472
197#endif	/* PC98 */
198
199/* Set up a real frame in case the double return in newboot is executed. */
200	pushl	%ebp
201	movl	%esp, %ebp
202
203/* Don't trust what the BIOS gives for eflags. */
204	pushl	$PSL_KERNEL
205	popfl
206
207/*
208 * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
209 * to set %cs, %ds, %es and %ss.
210 */
211	mov	%ds, %ax
212	mov	%ax, %fs
213	mov	%ax, %gs
214
215/*
216 * Clear the bss.  Not all boot programs do it, and it is our job anyway.
217 *
218 * XXX we don't check that there is memory for our bss and page tables
219 * before using it.
220 *
221 * Note: we must be careful to not overwrite an active gdt or idt.  They
222 * inactive from now until we switch to new ones, since we don't load any
223 * more segment registers or permit interrupts until after the switch.
224 */
225	movl	$R(end),%ecx
226	movl	$R(edata),%edi
227	subl	%edi,%ecx
228	xorl	%eax,%eax
229	cld
230	rep
231	stosb
232
233	call	recover_bootinfo
234
235/* Get onto a stack that we can trust. */
236/*
237 * XXX this step is delayed in case recover_bootinfo needs to return via
238 * the old stack, but it need not be, since recover_bootinfo actually
239 * returns via the old frame.
240 */
241	movl	$R(tmpstk),%esp
242
243#ifdef PC98
244	/* pc98_machine_type & M_EPSON_PC98 */
245	testb	$0x02,R(pc98_system_parameter)+220
246	jz	3f
247	/* epson_machine_id <= 0x0b */
248	cmpb	$0x0b,R(pc98_system_parameter)+224
249	ja	3f
250
251	/* count up memory */
252	movl	$0x100000,%eax		/* next, talley remaining memory */
253	movl	$0xFFF-0x100,%ecx
2541:	movl	0(%eax),%ebx		/* save location to check */
255	movl	$0xa55a5aa5,0(%eax)	/* write test pattern */
256	cmpl	$0xa55a5aa5,0(%eax)	/* does not check yet for rollover */
257	jne	2f
258	movl	%ebx,0(%eax)		/* restore memory */
259	addl	$PAGE_SIZE,%eax
260	loop	1b
2612:	subl	$0x100000,%eax
262	shrl	$17,%eax
263	movb	%al,R(pc98_system_parameter)+1
2643:
265
266	movw	R(pc98_system_parameter+0x86),%ax
267	movw	%ax,R(cpu_id)
268#endif
269
270	call	identify_cpu
271	call	create_pagetables
272
273/*
274 * If the CPU has support for VME, turn it on.
275 */
276	testl	$CPUID_VME, R(cpu_feature)
277	jz	1f
278	movl	%cr4, %eax
279	orl	$CR4_VME, %eax
280	movl	%eax, %cr4
2811:
282
283/* Now enable paging */
284#if defined(PAE) || defined(PAE_TABLES)
285	movl	R(IdlePDPT), %eax
286	movl	%eax, %cr3
287	movl	%cr4, %eax
288	orl	$CR4_PAE, %eax
289	movl	%eax, %cr4
290#else
291	movl	R(IdlePTD), %eax
292	movl	%eax,%cr3		/* load ptd addr into mmu */
293#endif
294	movl	%cr0,%eax		/* get control word */
295	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
296	movl	%eax,%cr0		/* and let's page NOW! */
297
298	pushl	$begin			/* jump to high virtualized address */
299	ret
300
301/* now running relocated at KERNBASE where the system is linked to run */
302begin:
303	/* set up bootstrap stack */
304	movl	proc0kstack,%eax	/* location of in-kernel stack */
305
306	/*
307	 * Only use bottom page for init386().  init386() calculates the
308	 * PCB + FPU save area size and returns the true top of stack.
309	 */
310	leal	PAGE_SIZE(%eax),%esp
311
312	xorl	%ebp,%ebp		/* mark end of frames */
313
314	pushl	physfree		/* value of first for init386(first) */
315	call	init386			/* wire 386 chip for unix operation */
316
317	/*
318	 * Clean up the stack in a way that db_numargs() understands, so
319	 * that backtraces in ddb don't underrun the stack.  Traps for
320	 * inaccessible memory are more fatal than usual this early.
321	 */
322	addl	$4,%esp
323
324	/* Switch to true top of stack. */
325	movl	%eax,%esp
326
327	call	mi_startup		/* autoconfiguration, mountroot etc */
328	/* NOTREACHED */
329	addl	$0,%esp			/* for db_numargs() again */
330
331/*
332 * Signal trampoline, copied to top of user stack
333 */
334NON_GPROF_ENTRY(sigcode)
335	calll	*SIGF_HANDLER(%esp)
336	leal	SIGF_UC(%esp),%eax	/* get ucontext */
337	pushl	%eax
338	testl	$PSL_VM,UC_EFLAGS(%eax)
339	jne	1f
340	mov	UC_GS(%eax),%gs		/* restore %gs */
3411:
342	movl	$SYS_sigreturn,%eax
343	pushl	%eax			/* junk to fake return addr. */
344	int	$0x80			/* enter kernel with args */
345					/* on stack */
3461:
347	jmp	1b
348
349#ifdef COMPAT_FREEBSD4
350	ALIGN_TEXT
351freebsd4_sigcode:
352	calll	*SIGF_HANDLER(%esp)
353	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
354	pushl	%eax
355	testl	$PSL_VM,UC4_EFLAGS(%eax)
356	jne	1f
357	mov	UC4_GS(%eax),%gs	/* restore %gs */
3581:
359	movl	$344,%eax		/* 4.x SYS_sigreturn */
360	pushl	%eax			/* junk to fake return addr. */
361	int	$0x80			/* enter kernel with args */
362					/* on stack */
3631:
364	jmp	1b
365#endif
366
367#ifdef COMPAT_43
368	ALIGN_TEXT
369osigcode:
370	call	*SIGF_HANDLER(%esp)	/* call signal handler */
371	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
372	pushl	%eax
373	testl	$PSL_VM,SC_PS(%eax)
374	jne	9f
375	mov	SC_GS(%eax),%gs		/* restore %gs */
3769:
377	movl	$103,%eax		/* 3.x SYS_sigreturn */
378	pushl	%eax			/* junk to fake return addr. */
379	int	$0x80			/* enter kernel with args */
3800:	jmp	0b
381#endif /* COMPAT_43 */
382
383	ALIGN_TEXT
384esigcode:
385
386	.data
387	.globl	szsigcode
388szsigcode:
389	.long	esigcode-sigcode
390#ifdef COMPAT_FREEBSD4
391	.globl	szfreebsd4_sigcode
392szfreebsd4_sigcode:
393	.long	esigcode-freebsd4_sigcode
394#endif
395#ifdef COMPAT_43
396	.globl	szosigcode
397szosigcode:
398	.long	esigcode-osigcode
399#endif
400	.text
401
402/**********************************************************************
403 *
404 * Recover the bootinfo passed to us from the boot program
405 *
406 */
407recover_bootinfo:
408	/*
409	 * This code is called in different ways depending on what loaded
410	 * and started the kernel.  This is used to detect how we get the
411	 * arguments from the other code and what we do with them.
412	 *
413	 * Old disk boot blocks:
414	 *	(*btext)(howto, bootdev, cyloffset, esym);
415	 *	[return address == 0, and can NOT be returned to]
416	 *	[cyloffset was not supported by the FreeBSD boot code
417	 *	 and always passed in as 0]
418	 *	[esym is also known as total in the boot code, and
419	 *	 was never properly supported by the FreeBSD boot code]
420	 *
421	 * Old diskless netboot code:
422	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
423	 *	[return address != 0, and can NOT be returned to]
424	 *	If we are being booted by this code it will NOT work,
425	 *	so we are just going to halt if we find this case.
426	 *
427	 * New uniform boot code:
428	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
429	 *	[return address != 0, and can be returned to]
430	 *
431	 * There may seem to be a lot of wasted arguments in here, but
432	 * that is so the newer boot code can still load very old kernels
433	 * and old boot code can load new kernels.
434	 */
435
436	/*
437	 * The old style disk boot blocks fake a frame on the stack and
438	 * did an lret to get here.  The frame on the stack has a return
439	 * address of 0.
440	 */
441	cmpl	$0,4(%ebp)
442	je	olddiskboot
443
444	/*
445	 * We have some form of return address, so this is either the
446	 * old diskless netboot code, or the new uniform code.  That can
447	 * be detected by looking at the 5th argument, if it is 0
448	 * we are being booted by the new uniform boot code.
449	 */
450	cmpl	$0,24(%ebp)
451	je	newboot
452
453	/*
454	 * Seems we have been loaded by the old diskless boot code, we
455	 * don't stand a chance of running as the diskless structure
456	 * changed considerably between the two, so just halt.
457	 */
458	 hlt
459
460	/*
461	 * We have been loaded by the new uniform boot code.
462	 * Let's check the bootinfo version, and if we do not understand
463	 * it we return to the loader with a status of 1 to indicate this error
464	 */
465newboot:
466	movl	28(%ebp),%ebx		/* &bootinfo.version */
467	movl	BI_VERSION(%ebx),%eax
468	cmpl	$1,%eax			/* We only understand version 1 */
469	je	1f
470	movl	$1,%eax			/* Return status */
471	leave
472	/*
473	 * XXX this returns to our caller's caller (as is required) since
474	 * we didn't set up a frame and our caller did.
475	 */
476	ret
477
4781:
479	/*
480	 * If we have a kernelname copy it in
481	 */
482	movl	BI_KERNELNAME(%ebx),%esi
483	cmpl	$0,%esi
484	je	2f			/* No kernelname */
485	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
486	movl	$R(kernelname),%edi
487	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
488	je	1f
489	movb	$'/',(%edi)
490	incl	%edi
491	decl	%ecx
4921:
493	cld
494	rep
495	movsb
496
4972:
498	/*
499	 * Determine the size of the boot loader's copy of the bootinfo
500	 * struct.  This is impossible to do properly because old versions
501	 * of the struct don't contain a size field and there are 2 old
502	 * versions with the same version number.
503	 */
504	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
505	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
506	je	got_bi_size		/* no, sizeless version */
507	movl	BI_SIZE(%ebx),%ecx
508got_bi_size:
509
510	/*
511	 * Copy the common part of the bootinfo struct
512	 */
513	movl	%ebx,%esi
514	movl	$R(bootinfo),%edi
515	cmpl	$BOOTINFO_SIZE,%ecx
516	jbe	got_common_bi_size
517	movl	$BOOTINFO_SIZE,%ecx
518got_common_bi_size:
519	cld
520	rep
521	movsb
522
523#ifdef NFS_ROOT
524#ifndef BOOTP_NFSV3
525	/*
526	 * If we have a nfs_diskless structure copy it in
527	 */
528	movl	BI_NFS_DISKLESS(%ebx),%esi
529	cmpl	$0,%esi
530	je	olddiskboot
531	movl	$R(nfs_diskless),%edi
532	movl	$NFSDISKLESS_SIZE,%ecx
533	cld
534	rep
535	movsb
536	movl	$R(nfs_diskless_valid),%edi
537	movl	$1,(%edi)
538#endif
539#endif
540
541	/*
542	 * The old style disk boot.
543	 *	(*btext)(howto, bootdev, cyloffset, esym);
544	 * Note that the newer boot code just falls into here to pick
545	 * up howto and bootdev, cyloffset and esym are no longer used
546	 */
547olddiskboot:
548	movl	8(%ebp),%eax
549	movl	%eax,R(boothowto)
550	movl	12(%ebp),%eax
551	movl	%eax,R(bootdev)
552
553	ret
554
555
556/**********************************************************************
557 *
558 * Identify the CPU and initialize anything special about it
559 *
560 */
561identify_cpu:
562
563	/* Try to toggle alignment check flag; does not exist on 386. */
564	pushfl
565	popl	%eax
566	movl	%eax,%ecx
567	orl	$PSL_AC,%eax
568	pushl	%eax
569	popfl
570	pushfl
571	popl	%eax
572	xorl	%ecx,%eax
573	andl	$PSL_AC,%eax
574	pushl	%ecx
575	popfl
576
577	testl	%eax,%eax
578	jnz	try486
579
580	/* NexGen CPU does not have aligment check flag. */
581	pushfl
582	movl	$0x5555, %eax
583	xorl	%edx, %edx
584	movl	$2, %ecx
585	clc
586	divl	%ecx
587	jz	trynexgen
588	popfl
589	movl	$CPU_386,R(cpu)
590	jmp	3f
591
592trynexgen:
593	popfl
594	movl	$CPU_NX586,R(cpu)
595	movl	$0x4778654e,R(cpu_vendor)	# store vendor string
596	movl	$0x72446e65,R(cpu_vendor+4)
597	movl	$0x6e657669,R(cpu_vendor+8)
598	movl	$0,R(cpu_vendor+12)
599	jmp	3f
600
601try486:	/* Try to toggle identification flag; does not exist on early 486s. */
602	pushfl
603	popl	%eax
604	movl	%eax,%ecx
605	xorl	$PSL_ID,%eax
606	pushl	%eax
607	popfl
608	pushfl
609	popl	%eax
610	xorl	%ecx,%eax
611	andl	$PSL_ID,%eax
612	pushl	%ecx
613	popfl
614
615	testl	%eax,%eax
616	jnz	trycpuid
617	movl	$CPU_486,R(cpu)
618
619	/*
620	 * Check Cyrix CPU
621	 * Cyrix CPUs do not change the undefined flags following
622	 * execution of the divide instruction which divides 5 by 2.
623	 *
624	 * Note: CPUID is enabled on M2, so it passes another way.
625	 */
626	pushfl
627	movl	$0x5555, %eax
628	xorl	%edx, %edx
629	movl	$2, %ecx
630	clc
631	divl	%ecx
632	jnc	trycyrix
633	popfl
634	jmp	3f		/* You may use Intel CPU. */
635
636trycyrix:
637	popfl
638	/*
639	 * IBM Bluelighting CPU also doesn't change the undefined flags.
640	 * Because IBM doesn't disclose the information for Bluelighting
641	 * CPU, we couldn't distinguish it from Cyrix's (including IBM
642	 * brand of Cyrix CPUs).
643	 */
644	movl	$0x69727943,R(cpu_vendor)	# store vendor string
645	movl	$0x736e4978,R(cpu_vendor+4)
646	movl	$0x64616574,R(cpu_vendor+8)
647	jmp	3f
648
649trycpuid:	/* Use the `cpuid' instruction. */
650	xorl	%eax,%eax
651	cpuid					# cpuid 0
652	movl	%eax,R(cpu_high)		# highest capability
653	movl	%ebx,R(cpu_vendor)		# store vendor string
654	movl	%edx,R(cpu_vendor+4)
655	movl	%ecx,R(cpu_vendor+8)
656	movb	$0,R(cpu_vendor+12)
657
658	movl	$1,%eax
659	cpuid					# cpuid 1
660	movl	%eax,R(cpu_id)			# store cpu_id
661	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
662	movl	%edx,R(cpu_feature)		# store cpu_feature
663	movl	%ecx,R(cpu_feature2)		# store cpu_feature2
664	rorl	$8,%eax				# extract family type
665	andl	$15,%eax
666	cmpl	$5,%eax
667	jae	1f
668
669	/* less than Pentium; must be 486 */
670	movl	$CPU_486,R(cpu)
671	jmp	3f
6721:
673	/* a Pentium? */
674	cmpl	$5,%eax
675	jne	2f
676	movl	$CPU_586,R(cpu)
677	jmp	3f
6782:
679	/* Greater than Pentium...call it a Pentium Pro */
680	movl	$CPU_686,R(cpu)
6813:
682	ret
683
684
685/**********************************************************************
686 *
687 * Create the first page directory and its page tables.
688 *
689 */
690
691create_pagetables:
692
693/* Find end of kernel image (rounded up to a page boundary). */
694	movl	$R(_end),%esi
695
696/* Include symbols, if any. */
697	movl	R(bootinfo+BI_ESYMTAB),%edi
698	testl	%edi,%edi
699	je	over_symalloc
700	movl	%edi,%esi
701	movl	$KERNBASE,%edi
702	addl	%edi,R(bootinfo+BI_SYMTAB)
703	addl	%edi,R(bootinfo+BI_ESYMTAB)
704over_symalloc:
705
706/* If we are told where the end of the kernel space is, believe it. */
707	movl	R(bootinfo+BI_KERNEND),%edi
708	testl	%edi,%edi
709	je	no_kernend
710	movl	%edi,%esi
711no_kernend:
712
713	addl	$PDRMASK,%esi		/* Play conservative for now, and */
714	andl	$~PDRMASK,%esi		/*   ... wrap to next 4M. */
715	movl	%esi,R(KERNend)		/* save end of kernel */
716	movl	%esi,R(physfree)	/* next free page is at end of kernel */
717
718/* Allocate Kernel Page Tables */
719	ALLOCPAGES(NKPT)
720	movl	%esi,R(KPTphys)
721	addl	$(KERNBASE-(KPTDI<<(PDRSHIFT-PAGE_SHIFT+PTESHIFT))),%esi
722	movl	%esi,R(KPTmap)
723
724/* Allocate Page Table Directory */
725#if defined(PAE) || defined(PAE_TABLES)
726	/* XXX only need 32 bytes (easier for now) */
727	ALLOCPAGES(1)
728	movl	%esi,R(IdlePDPT)
729#endif
730	ALLOCPAGES(NPGPTD)
731	movl	%esi,R(IdlePTD)
732
733/* Allocate KSTACK */
734	ALLOCPAGES(KSTACK_PAGES)
735	movl	%esi,R(p0kpa)
736	addl	$KERNBASE, %esi
737	movl	%esi, R(proc0kstack)
738
739	ALLOCPAGES(1)			/* vm86/bios stack */
740	movl	%esi,R(vm86phystk)
741
742	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
743	movl	%esi,R(vm86pa)
744	addl	$KERNBASE, %esi
745	movl	%esi, R(vm86paddr)
746
747/*
748 * Enable PSE and PGE.
749 */
750#ifndef DISABLE_PSE
751	testl	$CPUID_PSE, R(cpu_feature)
752	jz	1f
753	movl	$PG_PS, R(pseflag)
754	movl	%cr4, %eax
755	orl	$CR4_PSE, %eax
756	movl	%eax, %cr4
7571:
758#endif
759#ifndef DISABLE_PG_G
760	testl	$CPUID_PGE, R(cpu_feature)
761	jz	2f
762	movl	$PG_G, R(pgeflag)
763	movl	%cr4, %eax
764	orl	$CR4_PGE, %eax
765	movl	%eax, %cr4
7662:
767#endif
768
769/*
770 * Initialize page table pages mapping physical address zero through the
771 * end of the kernel.  All of the page table entries allow read and write
772 * access.  Write access to the first physical page is required by bios32
773 * calls, and write access to the first 1 MB of physical memory is required
774 * by ACPI for implementing suspend and resume.  We do this even
775 * if we've enabled PSE above, we'll just switch the corresponding kernel
776 * PDEs before we turn on paging.
777 *
778 * XXX: We waste some pages here in the PSE case!
779 */
780	xorl	%eax, %eax
781	movl	R(KERNend),%ecx
782	shrl	$PAGE_SHIFT,%ecx
783	fillkptphys($PG_RW)
784
785/* Map page table pages. */
786	movl	R(KPTphys),%eax
787	movl	$NKPT,%ecx
788	fillkptphys($PG_RW)
789
790/* Map page directory. */
791#if defined(PAE) || defined(PAE_TABLES)
792	movl	R(IdlePDPT), %eax
793	movl	$1, %ecx
794	fillkptphys($PG_RW)
795#endif
796
797	movl	R(IdlePTD), %eax
798	movl	$NPGPTD, %ecx
799	fillkptphys($PG_RW)
800
801/* Map proc0's KSTACK in the physical way ... */
802	movl	R(p0kpa), %eax
803	movl	$(KSTACK_PAGES), %ecx
804	fillkptphys($PG_RW)
805
806/* Map ISA hole */
807	movl	$ISA_HOLE_START, %eax
808	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
809	fillkptphys($PG_RW)
810
811/* Map space for the vm86 region */
812	movl	R(vm86phystk), %eax
813	movl	$4, %ecx
814	fillkptphys($PG_RW)
815
816/* Map page 0 into the vm86 page table */
817	movl	$0, %eax
818	movl	$0, %ebx
819	movl	$1, %ecx
820	fillkpt(R(vm86pa), $PG_RW|PG_U)
821
822/* ...likewise for the ISA hole */
823	movl	$ISA_HOLE_START, %eax
824	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
825	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
826	fillkpt(R(vm86pa), $PG_RW|PG_U)
827
828/*
829 * Create an identity mapping for low physical memory, including the kernel.
830 * The part of this mapping that covers the first 1 MB of physical memory
831 * becomes a permanent part of the kernel's address space.  The rest of this
832 * mapping is destroyed in pmap_bootstrap().  Ordinarily, the same page table
833 * pages are shared by the identity mapping and the kernel's native mapping.
834 * However, the permanent identity mapping cannot contain PG_G mappings.
835 * Thus, if the kernel is loaded within the permanent identity mapping, that
836 * page table page must be duplicated and not shared.
837 *
838 * N.B. Due to errata concerning large pages and physical address zero,
839 * a PG_PS mapping is not used.
840 */
841	movl	R(KPTphys), %eax
842	xorl	%ebx, %ebx
843	movl	$NKPT, %ecx
844	fillkpt(R(IdlePTD), $PG_RW)
845#if KERNLOAD < (1 << PDRSHIFT)
846	testl	$PG_G, R(pgeflag)
847	jz	1f
848	ALLOCPAGES(1)
849	movl	%esi, %edi
850	movl	R(IdlePTD), %eax
851	movl	(%eax), %esi
852	movl	%edi, (%eax)
853	movl	$PAGE_SIZE, %ecx
854	cld
855	rep
856	movsb
8571:
858#endif
859
860/*
861 * For the non-PSE case, install PDEs for PTs covering the KVA.
862 * For the PSE case, do the same, but clobber the ones corresponding
863 * to the kernel (from btext to KERNend) with 4M (2M for PAE) ('PS')
864 * PDEs immediately after.
865 */
866	movl	R(KPTphys), %eax
867	movl	$KPTDI, %ebx
868	movl	$NKPT, %ecx
869	fillkpt(R(IdlePTD), $PG_RW)
870	cmpl	$0,R(pseflag)
871	je	done_pde
872
873	movl	R(KERNend), %ecx
874	movl	$KERNLOAD, %eax
875	subl	%eax, %ecx
876	shrl	$PDRSHIFT, %ecx
877	movl	$(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx
878	shll	$PDESHIFT, %ebx
879	addl	R(IdlePTD), %ebx
880	orl	$(PG_V|PG_RW|PG_PS), %eax
8811:	movl	%eax, (%ebx)
882	addl	$(1 << PDRSHIFT), %eax
883	addl	$PDESIZE, %ebx
884	loop	1b
885
886done_pde:
887/* install a pde recursively mapping page directory as a page table */
888	movl	R(IdlePTD), %eax
889	movl	$PTDPTDI, %ebx
890	movl	$NPGPTD,%ecx
891	fillkpt(R(IdlePTD), $PG_RW)
892
893#if defined(PAE) || defined(PAE_TABLES)
894	movl	R(IdlePTD), %eax
895	xorl	%ebx, %ebx
896	movl	$NPGPTD, %ecx
897	fillkpt(R(IdlePDPT), $0x0)
898#endif
899
900	ret
901
902#ifdef XENHVM
903/* Xen Hypercall page */
904	.text
905.p2align PAGE_SHIFT, 0x90	/* Hypercall_page needs to be PAGE aligned */
906
907NON_GPROF_ENTRY(hypercall_page)
908	.skip	0x1000, 0x90	/* Fill with "nop"s */
909#endif
910