locore.s revision 201716
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * William Jolitz.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
33 * $FreeBSD: head/sys/i386/i386/locore.s 201716 2010-01-07 04:47:09Z alc $
34 *
35 *		originally from: locore.s, by William F. Jolitz
36 *
37 *		Substantially rewritten by David Greenman, Rod Grimes,
38 *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
39 *			and many others.
40 */
41
42#include "opt_bootp.h"
43#include "opt_compat.h"
44#include "opt_nfsroot.h"
45#include "opt_pmap.h"
46
47#include <sys/syscall.h>
48#include <sys/reboot.h>
49
50#include <machine/asmacros.h>
51#include <machine/cputypes.h>
52#include <machine/psl.h>
53#include <machine/pmap.h>
54#include <machine/specialreg.h>
55
56#include "assym.s"
57
58/*
59 *	XXX
60 *
61 * Note: This version greatly munged to avoid various assembler errors
62 * that may be fixed in newer versions of gas. Perhaps newer versions
63 * will have more pleasant appearance.
64 */
65
66/*
67 * PTmap is recursive pagemap at top of virtual address space.
68 * Within PTmap, the page directory can be found (third indirection).
69 */
70	.globl	PTmap,PTD,PTDpde
71	.set	PTmap,(PTDPTDI << PDRSHIFT)
72	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
73	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
74
75/*
76 * Compiled KERNBASE location and the kernel load address
77 */
78	.globl	kernbase
79	.set	kernbase,KERNBASE
80	.globl	kernload
81	.set	kernload,KERNLOAD
82
83/*
84 * Globals
85 */
86	.data
87	ALIGN_DATA			/* just to be sure */
88
89	.space	0x2000			/* space for tmpstk - temporary stack */
90tmpstk:
91
92	.globl	bootinfo
93bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
94
95		.globl KERNend
96KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
97physfree:	.long	0		/* phys addr of next free page */
98
99	.globl	IdlePTD
100IdlePTD:	.long	0		/* phys addr of kernel PTD */
101
102#ifdef PAE
103	.globl	IdlePDPT
104IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
105#endif
106
107#ifdef SMP
108	.globl	KPTphys
109#endif
110KPTphys:	.long	0		/* phys addr of kernel page tables */
111
112	.globl	proc0kstack
113proc0kstack:	.long	0		/* address of proc 0 kstack space */
114p0kpa:		.long	0		/* phys addr of proc0's STACK */
115
116vm86phystk:	.long	0		/* PA of vm86/bios stack */
117
118	.globl	vm86paddr, vm86pa
119vm86paddr:	.long	0		/* address of vm86 region */
120vm86pa:		.long	0		/* phys addr of vm86 region */
121
122#ifdef PC98
123	.globl	pc98_system_parameter
124pc98_system_parameter:
125	.space	0x240
126#endif
127
128/**********************************************************************
129 *
130 * Some handy macros
131 *
132 */
133
134#define R(foo) ((foo)-KERNBASE)
135
136#define ALLOCPAGES(foo) \
137	movl	R(physfree), %esi ; \
138	movl	$((foo)*PAGE_SIZE), %eax ; \
139	addl	%esi, %eax ; \
140	movl	%eax, R(physfree) ; \
141	movl	%esi, %edi ; \
142	movl	$((foo)*PAGE_SIZE),%ecx ; \
143	xorl	%eax,%eax ; \
144	cld ; \
145	rep ; \
146	stosb
147
148/*
149 * fillkpt
150 *	eax = page frame address
151 *	ebx = index into page table
152 *	ecx = how many pages to map
153 * 	base = base address of page dir/table
154 *	prot = protection bits
155 */
156#define	fillkpt(base, prot)		  \
157	shll	$PTESHIFT,%ebx		; \
158	addl	base,%ebx		; \
159	orl	$PG_V,%eax		; \
160	orl	prot,%eax		; \
1611:	movl	%eax,(%ebx)		; \
162	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
163	addl	$PTESIZE,%ebx		; /* next pte */ \
164	loop	1b
165
166/*
167 * fillkptphys(prot)
168 *	eax = physical address
169 *	ecx = how many pages to map
170 *	prot = protection bits
171 */
172#define	fillkptphys(prot)		  \
173	movl	%eax, %ebx		; \
174	shrl	$PAGE_SHIFT, %ebx	; \
175	fillkpt(R(KPTphys), prot)
176
177	.text
178/**********************************************************************
179 *
180 * This is where the bootblocks start us, set the ball rolling...
181 *
182 */
183NON_GPROF_ENTRY(btext)
184
185#ifdef PC98
186	/* save SYSTEM PARAMETER for resume (NS/T or other) */
187	movl	$0xa1400,%esi
188	movl	$R(pc98_system_parameter),%edi
189	movl	$0x0240,%ecx
190	cld
191	rep
192	movsb
193#else	/* IBM-PC */
194/* Tell the bios to warmboot next time */
195	movw	$0x1234,0x472
196#endif	/* PC98 */
197
198/* Set up a real frame in case the double return in newboot is executed. */
199	pushl	%ebp
200	movl	%esp, %ebp
201
202/* Don't trust what the BIOS gives for eflags. */
203	pushl	$PSL_KERNEL
204	popfl
205
206/*
207 * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
208 * to set %cs, %ds, %es and %ss.
209 */
210	mov	%ds, %ax
211	mov	%ax, %fs
212	mov	%ax, %gs
213
214/*
215 * Clear the bss.  Not all boot programs do it, and it is our job anyway.
216 *
217 * XXX we don't check that there is memory for our bss and page tables
218 * before using it.
219 *
220 * Note: we must be careful to not overwrite an active gdt or idt.  They
221 * inactive from now until we switch to new ones, since we don't load any
222 * more segment registers or permit interrupts until after the switch.
223 */
224	movl	$R(end),%ecx
225	movl	$R(edata),%edi
226	subl	%edi,%ecx
227	xorl	%eax,%eax
228	cld
229	rep
230	stosb
231
232	call	recover_bootinfo
233
234/* Get onto a stack that we can trust. */
235/*
236 * XXX this step is delayed in case recover_bootinfo needs to return via
237 * the old stack, but it need not be, since recover_bootinfo actually
238 * returns via the old frame.
239 */
240	movl	$R(tmpstk),%esp
241
242#ifdef PC98
243	/* pc98_machine_type & M_EPSON_PC98 */
244	testb	$0x02,R(pc98_system_parameter)+220
245	jz	3f
246	/* epson_machine_id <= 0x0b */
247	cmpb	$0x0b,R(pc98_system_parameter)+224
248	ja	3f
249
250	/* count up memory */
251	movl	$0x100000,%eax		/* next, talley remaining memory */
252	movl	$0xFFF-0x100,%ecx
2531:	movl	0(%eax),%ebx		/* save location to check */
254	movl	$0xa55a5aa5,0(%eax)	/* write test pattern */
255	cmpl	$0xa55a5aa5,0(%eax)	/* does not check yet for rollover */
256	jne	2f
257	movl	%ebx,0(%eax)		/* restore memory */
258	addl	$PAGE_SIZE,%eax
259	loop	1b
2602:	subl	$0x100000,%eax
261	shrl	$17,%eax
262	movb	%al,R(pc98_system_parameter)+1
2633:
264
265	movw	R(pc98_system_parameter+0x86),%ax
266	movw	%ax,R(cpu_id)
267#endif
268
269	call	identify_cpu
270	call	create_pagetables
271
272/*
273 * If the CPU has support for VME, turn it on.
274 */
275	testl	$CPUID_VME, R(cpu_feature)
276	jz	1f
277	movl	%cr4, %eax
278	orl	$CR4_VME, %eax
279	movl	%eax, %cr4
2801:
281
282/* Now enable paging */
283#ifdef PAE
284	movl	R(IdlePDPT), %eax
285	movl	%eax, %cr3
286	movl	%cr4, %eax
287	orl	$CR4_PAE, %eax
288	movl	%eax, %cr4
289#else
290	movl	R(IdlePTD), %eax
291	movl	%eax,%cr3		/* load ptd addr into mmu */
292#endif
293	movl	%cr0,%eax		/* get control word */
294	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
295	movl	%eax,%cr0		/* and let's page NOW! */
296
297	pushl	$begin			/* jump to high virtualized address */
298	ret
299
300/* now running relocated at KERNBASE where the system is linked to run */
301begin:
302	/* set up bootstrap stack */
303	movl	proc0kstack,%eax	/* location of in-kernel stack */
304			/* bootstrap stack end location */
305	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
306
307	xorl	%ebp,%ebp		/* mark end of frames */
308
309#ifdef PAE
310	movl	IdlePDPT,%esi
311#else
312	movl	IdlePTD,%esi
313#endif
314	movl	%esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
315
316	pushl	physfree		/* value of first for init386(first) */
317	call	init386			/* wire 386 chip for unix operation */
318
319	/*
320	 * Clean up the stack in a way that db_numargs() understands, so
321	 * that backtraces in ddb don't underrun the stack.  Traps for
322	 * inaccessible memory are more fatal than usual this early.
323	 */
324	addl	$4,%esp
325
326	call	mi_startup		/* autoconfiguration, mountroot etc */
327	/* NOTREACHED */
328	addl	$0,%esp			/* for db_numargs() again */
329
330/*
331 * Signal trampoline, copied to top of user stack
332 */
333NON_GPROF_ENTRY(sigcode)
334	calll	*SIGF_HANDLER(%esp)
335	leal	SIGF_UC(%esp),%eax	/* get ucontext */
336	pushl	%eax
337	testl	$PSL_VM,UC_EFLAGS(%eax)
338	jne	1f
339	mov	UC_GS(%eax),%gs		/* restore %gs */
3401:
341	movl	$SYS_sigreturn,%eax
342	pushl	%eax			/* junk to fake return addr. */
343	int	$0x80			/* enter kernel with args */
344					/* on stack */
3451:
346	jmp	1b
347
348#ifdef COMPAT_FREEBSD4
349	ALIGN_TEXT
350freebsd4_sigcode:
351	calll	*SIGF_HANDLER(%esp)
352	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
353	pushl	%eax
354	testl	$PSL_VM,UC4_EFLAGS(%eax)
355	jne	1f
356	mov	UC4_GS(%eax),%gs	/* restore %gs */
3571:
358	movl	$344,%eax		/* 4.x SYS_sigreturn */
359	pushl	%eax			/* junk to fake return addr. */
360	int	$0x80			/* enter kernel with args */
361					/* on stack */
3621:
363	jmp	1b
364#endif
365
366#ifdef COMPAT_43
367	ALIGN_TEXT
368osigcode:
369	call	*SIGF_HANDLER(%esp)	/* call signal handler */
370	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
371	pushl	%eax
372	testl	$PSL_VM,SC_PS(%eax)
373	jne	9f
374	mov	SC_GS(%eax),%gs		/* restore %gs */
3759:
376	movl	$103,%eax		/* 3.x SYS_sigreturn */
377	pushl	%eax			/* junk to fake return addr. */
378	int	$0x80			/* enter kernel with args */
3790:	jmp	0b
380#endif /* COMPAT_43 */
381
382	ALIGN_TEXT
383esigcode:
384
385	.data
386	.globl	szsigcode
387szsigcode:
388	.long	esigcode-sigcode
389#ifdef COMPAT_FREEBSD4
390	.globl	szfreebsd4_sigcode
391szfreebsd4_sigcode:
392	.long	esigcode-freebsd4_sigcode
393#endif
394#ifdef COMPAT_43
395	.globl	szosigcode
396szosigcode:
397	.long	esigcode-osigcode
398#endif
399	.text
400
401/**********************************************************************
402 *
403 * Recover the bootinfo passed to us from the boot program
404 *
405 */
406recover_bootinfo:
407	/*
408	 * This code is called in different ways depending on what loaded
409	 * and started the kernel.  This is used to detect how we get the
410	 * arguments from the other code and what we do with them.
411	 *
412	 * Old disk boot blocks:
413	 *	(*btext)(howto, bootdev, cyloffset, esym);
414	 *	[return address == 0, and can NOT be returned to]
415	 *	[cyloffset was not supported by the FreeBSD boot code
416	 *	 and always passed in as 0]
417	 *	[esym is also known as total in the boot code, and
418	 *	 was never properly supported by the FreeBSD boot code]
419	 *
420	 * Old diskless netboot code:
421	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
422	 *	[return address != 0, and can NOT be returned to]
423	 *	If we are being booted by this code it will NOT work,
424	 *	so we are just going to halt if we find this case.
425	 *
426	 * New uniform boot code:
427	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
428	 *	[return address != 0, and can be returned to]
429	 *
430	 * There may seem to be a lot of wasted arguments in here, but
431	 * that is so the newer boot code can still load very old kernels
432	 * and old boot code can load new kernels.
433	 */
434
435	/*
436	 * The old style disk boot blocks fake a frame on the stack and
437	 * did an lret to get here.  The frame on the stack has a return
438	 * address of 0.
439	 */
440	cmpl	$0,4(%ebp)
441	je	olddiskboot
442
443	/*
444	 * We have some form of return address, so this is either the
445	 * old diskless netboot code, or the new uniform code.  That can
446	 * be detected by looking at the 5th argument, if it is 0
447	 * we are being booted by the new uniform boot code.
448	 */
449	cmpl	$0,24(%ebp)
450	je	newboot
451
452	/*
453	 * Seems we have been loaded by the old diskless boot code, we
454	 * don't stand a chance of running as the diskless structure
455	 * changed considerably between the two, so just halt.
456	 */
457	 hlt
458
459	/*
460	 * We have been loaded by the new uniform boot code.
461	 * Let's check the bootinfo version, and if we do not understand
462	 * it we return to the loader with a status of 1 to indicate this error
463	 */
464newboot:
465	movl	28(%ebp),%ebx		/* &bootinfo.version */
466	movl	BI_VERSION(%ebx),%eax
467	cmpl	$1,%eax			/* We only understand version 1 */
468	je	1f
469	movl	$1,%eax			/* Return status */
470	leave
471	/*
472	 * XXX this returns to our caller's caller (as is required) since
473	 * we didn't set up a frame and our caller did.
474	 */
475	ret
476
4771:
478	/*
479	 * If we have a kernelname copy it in
480	 */
481	movl	BI_KERNELNAME(%ebx),%esi
482	cmpl	$0,%esi
483	je	2f			/* No kernelname */
484	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
485	movl	$R(kernelname),%edi
486	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
487	je	1f
488	movb	$'/',(%edi)
489	incl	%edi
490	decl	%ecx
4911:
492	cld
493	rep
494	movsb
495
4962:
497	/*
498	 * Determine the size of the boot loader's copy of the bootinfo
499	 * struct.  This is impossible to do properly because old versions
500	 * of the struct don't contain a size field and there are 2 old
501	 * versions with the same version number.
502	 */
503	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
504	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
505	je	got_bi_size		/* no, sizeless version */
506	movl	BI_SIZE(%ebx),%ecx
507got_bi_size:
508
509	/*
510	 * Copy the common part of the bootinfo struct
511	 */
512	movl	%ebx,%esi
513	movl	$R(bootinfo),%edi
514	cmpl	$BOOTINFO_SIZE,%ecx
515	jbe	got_common_bi_size
516	movl	$BOOTINFO_SIZE,%ecx
517got_common_bi_size:
518	cld
519	rep
520	movsb
521
522#ifdef NFS_ROOT
523#ifndef BOOTP_NFSV3
524	/*
525	 * If we have a nfs_diskless structure copy it in
526	 */
527	movl	BI_NFS_DISKLESS(%ebx),%esi
528	cmpl	$0,%esi
529	je	olddiskboot
530	movl	$R(nfs_diskless),%edi
531	movl	$NFSDISKLESS_SIZE,%ecx
532	cld
533	rep
534	movsb
535	movl	$R(nfs_diskless_valid),%edi
536	movl	$1,(%edi)
537#endif
538#endif
539
540	/*
541	 * The old style disk boot.
542	 *	(*btext)(howto, bootdev, cyloffset, esym);
543	 * Note that the newer boot code just falls into here to pick
544	 * up howto and bootdev, cyloffset and esym are no longer used
545	 */
546olddiskboot:
547	movl	8(%ebp),%eax
548	movl	%eax,R(boothowto)
549	movl	12(%ebp),%eax
550	movl	%eax,R(bootdev)
551
552	ret
553
554
555/**********************************************************************
556 *
557 * Identify the CPU and initialize anything special about it
558 *
559 */
560identify_cpu:
561
562	/* Try to toggle alignment check flag; does not exist on 386. */
563	pushfl
564	popl	%eax
565	movl	%eax,%ecx
566	orl	$PSL_AC,%eax
567	pushl	%eax
568	popfl
569	pushfl
570	popl	%eax
571	xorl	%ecx,%eax
572	andl	$PSL_AC,%eax
573	pushl	%ecx
574	popfl
575
576	testl	%eax,%eax
577	jnz	try486
578
579	/* NexGen CPU does not have aligment check flag. */
580	pushfl
581	movl	$0x5555, %eax
582	xorl	%edx, %edx
583	movl	$2, %ecx
584	clc
585	divl	%ecx
586	jz	trynexgen
587	popfl
588	movl	$CPU_386,R(cpu)
589	jmp	3f
590
591trynexgen:
592	popfl
593	movl	$CPU_NX586,R(cpu)
594	movl	$0x4778654e,R(cpu_vendor)	# store vendor string
595	movl	$0x72446e65,R(cpu_vendor+4)
596	movl	$0x6e657669,R(cpu_vendor+8)
597	movl	$0,R(cpu_vendor+12)
598	jmp	3f
599
600try486:	/* Try to toggle identification flag; does not exist on early 486s. */
601	pushfl
602	popl	%eax
603	movl	%eax,%ecx
604	xorl	$PSL_ID,%eax
605	pushl	%eax
606	popfl
607	pushfl
608	popl	%eax
609	xorl	%ecx,%eax
610	andl	$PSL_ID,%eax
611	pushl	%ecx
612	popfl
613
614	testl	%eax,%eax
615	jnz	trycpuid
616	movl	$CPU_486,R(cpu)
617
618	/*
619	 * Check Cyrix CPU
620	 * Cyrix CPUs do not change the undefined flags following
621	 * execution of the divide instruction which divides 5 by 2.
622	 *
623	 * Note: CPUID is enabled on M2, so it passes another way.
624	 */
625	pushfl
626	movl	$0x5555, %eax
627	xorl	%edx, %edx
628	movl	$2, %ecx
629	clc
630	divl	%ecx
631	jnc	trycyrix
632	popfl
633	jmp	3f		/* You may use Intel CPU. */
634
635trycyrix:
636	popfl
637	/*
638	 * IBM Bluelighting CPU also doesn't change the undefined flags.
639	 * Because IBM doesn't disclose the information for Bluelighting
640	 * CPU, we couldn't distinguish it from Cyrix's (including IBM
641	 * brand of Cyrix CPUs).
642	 */
643	movl	$0x69727943,R(cpu_vendor)	# store vendor string
644	movl	$0x736e4978,R(cpu_vendor+4)
645	movl	$0x64616574,R(cpu_vendor+8)
646	jmp	3f
647
648trycpuid:	/* Use the `cpuid' instruction. */
649	xorl	%eax,%eax
650	cpuid					# cpuid 0
651	movl	%eax,R(cpu_high)		# highest capability
652	movl	%ebx,R(cpu_vendor)		# store vendor string
653	movl	%edx,R(cpu_vendor+4)
654	movl	%ecx,R(cpu_vendor+8)
655	movb	$0,R(cpu_vendor+12)
656
657	movl	$1,%eax
658	cpuid					# cpuid 1
659	movl	%eax,R(cpu_id)			# store cpu_id
660	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
661	movl	%edx,R(cpu_feature)		# store cpu_feature
662	movl	%ecx,R(cpu_feature2)		# store cpu_feature2
663	rorl	$8,%eax				# extract family type
664	andl	$15,%eax
665	cmpl	$5,%eax
666	jae	1f
667
668	/* less than Pentium; must be 486 */
669	movl	$CPU_486,R(cpu)
670	jmp	3f
6711:
672	/* a Pentium? */
673	cmpl	$5,%eax
674	jne	2f
675	movl	$CPU_586,R(cpu)
676	jmp	3f
6772:
678	/* Greater than Pentium...call it a Pentium Pro */
679	movl	$CPU_686,R(cpu)
6803:
681	ret
682
683
684/**********************************************************************
685 *
686 * Create the first page directory and its page tables.
687 *
688 */
689
690create_pagetables:
691
692/* Find end of kernel image (rounded up to a page boundary). */
693	movl	$R(_end),%esi
694
695/* Include symbols, if any. */
696	movl	R(bootinfo+BI_ESYMTAB),%edi
697	testl	%edi,%edi
698	je	over_symalloc
699	movl	%edi,%esi
700	movl	$KERNBASE,%edi
701	addl	%edi,R(bootinfo+BI_SYMTAB)
702	addl	%edi,R(bootinfo+BI_ESYMTAB)
703over_symalloc:
704
705/* If we are told where the end of the kernel space is, believe it. */
706	movl	R(bootinfo+BI_KERNEND),%edi
707	testl	%edi,%edi
708	je	no_kernend
709	movl	%edi,%esi
710no_kernend:
711
712	addl	$PDRMASK,%esi		/* Play conservative for now, and */
713	andl	$~PDRMASK,%esi		/*   ... wrap to next 4M. */
714	movl	%esi,R(KERNend)		/* save end of kernel */
715	movl	%esi,R(physfree)	/* next free page is at end of kernel */
716
717/* Allocate Kernel Page Tables */
718	ALLOCPAGES(NKPT)
719	movl	%esi,R(KPTphys)
720
721/* Allocate Page Table Directory */
722#ifdef PAE
723	/* XXX only need 32 bytes (easier for now) */
724	ALLOCPAGES(1)
725	movl	%esi,R(IdlePDPT)
726#endif
727	ALLOCPAGES(NPGPTD)
728	movl	%esi,R(IdlePTD)
729
730/* Allocate KSTACK */
731	ALLOCPAGES(KSTACK_PAGES)
732	movl	%esi,R(p0kpa)
733	addl	$KERNBASE, %esi
734	movl	%esi, R(proc0kstack)
735
736	ALLOCPAGES(1)			/* vm86/bios stack */
737	movl	%esi,R(vm86phystk)
738
739	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
740	movl	%esi,R(vm86pa)
741	addl	$KERNBASE, %esi
742	movl	%esi, R(vm86paddr)
743
744/*
745 * Enable PSE and PGE.
746 */
747#ifndef DISABLE_PSE
748	testl	$CPUID_PSE, R(cpu_feature)
749	jz	1f
750	movl	$PG_PS, R(pseflag)
751	movl	%cr4, %eax
752	orl	$CR4_PSE, %eax
753	movl	%eax, %cr4
7541:
755#endif
756#ifndef DISABLE_PG_G
757	testl	$CPUID_PGE, R(cpu_feature)
758	jz	2f
759	movl	$PG_G, R(pgeflag)
760	movl	%cr4, %eax
761	orl	$CR4_PGE, %eax
762	movl	%eax, %cr4
7632:
764#endif
765
766/*
767 * Initialize page table pages mapping physical address zero through the
768 * end of the kernel.  All of the page table entries allow read and write
769 * access.  Write access to the first physical page is required by bios32
770 * calls, and write access to the first 1 MB of physical memory is required
771 * by ACPI for implementing suspend and resume.  We do this even
772 * if we've enabled PSE above, we'll just switch the corresponding kernel
773 * PDEs before we turn on paging.
774 *
775 * XXX: We waste some pages here in the PSE case!  DON'T BLINDLY REMOVE
776 * THIS!  SMP needs the page table to be there to map the kernel P==V.
777 */
778	xorl	%eax, %eax
779	movl	R(KERNend),%ecx
780	shrl	$PAGE_SHIFT,%ecx
781	fillkptphys($PG_RW)
782
783/* Map page directory. */
784#ifdef PAE
785	movl	R(IdlePDPT), %eax
786	movl	$1, %ecx
787	fillkptphys($PG_RW)
788#endif
789
790	movl	R(IdlePTD), %eax
791	movl	$NPGPTD, %ecx
792	fillkptphys($PG_RW)
793
794/* Map proc0's KSTACK in the physical way ... */
795	movl	R(p0kpa), %eax
796	movl	$(KSTACK_PAGES), %ecx
797	fillkptphys($PG_RW)
798
799/* Map ISA hole */
800	movl	$ISA_HOLE_START, %eax
801	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
802	fillkptphys($PG_RW)
803
804/* Map space for the vm86 region */
805	movl	R(vm86phystk), %eax
806	movl	$4, %ecx
807	fillkptphys($PG_RW)
808
809/* Map page 0 into the vm86 page table */
810	movl	$0, %eax
811	movl	$0, %ebx
812	movl	$1, %ecx
813	fillkpt(R(vm86pa), $PG_RW|PG_U)
814
815/* ...likewise for the ISA hole */
816	movl	$ISA_HOLE_START, %eax
817	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
818	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
819	fillkpt(R(vm86pa), $PG_RW|PG_U)
820
821/*
822 * Create an identity mapping for low physical memory, including the kernel.
823 * The part of this mapping that covers the first 1 MB of physical memory
824 * becomes a permanent part of the kernel's address space.  The rest of this
825 * mapping is destroyed in pmap_bootstrap().  Ordinarily, the same page table
826 * pages are shared by the identity mapping and the kernel's native mapping.
827 * However, the permanent identity mapping cannot contain PG_G mappings.
828 * Thus, if the kernel is loaded within the permanent identity mapping, that
829 * page table page must be duplicated and not shared.
830 *
831 * N.B. Due to errata concerning large pages and physical address zero,
832 * a PG_PS mapping is not used.
833 */
834	movl	R(KPTphys), %eax
835	xorl	%ebx, %ebx
836	movl	$NKPT, %ecx
837	fillkpt(R(IdlePTD), $PG_RW)
838#if KERNLOAD < (1 << PDRSHIFT)
839	testl	$PG_G, R(pgeflag)
840	jz	1f
841	ALLOCPAGES(1)
842	movl	%esi, %edi
843	movl	R(IdlePTD), %eax
844	movl	(%eax), %esi
845	movl	%edi, (%eax)
846	movl	$PAGE_SIZE, %ecx
847	cld
848	rep
849	movsb
8501:
851#endif
852
853/*
854 * For the non-PSE case, install PDEs for PTs covering the KVA.
855 * For the PSE case, do the same, but clobber the ones corresponding
856 * to the kernel (from btext to KERNend) with 4M (2M for PAE) ('PS')
857 * PDEs immediately after.
858 */
859	movl	R(KPTphys), %eax
860	movl	$KPTDI, %ebx
861	movl	$NKPT, %ecx
862	fillkpt(R(IdlePTD), $PG_RW)
863	cmpl	$0,R(pseflag)
864	je	done_pde
865
866	movl	R(KERNend), %ecx
867	movl	$KERNLOAD, %eax
868	subl	%eax, %ecx
869	shrl	$PDRSHIFT, %ecx
870	movl	$(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx
871	shll	$PDESHIFT, %ebx
872	addl	R(IdlePTD), %ebx
873	orl	$(PG_V|PG_RW|PG_PS), %eax
8741:	movl	%eax, (%ebx)
875	addl	$(1 << PDRSHIFT), %eax
876	addl	$PDESIZE, %ebx
877	loop	1b
878
879done_pde:
880/* install a pde recursively mapping page directory as a page table */
881	movl	R(IdlePTD), %eax
882	movl	$PTDPTDI, %ebx
883	movl	$NPGPTD,%ecx
884	fillkpt(R(IdlePTD), $PG_RW)
885
886#ifdef PAE
887	movl	R(IdlePTD), %eax
888	xorl	%ebx, %ebx
889	movl	$NPGPTD, %ecx
890	fillkpt(R(IdlePDPT), $0x0)
891#endif
892
893	ret
894