pmap.c revision 5580
1258945Sroberto/*
2258945Sroberto * Copyright (c) 1991 Regents of the University of California.
3258945Sroberto * All rights reserved.
4258945Sroberto * Copyright (c) 1994 John S. Dyson
5280849Scy * All rights reserved.
6258945Sroberto * Copyright (c) 1994 David Greenman
7280849Scy * All rights reserved.
8280849Scy *
9280849Scy * This code is derived from software contributed to Berkeley by
10280849Scy * the Systems Programming Group of the University of Utah Computer
11258945Sroberto * Science Department and William Jolitz of UUNET Technologies Inc.
12258945Sroberto *
13290000Sglebius * Redistribution and use in source and binary forms, with or without
14258945Sroberto * modification, are permitted provided that the following conditions
15258945Sroberto * are met:
16258945Sroberto * 1. Redistributions of source code must retain the above copyright
17258945Sroberto *    notice, this list of conditions and the following disclaimer.
18258945Sroberto * 2. Redistributions in binary form must reproduce the above copyright
19258945Sroberto *    notice, this list of conditions and the following disclaimer in the
20258945Sroberto *    documentation and/or other materials provided with the distribution.
21258945Sroberto * 3. All advertising materials mentioning features or use of this software
22258945Sroberto *    must display the following acknowledgement:
23258945Sroberto *	This product includes software developed by the University of
24258945Sroberto *	California, Berkeley and its contributors.
25280849Scy * 4. Neither the name of the University nor the names of its contributors
26258945Sroberto *    may be used to endorse or promote products derived from this software
27280849Scy *    without specific prior written permission.
28280849Scy *
29280849Scy * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30258945Sroberto * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31258945Sroberto * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32258945Sroberto * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33290000Sglebius * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34258945Sroberto * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35258945Sroberto * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36258945Sroberto * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37258945Sroberto * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38258945Sroberto * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39258945Sroberto * SUCH DAMAGE.
40258945Sroberto *
41258945Sroberto *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42258945Sroberto *	$Id: pmap.c,v 1.40 1995/01/09 16:04:38 davidg Exp $
43258945Sroberto */
44258945Sroberto
45258945Sroberto/*
46258945Sroberto * Derived from hp300 version by Mike Hibler, this version by William
47258945Sroberto * Jolitz uses a recursive map [a pde points to the page directory] to
48258945Sroberto * map the page tables using the pagetables themselves. This is done to
49258945Sroberto * reduce the impact on kernel virtual memory for lots of sparse address
50258945Sroberto * space, and to reduce the cost of memory to each process.
51258945Sroberto *
52258945Sroberto *	Derived from: hp300/@(#)pmap.c	7.1 (Berkeley) 12/5/90
53258945Sroberto */
54258945Sroberto/*
55258945Sroberto * Major modifications by John S. Dyson primarily to support
56258945Sroberto * pageable page tables, eliminating pmap_attributes,
57258945Sroberto * discontiguous memory pages, and using more efficient string
58258945Sroberto * instructions. Jan 13, 1994.  Further modifications on Mar 2, 1994,
59258945Sroberto * general clean-up and efficiency mods.
60258945Sroberto */
61258945Sroberto
62258945Sroberto/*
63258945Sroberto *	Manages physical address maps.
64258945Sroberto *
65258945Sroberto *	In addition to hardware address maps, this
66290000Sglebius *	module is called upon to provide software-use-only
67290000Sglebius *	maps which may or may not be stored in the same
68258945Sroberto *	form as hardware maps.  These pseudo-maps are
69258945Sroberto *	used to store intermediate results from copy
70258945Sroberto *	operations to and from address spaces.
71258945Sroberto *
72258945Sroberto *	Since the information managed by this module is
73258945Sroberto *	also stored by the logical address mapping module,
74280849Scy *	this module may throw away valid virtual-to-physical
75258945Sroberto *	mappings at almost any time.  However, invalidations
76280849Scy *	of virtual-to-physical mappings must be done as
77258945Sroberto *	requested.
78258945Sroberto *
79258945Sroberto *	In order to cope with hardware architectures which
80258945Sroberto *	make virtual-to-physical map invalidates expensive,
81258945Sroberto *	this module may delay invalidate or reduced protection
82280849Scy *	operations until such time as they are actually
83280849Scy *	necessary.  This module is given full information as
84280849Scy *	to which processors are currently using which maps,
85258945Sroberto *	and to when physical maps must be made correct.
86258945Sroberto */
87258945Sroberto
88258945Sroberto#include <sys/param.h>
89280849Scy#include <sys/systm.h>
90280849Scy#include <sys/proc.h>
91258945Sroberto#include <sys/malloc.h>
92258945Sroberto#include <sys/user.h>
93280849Scy
94258945Sroberto#include <vm/vm.h>
95258945Sroberto#include <vm/vm_kern.h>
96258945Sroberto#include <vm/vm_page.h>
97258945Sroberto
98258945Sroberto#include <i386/include/cputypes.h>
99280849Scy
100258945Sroberto#include <i386/isa/isa.h>
101258945Sroberto
102258945Sroberto/*
103258945Sroberto * Allocate various and sundry SYSMAPs used in the days of old VM
104280849Scy * and not yet converted.  XXX.
105258945Sroberto */
106258945Sroberto#define BSDVM_COMPAT	1
107258945Sroberto
108280849Scy/*
109258945Sroberto * Get PDEs and PTEs for user/kernel address space
110258945Sroberto */
111258945Sroberto#define	pmap_pde(m, v)	(&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023]))
112258945Sroberto#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023])
113258945Sroberto
114258945Sroberto#define pmap_pte_pa(pte)	(*(int *)(pte) & PG_FRAME)
115258945Sroberto
116290000Sglebius#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
117258945Sroberto#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
118258945Sroberto#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
119258945Sroberto#define pmap_pte_u(pte)		((*(int *)pte & PG_U) != 0)
120258945Sroberto#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
121258945Sroberto
122258945Sroberto#define pmap_pte_set_w(pte, v)		((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
123258945Sroberto#define pmap_pte_set_prot(pte, v)	((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
124258945Sroberto
125258945Sroberto/*
126258945Sroberto * Given a map and a machine independent protection code,
127258945Sroberto * convert to a vax protection code.
128258945Sroberto */
129258945Sroberto#define pte_prot(m, p)	(protection_codes[p])
130258945Srobertoint	protection_codes[8];
131258945Sroberto
132280849Scystruct pmap	kernel_pmap_store;
133258945Srobertopmap_t		kernel_pmap;
134280849Scy
135280849Scyvm_offset_t	phys_avail[6];		/* 2 entries + 1 null */
136258945Srobertovm_offset_t    	avail_start;		/* PA of first available physical page */
137258945Srobertovm_offset_t	avail_end;		/* PA of last available physical page */
138258945Srobertovm_size_t	mem_size;		/* memory size in bytes */
139258945Srobertovm_offset_t	virtual_avail; 		/* VA of first avail page (after kernel bss)*/
140258945Srobertovm_offset_t	virtual_end;		/* VA of last avail page (end of kernel AS) */
141258945Srobertoint		i386pagesperpage;	/* PAGE_SIZE / I386_PAGE_SIZE */
142boolean_t	pmap_initialized = FALSE; /* Has pmap_init completed? */
143vm_offset_t	vm_first_phys, vm_last_phys;
144
145static inline int		pmap_is_managed();
146static inline void *		vm_get_pmap();
147static inline void		vm_put_pmap();
148static void			i386_protection_init();
149static void			pmap_alloc_pv_entry();
150static inline pv_entry_t	get_pv_entry();
151static inline void		pmap_use_pt();
152static inline void		pmap_unuse_pt();
153int nkpt;
154
155
156extern vm_offset_t clean_sva, clean_eva;
157extern int cpu_class;
158
159#if BSDVM_COMPAT
160#include <sys/msgbuf.h>
161
162/*
163 * All those kernel PT submaps that BSD is so fond of
164 */
165pt_entry_t *CMAP1, *CMAP2, *ptmmap;
166caddr_t		CADDR1, CADDR2, ptvmmap;
167pt_entry_t *msgbufmap;
168struct msgbuf	*msgbufp;
169#endif
170
171void init_pv_entries(int) ;
172
173/*
174 *	Routine:	pmap_pte
175 *	Function:
176 *		Extract the page table entry associated
177 *		with the given map/virtual_address pair.
178 * [ what about induced faults -wfj]
179 */
180
181inline pt_entry_t *
182const pmap_pte(pmap, va)
183	register pmap_t	pmap;
184	vm_offset_t va;
185{
186
187	if (pmap && *pmap_pde(pmap, va)) {
188		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
189		/* are we current address space or kernel? */
190		if ( (pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME)))
191			return ((pt_entry_t *) vtopte(va));
192		/* otherwise, we are alternate address space */
193		else {
194			if ( frame != ((int) APTDpde & PG_FRAME) ) {
195				APTDpde = pmap->pm_pdir[PTDPTDI];
196				pmap_update();
197			}
198			return((pt_entry_t *) avtopte(va));
199		}
200	}
201	return(0);
202}
203
204/*
205 *	Routine:	pmap_extract
206 *	Function:
207 *		Extract the physical page address associated
208 *		with the given map/virtual_address pair.
209 */
210
211vm_offset_t
212pmap_extract(pmap, va)
213	register pmap_t	pmap;
214	vm_offset_t va;
215{
216	vm_offset_t pa;
217
218	if (pmap && *pmap_pde(pmap, va)) {
219		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
220		/* are we current address space or kernel? */
221		if ( (pmap == kernel_pmap)
222			|| (frame == ((int) PTDpde & PG_FRAME)) ) {
223			pa = *(int *) vtopte(va);
224		/* otherwise, we are alternate address space */
225		} else {
226			if ( frame != ((int) APTDpde & PG_FRAME)) {
227				APTDpde = pmap->pm_pdir[PTDPTDI];
228				pmap_update();
229			}
230			pa = *(int *) avtopte(va);
231		}
232		return ((pa & PG_FRAME) | (va & ~PG_FRAME));
233	}
234	return 0;
235
236}
237
238/*
239 * determine if a page is managed (memory vs. device)
240 */
241static inline int
242pmap_is_managed(pa)
243	vm_offset_t pa;
244{
245	int i;
246
247	if (!pmap_initialized)
248		return 0;
249
250	for (i = 0; phys_avail[i + 1]; i += 2) {
251		if (pa >= phys_avail[i] && pa < phys_avail[i + 1])
252			return 1;
253	}
254	return 0;
255}
256
257/*
258 * find the vm_page_t of a pte (only) given va of pte and pmap
259 */
260__inline vm_page_t
261pmap_pte_vm_page(pmap, pt)
262	pmap_t pmap;
263	vm_offset_t pt;
264{
265	pt = i386_trunc_page( pt);
266	pt = (pt - UPT_MIN_ADDRESS) / NBPG;
267	pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME;
268	return PHYS_TO_VM_PAGE(pt);
269}
270
271/*
272 * Wire a page table page
273 */
274inline void
275pmap_use_pt(pmap, va)
276	pmap_t pmap;
277	vm_offset_t va;
278{
279	vm_offset_t pt;
280
281	if (va >= VM_MAX_ADDRESS || !pmap_initialized)
282		return;
283
284	pt = (vm_offset_t) vtopte(va);
285	vm_page_hold( pmap_pte_vm_page(pmap, pt));
286}
287
288/*
289 * Unwire a page table page
290 */
291inline void
292pmap_unuse_pt(pmap, va)
293	pmap_t pmap;
294	vm_offset_t va;
295{
296	vm_offset_t pt;
297
298	if (va >= VM_MAX_ADDRESS || !pmap_initialized)
299		return;
300
301	pt = (vm_offset_t) vtopte(va);
302	vm_page_unhold( pmap_pte_vm_page(pmap, pt));
303}
304
305/* [ macro again?, should I force kstack into user map here? -wfj ] */
306void
307pmap_activate(pmap, pcbp)
308	register pmap_t pmap;
309	struct pcb *pcbp;
310{
311	PMAP_ACTIVATE(pmap, pcbp);
312}
313
314/*
315 *	Bootstrap the system enough to run with virtual memory.
316 *	Map the kernel's code and data, and allocate the system page table.
317 *
318 *	On the I386 this is called after mapping has already been enabled
319 *	and just syncs the pmap module with what has already been done.
320 *	[We can't call it easily with mapping off since the kernel is not
321 *	mapped with PA == VA, hence we would have to relocate every address
322 *	from the linked base (virtual) address "KERNBASE" to the actual
323 *	(physical) address starting relative to 0]
324 */
325
326#define DMAPAGES 8
327void
328pmap_bootstrap(firstaddr, loadaddr)
329	vm_offset_t firstaddr;
330	vm_offset_t loadaddr;
331{
332#if BSDVM_COMPAT
333	vm_offset_t va;
334	pt_entry_t *pte;
335#endif
336
337	avail_start = firstaddr + DMAPAGES*NBPG;
338
339	virtual_avail = (vm_offset_t) KERNBASE + avail_start;
340	virtual_end = VM_MAX_KERNEL_ADDRESS;
341	i386pagesperpage = PAGE_SIZE / NBPG;
342
343	/*
344	 * Initialize protection array.
345	 */
346	i386_protection_init();
347
348	/*
349	 * The kernel's pmap is statically allocated so we don't
350	 * have to use pmap_create, which is unlikely to work
351	 * correctly at this part of the boot sequence.
352	 */
353	kernel_pmap = &kernel_pmap_store;
354
355	kernel_pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD);
356
357	simple_lock_init(&kernel_pmap->pm_lock);
358	kernel_pmap->pm_count = 1;
359	nkpt = NKPT;
360
361#if BSDVM_COMPAT
362	/*
363	 * Allocate all the submaps we need
364	 */
365#define	SYSMAP(c, p, v, n)	\
366	v = (c)va; va += ((n)*NBPG); p = pte; pte += (n);
367
368	va = virtual_avail;
369	pte = pmap_pte(kernel_pmap, va);
370
371	SYSMAP(caddr_t		,CMAP1		,CADDR1	   ,1		)
372	SYSMAP(caddr_t		,CMAP2		,CADDR2	   ,1		)
373	SYSMAP(caddr_t		,ptmmap		,ptvmmap	   ,1		)
374	SYSMAP(struct msgbuf *	,msgbufmap	,msgbufp   ,1		)
375	virtual_avail = va;
376#endif
377	/*
378	 * Reserve special hunk of memory for use by bus dma as a bounce
379	 * buffer (contiguous virtual *and* physical memory).
380	 */
381	{
382		extern vm_offset_t isaphysmem;
383		isaphysmem = va;
384
385		virtual_avail = pmap_map(va, firstaddr,
386				firstaddr + DMAPAGES*NBPG, VM_PROT_ALL);
387	}
388
389	*(int *)CMAP1 = *(int *)CMAP2 = *(int *)PTD = 0;
390	pmap_update();
391
392}
393
394/*
395 *	Initialize the pmap module.
396 *	Called by vm_init, to initialize any structures that the pmap
397 *	system needs to map virtual memory.
398 *	pmap_init has been enhanced to support in a fairly consistant
399 *	way, discontiguous physical memory.
400 */
401void
402pmap_init(phys_start, phys_end)
403	vm_offset_t	phys_start, phys_end;
404{
405	vm_offset_t	addr;
406	vm_size_t	npg, s;
407	int i;
408
409	/*
410	 * Now that kernel map has been allocated, we can mark as
411	 * unavailable regions which we have mapped in locore.
412	 */
413	addr = atdevbase;
414	(void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0,
415			   &addr, (0x100000-0xa0000), FALSE);
416
417	addr = (vm_offset_t) KERNBASE + IdlePTD;
418	vm_object_reference(kernel_object);
419	(void) vm_map_find(kernel_map, kernel_object, addr,
420			   &addr, (4 + NKPDE) * NBPG, FALSE);
421
422	/*
423	 * calculate the number of pv_entries needed
424	 */
425	vm_first_phys = phys_avail[0];
426	for (i = 0; phys_avail[i + 1]; i += 2) ;
427	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG;
428
429	/*
430	 * Allocate memory for random pmap data structures.  Includes the
431	 * pv_head_table.
432	 */
433	s = (vm_size_t) (sizeof(struct pv_entry) * npg);
434	s = i386_round_page(s);
435	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
436	pv_table = (pv_entry_t) addr;
437
438	/*
439	 * init the pv free list
440	 */
441	init_pv_entries(npg);
442	/*
443	 * Now it is safe to enable pv_table recording.
444	 */
445	pmap_initialized = TRUE;
446}
447
448/*
449 *	Used to map a range of physical addresses into kernel
450 *	virtual address space.
451 *
452 *	For now, VM is already on, we only need to map the
453 *	specified memory.
454 */
455vm_offset_t
456pmap_map(virt, start, end, prot)
457	vm_offset_t	virt;
458	vm_offset_t	start;
459	vm_offset_t	end;
460	int		prot;
461{
462	while (start < end) {
463		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
464		virt += PAGE_SIZE;
465		start += PAGE_SIZE;
466	}
467	return(virt);
468}
469
470/*
471 *	Create and return a physical map.
472 *
473 *	If the size specified for the map
474 *	is zero, the map is an actual physical
475 *	map, and may be referenced by the
476 *	hardware.
477 *
478 *	If the size specified is non-zero,
479 *	the map will be used in software only, and
480 *	is bounded by that size.
481 *
482 * [ just allocate a ptd and mark it uninitialize -- should we track
483 *   with a table which process has which ptd? -wfj ]
484 */
485
486pmap_t
487pmap_create(size)
488	vm_size_t	size;
489{
490	register pmap_t pmap;
491
492	/*
493	 * Software use map does not need a pmap
494	 */
495	if (size)
496		return(NULL);
497
498	pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK);
499	bzero(pmap, sizeof(*pmap));
500	pmap_pinit(pmap);
501	return (pmap);
502}
503
504
505struct pmaplist {
506	struct pmaplist *next;
507};
508
509static inline void *
510vm_get_pmap()
511{
512	struct pmaplist *rtval;
513
514	rtval = (struct pmaplist *)kmem_alloc(kernel_map, ctob(1));
515	bzero(rtval, ctob(1));
516	return rtval;
517}
518
519static inline void
520vm_put_pmap(up)
521	struct pmaplist *up;
522{
523	kmem_free(kernel_map, (vm_offset_t)up, ctob(1));
524}
525
526/*
527 * Initialize a preallocated and zeroed pmap structure,
528 * such as one in a vmspace structure.
529 */
530void
531pmap_pinit(pmap)
532	register struct pmap *pmap;
533{
534	/*
535	 * No need to allocate page table space yet but we do need a
536	 * valid page directory table.
537	 */
538	pmap->pm_pdir = (pd_entry_t *) vm_get_pmap();
539
540	/* wire in kernel global address entries */
541	bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, nkpt*PTESIZE);
542
543	/* install self-referential address mapping entry */
544	*(int *)(pmap->pm_pdir+PTDPTDI) =
545		((int)pmap_kextract((vm_offset_t)pmap->pm_pdir)) | PG_V | PG_KW;
546
547	pmap->pm_count = 1;
548	simple_lock_init(&pmap->pm_lock);
549}
550
551/*
552 * grow the number of kernel page table entries, if needed
553 */
554
555vm_page_t nkpg;
556vm_offset_t kernel_vm_end;
557
558void
559pmap_growkernel(vm_offset_t addr) {
560	struct proc *p;
561	struct pmap *pmap;
562	int s;
563
564	s = splhigh();
565	if (kernel_vm_end == 0) {
566		kernel_vm_end = KERNBASE;
567		nkpt = 0;
568		while(pdir_pde(PTD, kernel_vm_end)) {
569			kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1);
570			++nkpt;
571		}
572	}
573
574	addr = (addr + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1);
575	while( kernel_vm_end < addr) {
576		if( pdir_pde( PTD, kernel_vm_end)) {
577			kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1);
578			continue;
579		}
580
581		++nkpt;
582		if( !nkpg) {
583			nkpg = vm_page_alloc(kernel_object, 0, TRUE);
584			vm_page_remove(nkpg);
585			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
586			if( !nkpg)
587				panic("pmap_growkernel: no memory to grow kernel");
588		}
589		pdir_pde( PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW);
590		nkpg = NULL;
591
592		for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
593			if( p->p_vmspace) {
594				pmap = &p->p_vmspace->vm_pmap;
595				*pmap_pde( pmap, kernel_vm_end) = pdir_pde( PTD, kernel_vm_end);
596			}
597		}
598		kernel_vm_end = (kernel_vm_end + NBPG*NPTEPG) & ~(NBPG*NPTEPG-1);
599	}
600#if 0
601	if( !nkpg) {
602		nkpg = vm_page_alloc(kernel_object, 0, TRUE);
603		vm_page_remove(nkpg);
604		pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
605	}
606#endif
607	splx(s);
608}
609
610/*
611 *	Retire the given physical map from service.
612 *	Should only be called if the map contains
613 *	no valid mappings.
614 */
615void
616pmap_destroy(pmap)
617	register pmap_t pmap;
618{
619	int count;
620
621	if (pmap == NULL)
622		return;
623
624	simple_lock(&pmap->pm_lock);
625	count = --pmap->pm_count;
626	simple_unlock(&pmap->pm_lock);
627	if (count == 0) {
628		pmap_release(pmap);
629		free((caddr_t)pmap, M_VMPMAP);
630	}
631}
632
633/*
634 * Release any resources held by the given physical map.
635 * Called when a pmap initialized by pmap_pinit is being released.
636 * Should only be called if the map contains no valid mappings.
637 */
638void
639pmap_release(pmap)
640	register struct pmap *pmap;
641{
642	vm_put_pmap((struct pmaplist *) pmap->pm_pdir);
643}
644
645/*
646 *	Add a reference to the specified pmap.
647 */
648void
649pmap_reference(pmap)
650	pmap_t	pmap;
651{
652	if (pmap != NULL) {
653		simple_lock(&pmap->pm_lock);
654		pmap->pm_count++;
655		simple_unlock(&pmap->pm_lock);
656	}
657}
658
659#define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2)
660
661/*
662 * Data for the pv entry allocation mechanism
663 */
664int pv_freelistcnt;
665pv_entry_t pv_freelist;
666vm_offset_t pvva;
667int npvvapg;
668
669/*
670 * free the pv_entry back to the free list
671 */
672inline static void
673free_pv_entry(pv)
674	pv_entry_t pv;
675{
676	if (!pv) return;
677	++pv_freelistcnt;
678	pv->pv_next = pv_freelist;
679	pv_freelist = pv;
680}
681
682/*
683 * get a new pv_entry, allocating a block from the system
684 * when needed.
685 * the memory allocation is performed bypassing the malloc code
686 * because of the possibility of allocations at interrupt time.
687 */
688static inline pv_entry_t
689get_pv_entry()
690{
691	pv_entry_t tmp;
692
693	/*
694	 * get more pv_entry pages if needed
695	 */
696	while (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) {
697		pmap_alloc_pv_entry();
698	}
699
700	/*
701	 * get a pv_entry off of the free list
702	 */
703	--pv_freelistcnt;
704	tmp = pv_freelist;
705	pv_freelist = tmp->pv_next;
706	tmp->pv_pmap = 0;
707	tmp->pv_va = 0;
708	tmp->pv_next = 0;
709	return tmp;
710}
711
712/*
713 * this *strange* allocation routine *statistically* eliminates the
714 * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure.
715 * also -- this code is MUCH MUCH faster than the malloc equiv...
716 */
717static void
718pmap_alloc_pv_entry()
719{
720	/*
721	 * do we have any pre-allocated map-pages left?
722	 */
723	if (npvvapg) {
724		vm_page_t m;
725		/*
726		 * we do this to keep recursion away
727		 */
728		pv_freelistcnt += PV_FREELIST_MIN;
729		/*
730		 * allocate a physical page out of the vm system
731		 */
732		m = vm_page_alloc(kernel_object, pvva-vm_map_min(kernel_map), TRUE);
733		if (m) {
734			int newentries;
735			int i;
736			pv_entry_t entry;
737			newentries = (NBPG/sizeof (struct pv_entry));
738			/*
739			 * wire the page
740			 */
741			vm_page_wire(m);
742			m->flags &= ~PG_BUSY;
743			/*
744			 * let the kernel see it
745			 */
746			pmap_kenter(pvva, VM_PAGE_TO_PHYS(m));
747
748			entry = (pv_entry_t) pvva;
749			/*
750			 * update the allocation pointers
751			 */
752			pvva += NBPG;
753			--npvvapg;
754
755			/*
756			 * free the entries into the free list
757			 */
758			for (i = 0; i < newentries; i++) {
759				free_pv_entry(entry);
760				entry++;
761			}
762		}
763		pv_freelistcnt -= PV_FREELIST_MIN;
764	}
765	if (!pv_freelist)
766		panic("get_pv_entry: cannot get a pv_entry_t");
767}
768
769
770
771/*
772 * init the pv_entry allocation system
773 */
774#define PVSPERPAGE 64
775void
776init_pv_entries(npg)
777	int npg;
778{
779	/*
780	 * allocate enough kvm space for PVSPERPAGE entries per page (lots)
781	 * kvm space is fairly cheap, be generous!!!  (the system can panic
782	 * if this is too small.)
783	 */
784	npvvapg = ((npg*PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1)/NBPG;
785	pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG);
786	/*
787	 * get the first batch of entries
788	 */
789	free_pv_entry(get_pv_entry());
790}
791
792static pt_entry_t *
793get_pt_entry(pmap)
794	pmap_t pmap;
795{
796	vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
797
798	/* are we current address space or kernel? */
799	if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) {
800		return PTmap;
801	}
802	/* otherwise, we are alternate address space */
803	if ( frame != ((int) APTDpde & PG_FRAME)) {
804		APTDpde = pmap->pm_pdir[PTDPTDI];
805		pmap_update();
806	}
807	return APTmap;
808}
809
810/*
811 * If it is the first entry on the list, it is actually
812 * in the header and we must copy the following entry up
813 * to the header.  Otherwise we must search the list for
814 * the entry.  In either case we free the now unused entry.
815 */
816void
817pmap_remove_entry(pmap, pv, va)
818	struct pmap *pmap;
819	pv_entry_t pv;
820	vm_offset_t va;
821{
822	pv_entry_t npv;
823	int s;
824	s = splhigh();
825	if (pmap == pv->pv_pmap && va == pv->pv_va) {
826		npv = pv->pv_next;
827		if (npv) {
828			*pv = *npv;
829			free_pv_entry(npv);
830		} else {
831			pv->pv_pmap = NULL;
832		}
833	} else {
834		for (npv = pv->pv_next; npv; npv = npv->pv_next) {
835			if (pmap == npv->pv_pmap && va == npv->pv_va) {
836				break;
837			}
838			pv = npv;
839		}
840		if (npv) {
841			pv->pv_next = npv->pv_next;
842			free_pv_entry(npv);
843		}
844	}
845	splx(s);
846}
847
848/*
849 *	Remove the given range of addresses from the specified map.
850 *
851 *	It is assumed that the start and end are properly
852 *	rounded to the page size.
853 */
854void
855pmap_remove(pmap, sva, eva)
856	struct pmap *pmap;
857	register vm_offset_t sva;
858	register vm_offset_t eva;
859{
860	register pt_entry_t *ptp,*ptq;
861	vm_offset_t pa;
862	register pv_entry_t pv;
863	vm_offset_t va;
864	vm_page_t m;
865	pt_entry_t oldpte;
866
867	if (pmap == NULL)
868		return;
869
870	ptp = get_pt_entry(pmap);
871
872/*
873 * special handling of removing one page.  a very
874 * common operation and easy to short circuit some
875 * code.
876 */
877	if( (sva + NBPG) == eva) {
878
879		if( *pmap_pde( pmap, sva) == 0)
880			return;
881
882		ptq = ptp + i386_btop(sva);
883
884		if( !*ptq)
885			return;
886		/*
887		 * Update statistics
888		 */
889		if (pmap_pte_w(ptq))
890			pmap->pm_stats.wired_count--;
891		pmap->pm_stats.resident_count--;
892
893		pa = pmap_pte_pa(ptq);
894		oldpte = *ptq;
895		*ptq = 0;
896
897		if (pmap_is_managed(pa)) {
898			if ((int) oldpte & PG_M) {
899				if ((sva < USRSTACK || sva > UPT_MAX_ADDRESS) ||
900				    (sva >= USRSTACK && sva < USRSTACK+(UPAGES*NBPG))) {
901					if (sva < clean_sva || sva >= clean_eva) {
902						PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
903					}
904				}
905			}
906
907			pv = pa_to_pvh(pa);
908			pmap_remove_entry(pmap, pv, sva);
909			pmap_unuse_pt(pmap, sva);
910		}
911		pmap_update();
912		return;
913	}
914
915	sva = i386_btop(sva);
916	eva = i386_btop(eva);
917
918	while (sva < eva) {
919		/*
920		 * Weed out invalid mappings.
921		 * Note: we assume that the page directory table is
922	 	 * always allocated, and in kernel virtual.
923		 */
924
925		if ( *pmap_pde(pmap, i386_ptob(sva)) == 0 ) {
926			/* We can race ahead here, straight to next pde.. */
927			sva = ((sva + NPTEPG) & ~(NPTEPG - 1));
928			continue;
929		}
930
931		ptq = ptp + sva;
932
933		/*
934		 * search for page table entries, use string operations
935		 * that are much faster than
936		 * explicitly scanning when page tables are not fully
937		 * populated.
938		 */
939		if ( *ptq == 0) {
940			vm_offset_t pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1));
941			vm_offset_t nscan = pdnxt - sva;
942			int found = 0;
943
944			if ((nscan + sva) > eva)
945				nscan = eva - sva;
946
947			asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;"
948				:"=D"(ptq),"=a"(found)
949				:"c"(nscan),"0"(ptq)
950				:"cx");
951
952			if( !found) {
953				sva = pdnxt;
954				continue;
955			}
956			ptq -= 1;
957
958			sva = ptq - ptp;
959		}
960
961		/*
962		 * Update statistics
963		 */
964		oldpte = *ptq;
965		if (((int)oldpte) & PG_W)
966			pmap->pm_stats.wired_count--;
967		pmap->pm_stats.resident_count--;
968
969		/*
970		 * Invalidate the PTEs.
971		 * XXX: should cluster them up and invalidate as many
972		 * as possible at once.
973		 */
974		*ptq = 0;
975
976		va = i386_ptob(sva);
977
978		/*
979		 * Remove from the PV table (raise IPL since we
980		 * may be called at interrupt time).
981		 */
982		pa = ((int)oldpte) & PG_FRAME;
983		if (!pmap_is_managed(pa)) {
984			++sva;
985			continue;
986		}
987
988		if ((int) oldpte & PG_M) {
989			if ((va < USRSTACK || va > UPT_MAX_ADDRESS) ||
990			    (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) {
991				if (va < clean_sva || va >= clean_eva) {
992					PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
993				}
994			}
995		}
996
997		pv = pa_to_pvh(pa);
998		pmap_remove_entry(pmap, pv, va);
999		pmap_unuse_pt(pmap, va);
1000		++sva;
1001	}
1002	pmap_update();
1003}
1004
1005/*
1006 *	Routine:	pmap_remove_all
1007 *	Function:
1008 *		Removes this physical page from
1009 *		all physical maps in which it resides.
1010 *		Reflects back modify bits to the pager.
1011 *
1012 *	Notes:
1013 *		Original versions of this routine were very
1014 *		inefficient because they iteratively called
1015 *		pmap_remove (slow...)
1016 */
1017void
1018pmap_remove_all(pa)
1019	vm_offset_t pa;
1020{
1021	register pv_entry_t pv, npv;
1022	register pt_entry_t *pte, *ptp;
1023	vm_offset_t va;
1024	struct pmap *pmap;
1025	vm_page_t m;
1026	int s;
1027	int anyvalid = 0;
1028
1029	/*
1030	 * Not one of ours
1031	 */
1032	if (!pmap_is_managed(pa))
1033		return;
1034
1035	pa = i386_trunc_page(pa);
1036	pv = pa_to_pvh(pa);
1037	m = PHYS_TO_VM_PAGE(pa);
1038
1039	s = splhigh();
1040	while (pv->pv_pmap != NULL) {
1041		pmap = pv->pv_pmap;
1042		ptp = get_pt_entry(pmap);
1043		va = pv->pv_va;
1044		pte = ptp + i386_btop(va);
1045		if (pmap_pte_w(pte))
1046			pmap->pm_stats.wired_count--;
1047		if (*pte) {
1048			pmap->pm_stats.resident_count--;
1049			anyvalid++;
1050
1051			/*
1052			 * Update the vm_page_t clean and reference bits.
1053			 */
1054			if ((int) *pte & PG_M) {
1055				if ((va < USRSTACK || va > UPT_MAX_ADDRESS) ||
1056				    (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) {
1057					if (va < clean_sva || va >= clean_eva) {
1058						PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
1059					}
1060				}
1061			}
1062
1063			*pte = 0;
1064		}
1065		pmap_unuse_pt(pmap, va);
1066
1067		npv = pv->pv_next;
1068		if (npv) {
1069			*pv = *npv;
1070			free_pv_entry(npv);
1071		} else {
1072			pv->pv_pmap = NULL;
1073		}
1074	}
1075	splx(s);
1076	if (anyvalid)
1077		pmap_update();
1078}
1079
1080
1081/*
1082 *	Set the physical protection on the
1083 *	specified range of this map as requested.
1084 */
1085void
1086pmap_protect(pmap, sva, eva, prot)
1087	register pmap_t	pmap;
1088	vm_offset_t	sva, eva;
1089	vm_prot_t	prot;
1090{
1091	register pt_entry_t *pte;
1092	register vm_offset_t va;
1093	int i386prot;
1094	register pt_entry_t *ptp;
1095	int evap = i386_btop(eva);
1096	int anyvalid = 0;;
1097
1098	if (pmap == NULL)
1099		return;
1100
1101	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1102		pmap_remove(pmap, sva, eva);
1103		return;
1104	}
1105	if (prot & VM_PROT_WRITE)
1106		return;
1107
1108	ptp = get_pt_entry(pmap);
1109
1110	va = sva;
1111	while (va < eva) {
1112		int found=0;
1113		int svap;
1114		vm_offset_t nscan;
1115		/*
1116		 * Page table page is not allocated.
1117		 * Skip it, we don't want to force allocation
1118		 * of unnecessary PTE pages just to set the protection.
1119		 */
1120		if (! *pmap_pde(pmap, va)) {
1121			/* XXX: avoid address wrap around */
1122nextpde:
1123			if (va >= i386_trunc_pdr((vm_offset_t)-1))
1124				break;
1125			va = i386_round_pdr(va + PAGE_SIZE);
1126			continue;
1127		}
1128
1129		pte = ptp + i386_btop(va);
1130
1131		if( *pte == 0) {
1132		/*
1133		 * scan for a non-empty pte
1134		 */
1135			svap = pte - ptp;
1136			nscan = ((svap + NPTEPG) & ~(NPTEPG - 1)) - svap;
1137
1138			if (nscan + svap > evap)
1139				nscan = evap - svap;
1140
1141			found = 0;
1142			if (nscan)
1143				asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;"
1144					:"=D"(pte),"=a"(found)
1145					:"c"(nscan),"0"(pte):"cx");
1146
1147			if( !found)
1148				goto nextpde;
1149
1150			pte -= 1;
1151			svap = pte - ptp;
1152
1153			va = i386_ptob(svap);
1154		}
1155
1156		anyvalid++;
1157
1158		i386prot = pte_prot(pmap, prot);
1159		if (va < UPT_MAX_ADDRESS) {
1160			i386prot |= PG_u;
1161			if( va >= UPT_MIN_ADDRESS)
1162				i386prot |= PG_RW;
1163		}
1164		pmap_pte_set_prot(pte, i386prot);
1165		va += PAGE_SIZE;
1166	}
1167	if (anyvalid)
1168		pmap_update();
1169}
1170
1171/*
1172 *	Insert the given physical page (p) at
1173 *	the specified virtual address (v) in the
1174 *	target physical map with the protection requested.
1175 *
1176 *	If specified, the page will be wired down, meaning
1177 *	that the related pte can not be reclaimed.
1178 *
1179 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1180 *	or lose information.  That is, this routine must actually
1181 *	insert this page into the given map NOW.
1182 */
1183void
1184pmap_enter(pmap, va, pa, prot, wired)
1185	register pmap_t pmap;
1186	vm_offset_t va;
1187	register vm_offset_t pa;
1188	vm_prot_t prot;
1189	boolean_t wired;
1190{
1191	register pt_entry_t *pte;
1192	register pt_entry_t npte;
1193	vm_offset_t opa;
1194	int cacheable=1;
1195	int ptevalid = 0;
1196
1197	if (pmap == NULL)
1198		return;
1199
1200	va = i386_trunc_page(va);
1201	pa = i386_trunc_page(pa);
1202	if (va > VM_MAX_KERNEL_ADDRESS)
1203		panic("pmap_enter: toobig");
1204
1205	/*
1206	 * Page Directory table entry not valid, we need a new PT page
1207	 */
1208	if (*pmap_pde(pmap, va) == 0) {
1209		printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va);
1210		panic("invalid kernel page directory");
1211	}
1212
1213	pte = pmap_pte(pmap, va);
1214	opa = pmap_pte_pa(pte);
1215
1216	/*
1217	 * Mapping has not changed, must be protection or wiring change.
1218	 */
1219	if (opa == pa) {
1220		/*
1221		 * Wiring change, just update stats.
1222		 * We don't worry about wiring PT pages as they remain
1223		 * resident as long as there are valid mappings in them.
1224		 * Hence, if a user page is wired, the PT page will be also.
1225		 */
1226		if (wired && !pmap_pte_w(pte))
1227			pmap->pm_stats.wired_count++;
1228		else if (!wired && pmap_pte_w(pte))
1229			pmap->pm_stats.wired_count--;
1230
1231		goto validate;
1232	}
1233
1234	/*
1235	 * Mapping has changed, invalidate old range and fall through to
1236	 * handle validating new mapping.
1237	 */
1238	if (opa) {
1239		pmap_remove(pmap, va, va + PAGE_SIZE);
1240	}
1241
1242	/*
1243	 * Enter on the PV list if part of our managed memory
1244	 * Note that we raise IPL while manipulating pv_table
1245	 * since pmap_enter can be called at interrupt time.
1246	 */
1247	if (pmap_is_managed(pa)) {
1248		register pv_entry_t pv, npv;
1249		int s;
1250
1251		pv = pa_to_pvh(pa);
1252		s = splhigh();
1253		/*
1254		 * No entries yet, use header as the first entry
1255		 */
1256		if (pv->pv_pmap == NULL) {
1257			pv->pv_va = va;
1258			pv->pv_pmap = pmap;
1259			pv->pv_next = NULL;
1260		}
1261		/*
1262		 * There is at least one other VA mapping this page.
1263		 * Place this entry after the header.
1264		 */
1265		else {
1266			npv = get_pv_entry();
1267			npv->pv_va = va;
1268			npv->pv_pmap = pmap;
1269			npv->pv_next = pv->pv_next;
1270			pv->pv_next = npv;
1271		}
1272		splx(s);
1273		cacheable = 1;
1274	} else {
1275		cacheable = 0;
1276	}
1277
1278	pmap_use_pt(pmap, va);
1279
1280	/*
1281	 * Increment counters
1282	 */
1283	pmap->pm_stats.resident_count++;
1284	if (wired)
1285		pmap->pm_stats.wired_count++;
1286
1287validate:
1288	/*
1289	 * Now validate mapping with desired protection/wiring.
1290	 */
1291	npte = (pt_entry_t) ( (int) (pa | pte_prot(pmap, prot) | PG_V));
1292	/*
1293	 * for correctness:
1294	 */
1295	if( !cacheable)
1296		(int) npte |= PG_N;
1297
1298	/*
1299	 * When forking (copy-on-write, etc):
1300	 * A process will turn off write permissions for any of its writable
1301	 * pages.  If the data (object) is only referred to by one process, the
1302	 * processes map is modified directly as opposed to using the
1303	 * object manipulation routine.  When using pmap_protect, the
1304	 * modified bits are not kept in the vm_page_t data structure.
1305	 * Therefore, when using pmap_enter in vm_fault to bring back
1306	 * writability of a page, there has been no memory of the
1307	 * modified or referenced bits except at the pte level.
1308	 * this clause supports the carryover of the modified and
1309	 * used (referenced) bits.
1310	 */
1311	if (pa == opa)
1312		(int) npte |= (int) *pte & (PG_M|PG_U);
1313
1314
1315	if (wired)
1316		(int) npte |= PG_W;
1317	if (va < UPT_MIN_ADDRESS)
1318		(int) npte |= PG_u;
1319	else if (va < UPT_MAX_ADDRESS)
1320		(int) npte |= PG_u | PG_RW;
1321
1322	if(*pte != npte) {
1323		if (*pte)
1324			ptevalid++;
1325		*pte = npte;
1326	}
1327	if (ptevalid)
1328		pmap_update();
1329}
1330
1331/*
1332 * Add a list of wired pages to the kva
1333 * this routine is only used for temporary
1334 * kernel mappings that do not need to have
1335 * page modification or references recorded.
1336 * Note that old mappings are simply written
1337 * over.  The page *must* be wired.
1338 */
1339void
1340pmap_qenter(va, m, count)
1341	vm_offset_t va;
1342	vm_page_t *m;
1343	int count;
1344{
1345	int i;
1346	int anyvalid = 0;
1347	register pt_entry_t *pte;
1348
1349	for(i=0;i<count;i++) {
1350		pte = vtopte(va + i * NBPG);
1351		if (*pte)
1352			anyvalid++;
1353		*pte = (pt_entry_t) ( (int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | PG_W));
1354	}
1355	if (anyvalid)
1356		pmap_update();
1357}
1358/*
1359 * this routine jerks page mappings from the
1360 * kernel -- it is meant only for temporary mappings.
1361 */
1362void
1363pmap_qremove(va, count)
1364	vm_offset_t va;
1365	int count;
1366{
1367	int i;
1368	register pt_entry_t *pte;
1369	for (i=0;i<count;i++) {
1370		pte = vtopte(va + i * NBPG);
1371		*pte = 0;
1372	}
1373	pmap_update();
1374}
1375
1376/*
1377 * add a wired page to the kva
1378 * note that in order for the mapping to take effect -- you
1379 * should do a pmap_update after doing the pmap_kenter...
1380 */
1381void
1382pmap_kenter(va, pa)
1383	vm_offset_t va;
1384	register vm_offset_t pa;
1385{
1386	register pt_entry_t *pte;
1387	int wasvalid = 0;
1388
1389	pte = vtopte(va);
1390
1391	if (*pte)
1392		wasvalid++;
1393
1394	*pte = (pt_entry_t) ( (int) (pa | PG_RW | PG_V | PG_W));
1395
1396	if (wasvalid)
1397		pmap_update();
1398}
1399
1400/*
1401 * remove a page from the kernel pagetables
1402 */
1403void
1404pmap_kremove( va)
1405	vm_offset_t va;
1406{
1407	register pt_entry_t *pte;
1408	pte = vtopte(va);
1409
1410	*pte = (pt_entry_t) 0;
1411	pmap_update();
1412}
1413
1414/*
1415 * this code makes some *MAJOR* assumptions:
1416 * 1. Current pmap & pmap exists.
1417 * 2. Not wired.
1418 * 3. Read access.
1419 * 4. No page table pages.
1420 * 5. Tlbflush is deferred to calling procedure.
1421 * 6. Page IS managed.
1422 * but is *MUCH* faster than pmap_enter...
1423 */
1424
1425static inline void
1426pmap_enter_quick(pmap, va, pa)
1427	register pmap_t pmap;
1428	vm_offset_t va;
1429	register vm_offset_t pa;
1430{
1431	register pt_entry_t *pte;
1432	register pv_entry_t pv, npv;
1433	int s;
1434
1435	/*
1436	 * Enter on the PV list if part of our managed memory
1437	 * Note that we raise IPL while manipulating pv_table
1438	 * since pmap_enter can be called at interrupt time.
1439	 */
1440
1441	pte = vtopte(va);
1442
1443	/* a fault on the page table might occur here */
1444	if (*pte) {
1445		pmap_remove(pmap, va, va + PAGE_SIZE);
1446	}
1447
1448	pv = pa_to_pvh(pa);
1449	s = splhigh();
1450	/*
1451	 * No entries yet, use header as the first entry
1452	 */
1453	if (pv->pv_pmap == NULL) {
1454		pv->pv_pmap = pmap;
1455		pv->pv_va = va;
1456		pv->pv_next = NULL;
1457	}
1458	/*
1459	 * There is at least one other VA mapping this page.
1460	 * Place this entry after the header.
1461	 */
1462	else {
1463		npv = get_pv_entry();
1464		npv->pv_va = va;
1465		npv->pv_pmap = pmap;
1466		npv->pv_next = pv->pv_next;
1467		pv->pv_next = npv;
1468	}
1469	splx(s);
1470
1471	pmap_use_pt(pmap, va);
1472
1473	/*
1474	 * Increment counters
1475	 */
1476	pmap->pm_stats.resident_count++;
1477
1478	/*
1479	 * Now validate mapping with desired protection/wiring.
1480	 */
1481	*pte = (pt_entry_t) ( (int) (pa | PG_V | PG_u));
1482
1483	return;
1484}
1485
1486/*
1487 * pmap_object_init_pt preloads the ptes for a given object
1488 * into the specified pmap.  This eliminates the blast of soft
1489 * faults on process startup and immediately after an mmap.
1490 */
1491void
1492pmap_object_init_pt(pmap, addr, object, offset, size)
1493	pmap_t pmap;
1494	vm_offset_t addr;
1495	vm_object_t object;
1496	vm_offset_t offset;
1497	vm_offset_t size;
1498{
1499	vm_offset_t tmpoff;
1500	vm_page_t p;
1501	vm_offset_t objbytes;
1502	int bits;
1503
1504	if (!pmap)
1505		return;
1506
1507	if (!vm_object_lock_try(object))
1508		return;
1509
1510	/*
1511	 * if we are processing a major portion of the object, then
1512	 * scan the entire thing.
1513	 */
1514	if (size > (object->size >> 1)) {
1515		objbytes = size;
1516		p = object->memq.tqh_first;
1517		while ((p != NULL) && (objbytes != 0)) {
1518			tmpoff = p->offset;
1519			if( tmpoff < offset) {
1520				p = p->listq.tqe_next;
1521				continue;
1522			}
1523			tmpoff -= offset;
1524			if( tmpoff >= size) {
1525				p = p->listq.tqe_next;
1526				continue;
1527			}
1528
1529			if ((p->bmapped == 0) &&
1530			    (p->busy == 0) &&
1531				((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1532			    (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_CACHE)) == 0 ) {
1533				vm_page_hold(p);
1534				pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p));
1535				vm_page_unhold(p);
1536			}
1537			p = p->listq.tqe_next;
1538			objbytes -= NBPG;
1539		}
1540	} else {
1541	/*
1542	 * else lookup the pages one-by-one.
1543	 */
1544		for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) {
1545			p = vm_page_lookup(object, tmpoff + offset);
1546			if (p) {
1547				if ((p->bmapped == 0) &&
1548				    (p->busy == 0) &&
1549					((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1550				    (p->flags & (PG_BUSY|PG_FICTITIOUS|PG_CACHE)) == 0) {
1551					vm_page_hold(p);
1552					pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p));
1553					vm_page_unhold(p);
1554				}
1555			}
1556		}
1557	}
1558	vm_object_unlock(object);
1559}
1560
1561/*
1562 *	Routine:	pmap_change_wiring
1563 *	Function:	Change the wiring attribute for a map/virtual-address
1564 *			pair.
1565 *	In/out conditions:
1566 *			The mapping must already exist in the pmap.
1567 */
1568void
1569pmap_change_wiring(pmap, va, wired)
1570	register pmap_t	pmap;
1571	vm_offset_t	va;
1572	boolean_t	wired;
1573{
1574	register pt_entry_t *pte;
1575
1576	if (pmap == NULL)
1577		return;
1578
1579	pte = pmap_pte(pmap, va);
1580
1581	if (wired && !pmap_pte_w(pte))
1582		pmap->pm_stats.wired_count++;
1583	else if (!wired && pmap_pte_w(pte))
1584		pmap->pm_stats.wired_count--;
1585
1586	/*
1587	 * Wiring is not a hardware characteristic so there is no need
1588	 * to invalidate TLB.
1589	 */
1590	pmap_pte_set_w(pte, wired);
1591	/*
1592 	 * When unwiring, set the modified bit in the pte -- could have
1593	 * been changed by the kernel
1594 	 */
1595	if (!wired)
1596		(int) *pte |= PG_M;
1597}
1598
1599
1600
1601/*
1602 *	Copy the range specified by src_addr/len
1603 *	from the source map to the range dst_addr/len
1604 *	in the destination map.
1605 *
1606 *	This routine is only advisory and need not do anything.
1607 */
1608void
1609pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
1610	pmap_t dst_pmap, src_pmap;
1611	vm_offset_t	dst_addr;
1612	vm_size_t	len;
1613	vm_offset_t	src_addr;
1614{
1615}
1616
1617/*
1618 *	Routine:	pmap_kernel
1619 *	Function:
1620 *		Returns the physical map handle for the kernel.
1621 */
1622pmap_t
1623pmap_kernel()
1624{
1625    	return (kernel_pmap);
1626}
1627
1628/*
1629 *	pmap_zero_page zeros the specified (machine independent)
1630 *	page by mapping the page into virtual memory and using
1631 *	bzero to clear its contents, one machine dependent page
1632 *	at a time.
1633 */
1634void
1635pmap_zero_page(phys)
1636	vm_offset_t phys;
1637{
1638	if (*(int *)CMAP2)
1639		panic("pmap_zero_page: CMAP busy");
1640
1641	*(int *)CMAP2 = PG_V | PG_KW | i386_trunc_page(phys);
1642	bzero(CADDR2,NBPG);
1643
1644	*(int *)CMAP2 = 0;
1645	pmap_update();
1646}
1647
1648/*
1649 *	pmap_copy_page copies the specified (machine independent)
1650 *	page by mapping the page into virtual memory and using
1651 *	bcopy to copy the page, one machine dependent page at a
1652 *	time.
1653 */
1654void
1655pmap_copy_page(src, dst)
1656	vm_offset_t src;
1657	vm_offset_t dst;
1658{
1659	if (*(int *)CMAP1 || *(int *)CMAP2)
1660		panic("pmap_copy_page: CMAP busy");
1661
1662	*(int *)CMAP1 = PG_V | PG_KW | i386_trunc_page(src);
1663	*(int *)CMAP2 = PG_V | PG_KW | i386_trunc_page(dst);
1664
1665#if __GNUC__ > 1
1666	memcpy(CADDR2, CADDR1, NBPG);
1667#else
1668	bcopy(CADDR1, CADDR2, NBPG);
1669#endif
1670	*(int *)CMAP1 = 0;
1671	*(int *)CMAP2 = 0;
1672	pmap_update();
1673}
1674
1675
1676/*
1677 *	Routine:	pmap_pageable
1678 *	Function:
1679 *		Make the specified pages (by pmap, offset)
1680 *		pageable (or not) as requested.
1681 *
1682 *		A page which is not pageable may not take
1683 *		a fault; therefore, its page table entry
1684 *		must remain valid for the duration.
1685 *
1686 *		This routine is merely advisory; pmap_enter
1687 *		will specify that these pages are to be wired
1688 *		down (or not) as appropriate.
1689 */
1690void
1691pmap_pageable(pmap, sva, eva, pageable)
1692	pmap_t		pmap;
1693	vm_offset_t	sva, eva;
1694	boolean_t	pageable;
1695{
1696}
1697
1698/*
1699 * this routine returns true if a physical page resides
1700 * in the given pmap.
1701 */
1702boolean_t
1703pmap_page_exists(pmap, pa)
1704	pmap_t pmap;
1705	vm_offset_t pa;
1706{
1707	register pv_entry_t pv;
1708	int s;
1709
1710	if (!pmap_is_managed(pa))
1711		return FALSE;
1712
1713	pv = pa_to_pvh(pa);
1714	s = splhigh();
1715
1716	/*
1717	 * Not found, check current mappings returning
1718	 * immediately if found.
1719	 */
1720	if (pv->pv_pmap != NULL) {
1721		for (; pv; pv = pv->pv_next) {
1722			if (pv->pv_pmap == pmap) {
1723				splx(s);
1724				return TRUE;
1725			}
1726		}
1727	}
1728	splx(s);
1729	return(FALSE);
1730}
1731
1732/*
1733 * pmap_testbit tests bits in pte's
1734 * note that the testbit/changebit routines are inline,
1735 * and a lot of things compile-time evaluate.
1736 */
1737boolean_t
1738pmap_testbit(pa, bit)
1739	register vm_offset_t pa;
1740	int bit;
1741{
1742	register pv_entry_t pv;
1743	pt_entry_t *pte;
1744	int s;
1745
1746	if (!pmap_is_managed(pa))
1747		return FALSE;
1748
1749	pv = pa_to_pvh(pa);
1750	s = splhigh();
1751
1752	/*
1753	 * Not found, check current mappings returning
1754	 * immediately if found.
1755	 */
1756	if (pv->pv_pmap != NULL) {
1757		for (; pv; pv = pv->pv_next) {
1758			/*
1759			 * if the bit being tested is the modified bit,
1760			 * then mark UPAGES as always modified, and
1761			 * ptes as never modified.
1762			 */
1763			if (bit & PG_U ) {
1764				if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) {
1765					continue;
1766				}
1767			}
1768			if (bit & PG_M ) {
1769				if (pv->pv_va >= USRSTACK) {
1770					if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) {
1771						continue;
1772					}
1773					if (pv->pv_va < USRSTACK+(UPAGES*NBPG)) {
1774						splx(s);
1775						return TRUE;
1776					}
1777					else if (pv->pv_va < UPT_MAX_ADDRESS) {
1778						splx(s);
1779						return FALSE;
1780					}
1781				}
1782			}
1783			if( !pv->pv_pmap) {
1784				printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
1785				continue;
1786			}
1787			pte = pmap_pte(pv->pv_pmap, pv->pv_va);
1788			if ((int) *pte & bit) {
1789				splx(s);
1790				return TRUE;
1791			}
1792		}
1793	}
1794	splx(s);
1795	return(FALSE);
1796}
1797
1798/*
1799 * this routine is used to modify bits in ptes
1800 */
1801void
1802pmap_changebit(pa, bit, setem)
1803	vm_offset_t pa;
1804	int bit;
1805	boolean_t setem;
1806{
1807	register pv_entry_t pv;
1808	register pt_entry_t *pte, npte;
1809	vm_offset_t va;
1810	int s;
1811
1812	if (!pmap_is_managed(pa))
1813		return;
1814
1815	pv = pa_to_pvh(pa);
1816	s = splhigh();
1817
1818	/*
1819	 * Loop over all current mappings setting/clearing as appropos
1820	 * If setting RO do we need to clear the VAC?
1821	 */
1822	if (pv->pv_pmap != NULL) {
1823		for (; pv; pv = pv->pv_next) {
1824			va = pv->pv_va;
1825
1826			/*
1827			 * don't write protect pager mappings
1828			 */
1829			if (!setem && (bit == PG_RW)) {
1830				if (va >= clean_sva && va < clean_eva)
1831					continue;
1832			}
1833
1834			if( !pv->pv_pmap) {
1835				printf("Null pmap (cb) at va: 0x%lx\n", va);
1836				continue;
1837			}
1838			pte = pmap_pte(pv->pv_pmap, va);
1839			if (setem)
1840				(int) npte = (int) *pte | bit;
1841			else
1842				(int) npte = (int) *pte & ~bit;
1843			*pte = npte;
1844		}
1845	}
1846	splx(s);
1847	pmap_update();
1848}
1849
1850/*
1851 *      pmap_page_protect:
1852 *
1853 *      Lower the permission for all mappings to a given page.
1854 */
1855void
1856pmap_page_protect(phys, prot)
1857        vm_offset_t     phys;
1858        vm_prot_t       prot;
1859{
1860	if ((prot & VM_PROT_WRITE) == 0) {
1861		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
1862			pmap_changebit(phys, PG_RW, FALSE);
1863		else
1864			pmap_remove_all(phys);
1865	}
1866}
1867
1868vm_offset_t
1869pmap_phys_address(ppn)
1870	int ppn;
1871{
1872	return(i386_ptob(ppn));
1873}
1874
1875/*
1876 * Miscellaneous support routines follow
1877 */
1878
1879void
1880i386_protection_init()
1881{
1882	register int *kp, prot;
1883
1884	kp = protection_codes;
1885	for (prot = 0; prot < 8; prot++) {
1886		switch (prot) {
1887		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
1888		/*
1889		 * Read access is also 0. There isn't any execute
1890		 * bit, so just make it readable.
1891		 */
1892		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
1893		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
1894		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
1895			*kp++ = 0;
1896			break;
1897		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
1898		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
1899		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
1900		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
1901			*kp++ = PG_RW;
1902			break;
1903		}
1904	}
1905}
1906
1907/*
1908 * Map a set of physical memory pages into the kernel virtual
1909 * address space. Return a pointer to where it is mapped. This
1910 * routine is intended to be used for mapping device memory,
1911 * NOT real memory. The non-cacheable bits are set on each
1912 * mapped page.
1913 */
1914void *
1915pmap_mapdev(pa, size)
1916	vm_offset_t pa;
1917	vm_size_t size;
1918{
1919	vm_offset_t va, tmpva;
1920	pt_entry_t *pte;
1921
1922	pa = trunc_page(pa);
1923	size = roundup(size, PAGE_SIZE);
1924
1925	va = kmem_alloc_pageable(kernel_map, size);
1926	if (!va)
1927		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
1928
1929	for (tmpva = va; size > 0;) {
1930		pte = vtopte(tmpva);
1931		*pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N));
1932		size -= PAGE_SIZE;
1933		tmpva += PAGE_SIZE;
1934		pa += PAGE_SIZE;
1935	}
1936	pmap_update();
1937
1938	return ((void *)va);
1939}
1940
1941#ifdef DEBUG
1942/* print address space of pmap*/
1943void
1944pads(pm)
1945	pmap_t pm;
1946{
1947	unsigned va, i, j;
1948	pt_entry_t *ptep;
1949
1950	if (pm == kernel_pmap) return;
1951	for (i = 0; i < 1024; i++)
1952		if (pm->pm_pdir[i])
1953			for (j = 0; j < 1024 ; j++) {
1954				va = (i<<PD_SHIFT)+(j<<PG_SHIFT);
1955				if (pm == kernel_pmap && va < KERNBASE)
1956						continue;
1957				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
1958						continue;
1959				ptep = pmap_pte(pm, va);
1960				if (pmap_pte_v(ptep))
1961					printf("%x:%x ", va, *(int *)ptep);
1962			} ;
1963
1964}
1965
1966void
1967pmap_pvdump(pa)
1968	vm_offset_t pa;
1969{
1970	register pv_entry_t pv;
1971
1972	printf("pa %x", pa);
1973	for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
1974#ifdef used_to_be
1975		printf(" -> pmap %x, va %x, flags %x",
1976		       pv->pv_pmap, pv->pv_va, pv->pv_flags);
1977#endif
1978		printf(" -> pmap %x, va %x",
1979		       pv->pv_pmap, pv->pv_va);
1980		pads(pv->pv_pmap);
1981	}
1982	printf(" ");
1983}
1984#endif
1985
1986