uvm_page.c revision 1.30
1/*	$OpenBSD: uvm_page.c,v 1.30 2001/11/10 18:42:31 art Exp $	*/
2/*	$NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $	*/
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by Charles D. Cranor,
24 *      Washington University, the University of California, Berkeley and
25 *      its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 *    may be used to endorse or promote products derived from this software
28 *    without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
43 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
44 *
45 *
46 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
47 * All rights reserved.
48 *
49 * Permission to use, copy, modify and distribute this software and
50 * its documentation is hereby granted, provided that both the copyright
51 * notice and this permission notice appear in all copies of the
52 * software, derivative works or modified versions, and any portions
53 * thereof, and that both notices appear in supporting documentation.
54 *
55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
56 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58 *
59 * Carnegie Mellon requests users of this software to return to
60 *
61 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
62 *  School of Computer Science
63 *  Carnegie Mellon University
64 *  Pittsburgh PA 15213-3890
65 *
66 * any improvements or extensions that they make and grant Carnegie the
67 * rights to redistribute these changes.
68 */
69
70/*
71 * uvm_page.c: page ops.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/malloc.h>
77#include <sys/sched.h>
78#include <sys/kernel.h>
79
80#define UVM_PAGE                /* pull in uvm_page.h functions */
81#include <uvm/uvm.h>
82
83/*
84 * global vars... XXXCDC: move to uvm. structure.
85 */
86
87/*
88 * physical memory config is stored in vm_physmem.
89 */
90
91struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];	/* XXXCDC: uvm.physmem */
92int vm_nphysseg = 0;				/* XXXCDC: uvm.nphysseg */
93
94/*
95 * Some supported CPUs in a given architecture don't support all
96 * of the things necessary to do idle page zero'ing efficiently.
97 * We therefore provide a way to disable it from machdep code here.
98 */
99
100/*
101 * XXX disabled until we can find a way to do this without causing
102 * problems for either cpu caches or DMA latency.
103 */
104boolean_t vm_page_zero_enable = FALSE;
105
106#ifdef UBC
107u_long uvm_pgcnt_anon;
108u_long uvm_pgcnt_vnode;
109extern struct uvm_pagerops uvm_vnodeops;
110#endif
111
112/*
113 * local variables
114 */
115
116/*
117 * these variables record the values returned by vm_page_bootstrap,
118 * for debugging purposes.  The implementation of uvm_pageboot_alloc
119 * and pmap_startup here also uses them internally.
120 */
121
122static vaddr_t      virtual_space_start;
123static vaddr_t      virtual_space_end;
124
125/*
126 * we use a hash table with only one bucket during bootup.  we will
127 * later rehash (resize) the hash table once the allocator is ready.
128 * we static allocate the one bootstrap bucket below...
129 */
130
131static struct pglist uvm_bootbucket;
132
133/*
134 * local prototypes
135 */
136
137static void uvm_pageinsert __P((struct vm_page *));
138static void uvm_pageremove __P((struct vm_page *));
139
140/*
141 * inline functions
142 */
143
144/*
145 * uvm_pageinsert: insert a page in the object and the hash table
146 *
147 * => caller must lock object
148 * => caller must lock page queues
149 * => call should have already set pg's object and offset pointers
150 *    and bumped the version counter
151 */
152
153__inline static void
154uvm_pageinsert(pg)
155	struct vm_page *pg;
156{
157	struct pglist *buck;
158	int s;
159
160#ifdef DIAGNOSTIC
161	if (pg->flags & PG_TABLED)
162		panic("uvm_pageinsert: already inserted");
163#endif
164
165	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
166	s = splimp();
167	simple_lock(&uvm.hashlock);
168	TAILQ_INSERT_TAIL(buck, pg, hashq);	/* put in hash */
169	simple_unlock(&uvm.hashlock);
170	splx(s);
171
172	TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
173	pg->flags |= PG_TABLED;
174	pg->uobject->uo_npages++;
175}
176
177/*
178 * uvm_page_remove: remove page from object and hash
179 *
180 * => caller must lock object
181 * => caller must lock page queues
182 */
183
184static __inline void
185uvm_pageremove(pg)
186	struct vm_page *pg;
187{
188	struct pglist *buck;
189	int s;
190
191	KASSERT(pg->flags & PG_TABLED);
192	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
193	s = splimp();
194	simple_lock(&uvm.hashlock);
195	TAILQ_REMOVE(buck, pg, hashq);
196	simple_unlock(&uvm.hashlock);
197	splx(s);
198
199#ifdef UBC
200	if (pg->uobject->pgops == &uvm_vnodeops) {
201		uvm_pgcnt_vnode--;
202	}
203#endif
204
205	/* object should be locked */
206	TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
207
208	pg->flags &= ~PG_TABLED;
209	pg->uobject->uo_npages--;
210	pg->uobject = NULL;
211	pg->version++;
212}
213
214/*
215 * uvm_page_init: init the page system.   called from uvm_init().
216 *
217 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
218 */
219
220void
221uvm_page_init(kvm_startp, kvm_endp)
222	vaddr_t *kvm_startp, *kvm_endp;
223{
224	vsize_t freepages, pagecount, n;
225	vm_page_t pagearray;
226	int lcv, i;
227	paddr_t paddr;
228
229	/*
230	 * step 1: init the page queues and page queue locks
231	 */
232	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
233		for (i = 0; i < PGFL_NQUEUES; i++)
234			TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]);
235	}
236	TAILQ_INIT(&uvm.page_active);
237	TAILQ_INIT(&uvm.page_inactive_swp);
238	TAILQ_INIT(&uvm.page_inactive_obj);
239	simple_lock_init(&uvm.pageqlock);
240	simple_lock_init(&uvm.fpageqlock);
241
242	/*
243	 * step 2: init the <obj,offset> => <page> hash table. for now
244	 * we just have one bucket (the bootstrap bucket).   later on we
245	 * will allocate new buckets as we dynamically resize the hash table.
246	 */
247
248	uvm.page_nhash = 1;			/* 1 bucket */
249	uvm.page_hashmask = 0;			/* mask for hash function */
250	uvm.page_hash = &uvm_bootbucket;	/* install bootstrap bucket */
251	TAILQ_INIT(uvm.page_hash);		/* init hash table */
252	simple_lock_init(&uvm.hashlock);	/* init hash table lock */
253
254	/*
255	 * step 3: allocate vm_page structures.
256	 */
257
258	/*
259	 * sanity check:
260	 * before calling this function the MD code is expected to register
261	 * some free RAM with the uvm_page_physload() function.   our job
262	 * now is to allocate vm_page structures for this memory.
263	 */
264
265	if (vm_nphysseg == 0)
266		panic("uvm_page_bootstrap: no memory pre-allocated");
267
268	/*
269	 * first calculate the number of free pages...
270	 *
271	 * note that we use start/end rather than avail_start/avail_end.
272	 * this allows us to allocate extra vm_page structures in case we
273	 * want to return some memory to the pool after booting.
274	 */
275
276	freepages = 0;
277	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
278		freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
279
280	/*
281	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
282	 * use.   for each page of memory we use we need a vm_page structure.
283	 * thus, the total number of pages we can use is the total size of
284	 * the memory divided by the PAGE_SIZE plus the size of the vm_page
285	 * structure.   we add one to freepages as a fudge factor to avoid
286	 * truncation errors (since we can only allocate in terms of whole
287	 * pages).
288	 */
289
290	pagecount = ((freepages + 1) << PAGE_SHIFT) /
291	    (PAGE_SIZE + sizeof(struct vm_page));
292	pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
293	    sizeof(struct vm_page));
294	memset(pagearray, 0, pagecount * sizeof(struct vm_page));
295
296	/*
297	 * step 4: init the vm_page structures and put them in the correct
298	 * place...
299	 */
300
301	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
302		n = vm_physmem[lcv].end - vm_physmem[lcv].start;
303		if (n > pagecount) {
304			printf("uvm_page_init: lost %ld page(s) in init\n",
305			    (long)(n - pagecount));
306			panic("uvm_page_init");  /* XXXCDC: shouldn't happen? */
307			/* n = pagecount; */
308		}
309		/* set up page array pointers */
310		vm_physmem[lcv].pgs = pagearray;
311		pagearray += n;
312		pagecount -= n;
313		vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
314
315		/* init and free vm_pages (we've already zeroed them) */
316		paddr = ptoa(vm_physmem[lcv].start);
317		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
318			vm_physmem[lcv].pgs[i].phys_addr = paddr;
319			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
320			    atop(paddr) <= vm_physmem[lcv].avail_end) {
321				uvmexp.npages++;
322				/* add page to free pool */
323				uvm_pagefree(&vm_physmem[lcv].pgs[i]);
324			}
325		}
326	}
327
328	/*
329	 * step 5: pass up the values of virtual_space_start and
330	 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
331	 * layers of the VM.
332	 */
333
334	*kvm_startp = round_page(virtual_space_start);
335	*kvm_endp = trunc_page(virtual_space_end);
336
337	/*
338	 * step 6: init locks for kernel threads
339	 */
340
341	simple_lock_init(&uvm.pagedaemon_lock);
342	simple_lock_init(&uvm.aiodoned_lock);
343
344	/*
345	 * step 7: init reserve thresholds
346	 * XXXCDC - values may need adjusting
347	 */
348	uvmexp.reserve_pagedaemon = 4;
349	uvmexp.reserve_kernel = 6;
350
351  	/*
352	 * step 8: determine if we should zero pages in the idle
353	 * loop.
354	 */
355	uvm.page_idle_zero = vm_page_zero_enable;
356
357	/*
358	 * done!
359	 */
360
361	uvm.page_init_done = TRUE;
362}
363
364/*
365 * uvm_setpagesize: set the page size
366 *
367 * => sets page_shift and page_mask from uvmexp.pagesize.
368 */
369
370void
371uvm_setpagesize()
372{
373	if (uvmexp.pagesize == 0)
374		uvmexp.pagesize = DEFAULT_PAGE_SIZE;
375	uvmexp.pagemask = uvmexp.pagesize - 1;
376	if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
377		panic("uvm_setpagesize: page size not a power of two");
378	for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
379		if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
380			break;
381}
382
383/*
384 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
385 */
386
387vaddr_t
388uvm_pageboot_alloc(size)
389	vsize_t size;
390{
391#if defined(PMAP_STEAL_MEMORY)
392	vaddr_t addr;
393
394	/*
395	 * defer bootstrap allocation to MD code (it may want to allocate
396	 * from a direct-mapped segment).  pmap_steal_memory should round
397	 * off virtual_space_start/virtual_space_end.
398	 */
399
400	addr = pmap_steal_memory(size, &virtual_space_start,
401	    &virtual_space_end);
402
403	return(addr);
404
405#else /* !PMAP_STEAL_MEMORY */
406
407	static boolean_t initialized = FALSE;
408	vaddr_t addr, vaddr;
409	paddr_t paddr;
410
411	/* round to page size */
412	size = round_page(size);
413
414	/*
415	 * on first call to this function, initialize ourselves.
416	 */
417	if (initialized == FALSE) {
418		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
419
420		/* round it the way we like it */
421		virtual_space_start = round_page(virtual_space_start);
422		virtual_space_end = trunc_page(virtual_space_end);
423
424		initialized = TRUE;
425	}
426
427	/*
428	 * allocate virtual memory for this request
429	 */
430	if (virtual_space_start == virtual_space_end ||
431	    (virtual_space_end - virtual_space_start) < size)
432		panic("uvm_pageboot_alloc: out of virtual space");
433
434	addr = virtual_space_start;
435
436#ifdef PMAP_GROWKERNEL
437	/*
438	 * If the kernel pmap can't map the requested space,
439	 * then allocate more resources for it.
440	 */
441	if (uvm_maxkaddr < (addr + size)) {
442		uvm_maxkaddr = pmap_growkernel(addr + size);
443		if (uvm_maxkaddr < (addr + size))
444			panic("uvm_pageboot_alloc: pmap_growkernel() failed");
445	}
446#endif
447
448	virtual_space_start += size;
449
450	/*
451	 * allocate and mapin physical pages to back new virtual pages
452	 */
453
454	for (vaddr = round_page(addr) ; vaddr < addr + size ;
455	    vaddr += PAGE_SIZE) {
456
457		if (!uvm_page_physget(&paddr))
458			panic("uvm_pageboot_alloc: out of memory");
459
460		/*
461		 * Note this memory is no longer managed, so using
462		 * pmap_kenter is safe.
463		 */
464		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
465	}
466	return(addr);
467#endif	/* PMAP_STEAL_MEMORY */
468}
469
470#if !defined(PMAP_STEAL_MEMORY)
471/*
472 * uvm_page_physget: "steal" one page from the vm_physmem structure.
473 *
474 * => attempt to allocate it off the end of a segment in which the "avail"
475 *    values match the start/end values.   if we can't do that, then we
476 *    will advance both values (making them equal, and removing some
477 *    vm_page structures from the non-avail area).
478 * => return false if out of memory.
479 */
480
481/* subroutine: try to allocate from memory chunks on the specified freelist */
482static boolean_t uvm_page_physget_freelist __P((paddr_t *, int));
483
484static boolean_t
485uvm_page_physget_freelist(paddrp, freelist)
486	paddr_t *paddrp;
487	int freelist;
488{
489	int lcv, x;
490
491	/* pass 1: try allocating from a matching end */
492#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
493	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
494	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
495#else
496	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
497#endif
498	{
499
500		if (uvm.page_init_done == TRUE)
501			panic("uvm_page_physget: called _after_ bootstrap");
502
503		if (vm_physmem[lcv].free_list != freelist)
504			continue;
505
506		/* try from front */
507		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
508		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
509			*paddrp = ptoa(vm_physmem[lcv].avail_start);
510			vm_physmem[lcv].avail_start++;
511			vm_physmem[lcv].start++;
512			/* nothing left?   nuke it */
513			if (vm_physmem[lcv].avail_start ==
514			    vm_physmem[lcv].end) {
515				if (vm_nphysseg == 1)
516				    panic("vum_page_physget: out of memory!");
517				vm_nphysseg--;
518				for (x = lcv ; x < vm_nphysseg ; x++)
519					/* structure copy */
520					vm_physmem[x] = vm_physmem[x+1];
521			}
522			return (TRUE);
523		}
524
525		/* try from rear */
526		if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
527		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
528			*paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
529			vm_physmem[lcv].avail_end--;
530			vm_physmem[lcv].end--;
531			/* nothing left?   nuke it */
532			if (vm_physmem[lcv].avail_end ==
533			    vm_physmem[lcv].start) {
534				if (vm_nphysseg == 1)
535				    panic("uvm_page_physget: out of memory!");
536				vm_nphysseg--;
537				for (x = lcv ; x < vm_nphysseg ; x++)
538					/* structure copy */
539					vm_physmem[x] = vm_physmem[x+1];
540			}
541			return (TRUE);
542		}
543	}
544
545	/* pass2: forget about matching ends, just allocate something */
546#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
547	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
548	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
549#else
550	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
551#endif
552	{
553
554		/* any room in this bank? */
555		if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
556			continue;  /* nope */
557
558		*paddrp = ptoa(vm_physmem[lcv].avail_start);
559		vm_physmem[lcv].avail_start++;
560		/* truncate! */
561		vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
562
563		/* nothing left?   nuke it */
564		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
565			if (vm_nphysseg == 1)
566				panic("uvm_page_physget: out of memory!");
567			vm_nphysseg--;
568			for (x = lcv ; x < vm_nphysseg ; x++)
569				/* structure copy */
570				vm_physmem[x] = vm_physmem[x+1];
571		}
572		return (TRUE);
573	}
574
575	return (FALSE);        /* whoops! */
576}
577
578boolean_t
579uvm_page_physget(paddrp)
580	paddr_t *paddrp;
581{
582	int i;
583
584	/* try in the order of freelist preference */
585	for (i = 0; i < VM_NFREELIST; i++)
586		if (uvm_page_physget_freelist(paddrp, i) == TRUE)
587			return (TRUE);
588	return (FALSE);
589}
590#endif /* PMAP_STEAL_MEMORY */
591
592/*
593 * uvm_page_physload: load physical memory into VM system
594 *
595 * => all args are PFs
596 * => all pages in start/end get vm_page structures
597 * => areas marked by avail_start/avail_end get added to the free page pool
598 * => we are limited to VM_PHYSSEG_MAX physical memory segments
599 */
600
601void
602uvm_page_physload(start, end, avail_start, avail_end, free_list)
603	paddr_t start, end, avail_start, avail_end;
604	int free_list;
605{
606	int preload, lcv;
607	psize_t npages;
608	struct vm_page *pgs;
609	struct vm_physseg *ps;
610
611	if (uvmexp.pagesize == 0)
612		panic("uvm_page_physload: page size not set!");
613
614	if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
615		panic("uvm_page_physload: bad free list %d\n", free_list);
616
617	if (start >= end)
618		panic("uvm_page_physload: start >= end");
619
620	/*
621	 * do we have room?
622	 */
623	if (vm_nphysseg == VM_PHYSSEG_MAX) {
624		printf("uvm_page_physload: unable to load physical memory "
625		    "segment\n");
626		printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
627		    VM_PHYSSEG_MAX, (long long)start, (long long)end);
628		printf("\tincrease VM_PHYSSEG_MAX\n");
629		return;
630	}
631
632	/*
633	 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
634	 * called yet, so malloc is not available).
635	 */
636	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
637		if (vm_physmem[lcv].pgs)
638			break;
639	}
640	preload = (lcv == vm_nphysseg);
641
642	/*
643	 * if VM is already running, attempt to malloc() vm_page structures
644	 */
645	if (!preload) {
646#if defined(VM_PHYSSEG_NOADD)
647		panic("uvm_page_physload: tried to add RAM after vm_mem_init");
648#else
649		/* XXXCDC: need some sort of lockout for this case */
650		paddr_t paddr;
651		npages = end - start;  /* # of pages */
652		pgs = malloc(sizeof(struct vm_page) * npages,
653		    M_VMPAGE, M_NOWAIT);
654		if (pgs == NULL) {
655			printf("uvm_page_physload: can not malloc vm_page "
656			    "structs for segment\n");
657			printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
658			return;
659		}
660		/* zero data, init phys_addr and free_list, and free pages */
661		memset(pgs, 0, sizeof(struct vm_page) * npages);
662		for (lcv = 0, paddr = ptoa(start) ;
663				 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
664			pgs[lcv].phys_addr = paddr;
665			pgs[lcv].free_list = free_list;
666			if (atop(paddr) >= avail_start &&
667			    atop(paddr) <= avail_end)
668				uvm_pagefree(&pgs[lcv]);
669		}
670		/* XXXCDC: incomplete: need to update uvmexp.free, what else? */
671		/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
672#endif
673	} else {
674
675		/* gcc complains if these don't get init'd */
676		pgs = NULL;
677		npages = 0;
678
679	}
680
681	/*
682	 * now insert us in the proper place in vm_physmem[]
683	 */
684
685#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
686
687	/* random: put it at the end (easy!) */
688	ps = &vm_physmem[vm_nphysseg];
689
690#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
691
692	{
693		int x;
694		/* sort by address for binary search */
695		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
696			if (start < vm_physmem[lcv].start)
697				break;
698		ps = &vm_physmem[lcv];
699		/* move back other entries, if necessary ... */
700		for (x = vm_nphysseg ; x > lcv ; x--)
701			/* structure copy */
702			vm_physmem[x] = vm_physmem[x - 1];
703	}
704
705#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
706
707	{
708		int x;
709		/* sort by largest segment first */
710		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
711			if ((end - start) >
712			    (vm_physmem[lcv].end - vm_physmem[lcv].start))
713				break;
714		ps = &vm_physmem[lcv];
715		/* move back other entries, if necessary ... */
716		for (x = vm_nphysseg ; x > lcv ; x--)
717			/* structure copy */
718			vm_physmem[x] = vm_physmem[x - 1];
719	}
720
721#else
722
723	panic("uvm_page_physload: unknown physseg strategy selected!");
724
725#endif
726
727	ps->start = start;
728	ps->end = end;
729	ps->avail_start = avail_start;
730	ps->avail_end = avail_end;
731	if (preload) {
732		ps->pgs = NULL;
733	} else {
734		ps->pgs = pgs;
735		ps->lastpg = pgs + npages - 1;
736	}
737	ps->free_list = free_list;
738	vm_nphysseg++;
739
740	/*
741	 * done!
742	 */
743
744	if (!preload)
745		uvm_page_rehash();
746
747	return;
748}
749
750/*
751 * uvm_page_rehash: reallocate hash table based on number of free pages.
752 */
753
754void
755uvm_page_rehash()
756{
757	int freepages, lcv, bucketcount, s, oldcount;
758	struct pglist *newbuckets, *oldbuckets;
759	struct vm_page *pg;
760	size_t newsize, oldsize;
761
762	/*
763	 * compute number of pages that can go in the free pool
764	 */
765
766	freepages = 0;
767	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
768		freepages +=
769		    (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
770
771	/*
772	 * compute number of buckets needed for this number of pages
773	 */
774
775	bucketcount = 1;
776	while (bucketcount < freepages)
777		bucketcount = bucketcount * 2;
778
779	/*
780	 * compute the size of the current table and new table.
781	 */
782
783	oldbuckets = uvm.page_hash;
784	oldcount = uvm.page_nhash;
785	oldsize = round_page(sizeof(struct pglist) * oldcount);
786	newsize = round_page(sizeof(struct pglist) * bucketcount);
787
788	/*
789	 * allocate the new buckets
790	 */
791
792	newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize);
793	if (newbuckets == NULL) {
794		printf("uvm_page_physrehash: WARNING: could not grow page "
795		    "hash table\n");
796		return;
797	}
798	for (lcv = 0 ; lcv < bucketcount ; lcv++)
799		TAILQ_INIT(&newbuckets[lcv]);
800
801	/*
802	 * now replace the old buckets with the new ones and rehash everything
803	 */
804
805	s = splimp();
806	simple_lock(&uvm.hashlock);
807	uvm.page_hash = newbuckets;
808	uvm.page_nhash = bucketcount;
809	uvm.page_hashmask = bucketcount - 1;  /* power of 2 */
810
811	/* ... and rehash */
812	for (lcv = 0 ; lcv < oldcount ; lcv++) {
813		while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
814			TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
815			TAILQ_INSERT_TAIL(
816			  &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
817			  pg, hashq);
818		}
819	}
820	simple_unlock(&uvm.hashlock);
821	splx(s);
822
823	/*
824	 * free old bucket array if is not the boot-time table
825	 */
826
827	if (oldbuckets != &uvm_bootbucket)
828		uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize);
829
830	/*
831	 * done
832	 */
833	return;
834}
835
836
837#if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
838
839void uvm_page_physdump __P((void)); /* SHUT UP GCC */
840
841/* call from DDB */
842void
843uvm_page_physdump()
844{
845	int lcv;
846
847	printf("rehash: physical memory config [segs=%d of %d]:\n",
848				 vm_nphysseg, VM_PHYSSEG_MAX);
849	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
850		printf("0x%llx->0x%llx [0x%llx->0x%llx]\n",
851		    (long long)vm_physmem[lcv].start,
852		    (long long)vm_physmem[lcv].end,
853		    (long long)vm_physmem[lcv].avail_start,
854		    (long long)vm_physmem[lcv].avail_end);
855	printf("STRATEGY = ");
856	switch (VM_PHYSSEG_STRAT) {
857	case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
858	case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
859	case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
860	default: printf("<<UNKNOWN>>!!!!\n");
861	}
862	printf("number of buckets = %d\n", uvm.page_nhash);
863}
864#endif
865
866/*
867 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
868 *
869 * => return null if no pages free
870 * => wake up pagedaemon if number of free pages drops below low water mark
871 * => if obj != NULL, obj must be locked (to put in hash)
872 * => if anon != NULL, anon must be locked (to put in anon)
873 * => only one of obj or anon can be non-null
874 * => caller must activate/deactivate page if it is not wired.
875 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
876 * => policy decision: it is more important to pull a page off of the
877 *	appropriate priority free list than it is to get a zero'd or
878 *	unknown contents page.  This is because we live with the
879 *	consequences of a bad free list decision for the entire
880 *	lifetime of the page, e.g. if the page comes from memory that
881 *	is slower to access.
882 */
883
884struct vm_page *
885uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
886	struct uvm_object *obj;
887	voff_t off;
888	int flags;
889	struct vm_anon *anon;
890	int strat, free_list;
891{
892	int lcv, try1, try2, s, zeroit = 0;
893	struct vm_page *pg;
894	struct pglist *freeq;
895	struct pgfreelist *pgfl;
896	boolean_t use_reserve;
897
898	KASSERT(obj == NULL || anon == NULL);
899	KASSERT(off == trunc_page(off));
900	s = uvm_lock_fpageq();
901
902	/*
903	 * check to see if we need to generate some free pages waking
904	 * the pagedaemon.
905	 */
906
907#ifdef UBC
908	if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
909	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
910	     uvmexp.inactive < uvmexp.inactarg)) {
911		wakeup(&uvm.pagedaemon);
912	}
913#else
914	if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
915	    uvmexp.inactive < uvmexp.inactarg))
916		wakeup(&uvm.pagedaemon);
917#endif
918
919	/*
920	 * fail if any of these conditions is true:
921	 * [1]  there really are no free pages, or
922	 * [2]  only kernel "reserved" pages remain and
923	 *        the page isn't being allocated to a kernel object.
924	 * [3]  only pagedaemon "reserved" pages remain and
925	 *        the requestor isn't the pagedaemon.
926	 */
927
928	use_reserve = (flags & UVM_PGA_USERESERVE) ||
929		(obj && UVM_OBJ_IS_KERN_OBJECT(obj));
930	if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
931	    (uvmexp.free <= uvmexp.reserve_pagedaemon &&
932	     !(use_reserve && (curproc == uvm.pagedaemon_proc ||
933				curproc == syncerproc))))
934		goto fail;
935
936#if PGFL_NQUEUES != 2
937#error uvm_pagealloc_strat needs to be updated
938#endif
939
940	/*
941	 * If we want a zero'd page, try the ZEROS queue first, otherwise
942	 * we try the UNKNOWN queue first.
943	 */
944	if (flags & UVM_PGA_ZERO) {
945		try1 = PGFL_ZEROS;
946		try2 = PGFL_UNKNOWN;
947	} else {
948		try1 = PGFL_UNKNOWN;
949		try2 = PGFL_ZEROS;
950	}
951
952 again:
953	switch (strat) {
954	case UVM_PGA_STRAT_NORMAL:
955		/* Check all freelists in descending priority order. */
956		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
957			pgfl = &uvm.page_free[lcv];
958			if ((pg = TAILQ_FIRST((freeq =
959			      &pgfl->pgfl_queues[try1]))) != NULL ||
960			    (pg = TAILQ_FIRST((freeq =
961			      &pgfl->pgfl_queues[try2]))) != NULL)
962				goto gotit;
963		}
964
965		/* No pages free! */
966		goto fail;
967
968	case UVM_PGA_STRAT_ONLY:
969	case UVM_PGA_STRAT_FALLBACK:
970		/* Attempt to allocate from the specified free list. */
971		KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
972		pgfl = &uvm.page_free[free_list];
973		if ((pg = TAILQ_FIRST((freeq =
974		      &pgfl->pgfl_queues[try1]))) != NULL ||
975		    (pg = TAILQ_FIRST((freeq =
976		      &pgfl->pgfl_queues[try2]))) != NULL)
977			goto gotit;
978
979		/* Fall back, if possible. */
980		if (strat == UVM_PGA_STRAT_FALLBACK) {
981			strat = UVM_PGA_STRAT_NORMAL;
982			goto again;
983		}
984
985		/* No pages free! */
986		goto fail;
987
988	default:
989		panic("uvm_pagealloc_strat: bad strat %d", strat);
990		/* NOTREACHED */
991	}
992
993 gotit:
994	TAILQ_REMOVE(freeq, pg, pageq);
995	uvmexp.free--;
996
997	/* update zero'd page count */
998	if (pg->flags & PG_ZERO)
999		uvmexp.zeropages--;
1000
1001	/*
1002	 * update allocation statistics and remember if we have to
1003	 * zero the page
1004	 */
1005	if (flags & UVM_PGA_ZERO) {
1006		if (pg->flags & PG_ZERO) {
1007			uvmexp.pga_zerohit++;
1008			zeroit = 0;
1009		} else {
1010			uvmexp.pga_zeromiss++;
1011			zeroit = 1;
1012		}
1013	}
1014
1015	uvm_unlock_fpageq(s);		/* unlock free page queue */
1016
1017	pg->offset = off;
1018	pg->uobject = obj;
1019	pg->uanon = anon;
1020	pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1021	pg->version++;
1022	if (anon) {
1023		anon->u.an_page = pg;
1024		pg->pqflags = PQ_ANON;
1025#ifdef UBC
1026		uvm_pgcnt_anon++;
1027#endif
1028	} else {
1029		if (obj)
1030			uvm_pageinsert(pg);
1031		pg->pqflags = 0;
1032	}
1033#if defined(UVM_PAGE_TRKOWN)
1034	pg->owner_tag = NULL;
1035#endif
1036	UVM_PAGE_OWN(pg, "new alloc");
1037
1038	if (flags & UVM_PGA_ZERO) {
1039		/*
1040		 * A zero'd page is not clean.  If we got a page not already
1041		 * zero'd, then we have to zero it ourselves.
1042		 */
1043		pg->flags &= ~PG_CLEAN;
1044		if (zeroit)
1045			pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1046	}
1047
1048	return(pg);
1049
1050 fail:
1051	uvm_unlock_fpageq(s);
1052	return (NULL);
1053}
1054
1055/*
1056 * uvm_pagealloc_contig: allocate contiguous memory.
1057 *
1058 * XXX - fix comment.
1059 */
1060
1061vaddr_t
1062uvm_pagealloc_contig(size, low, high, alignment)
1063	vaddr_t size;
1064	vaddr_t low, high;
1065	vaddr_t alignment;
1066{
1067	struct pglist pglist;
1068	struct vm_page *pg;
1069	vaddr_t addr, temp_addr;
1070
1071	size = round_page(size);
1072
1073	TAILQ_INIT(&pglist);
1074	if (uvm_pglistalloc(size, low, high, alignment, 0,
1075			    &pglist, 1, FALSE))
1076		return 0;
1077	addr = vm_map_min(kernel_map);
1078	if (uvm_map(kernel_map, &addr, size, NULL, UVM_UNKNOWN_OFFSET, 0,
1079		    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
1080				UVM_ADV_RANDOM, 0)) != KERN_SUCCESS) {
1081		uvm_pglistfree(&pglist);
1082		return 0;
1083	}
1084	temp_addr = addr;
1085	for (pg = TAILQ_FIRST(&pglist); pg != NULL;
1086	     pg = TAILQ_NEXT(pg, pageq)) {
1087	        pg->uobject = uvm.kernel_object;
1088		pg->offset = temp_addr - vm_map_min(kernel_map);
1089		uvm_pageinsert(pg);
1090		uvm_pagewire(pg);
1091		pmap_kenter_pa(temp_addr, VM_PAGE_TO_PHYS(pg),
1092			       VM_PROT_READ|VM_PROT_WRITE);
1093		temp_addr += PAGE_SIZE;
1094	}
1095	return addr;
1096}
1097
1098/*
1099 * uvm_pagerealloc: reallocate a page from one object to another
1100 *
1101 * => both objects must be locked
1102 */
1103
1104void
1105uvm_pagerealloc(pg, newobj, newoff)
1106	struct vm_page *pg;
1107	struct uvm_object *newobj;
1108	voff_t newoff;
1109{
1110	/*
1111	 * remove it from the old object
1112	 */
1113
1114	if (pg->uobject) {
1115		uvm_pageremove(pg);
1116	}
1117
1118	/*
1119	 * put it in the new object
1120	 */
1121
1122	if (newobj) {
1123		pg->uobject = newobj;
1124		pg->offset = newoff;
1125		pg->version++;
1126		uvm_pageinsert(pg);
1127	}
1128}
1129
1130
1131/*
1132 * uvm_pagefree: free page
1133 *
1134 * => erase page's identity (i.e. remove from hash/object)
1135 * => put page on free list
1136 * => caller must lock owning object (either anon or uvm_object)
1137 * => caller must lock page queues
1138 * => assumes all valid mappings of pg are gone
1139 */
1140
1141void
1142uvm_pagefree(pg)
1143	struct vm_page *pg;
1144{
1145	int s;
1146	int saved_loan_count = pg->loan_count;
1147
1148#ifdef DEBUG
1149	if (pg->uobject == (void *)0xdeadbeef &&
1150	    pg->uanon == (void *)0xdeadbeef) {
1151		panic("uvm_pagefree: freeing free page %p\n", pg);
1152	}
1153#endif
1154
1155	/*
1156	 * if the page was an object page (and thus "TABLED"), remove it
1157	 * from the object.
1158	 */
1159
1160	if (pg->flags & PG_TABLED) {
1161
1162		/*
1163		 * if the object page is on loan we are going to drop ownership.
1164		 * it is possible that an anon will take over as owner for this
1165		 * page later on.   the anon will want a !PG_CLEAN page so that
1166		 * it knows it needs to allocate swap if it wants to page the
1167		 * page out.
1168		 */
1169
1170		if (saved_loan_count)
1171			pg->flags &= ~PG_CLEAN;	/* in case an anon takes over */
1172		uvm_pageremove(pg);
1173
1174		/*
1175		 * if our page was on loan, then we just lost control over it
1176		 * (in fact, if it was loaned to an anon, the anon may have
1177		 * already taken over ownership of the page by now and thus
1178		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just
1179		 * return (when the last loan is dropped, then the page can be
1180		 * freed by whatever was holding the last loan).
1181		 */
1182
1183		if (saved_loan_count)
1184			return;
1185	} else if (saved_loan_count && (pg->pqflags & PQ_ANON)) {
1186
1187		/*
1188		 * if our page is owned by an anon and is loaned out to the
1189		 * kernel then we just want to drop ownership and return.
1190		 * the kernel must free the page when all its loans clear ...
1191		 * note that the kernel can't change the loan status of our
1192		 * page as long as we are holding PQ lock.
1193		 */
1194
1195		pg->pqflags &= ~PQ_ANON;
1196		pg->uanon = NULL;
1197		return;
1198	}
1199	KASSERT(saved_loan_count == 0);
1200
1201	/*
1202	 * now remove the page from the queues
1203	 */
1204
1205	if (pg->pqflags & PQ_ACTIVE) {
1206		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1207		pg->pqflags &= ~PQ_ACTIVE;
1208		uvmexp.active--;
1209	}
1210	if (pg->pqflags & PQ_INACTIVE) {
1211		if (pg->pqflags & PQ_SWAPBACKED)
1212			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1213		else
1214			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1215		pg->pqflags &= ~PQ_INACTIVE;
1216		uvmexp.inactive--;
1217	}
1218
1219	/*
1220	 * if the page was wired, unwire it now.
1221	 */
1222
1223	if (pg->wire_count) {
1224		pg->wire_count = 0;
1225		uvmexp.wired--;
1226	}
1227#ifdef UBC
1228	if (pg->uanon) {
1229		uvm_pgcnt_anon--;
1230	}
1231#endif
1232
1233	/*
1234	 * and put on free queue
1235	 */
1236
1237	pg->flags &= ~PG_ZERO;
1238
1239	s = uvm_lock_fpageq();
1240	TAILQ_INSERT_TAIL(&uvm.page_free[
1241	    uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1242	pg->pqflags = PQ_FREE;
1243#ifdef DEBUG
1244	pg->uobject = (void *)0xdeadbeef;
1245	pg->offset = 0xdeadbeef;
1246	pg->uanon = (void *)0xdeadbeef;
1247#endif
1248	uvmexp.free++;
1249
1250	if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
1251		uvm.page_idle_zero = vm_page_zero_enable;
1252
1253	uvm_unlock_fpageq(s);
1254}
1255
1256/*
1257 * uvm_page_unbusy: unbusy an array of pages.
1258 *
1259 * => pages must either all belong to the same object, or all belong to anons.
1260 * => if pages are object-owned, object must be locked.
1261 * => if pages are anon-owned, anons must be unlockd and have 0 refcount.
1262 */
1263
1264void
1265uvm_page_unbusy(pgs, npgs)
1266	struct vm_page **pgs;
1267	int npgs;
1268{
1269	struct vm_page *pg;
1270	struct uvm_object *uobj;
1271	int i;
1272	UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1273
1274	for (i = 0; i < npgs; i++) {
1275		pg = pgs[i];
1276
1277		if (pg == NULL) {
1278			continue;
1279		}
1280		if (pg->flags & PG_WANTED) {
1281			wakeup(pg);
1282		}
1283		if (pg->flags & PG_RELEASED) {
1284			UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
1285			uobj = pg->uobject;
1286			if (uobj != NULL) {
1287				uobj->pgops->pgo_releasepg(pg, NULL);
1288			} else {
1289				pg->flags &= ~(PG_BUSY);
1290				UVM_PAGE_OWN(pg, NULL);
1291				uvm_anfree(pg->uanon);
1292			}
1293		} else {
1294			UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
1295			pg->flags &= ~(PG_WANTED|PG_BUSY);
1296			UVM_PAGE_OWN(pg, NULL);
1297		}
1298	}
1299}
1300
1301#if defined(UVM_PAGE_TRKOWN)
1302/*
1303 * uvm_page_own: set or release page ownership
1304 *
1305 * => this is a debugging function that keeps track of who sets PG_BUSY
1306 *	and where they do it.   it can be used to track down problems
1307 *	such a process setting "PG_BUSY" and never releasing it.
1308 * => page's object [if any] must be locked
1309 * => if "tag" is NULL then we are releasing page ownership
1310 */
1311void
1312uvm_page_own(pg, tag)
1313	struct vm_page *pg;
1314	char *tag;
1315{
1316	/* gain ownership? */
1317	if (tag) {
1318		if (pg->owner_tag) {
1319			printf("uvm_page_own: page %p already owned "
1320			    "by proc %d [%s]\n", pg,
1321			     pg->owner, pg->owner_tag);
1322			panic("uvm_page_own");
1323		}
1324		pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
1325		pg->owner_tag = tag;
1326		return;
1327	}
1328
1329	/* drop ownership */
1330	if (pg->owner_tag == NULL) {
1331		printf("uvm_page_own: dropping ownership of an non-owned "
1332		    "page (%p)\n", pg);
1333		panic("uvm_page_own");
1334	}
1335	pg->owner_tag = NULL;
1336	return;
1337}
1338#endif
1339
1340/*
1341 * uvm_pageidlezero: zero free pages while the system is idle.
1342 *
1343 * => we do at least one iteration per call, if we are below the target.
1344 * => we loop until we either reach the target or whichqs indicates that
1345 *	there is a process ready to run.
1346 */
1347void
1348uvm_pageidlezero()
1349{
1350	struct vm_page *pg;
1351	struct pgfreelist *pgfl;
1352	int free_list, s;
1353
1354	do {
1355		s = uvm_lock_fpageq();
1356
1357		if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
1358			uvm.page_idle_zero = FALSE;
1359			uvm_unlock_fpageq(s);
1360			return;
1361		}
1362
1363		for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1364			pgfl = &uvm.page_free[free_list];
1365			if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[
1366			    PGFL_UNKNOWN])) != NULL)
1367				break;
1368		}
1369
1370		if (pg == NULL) {
1371			/*
1372			 * No non-zero'd pages; don't bother trying again
1373			 * until we know we have non-zero'd pages free.
1374			 */
1375			uvm.page_idle_zero = FALSE;
1376			uvm_unlock_fpageq(s);
1377			return;
1378		}
1379
1380		TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1381		uvmexp.free--;
1382		uvm_unlock_fpageq(s);
1383
1384#ifdef PMAP_PAGEIDLEZERO
1385		if (PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg)) == FALSE) {
1386			/*
1387			 * The machine-dependent code detected some
1388			 * reason for us to abort zeroing pages,
1389			 * probably because there is a process now
1390			 * ready to run.
1391			 */
1392			s = uvm_lock_fpageq();
1393			TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN],
1394			    pg, pageq);
1395			uvmexp.free++;
1396			uvmexp.zeroaborts++;
1397			uvm_unlock_fpageq(s);
1398			return;
1399		}
1400#else
1401		/*
1402		 * XXX This will toast the cache unless the pmap_zero_page()
1403		 * XXX implementation does uncached access.
1404		 */
1405		pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1406#endif
1407		pg->flags |= PG_ZERO;
1408
1409		s = uvm_lock_fpageq();
1410		TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq);
1411		uvmexp.free++;
1412		uvmexp.zeropages++;
1413		uvm_unlock_fpageq(s);
1414	} while (whichqs == 0);
1415}
1416