uvm_page.c revision 1.95
1/*	$OpenBSD: uvm_page.c,v 1.95 2009/08/06 15:28:14 oga Exp $	*/
2/*	$NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $	*/
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by Charles D. Cranor,
24 *      Washington University, the University of California, Berkeley and
25 *      its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 *    may be used to endorse or promote products derived from this software
28 *    without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
43 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
44 *
45 *
46 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
47 * All rights reserved.
48 *
49 * Permission to use, copy, modify and distribute this software and
50 * its documentation is hereby granted, provided that both the copyright
51 * notice and this permission notice appear in all copies of the
52 * software, derivative works or modified versions, and any portions
53 * thereof, and that both notices appear in supporting documentation.
54 *
55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
56 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58 *
59 * Carnegie Mellon requests users of this software to return to
60 *
61 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
62 *  School of Computer Science
63 *  Carnegie Mellon University
64 *  Pittsburgh PA 15213-3890
65 *
66 * any improvements or extensions that they make and grant Carnegie the
67 * rights to redistribute these changes.
68 */
69
70/*
71 * uvm_page.c: page ops.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/malloc.h>
77#include <sys/sched.h>
78#include <sys/kernel.h>
79#include <sys/vnode.h>
80#include <sys/mount.h>
81
82#include <uvm/uvm.h>
83
84/*
85 * for object trees
86 */
87RB_GENERATE(uvm_objtree, vm_page, objt, uvm_pagecmp);
88
89int
90uvm_pagecmp(struct vm_page *a, struct vm_page *b)
91{
92	return (a->offset < b->offset ? -1 : a->offset > b->offset);
93}
94
95/*
96 * global vars... XXXCDC: move to uvm. structure.
97 */
98
99/*
100 * physical memory config is stored in vm_physmem.
101 */
102
103struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];	/* XXXCDC: uvm.physmem */
104int vm_nphysseg = 0;				/* XXXCDC: uvm.nphysseg */
105
106/*
107 * Some supported CPUs in a given architecture don't support all
108 * of the things necessary to do idle page zero'ing efficiently.
109 * We therefore provide a way to disable it from machdep code here.
110 */
111
112/*
113 * XXX disabled until we can find a way to do this without causing
114 * problems for either cpu caches or DMA latency.
115 */
116boolean_t vm_page_zero_enable = FALSE;
117
118/*
119 * local variables
120 */
121
122/*
123 * these variables record the values returned by vm_page_bootstrap,
124 * for debugging purposes.  The implementation of uvm_pageboot_alloc
125 * and pmap_startup here also uses them internally.
126 */
127
128static vaddr_t      virtual_space_start;
129static vaddr_t      virtual_space_end;
130
131/*
132 * History
133 */
134UVMHIST_DECL(pghist);
135
136/*
137 * local prototypes
138 */
139
140static void uvm_pageinsert(struct vm_page *);
141static void uvm_pageremove(struct vm_page *);
142
143/*
144 * inline functions
145 */
146
147/*
148 * uvm_pageinsert: insert a page in the object
149 *
150 * => caller must lock object
151 * => caller must lock page queues XXX questionable
152 * => call should have already set pg's object and offset pointers
153 *    and bumped the version counter
154 */
155
156__inline static void
157uvm_pageinsert(struct vm_page *pg)
158{
159	UVMHIST_FUNC("uvm_pageinsert"); UVMHIST_CALLED(pghist);
160
161	KASSERT((pg->pg_flags & PG_TABLED) == 0);
162	/* XXX should we check duplicates? */
163	RB_INSERT(uvm_objtree, &pg->uobject->memt, pg);
164	atomic_setbits_int(&pg->pg_flags, PG_TABLED);
165	pg->uobject->uo_npages++;
166}
167
168/*
169 * uvm_page_remove: remove page from object
170 *
171 * => caller must lock object
172 * => caller must lock page queues
173 */
174
175static __inline void
176uvm_pageremove(struct vm_page *pg)
177{
178	UVMHIST_FUNC("uvm_pageremove"); UVMHIST_CALLED(pghist);
179
180	KASSERT(pg->pg_flags & PG_TABLED);
181	RB_REMOVE(uvm_objtree, &pg->uobject->memt, pg);
182
183	atomic_clearbits_int(&pg->pg_flags, PG_TABLED);
184	pg->uobject->uo_npages--;
185	pg->uobject = NULL;
186	pg->pg_version++;
187}
188
189/*
190 * uvm_page_init: init the page system.   called from uvm_init().
191 *
192 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
193 */
194
195void
196uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
197{
198	vsize_t freepages, pagecount, n;
199	vm_page_t pagearray;
200	int lcv, i;
201	paddr_t paddr;
202#if defined(UVMHIST)
203	static struct uvm_history_ent pghistbuf[100];
204#endif
205
206	UVMHIST_FUNC("uvm_page_init");
207	UVMHIST_INIT_STATIC(pghist, pghistbuf);
208	UVMHIST_CALLED(pghist);
209
210	/*
211	 * init the page queues and page queue locks
212	 */
213
214	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
215		for (i = 0; i < PGFL_NQUEUES; i++)
216			TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]);
217	}
218	TAILQ_INIT(&uvm.page_active);
219	TAILQ_INIT(&uvm.page_inactive_swp);
220	TAILQ_INIT(&uvm.page_inactive_obj);
221	simple_lock_init(&uvm.pageqlock);
222	mtx_init(&uvm.fpageqlock, IPL_VM);
223
224	/*
225	 * allocate vm_page structures.
226	 */
227
228	/*
229	 * sanity check:
230	 * before calling this function the MD code is expected to register
231	 * some free RAM with the uvm_page_physload() function.   our job
232	 * now is to allocate vm_page structures for this memory.
233	 */
234
235	if (vm_nphysseg == 0)
236		panic("uvm_page_bootstrap: no memory pre-allocated");
237
238	/*
239	 * first calculate the number of free pages...
240	 *
241	 * note that we use start/end rather than avail_start/avail_end.
242	 * this allows us to allocate extra vm_page structures in case we
243	 * want to return some memory to the pool after booting.
244	 */
245
246	freepages = 0;
247	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
248		freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
249
250	/*
251	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
252	 * use.   for each page of memory we use we need a vm_page structure.
253	 * thus, the total number of pages we can use is the total size of
254	 * the memory divided by the PAGE_SIZE plus the size of the vm_page
255	 * structure.   we add one to freepages as a fudge factor to avoid
256	 * truncation errors (since we can only allocate in terms of whole
257	 * pages).
258	 */
259
260	pagecount = (((paddr_t)freepages + 1) << PAGE_SHIFT) /
261	    (PAGE_SIZE + sizeof(struct vm_page));
262	pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
263	    sizeof(struct vm_page));
264	memset(pagearray, 0, pagecount * sizeof(struct vm_page));
265
266	/*
267	 * init the vm_page structures and put them in the correct place.
268	 */
269
270	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
271		n = vm_physmem[lcv].end - vm_physmem[lcv].start;
272		if (n > pagecount) {
273			printf("uvm_page_init: lost %ld page(s) in init\n",
274			    (long)(n - pagecount));
275			panic("uvm_page_init");  /* XXXCDC: shouldn't happen? */
276			/* n = pagecount; */
277		}
278
279		/* set up page array pointers */
280		vm_physmem[lcv].pgs = pagearray;
281		pagearray += n;
282		pagecount -= n;
283		vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
284
285		/* init and free vm_pages (we've already zeroed them) */
286		paddr = ptoa(vm_physmem[lcv].start);
287		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
288			vm_physmem[lcv].pgs[i].phys_addr = paddr;
289#ifdef __HAVE_VM_PAGE_MD
290			VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
291#endif
292			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
293			    atop(paddr) <= vm_physmem[lcv].avail_end) {
294				uvmexp.npages++;
295				/* add page to free pool */
296				uvm_pagefree(&vm_physmem[lcv].pgs[i]);
297			}
298		}
299	}
300
301	/*
302	 * pass up the values of virtual_space_start and
303	 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
304	 * layers of the VM.
305	 */
306
307	*kvm_startp = round_page(virtual_space_start);
308	*kvm_endp = trunc_page(virtual_space_end);
309
310	/*
311	 * init locks for kernel threads
312	 */
313	mtx_init(&uvm.aiodoned_lock, IPL_BIO);
314
315	/*
316	 * init reserve thresholds
317	 * XXXCDC - values may need adjusting
318	 */
319	uvmexp.reserve_pagedaemon = 4;
320	uvmexp.reserve_kernel = 6;
321	uvmexp.anonminpct = 10;
322	uvmexp.vnodeminpct = 10;
323	uvmexp.vtextminpct = 5;
324	uvmexp.anonmin = uvmexp.anonminpct * 256 / 100;
325	uvmexp.vnodemin = uvmexp.vnodeminpct * 256 / 100;
326	uvmexp.vtextmin = uvmexp.vtextminpct * 256 / 100;
327
328  	/*
329	 * determine if we should zero pages in the idle loop.
330	 */
331
332	uvm.page_idle_zero = vm_page_zero_enable;
333
334	/*
335	 * done!
336	 */
337
338	uvm.page_init_done = TRUE;
339}
340
341/*
342 * uvm_setpagesize: set the page size
343 *
344 * => sets page_shift and page_mask from uvmexp.pagesize.
345 */
346
347void
348uvm_setpagesize(void)
349{
350	if (uvmexp.pagesize == 0)
351		uvmexp.pagesize = DEFAULT_PAGE_SIZE;
352	uvmexp.pagemask = uvmexp.pagesize - 1;
353	if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
354		panic("uvm_setpagesize: page size not a power of two");
355	for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
356		if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
357			break;
358}
359
360/*
361 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
362 */
363
364vaddr_t
365uvm_pageboot_alloc(vsize_t size)
366{
367#if defined(PMAP_STEAL_MEMORY)
368	vaddr_t addr;
369
370	/*
371	 * defer bootstrap allocation to MD code (it may want to allocate
372	 * from a direct-mapped segment).  pmap_steal_memory should round
373	 * off virtual_space_start/virtual_space_end.
374	 */
375
376	addr = pmap_steal_memory(size, &virtual_space_start,
377	    &virtual_space_end);
378
379	return(addr);
380
381#else /* !PMAP_STEAL_MEMORY */
382
383	static boolean_t initialized = FALSE;
384	vaddr_t addr, vaddr;
385	paddr_t paddr;
386
387	/* round to page size */
388	size = round_page(size);
389
390	/*
391	 * on first call to this function, initialize ourselves.
392	 */
393	if (initialized == FALSE) {
394		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
395
396		/* round it the way we like it */
397		virtual_space_start = round_page(virtual_space_start);
398		virtual_space_end = trunc_page(virtual_space_end);
399
400		initialized = TRUE;
401	}
402
403	/*
404	 * allocate virtual memory for this request
405	 */
406	if (virtual_space_start == virtual_space_end ||
407	    (virtual_space_end - virtual_space_start) < size)
408		panic("uvm_pageboot_alloc: out of virtual space");
409
410	addr = virtual_space_start;
411
412#ifdef PMAP_GROWKERNEL
413	/*
414	 * If the kernel pmap can't map the requested space,
415	 * then allocate more resources for it.
416	 */
417	if (uvm_maxkaddr < (addr + size)) {
418		uvm_maxkaddr = pmap_growkernel(addr + size);
419		if (uvm_maxkaddr < (addr + size))
420			panic("uvm_pageboot_alloc: pmap_growkernel() failed");
421	}
422#endif
423
424	virtual_space_start += size;
425
426	/*
427	 * allocate and mapin physical pages to back new virtual pages
428	 */
429
430	for (vaddr = round_page(addr) ; vaddr < addr + size ;
431	    vaddr += PAGE_SIZE) {
432
433		if (!uvm_page_physget(&paddr))
434			panic("uvm_pageboot_alloc: out of memory");
435
436		/*
437		 * Note this memory is no longer managed, so using
438		 * pmap_kenter is safe.
439		 */
440		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
441	}
442	pmap_update(pmap_kernel());
443	return(addr);
444#endif	/* PMAP_STEAL_MEMORY */
445}
446
447#if !defined(PMAP_STEAL_MEMORY)
448/*
449 * uvm_page_physget: "steal" one page from the vm_physmem structure.
450 *
451 * => attempt to allocate it off the end of a segment in which the "avail"
452 *    values match the start/end values.   if we can't do that, then we
453 *    will advance both values (making them equal, and removing some
454 *    vm_page structures from the non-avail area).
455 * => return false if out of memory.
456 */
457
458/* subroutine: try to allocate from memory chunks on the specified freelist */
459static boolean_t uvm_page_physget_freelist(paddr_t *, int);
460
461static boolean_t
462uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
463{
464	int lcv, x;
465	UVMHIST_FUNC("uvm_page_physget_freelist"); UVMHIST_CALLED(pghist);
466
467	/* pass 1: try allocating from a matching end */
468#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
469	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
470	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
471#else
472	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
473#endif
474	{
475
476		if (uvm.page_init_done == TRUE)
477			panic("uvm_page_physget: called _after_ bootstrap");
478
479		if (vm_physmem[lcv].free_list != freelist)
480			continue;
481
482		/* try from front */
483		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
484		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
485			*paddrp = ptoa(vm_physmem[lcv].avail_start);
486			vm_physmem[lcv].avail_start++;
487			vm_physmem[lcv].start++;
488			/* nothing left?   nuke it */
489			if (vm_physmem[lcv].avail_start ==
490			    vm_physmem[lcv].end) {
491				if (vm_nphysseg == 1)
492				    panic("uvm_page_physget: out of memory!");
493				vm_nphysseg--;
494				for (x = lcv ; x < vm_nphysseg ; x++)
495					/* structure copy */
496					vm_physmem[x] = vm_physmem[x+1];
497			}
498			return (TRUE);
499		}
500
501		/* try from rear */
502		if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
503		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
504			*paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
505			vm_physmem[lcv].avail_end--;
506			vm_physmem[lcv].end--;
507			/* nothing left?   nuke it */
508			if (vm_physmem[lcv].avail_end ==
509			    vm_physmem[lcv].start) {
510				if (vm_nphysseg == 1)
511				    panic("uvm_page_physget: out of memory!");
512				vm_nphysseg--;
513				for (x = lcv ; x < vm_nphysseg ; x++)
514					/* structure copy */
515					vm_physmem[x] = vm_physmem[x+1];
516			}
517			return (TRUE);
518		}
519	}
520
521	/* pass2: forget about matching ends, just allocate something */
522#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
523	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
524	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
525#else
526	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
527#endif
528	{
529
530		/* any room in this bank? */
531		if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
532			continue;  /* nope */
533
534		*paddrp = ptoa(vm_physmem[lcv].avail_start);
535		vm_physmem[lcv].avail_start++;
536		/* truncate! */
537		vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
538
539		/* nothing left?   nuke it */
540		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
541			if (vm_nphysseg == 1)
542				panic("uvm_page_physget: out of memory!");
543			vm_nphysseg--;
544			for (x = lcv ; x < vm_nphysseg ; x++)
545				/* structure copy */
546				vm_physmem[x] = vm_physmem[x+1];
547		}
548		return (TRUE);
549	}
550
551	return (FALSE);        /* whoops! */
552}
553
554boolean_t
555uvm_page_physget(paddr_t *paddrp)
556{
557	int i;
558	UVMHIST_FUNC("uvm_page_physget"); UVMHIST_CALLED(pghist);
559
560	/* try in the order of freelist preference */
561	for (i = 0; i < VM_NFREELIST; i++)
562		if (uvm_page_physget_freelist(paddrp, i) == TRUE)
563			return (TRUE);
564	return (FALSE);
565}
566#endif /* PMAP_STEAL_MEMORY */
567
568/*
569 * uvm_page_physload: load physical memory into VM system
570 *
571 * => all args are PFs
572 * => all pages in start/end get vm_page structures
573 * => areas marked by avail_start/avail_end get added to the free page pool
574 * => we are limited to VM_PHYSSEG_MAX physical memory segments
575 */
576
577void
578uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
579    paddr_t avail_end, int free_list)
580{
581	int preload, lcv;
582	psize_t npages;
583	struct vm_page *pgs;
584	struct vm_physseg *ps;
585
586	if (uvmexp.pagesize == 0)
587		panic("uvm_page_physload: page size not set!");
588
589	if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
590		panic("uvm_page_physload: bad free list %d", free_list);
591
592	if (start >= end)
593		panic("uvm_page_physload: start >= end");
594
595	/*
596	 * do we have room?
597	 */
598	if (vm_nphysseg == VM_PHYSSEG_MAX) {
599		printf("uvm_page_physload: unable to load physical memory "
600		    "segment\n");
601		printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
602		    VM_PHYSSEG_MAX, (long long)start, (long long)end);
603		printf("\tincrease VM_PHYSSEG_MAX\n");
604		return;
605	}
606
607	/*
608	 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
609	 * called yet, so malloc is not available).
610	 */
611	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
612		if (vm_physmem[lcv].pgs)
613			break;
614	}
615	preload = (lcv == vm_nphysseg);
616
617	/*
618	 * if VM is already running, attempt to malloc() vm_page structures
619	 */
620	if (!preload) {
621#if defined(VM_PHYSSEG_NOADD)
622		panic("uvm_page_physload: tried to add RAM after vm_mem_init");
623#else
624		/* XXXCDC: need some sort of lockout for this case */
625		paddr_t paddr;
626		npages = end - start;  /* # of pages */
627		pgs = (vm_page *)uvm_km_zalloc(kernel_map,
628		    sizeof(struct vm_page) * npages);
629		if (pgs == NULL) {
630			printf("uvm_page_physload: can not malloc vm_page "
631			    "structs for segment\n");
632			printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
633			return;
634		}
635		/* init phys_addr and free_list, and free pages */
636		for (lcv = 0, paddr = ptoa(start) ;
637				 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
638			pgs[lcv].phys_addr = paddr;
639			pgs[lcv].free_list = free_list;
640			if (atop(paddr) >= avail_start &&
641			    atop(paddr) <= avail_end)
642				uvm_pagefree(&pgs[lcv]);
643		}
644		/* XXXCDC: incomplete: need to update uvmexp.free, what else? */
645		/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
646#endif
647	} else {
648
649		/* gcc complains if these don't get init'd */
650		pgs = NULL;
651		npages = 0;
652
653	}
654
655	/*
656	 * now insert us in the proper place in vm_physmem[]
657	 */
658
659#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
660
661	/* random: put it at the end (easy!) */
662	ps = &vm_physmem[vm_nphysseg];
663
664#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
665
666	{
667		int x;
668		/* sort by address for binary search */
669		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
670			if (start < vm_physmem[lcv].start)
671				break;
672		ps = &vm_physmem[lcv];
673		/* move back other entries, if necessary ... */
674		for (x = vm_nphysseg ; x > lcv ; x--)
675			/* structure copy */
676			vm_physmem[x] = vm_physmem[x - 1];
677	}
678
679#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
680
681	{
682		int x;
683		/* sort by largest segment first */
684		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
685			if ((end - start) >
686			    (vm_physmem[lcv].end - vm_physmem[lcv].start))
687				break;
688		ps = &vm_physmem[lcv];
689		/* move back other entries, if necessary ... */
690		for (x = vm_nphysseg ; x > lcv ; x--)
691			/* structure copy */
692			vm_physmem[x] = vm_physmem[x - 1];
693	}
694
695#else
696
697	panic("uvm_page_physload: unknown physseg strategy selected!");
698
699#endif
700
701	ps->start = start;
702	ps->end = end;
703	ps->avail_start = avail_start;
704	ps->avail_end = avail_end;
705	if (preload) {
706		ps->pgs = NULL;
707	} else {
708		ps->pgs = pgs;
709		ps->lastpg = pgs + npages - 1;
710	}
711	ps->free_list = free_list;
712	vm_nphysseg++;
713
714	/*
715	 * done!
716	 */
717
718	return;
719}
720
721#ifdef DDB /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
722
723void uvm_page_physdump(void); /* SHUT UP GCC */
724
725/* call from DDB */
726void
727uvm_page_physdump(void)
728{
729	int lcv;
730
731	printf("uvm_page_physdump: physical memory config [segs=%d of %d]:\n",
732	    vm_nphysseg, VM_PHYSSEG_MAX);
733	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
734		printf("0x%llx->0x%llx [0x%llx->0x%llx]\n",
735		    (long long)vm_physmem[lcv].start,
736		    (long long)vm_physmem[lcv].end,
737		    (long long)vm_physmem[lcv].avail_start,
738		    (long long)vm_physmem[lcv].avail_end);
739	printf("STRATEGY = ");
740	switch (VM_PHYSSEG_STRAT) {
741	case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
742	case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
743	case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
744	default: printf("<<UNKNOWN>>!!!!\n");
745	}
746}
747#endif
748
749void
750uvm_shutdown(void)
751{
752#ifdef UVM_SWAP_ENCRYPT
753	uvm_swap_finicrypt_all();
754#endif
755}
756
757/*
758 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
759 *
760 * => return null if no pages free
761 * => wake up pagedaemon if number of free pages drops below low water mark
762 * => if obj != NULL, obj must be locked (to put in tree)
763 * => if anon != NULL, anon must be locked (to put in anon)
764 * => only one of obj or anon can be non-null
765 * => caller must activate/deactivate page if it is not wired.
766 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
767 * => policy decision: it is more important to pull a page off of the
768 *	appropriate priority free list than it is to get a zero'd or
769 *	unknown contents page.  This is because we live with the
770 *	consequences of a bad free list decision for the entire
771 *	lifetime of the page, e.g. if the page comes from memory that
772 *	is slower to access.
773 */
774
775struct vm_page *
776uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
777    int flags, int strat, int free_list)
778{
779	int lcv, try1, try2, zeroit = 0;
780	struct vm_page *pg;
781	struct pglist *freeq;
782	struct pgfreelist *pgfl;
783	boolean_t use_reserve;
784	UVMHIST_FUNC("uvm_pagealloc_strat"); UVMHIST_CALLED(pghist);
785
786	KASSERT(obj == NULL || anon == NULL);
787	KASSERT(off == trunc_page(off));
788
789	uvm_lock_fpageq();
790
791	/*
792	 * check to see if we need to generate some free pages waking
793	 * the pagedaemon.
794	 */
795	if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freemin ||
796	    ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg &&
797	     uvmexp.inactive < uvmexp.inactarg))
798		wakeup(&uvm.pagedaemon);
799
800	/*
801	 * fail if any of these conditions is true:
802	 * [1]  there really are no free pages, or
803	 * [2]  only kernel "reserved" pages remain and
804	 *        the page isn't being allocated to a kernel object.
805	 * [3]  only pagedaemon "reserved" pages remain and
806	 *        the requestor isn't the pagedaemon.
807	 */
808
809	use_reserve = (flags & UVM_PGA_USERESERVE) ||
810		(obj && UVM_OBJ_IS_KERN_OBJECT(obj));
811	if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
812	    (uvmexp.free <= uvmexp.reserve_pagedaemon &&
813	     !((curproc == uvm.pagedaemon_proc) ||
814	      (curproc == syncerproc))))
815		goto fail;
816
817#if PGFL_NQUEUES != 2
818#error uvm_pagealloc_strat needs to be updated
819#endif
820
821	/*
822	 * If we want a zero'd page, try the ZEROS queue first, otherwise
823	 * we try the UNKNOWN queue first.
824	 */
825	if (flags & UVM_PGA_ZERO) {
826		try1 = PGFL_ZEROS;
827		try2 = PGFL_UNKNOWN;
828	} else {
829		try1 = PGFL_UNKNOWN;
830		try2 = PGFL_ZEROS;
831	}
832
833	UVMHIST_LOG(pghist, "obj=%p off=%lx anon=%p flags=%lx",
834	    obj, (u_long)off, anon, flags);
835	UVMHIST_LOG(pghist, "strat=%ld free_list=%ld", strat, free_list, 0, 0);
836 again:
837	switch (strat) {
838	case UVM_PGA_STRAT_NORMAL:
839		/* Check all freelists in descending priority order. */
840		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
841			pgfl = &uvm.page_free[lcv];
842			if ((pg = TAILQ_FIRST((freeq =
843			      &pgfl->pgfl_queues[try1]))) != NULL ||
844			    (pg = TAILQ_FIRST((freeq =
845			      &pgfl->pgfl_queues[try2]))) != NULL)
846				goto gotit;
847		}
848
849		/* No pages free! */
850		goto fail;
851
852	case UVM_PGA_STRAT_ONLY:
853	case UVM_PGA_STRAT_FALLBACK:
854		/* Attempt to allocate from the specified free list. */
855		KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
856		pgfl = &uvm.page_free[free_list];
857		if ((pg = TAILQ_FIRST((freeq =
858		      &pgfl->pgfl_queues[try1]))) != NULL ||
859		    (pg = TAILQ_FIRST((freeq =
860		      &pgfl->pgfl_queues[try2]))) != NULL)
861			goto gotit;
862
863		/* Fall back, if possible. */
864		if (strat == UVM_PGA_STRAT_FALLBACK) {
865			strat = UVM_PGA_STRAT_NORMAL;
866			goto again;
867		}
868
869		/* No pages free! */
870		goto fail;
871
872	default:
873		panic("uvm_pagealloc_strat: bad strat %d", strat);
874		/* NOTREACHED */
875	}
876
877 gotit:
878	TAILQ_REMOVE(freeq, pg, pageq);
879	uvmexp.free--;
880
881	/* update zero'd page count */
882	if (pg->pg_flags & PG_ZERO)
883		uvmexp.zeropages--;
884
885	/*
886	 * update allocation statistics and remember if we have to
887	 * zero the page
888	 */
889	if (flags & UVM_PGA_ZERO) {
890		if (pg->pg_flags & PG_ZERO) {
891			uvmexp.pga_zerohit++;
892			zeroit = 0;
893		} else {
894			uvmexp.pga_zeromiss++;
895			zeroit = 1;
896		}
897	}
898
899	uvm_unlock_fpageq();		/* unlock free page queue */
900
901	pg->offset = off;
902	pg->uobject = obj;
903	pg->uanon = anon;
904	pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE;
905	pg->pg_version++;
906	if (anon) {
907		anon->an_page = pg;
908		atomic_setbits_int(&pg->pg_flags, PQ_ANON);
909#ifdef UBC
910		uvm_pgcnt_anon++;
911#endif
912	} else {
913		if (obj)
914			uvm_pageinsert(pg);
915	}
916#if defined(UVM_PAGE_TRKOWN)
917	pg->owner_tag = NULL;
918#endif
919	UVM_PAGE_OWN(pg, "new alloc");
920
921	if (flags & UVM_PGA_ZERO) {
922		/*
923		 * A zero'd page is not clean.  If we got a page not already
924		 * zero'd, then we have to zero it ourselves.
925		 */
926		atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
927		if (zeroit)
928			pmap_zero_page(pg);
929	}
930
931	UVMHIST_LOG(pghist, "allocated pg %p/%lx", pg,
932	    (u_long)VM_PAGE_TO_PHYS(pg), 0, 0);
933	return(pg);
934
935 fail:
936	uvm_unlock_fpageq();
937	UVMHIST_LOG(pghist, "failed!", 0, 0, 0, 0);
938	return (NULL);
939}
940
941/*
942 * uvm_pagerealloc: reallocate a page from one object to another
943 *
944 * => both objects must be locked
945 */
946
947void
948uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
949{
950
951	UVMHIST_FUNC("uvm_pagerealloc"); UVMHIST_CALLED(pghist);
952
953	/*
954	 * remove it from the old object
955	 */
956
957	if (pg->uobject) {
958		uvm_pageremove(pg);
959	}
960
961	/*
962	 * put it in the new object
963	 */
964
965	if (newobj) {
966		pg->uobject = newobj;
967		pg->offset = newoff;
968		pg->pg_version++;
969		uvm_pageinsert(pg);
970	}
971}
972
973
974/*
975 * uvm_pagefree: free page
976 *
977 * => erase page's identity (i.e. remove from object)
978 * => put page on free list
979 * => caller must lock owning object (either anon or uvm_object)
980 * => caller must lock page queues
981 * => assumes all valid mappings of pg are gone
982 */
983
984void
985uvm_pagefree(struct vm_page *pg)
986{
987	int saved_loan_count = pg->loan_count;
988	UVMHIST_FUNC("uvm_pagefree"); UVMHIST_CALLED(pghist);
989
990#ifdef DEBUG
991	if (pg->uobject == (void *)0xdeadbeef &&
992	    pg->uanon == (void *)0xdeadbeef) {
993		panic("uvm_pagefree: freeing free page %p", pg);
994	}
995#endif
996
997	UVMHIST_LOG(pghist, "freeing pg %p/%lx", pg,
998	    (u_long)VM_PAGE_TO_PHYS(pg), 0, 0);
999
1000	/*
1001	 * if the page was an object page (and thus "TABLED"), remove it
1002	 * from the object.
1003	 */
1004
1005	if (pg->pg_flags & PG_TABLED) {
1006
1007		/*
1008		 * if the object page is on loan we are going to drop ownership.
1009		 * it is possible that an anon will take over as owner for this
1010		 * page later on.   the anon will want a !PG_CLEAN page so that
1011		 * it knows it needs to allocate swap if it wants to page the
1012		 * page out.
1013		 */
1014
1015		/* in case an anon takes over */
1016		if (saved_loan_count)
1017			atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1018		uvm_pageremove(pg);
1019
1020		/*
1021		 * if our page was on loan, then we just lost control over it
1022		 * (in fact, if it was loaned to an anon, the anon may have
1023		 * already taken over ownership of the page by now and thus
1024		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just
1025		 * return (when the last loan is dropped, then the page can be
1026		 * freed by whatever was holding the last loan).
1027		 */
1028
1029		if (saved_loan_count)
1030			return;
1031	} else if (saved_loan_count && pg->uanon) {
1032		/*
1033		 * if our page is owned by an anon and is loaned out to the
1034		 * kernel then we just want to drop ownership and return.
1035		 * the kernel must free the page when all its loans clear ...
1036		 * note that the kernel can't change the loan status of our
1037		 * page as long as we are holding PQ lock.
1038		 */
1039		atomic_clearbits_int(&pg->pg_flags, PQ_ANON);
1040		pg->uanon->an_page = NULL;
1041		pg->uanon = NULL;
1042		return;
1043	}
1044	KASSERT(saved_loan_count == 0);
1045
1046	/*
1047	 * now remove the page from the queues
1048	 */
1049
1050	if (pg->pg_flags & PQ_ACTIVE) {
1051		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1052		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1053		uvmexp.active--;
1054	}
1055	if (pg->pg_flags & PQ_INACTIVE) {
1056		if (pg->pg_flags & PQ_SWAPBACKED)
1057			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1058		else
1059			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1060		atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1061		uvmexp.inactive--;
1062	}
1063
1064	/*
1065	 * if the page was wired, unwire it now.
1066	 */
1067
1068	if (pg->wire_count) {
1069		pg->wire_count = 0;
1070		uvmexp.wired--;
1071	}
1072	if (pg->uanon) {
1073		pg->uanon->an_page = NULL;
1074#ifdef UBC
1075		uvm_pgcnt_anon--;
1076#endif
1077	}
1078
1079	/*
1080	 * and put on free queue
1081	 */
1082
1083	atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
1084
1085	uvm_lock_fpageq();
1086	TAILQ_INSERT_TAIL(&uvm.page_free[
1087	    uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1088	atomic_clearbits_int(&pg->pg_flags, PQ_MASK);
1089	atomic_setbits_int(&pg->pg_flags, PQ_FREE);
1090#ifdef DEBUG
1091	pg->uobject = (void *)0xdeadbeef;
1092	pg->offset = 0xdeadbeef;
1093	pg->uanon = (void *)0xdeadbeef;
1094#endif
1095	uvmexp.free++;
1096
1097	if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
1098		uvm.page_idle_zero = vm_page_zero_enable;
1099
1100	uvm_unlock_fpageq();
1101}
1102
1103/*
1104 * uvm_page_unbusy: unbusy an array of pages.
1105 *
1106 * => pages must either all belong to the same object, or all belong to anons.
1107 * => if pages are object-owned, object must be locked.
1108 * => if pages are anon-owned, anons must be unlockd and have 0 refcount.
1109 */
1110
1111void
1112uvm_page_unbusy(struct vm_page **pgs, int npgs)
1113{
1114	struct vm_page *pg;
1115	struct uvm_object *uobj;
1116	int i;
1117	UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(pdhist);
1118
1119	for (i = 0; i < npgs; i++) {
1120		pg = pgs[i];
1121
1122		if (pg == NULL || pg == PGO_DONTCARE) {
1123			continue;
1124		}
1125		if (pg->pg_flags & PG_WANTED) {
1126			wakeup(pg);
1127		}
1128		if (pg->pg_flags & PG_RELEASED) {
1129			UVMHIST_LOG(pdhist, "releasing pg %p", pg,0,0,0);
1130			uobj = pg->uobject;
1131			if (uobj != NULL) {
1132				uvm_lock_pageq();
1133				pmap_page_protect(pg, VM_PROT_NONE);
1134				/* XXX won't happen right now */
1135				if (pg->pg_flags & PQ_ANON)
1136					uao_dropswap(uobj,
1137					    pg->offset >> PAGE_SHIFT);
1138				uvm_pagefree(pg);
1139				uvm_unlock_pageq();
1140			} else {
1141				atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
1142				UVM_PAGE_OWN(pg, NULL);
1143				uvm_anfree(pg->uanon);
1144			}
1145		} else {
1146			UVMHIST_LOG(pdhist, "unbusying pg %p", pg,0,0,0);
1147			atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
1148			UVM_PAGE_OWN(pg, NULL);
1149		}
1150	}
1151}
1152
1153#if defined(UVM_PAGE_TRKOWN)
1154/*
1155 * uvm_page_own: set or release page ownership
1156 *
1157 * => this is a debugging function that keeps track of who sets PG_BUSY
1158 *	and where they do it.   it can be used to track down problems
1159 *	such a process setting "PG_BUSY" and never releasing it.
1160 * => page's object [if any] must be locked
1161 * => if "tag" is NULL then we are releasing page ownership
1162 */
1163void
1164uvm_page_own(struct vm_page *pg, char *tag)
1165{
1166	/* gain ownership? */
1167	if (tag) {
1168		if (pg->owner_tag) {
1169			printf("uvm_page_own: page %p already owned "
1170			    "by proc %d [%s]\n", pg,
1171			     pg->owner, pg->owner_tag);
1172			panic("uvm_page_own");
1173		}
1174		pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
1175		pg->owner_tag = tag;
1176		return;
1177	}
1178
1179	/* drop ownership */
1180	if (pg->owner_tag == NULL) {
1181		printf("uvm_page_own: dropping ownership of an non-owned "
1182		    "page (%p)\n", pg);
1183		panic("uvm_page_own");
1184	}
1185	pg->owner_tag = NULL;
1186	return;
1187}
1188#endif
1189
1190/*
1191 * uvm_pageidlezero: zero free pages while the system is idle.
1192 *
1193 * => we do at least one iteration per call, if we are below the target.
1194 * => we loop until we either reach the target or whichqs indicates that
1195 *	there is a process ready to run.
1196 */
1197void
1198uvm_pageidlezero(void)
1199{
1200	struct vm_page *pg;
1201	struct pgfreelist *pgfl;
1202	int free_list;
1203	UVMHIST_FUNC("uvm_pageidlezero"); UVMHIST_CALLED(pghist);
1204
1205	do {
1206		uvm_lock_fpageq();
1207
1208		if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
1209			uvm.page_idle_zero = FALSE;
1210			uvm_unlock_fpageq();
1211			return;
1212		}
1213
1214		for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1215			pgfl = &uvm.page_free[free_list];
1216			if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[
1217			    PGFL_UNKNOWN])) != NULL)
1218				break;
1219		}
1220
1221		if (pg == NULL) {
1222			/*
1223			 * No non-zero'd pages; don't bother trying again
1224			 * until we know we have non-zero'd pages free.
1225			 */
1226			uvm.page_idle_zero = FALSE;
1227			uvm_unlock_fpageq();
1228			return;
1229		}
1230
1231		TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1232		uvmexp.free--;
1233		uvm_unlock_fpageq();
1234
1235#ifdef PMAP_PAGEIDLEZERO
1236		if (PMAP_PAGEIDLEZERO(pg) == FALSE) {
1237			/*
1238			 * The machine-dependent code detected some
1239			 * reason for us to abort zeroing pages,
1240			 * probably because there is a process now
1241			 * ready to run.
1242			 */
1243			uvm_lock_fpageq();
1244			TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN],
1245			    pg, pageq);
1246			uvmexp.free++;
1247			uvmexp.zeroaborts++;
1248			uvm_unlock_fpageq();
1249			return;
1250		}
1251#else
1252		/*
1253		 * XXX This will toast the cache unless the pmap_zero_page()
1254		 * XXX implementation does uncached access.
1255		 */
1256		pmap_zero_page(pg);
1257#endif
1258		atomic_setbits_int(&pg->pg_flags, PG_ZERO);
1259
1260		uvm_lock_fpageq();
1261		TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq);
1262		uvmexp.free++;
1263		uvmexp.zeropages++;
1264		uvm_unlock_fpageq();
1265	} while (curcpu_is_idle());
1266}
1267
1268/*
1269 * when VM_PHYSSEG_MAX is 1, we can simplify these functions
1270 */
1271
1272#if VM_PHYSSEG_MAX > 1
1273/*
1274 * vm_physseg_find: find vm_physseg structure that belongs to a PA
1275 */
1276int
1277vm_physseg_find(paddr_t pframe, int *offp)
1278{
1279
1280#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
1281	/* binary search for it */
1282	int	start, len, try;
1283
1284	/*
1285	 * if try is too large (thus target is less than than try) we reduce
1286	 * the length to trunc(len/2) [i.e. everything smaller than "try"]
1287	 *
1288	 * if the try is too small (thus target is greater than try) then
1289	 * we set the new start to be (try + 1).   this means we need to
1290	 * reduce the length to (round(len/2) - 1).
1291	 *
1292	 * note "adjust" below which takes advantage of the fact that
1293	 *  (round(len/2) - 1) == trunc((len - 1) / 2)
1294	 * for any value of len we may have
1295	 */
1296
1297	for (start = 0, len = vm_nphysseg ; len != 0 ; len = len / 2) {
1298		try = start + (len / 2);	/* try in the middle */
1299
1300		/* start past our try? */
1301		if (pframe >= vm_physmem[try].start) {
1302			/* was try correct? */
1303			if (pframe < vm_physmem[try].end) {
1304				if (offp)
1305					*offp = pframe - vm_physmem[try].start;
1306				return(try);            /* got it */
1307			}
1308			start = try + 1;	/* next time, start here */
1309			len--;			/* "adjust" */
1310		} else {
1311			/*
1312			 * pframe before try, just reduce length of
1313			 * region, done in "for" loop
1314			 */
1315		}
1316	}
1317	return(-1);
1318
1319#else
1320	/* linear search for it */
1321	int	lcv;
1322
1323	for (lcv = 0; lcv < vm_nphysseg; lcv++) {
1324		if (pframe >= vm_physmem[lcv].start &&
1325		    pframe < vm_physmem[lcv].end) {
1326			if (offp)
1327				*offp = pframe - vm_physmem[lcv].start;
1328			return(lcv);		   /* got it */
1329		}
1330	}
1331	return(-1);
1332
1333#endif
1334}
1335
1336/*
1337 * PHYS_TO_VM_PAGE: find vm_page for a PA.   used by MI code to get vm_pages
1338 * back from an I/O mapping (ugh!).   used in some MD code as well.
1339 */
1340struct vm_page *
1341PHYS_TO_VM_PAGE(paddr_t pa)
1342{
1343	paddr_t pf = atop(pa);
1344	int	off;
1345	int	psi;
1346
1347	psi = vm_physseg_find(pf, &off);
1348
1349	return ((psi == -1) ? NULL : &vm_physmem[psi].pgs[off]);
1350}
1351#endif /* VM_PHYSSEG_MAX > 1 */
1352
1353/*
1354 * uvm_pagelookup: look up a page
1355 *
1356 * => caller should lock object to keep someone from pulling the page
1357 *	out from under it
1358 */
1359struct vm_page *
1360uvm_pagelookup(struct uvm_object *obj, voff_t off)
1361{
1362	/* XXX if stack is too much, handroll */
1363	struct vm_page pg;
1364
1365	pg.offset = off;
1366	return (RB_FIND(uvm_objtree, &obj->memt, &pg));
1367}
1368
1369/*
1370 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1371 *
1372 * => caller must lock page queues
1373 */
1374void
1375uvm_pagewire(struct vm_page *pg)
1376{
1377	if (pg->wire_count == 0) {
1378		if (pg->pg_flags & PQ_ACTIVE) {
1379			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1380			atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1381			uvmexp.active--;
1382		}
1383		if (pg->pg_flags & PQ_INACTIVE) {
1384			if (pg->pg_flags & PQ_SWAPBACKED)
1385				TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1386			else
1387				TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1388			atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1389			uvmexp.inactive--;
1390		}
1391		uvmexp.wired++;
1392	}
1393	pg->wire_count++;
1394}
1395
1396/*
1397 * uvm_pageunwire: unwire the page.
1398 *
1399 * => activate if wire count goes to zero.
1400 * => caller must lock page queues
1401 */
1402void
1403uvm_pageunwire(struct vm_page *pg)
1404{
1405	pg->wire_count--;
1406	if (pg->wire_count == 0) {
1407		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
1408		uvmexp.active++;
1409		atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
1410		uvmexp.wired--;
1411	}
1412}
1413
1414/*
1415 * uvm_pagedeactivate: deactivate page -- no pmaps have access to page
1416 *
1417 * => caller must lock page queues
1418 * => caller must check to make sure page is not wired
1419 * => object that page belongs to must be locked (so we can adjust pg->flags)
1420 */
1421void
1422uvm_pagedeactivate(struct vm_page *pg)
1423{
1424	if (pg->pg_flags & PQ_ACTIVE) {
1425		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1426		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1427		uvmexp.active--;
1428	}
1429	if ((pg->pg_flags & PQ_INACTIVE) == 0) {
1430		KASSERT(pg->wire_count == 0);
1431		if (pg->pg_flags & PQ_SWAPBACKED)
1432			TAILQ_INSERT_TAIL(&uvm.page_inactive_swp, pg, pageq);
1433		else
1434			TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
1435		atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE);
1436		uvmexp.inactive++;
1437		pmap_clear_reference(pg);
1438		/*
1439		 * update the "clean" bit.  this isn't 100%
1440		 * accurate, and doesn't have to be.  we'll
1441		 * re-sync it after we zap all mappings when
1442		 * scanning the inactive list.
1443		 */
1444		if ((pg->pg_flags & PG_CLEAN) != 0 &&
1445		    pmap_is_modified(pg))
1446			atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1447	}
1448}
1449
1450/*
1451 * uvm_pageactivate: activate page
1452 *
1453 * => caller must lock page queues
1454 */
1455void
1456uvm_pageactivate(struct vm_page *pg)
1457{
1458	if (pg->pg_flags & PQ_INACTIVE) {
1459		if (pg->pg_flags & PQ_SWAPBACKED)
1460			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1461		else
1462			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1463		atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1464		uvmexp.inactive--;
1465	}
1466	if (pg->wire_count == 0) {
1467
1468		/*
1469		 * if page is already active, remove it from list so we
1470		 * can put it at tail.  if it wasn't active, then mark
1471		 * it active and bump active count
1472		 */
1473		if (pg->pg_flags & PQ_ACTIVE)
1474			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1475		else {
1476			atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
1477			uvmexp.active++;
1478		}
1479
1480		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
1481	}
1482}
1483
1484/*
1485 * uvm_pagezero: zero fill a page
1486 *
1487 * => if page is part of an object then the object should be locked
1488 *	to protect pg->flags.
1489 */
1490void
1491uvm_pagezero(struct vm_page *pg)
1492{
1493	atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1494	pmap_zero_page(pg);
1495}
1496
1497/*
1498 * uvm_pagecopy: copy a page
1499 *
1500 * => if page is part of an object then the object should be locked
1501 *	to protect pg->flags.
1502 */
1503void
1504uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1505{
1506	atomic_clearbits_int(&dst->pg_flags, PG_CLEAN);
1507	pmap_copy_page(src, dst);
1508}
1509
1510/*
1511 * uvm_page_lookup_freelist: look up the free list for the specified page
1512 */
1513int
1514uvm_page_lookup_freelist(struct vm_page *pg)
1515{
1516#if VM_PHYSSEG_MAX == 1
1517	return (vm_physmem[0].free_list);
1518#else
1519	int lcv;
1520
1521	lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1522	KASSERT(lcv != -1);
1523	return (vm_physmem[lcv].free_list);
1524#endif
1525}
1526