uvm_page.c revision 1.96
1/*	$OpenBSD: uvm_page.c,v 1.96 2009/08/13 15:29:59 deraadt Exp $	*/
2/*	$NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $	*/
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by Charles D. Cranor,
24 *      Washington University, the University of California, Berkeley and
25 *      its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 *    may be used to endorse or promote products derived from this software
28 *    without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
43 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
44 *
45 *
46 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
47 * All rights reserved.
48 *
49 * Permission to use, copy, modify and distribute this software and
50 * its documentation is hereby granted, provided that both the copyright
51 * notice and this permission notice appear in all copies of the
52 * software, derivative works or modified versions, and any portions
53 * thereof, and that both notices appear in supporting documentation.
54 *
55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
56 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58 *
59 * Carnegie Mellon requests users of this software to return to
60 *
61 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
62 *  School of Computer Science
63 *  Carnegie Mellon University
64 *  Pittsburgh PA 15213-3890
65 *
66 * any improvements or extensions that they make and grant Carnegie the
67 * rights to redistribute these changes.
68 */
69
70/*
71 * uvm_page.c: page ops.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/malloc.h>
77#include <sys/sched.h>
78#include <sys/kernel.h>
79#include <sys/vnode.h>
80#include <sys/mount.h>
81
82#include <uvm/uvm.h>
83
84/*
85 * for object trees
86 */
87RB_GENERATE(uvm_objtree, vm_page, objt, uvm_pagecmp);
88
89int
90uvm_pagecmp(struct vm_page *a, struct vm_page *b)
91{
92	return (a->offset < b->offset ? -1 : a->offset > b->offset);
93}
94
95/*
96 * global vars... XXXCDC: move to uvm. structure.
97 */
98
99/*
100 * physical memory config is stored in vm_physmem.
101 */
102
103struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];	/* XXXCDC: uvm.physmem */
104int vm_nphysseg = 0;				/* XXXCDC: uvm.nphysseg */
105
106/*
107 * Some supported CPUs in a given architecture don't support all
108 * of the things necessary to do idle page zero'ing efficiently.
109 * We therefore provide a way to disable it from machdep code here.
110 */
111
112/*
113 * XXX disabled until we can find a way to do this without causing
114 * problems for either cpu caches or DMA latency.
115 */
116boolean_t vm_page_zero_enable = FALSE;
117
118/*
119 * local variables
120 */
121
122/*
123 * these variables record the values returned by vm_page_bootstrap,
124 * for debugging purposes.  The implementation of uvm_pageboot_alloc
125 * and pmap_startup here also uses them internally.
126 */
127
128static vaddr_t      virtual_space_start;
129static vaddr_t      virtual_space_end;
130
131/*
132 * History
133 */
134UVMHIST_DECL(pghist);
135
136/*
137 * local prototypes
138 */
139
140static void uvm_pageinsert(struct vm_page *);
141static void uvm_pageremove(struct vm_page *);
142
143/*
144 * inline functions
145 */
146
147/*
148 * uvm_pageinsert: insert a page in the object
149 *
150 * => caller must lock object
151 * => caller must lock page queues XXX questionable
152 * => call should have already set pg's object and offset pointers
153 *    and bumped the version counter
154 */
155
156__inline static void
157uvm_pageinsert(struct vm_page *pg)
158{
159	UVMHIST_FUNC("uvm_pageinsert"); UVMHIST_CALLED(pghist);
160
161	KASSERT((pg->pg_flags & PG_TABLED) == 0);
162	/* XXX should we check duplicates? */
163	RB_INSERT(uvm_objtree, &pg->uobject->memt, pg);
164	atomic_setbits_int(&pg->pg_flags, PG_TABLED);
165	pg->uobject->uo_npages++;
166}
167
168/*
169 * uvm_page_remove: remove page from object
170 *
171 * => caller must lock object
172 * => caller must lock page queues
173 */
174
175static __inline void
176uvm_pageremove(struct vm_page *pg)
177{
178	UVMHIST_FUNC("uvm_pageremove"); UVMHIST_CALLED(pghist);
179
180	KASSERT(pg->pg_flags & PG_TABLED);
181	RB_REMOVE(uvm_objtree, &pg->uobject->memt, pg);
182
183	atomic_clearbits_int(&pg->pg_flags, PG_TABLED);
184	pg->uobject->uo_npages--;
185	pg->uobject = NULL;
186	pg->pg_version++;
187}
188
189/*
190 * uvm_page_init: init the page system.   called from uvm_init().
191 *
192 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
193 */
194
195void
196uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
197{
198	vsize_t freepages, pagecount, n;
199	vm_page_t pagearray;
200	int lcv, i;
201	paddr_t paddr;
202#if defined(UVMHIST)
203	static struct uvm_history_ent pghistbuf[100];
204#endif
205
206	UVMHIST_FUNC("uvm_page_init");
207	UVMHIST_INIT_STATIC(pghist, pghistbuf);
208	UVMHIST_CALLED(pghist);
209
210	/*
211	 * init the page queues and page queue locks
212	 */
213
214	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
215		for (i = 0; i < PGFL_NQUEUES; i++)
216			TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]);
217	}
218	TAILQ_INIT(&uvm.page_active);
219	TAILQ_INIT(&uvm.page_inactive_swp);
220	TAILQ_INIT(&uvm.page_inactive_obj);
221	simple_lock_init(&uvm.pageqlock);
222	mtx_init(&uvm.fpageqlock, IPL_VM);
223
224	/*
225	 * allocate vm_page structures.
226	 */
227
228	/*
229	 * sanity check:
230	 * before calling this function the MD code is expected to register
231	 * some free RAM with the uvm_page_physload() function.   our job
232	 * now is to allocate vm_page structures for this memory.
233	 */
234
235	if (vm_nphysseg == 0)
236		panic("uvm_page_bootstrap: no memory pre-allocated");
237
238	/*
239	 * first calculate the number of free pages...
240	 *
241	 * note that we use start/end rather than avail_start/avail_end.
242	 * this allows us to allocate extra vm_page structures in case we
243	 * want to return some memory to the pool after booting.
244	 */
245
246	freepages = 0;
247	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
248		freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
249
250	/*
251	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
252	 * use.   for each page of memory we use we need a vm_page structure.
253	 * thus, the total number of pages we can use is the total size of
254	 * the memory divided by the PAGE_SIZE plus the size of the vm_page
255	 * structure.   we add one to freepages as a fudge factor to avoid
256	 * truncation errors (since we can only allocate in terms of whole
257	 * pages).
258	 */
259
260	pagecount = (((paddr_t)freepages + 1) << PAGE_SHIFT) /
261	    (PAGE_SIZE + sizeof(struct vm_page));
262	pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
263	    sizeof(struct vm_page));
264	memset(pagearray, 0, pagecount * sizeof(struct vm_page));
265
266	/*
267	 * init the vm_page structures and put them in the correct place.
268	 */
269
270	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
271		n = vm_physmem[lcv].end - vm_physmem[lcv].start;
272		if (n > pagecount) {
273			printf("uvm_page_init: lost %ld page(s) in init\n",
274			    (long)(n - pagecount));
275			panic("uvm_page_init");  /* XXXCDC: shouldn't happen? */
276			/* n = pagecount; */
277		}
278
279		/* set up page array pointers */
280		vm_physmem[lcv].pgs = pagearray;
281		pagearray += n;
282		pagecount -= n;
283		vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
284
285		/* init and free vm_pages (we've already zeroed them) */
286		paddr = ptoa(vm_physmem[lcv].start);
287		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
288			vm_physmem[lcv].pgs[i].phys_addr = paddr;
289#ifdef __HAVE_VM_PAGE_MD
290			VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
291#endif
292			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
293			    atop(paddr) <= vm_physmem[lcv].avail_end) {
294				uvmexp.npages++;
295				/* add page to free pool */
296				uvm_pagefree(&vm_physmem[lcv].pgs[i]);
297			}
298		}
299	}
300
301	/*
302	 * pass up the values of virtual_space_start and
303	 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
304	 * layers of the VM.
305	 */
306
307	*kvm_startp = round_page(virtual_space_start);
308	*kvm_endp = trunc_page(virtual_space_end);
309
310	/*
311	 * init locks for kernel threads
312	 */
313	mtx_init(&uvm.aiodoned_lock, IPL_BIO);
314
315	/*
316	 * init reserve thresholds
317	 * XXXCDC - values may need adjusting
318	 */
319	uvmexp.reserve_pagedaemon = 4;
320	uvmexp.reserve_kernel = 6;
321	uvmexp.anonminpct = 10;
322	uvmexp.vnodeminpct = 10;
323	uvmexp.vtextminpct = 5;
324	uvmexp.anonmin = uvmexp.anonminpct * 256 / 100;
325	uvmexp.vnodemin = uvmexp.vnodeminpct * 256 / 100;
326	uvmexp.vtextmin = uvmexp.vtextminpct * 256 / 100;
327
328  	/*
329	 * determine if we should zero pages in the idle loop.
330	 */
331
332	uvm.page_idle_zero = vm_page_zero_enable;
333
334	/*
335	 * done!
336	 */
337
338	uvm.page_init_done = TRUE;
339}
340
341/*
342 * uvm_setpagesize: set the page size
343 *
344 * => sets page_shift and page_mask from uvmexp.pagesize.
345 */
346
347void
348uvm_setpagesize(void)
349{
350	if (uvmexp.pagesize == 0)
351		uvmexp.pagesize = DEFAULT_PAGE_SIZE;
352	uvmexp.pagemask = uvmexp.pagesize - 1;
353	if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
354		panic("uvm_setpagesize: page size not a power of two");
355	for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
356		if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
357			break;
358}
359
360/*
361 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
362 */
363
364vaddr_t
365uvm_pageboot_alloc(vsize_t size)
366{
367#if defined(PMAP_STEAL_MEMORY)
368	vaddr_t addr;
369
370	/*
371	 * defer bootstrap allocation to MD code (it may want to allocate
372	 * from a direct-mapped segment).  pmap_steal_memory should round
373	 * off virtual_space_start/virtual_space_end.
374	 */
375
376	addr = pmap_steal_memory(size, &virtual_space_start,
377	    &virtual_space_end);
378
379	return(addr);
380
381#else /* !PMAP_STEAL_MEMORY */
382
383	static boolean_t initialized = FALSE;
384	vaddr_t addr, vaddr;
385	paddr_t paddr;
386
387	/* round to page size */
388	size = round_page(size);
389
390	/*
391	 * on first call to this function, initialize ourselves.
392	 */
393	if (initialized == FALSE) {
394		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
395
396		/* round it the way we like it */
397		virtual_space_start = round_page(virtual_space_start);
398		virtual_space_end = trunc_page(virtual_space_end);
399
400		initialized = TRUE;
401	}
402
403	/*
404	 * allocate virtual memory for this request
405	 */
406	if (virtual_space_start == virtual_space_end ||
407	    (virtual_space_end - virtual_space_start) < size)
408		panic("uvm_pageboot_alloc: out of virtual space");
409
410	addr = virtual_space_start;
411
412#ifdef PMAP_GROWKERNEL
413	/*
414	 * If the kernel pmap can't map the requested space,
415	 * then allocate more resources for it.
416	 */
417	if (uvm_maxkaddr < (addr + size)) {
418		uvm_maxkaddr = pmap_growkernel(addr + size);
419		if (uvm_maxkaddr < (addr + size))
420			panic("uvm_pageboot_alloc: pmap_growkernel() failed");
421	}
422#endif
423
424	virtual_space_start += size;
425
426	/*
427	 * allocate and mapin physical pages to back new virtual pages
428	 */
429
430	for (vaddr = round_page(addr) ; vaddr < addr + size ;
431	    vaddr += PAGE_SIZE) {
432
433		if (!uvm_page_physget(&paddr))
434			panic("uvm_pageboot_alloc: out of memory");
435
436		/*
437		 * Note this memory is no longer managed, so using
438		 * pmap_kenter is safe.
439		 */
440		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
441	}
442	pmap_update(pmap_kernel());
443	return(addr);
444#endif	/* PMAP_STEAL_MEMORY */
445}
446
447#if !defined(PMAP_STEAL_MEMORY)
448/*
449 * uvm_page_physget: "steal" one page from the vm_physmem structure.
450 *
451 * => attempt to allocate it off the end of a segment in which the "avail"
452 *    values match the start/end values.   if we can't do that, then we
453 *    will advance both values (making them equal, and removing some
454 *    vm_page structures from the non-avail area).
455 * => return false if out of memory.
456 */
457
458/* subroutine: try to allocate from memory chunks on the specified freelist */
459static boolean_t uvm_page_physget_freelist(paddr_t *, int);
460
461static boolean_t
462uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
463{
464	int lcv, x;
465	UVMHIST_FUNC("uvm_page_physget_freelist"); UVMHIST_CALLED(pghist);
466
467	/* pass 1: try allocating from a matching end */
468#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
469	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
470	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
471#else
472	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
473#endif
474	{
475
476		if (uvm.page_init_done == TRUE)
477			panic("uvm_page_physget: called _after_ bootstrap");
478
479		if (vm_physmem[lcv].free_list != freelist)
480			continue;
481
482		/* try from front */
483		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
484		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
485			*paddrp = ptoa(vm_physmem[lcv].avail_start);
486			vm_physmem[lcv].avail_start++;
487			vm_physmem[lcv].start++;
488			/* nothing left?   nuke it */
489			if (vm_physmem[lcv].avail_start ==
490			    vm_physmem[lcv].end) {
491				if (vm_nphysseg == 1)
492				    panic("uvm_page_physget: out of memory!");
493				vm_nphysseg--;
494				for (x = lcv ; x < vm_nphysseg ; x++)
495					/* structure copy */
496					vm_physmem[x] = vm_physmem[x+1];
497			}
498			return (TRUE);
499		}
500
501		/* try from rear */
502		if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
503		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
504			*paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
505			vm_physmem[lcv].avail_end--;
506			vm_physmem[lcv].end--;
507			/* nothing left?   nuke it */
508			if (vm_physmem[lcv].avail_end ==
509			    vm_physmem[lcv].start) {
510				if (vm_nphysseg == 1)
511				    panic("uvm_page_physget: out of memory!");
512				vm_nphysseg--;
513				for (x = lcv ; x < vm_nphysseg ; x++)
514					/* structure copy */
515					vm_physmem[x] = vm_physmem[x+1];
516			}
517			return (TRUE);
518		}
519	}
520
521	/* pass2: forget about matching ends, just allocate something */
522#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
523	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
524	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
525#else
526	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
527#endif
528	{
529
530		/* any room in this bank? */
531		if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
532			continue;  /* nope */
533
534		*paddrp = ptoa(vm_physmem[lcv].avail_start);
535		vm_physmem[lcv].avail_start++;
536		/* truncate! */
537		vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
538
539		/* nothing left?   nuke it */
540		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
541			if (vm_nphysseg == 1)
542				panic("uvm_page_physget: out of memory!");
543			vm_nphysseg--;
544			for (x = lcv ; x < vm_nphysseg ; x++)
545				/* structure copy */
546				vm_physmem[x] = vm_physmem[x+1];
547		}
548		return (TRUE);
549	}
550
551	return (FALSE);        /* whoops! */
552}
553
554boolean_t
555uvm_page_physget(paddr_t *paddrp)
556{
557	int i;
558	UVMHIST_FUNC("uvm_page_physget"); UVMHIST_CALLED(pghist);
559
560	/* try in the order of freelist preference */
561	for (i = 0; i < VM_NFREELIST; i++)
562		if (uvm_page_physget_freelist(paddrp, i) == TRUE)
563			return (TRUE);
564	return (FALSE);
565}
566#endif /* PMAP_STEAL_MEMORY */
567
568/*
569 * uvm_page_physload: load physical memory into VM system
570 *
571 * => all args are PFs
572 * => all pages in start/end get vm_page structures
573 * => areas marked by avail_start/avail_end get added to the free page pool
574 * => we are limited to VM_PHYSSEG_MAX physical memory segments
575 */
576
577void
578uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
579    paddr_t avail_end, int free_list)
580{
581	int preload, lcv;
582	psize_t npages;
583	struct vm_page *pgs;
584	struct vm_physseg *ps;
585
586	if (uvmexp.pagesize == 0)
587		panic("uvm_page_physload: page size not set!");
588
589	if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
590		panic("uvm_page_physload: bad free list %d", free_list);
591
592	if (start >= end)
593		panic("uvm_page_physload: start >= end");
594
595	/*
596	 * do we have room?
597	 */
598	if (vm_nphysseg == VM_PHYSSEG_MAX) {
599		printf("uvm_page_physload: unable to load physical memory "
600		    "segment\n");
601		printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
602		    VM_PHYSSEG_MAX, (long long)start, (long long)end);
603		printf("\tincrease VM_PHYSSEG_MAX\n");
604		return;
605	}
606
607	/*
608	 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
609	 * called yet, so malloc is not available).
610	 */
611	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
612		if (vm_physmem[lcv].pgs)
613			break;
614	}
615	preload = (lcv == vm_nphysseg);
616
617	/*
618	 * if VM is already running, attempt to malloc() vm_page structures
619	 */
620	if (!preload) {
621#if defined(VM_PHYSSEG_NOADD)
622		panic("uvm_page_physload: tried to add RAM after vm_mem_init");
623#else
624		/* XXXCDC: need some sort of lockout for this case */
625		paddr_t paddr;
626		npages = end - start;  /* # of pages */
627		pgs = (vm_page *)uvm_km_zalloc(kernel_map,
628		    sizeof(struct vm_page) * npages);
629		if (pgs == NULL) {
630			printf("uvm_page_physload: can not malloc vm_page "
631			    "structs for segment\n");
632			printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
633			return;
634		}
635		/* init phys_addr and free_list, and free pages */
636		for (lcv = 0, paddr = ptoa(start) ;
637				 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
638			pgs[lcv].phys_addr = paddr;
639			pgs[lcv].free_list = free_list;
640			if (atop(paddr) >= avail_start &&
641			    atop(paddr) <= avail_end)
642				uvm_pagefree(&pgs[lcv]);
643		}
644		/* XXXCDC: incomplete: need to update uvmexp.free, what else? */
645		/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
646#endif
647	} else {
648
649		/* gcc complains if these don't get init'd */
650		pgs = NULL;
651		npages = 0;
652
653	}
654
655	/*
656	 * now insert us in the proper place in vm_physmem[]
657	 */
658
659#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
660
661	/* random: put it at the end (easy!) */
662	ps = &vm_physmem[vm_nphysseg];
663
664#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
665
666	{
667		int x;
668		/* sort by address for binary search */
669		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
670			if (start < vm_physmem[lcv].start)
671				break;
672		ps = &vm_physmem[lcv];
673		/* move back other entries, if necessary ... */
674		for (x = vm_nphysseg ; x > lcv ; x--)
675			/* structure copy */
676			vm_physmem[x] = vm_physmem[x - 1];
677	}
678
679#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
680
681	{
682		int x;
683		/* sort by largest segment first */
684		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
685			if ((end - start) >
686			    (vm_physmem[lcv].end - vm_physmem[lcv].start))
687				break;
688		ps = &vm_physmem[lcv];
689		/* move back other entries, if necessary ... */
690		for (x = vm_nphysseg ; x > lcv ; x--)
691			/* structure copy */
692			vm_physmem[x] = vm_physmem[x - 1];
693	}
694
695#else
696
697	panic("uvm_page_physload: unknown physseg strategy selected!");
698
699#endif
700
701	ps->start = start;
702	ps->end = end;
703	ps->avail_start = avail_start;
704	ps->avail_end = avail_end;
705	if (preload) {
706		ps->pgs = NULL;
707	} else {
708		ps->pgs = pgs;
709		ps->lastpg = pgs + npages - 1;
710	}
711	ps->free_list = free_list;
712	vm_nphysseg++;
713
714	/*
715	 * done!
716	 */
717
718	return;
719}
720
721#ifdef DDB /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
722
723void uvm_page_physdump(void); /* SHUT UP GCC */
724
725/* call from DDB */
726void
727uvm_page_physdump(void)
728{
729	int lcv;
730
731	printf("uvm_page_physdump: physical memory config [segs=%d of %d]:\n",
732	    vm_nphysseg, VM_PHYSSEG_MAX);
733	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
734		printf("0x%llx->0x%llx [0x%llx->0x%llx]\n",
735		    (long long)vm_physmem[lcv].start,
736		    (long long)vm_physmem[lcv].end,
737		    (long long)vm_physmem[lcv].avail_start,
738		    (long long)vm_physmem[lcv].avail_end);
739	printf("STRATEGY = ");
740	switch (VM_PHYSSEG_STRAT) {
741	case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
742	case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
743	case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
744	default: printf("<<UNKNOWN>>!!!!\n");
745	}
746}
747#endif
748
749void
750uvm_shutdown(void)
751{
752#ifdef UVM_SWAP_ENCRYPT
753	uvm_swap_finicrypt_all();
754#endif
755}
756
757/*
758 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
759 *
760 * => return null if no pages free
761 * => wake up pagedaemon if number of free pages drops below low water mark
762 * => if obj != NULL, obj must be locked (to put in tree)
763 * => if anon != NULL, anon must be locked (to put in anon)
764 * => only one of obj or anon can be non-null
765 * => caller must activate/deactivate page if it is not wired.
766 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
767 * => policy decision: it is more important to pull a page off of the
768 *	appropriate priority free list than it is to get a zero'd or
769 *	unknown contents page.  This is because we live with the
770 *	consequences of a bad free list decision for the entire
771 *	lifetime of the page, e.g. if the page comes from memory that
772 *	is slower to access.
773 */
774
775struct vm_page *
776uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
777    int flags, int strat, int free_list)
778{
779	int lcv, try1, try2, zeroit = 0;
780	struct vm_page *pg;
781	struct pglist *freeq;
782	struct pgfreelist *pgfl;
783	boolean_t use_reserve;
784	UVMHIST_FUNC("uvm_pagealloc_strat"); UVMHIST_CALLED(pghist);
785
786	KASSERT(obj == NULL || anon == NULL);
787	KASSERT(off == trunc_page(off));
788
789	uvm_lock_fpageq();
790
791	/*
792	 * check to see if we need to generate some free pages waking
793	 * the pagedaemon.
794	 */
795	if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freemin ||
796	    ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg &&
797	     uvmexp.inactive < uvmexp.inactarg))
798		wakeup(&uvm.pagedaemon);
799
800	/*
801	 * fail if any of these conditions is true:
802	 * [1]  there really are no free pages, or
803	 * [2]  only kernel "reserved" pages remain and
804	 *        the page isn't being allocated to a kernel object.
805	 * [3]  only pagedaemon "reserved" pages remain and
806	 *        the requestor isn't the pagedaemon.
807	 */
808
809	use_reserve = (flags & UVM_PGA_USERESERVE) ||
810		(obj && UVM_OBJ_IS_KERN_OBJECT(obj));
811	if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
812	    (uvmexp.free <= uvmexp.reserve_pagedaemon &&
813	     !((curproc == uvm.pagedaemon_proc) ||
814	      (curproc == syncerproc))))
815		goto fail;
816
817#if PGFL_NQUEUES != 2
818#error uvm_pagealloc_strat needs to be updated
819#endif
820
821	/*
822	 * If we want a zero'd page, try the ZEROS queue first, otherwise
823	 * we try the UNKNOWN queue first.
824	 */
825	if (flags & UVM_PGA_ZERO) {
826		try1 = PGFL_ZEROS;
827		try2 = PGFL_UNKNOWN;
828	} else {
829		try1 = PGFL_UNKNOWN;
830		try2 = PGFL_ZEROS;
831	}
832
833	UVMHIST_LOG(pghist, "obj=%p off=%lx anon=%p flags=%lx",
834	    obj, (u_long)off, anon, flags);
835	UVMHIST_LOG(pghist, "strat=%ld free_list=%ld", strat, free_list, 0, 0);
836 again:
837	switch (strat) {
838	case UVM_PGA_STRAT_NORMAL:
839		/* Check all freelists in descending priority order. */
840		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
841			pgfl = &uvm.page_free[lcv];
842			if ((pg = TAILQ_FIRST((freeq =
843			      &pgfl->pgfl_queues[try1]))) != NULL ||
844			    (pg = TAILQ_FIRST((freeq =
845			      &pgfl->pgfl_queues[try2]))) != NULL)
846				goto gotit;
847		}
848
849		/* No pages free! */
850		goto fail;
851
852	case UVM_PGA_STRAT_ONLY:
853	case UVM_PGA_STRAT_FALLBACK:
854		/* Attempt to allocate from the specified free list. */
855		KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
856		pgfl = &uvm.page_free[free_list];
857		if ((pg = TAILQ_FIRST((freeq =
858		      &pgfl->pgfl_queues[try1]))) != NULL ||
859		    (pg = TAILQ_FIRST((freeq =
860		      &pgfl->pgfl_queues[try2]))) != NULL)
861			goto gotit;
862
863		/* Fall back, if possible. */
864		if (strat == UVM_PGA_STRAT_FALLBACK) {
865			strat = UVM_PGA_STRAT_NORMAL;
866			goto again;
867		}
868
869		/* No pages free! */
870		goto fail;
871
872	default:
873		panic("uvm_pagealloc_strat: bad strat %d", strat);
874		/* NOTREACHED */
875	}
876
877 gotit:
878	TAILQ_REMOVE(freeq, pg, pageq);
879	uvmexp.free--;
880
881	/* update zero'd page count */
882	if (pg->pg_flags & PG_ZERO)
883		uvmexp.zeropages--;
884
885	/*
886	 * update allocation statistics and remember if we have to
887	 * zero the page
888	 */
889	if (flags & UVM_PGA_ZERO) {
890		if (pg->pg_flags & PG_ZERO) {
891			uvmexp.pga_zerohit++;
892			zeroit = 0;
893		} else {
894			uvmexp.pga_zeromiss++;
895			zeroit = 1;
896		}
897	}
898
899	uvm_unlock_fpageq();		/* unlock free page queue */
900
901	pg->offset = off;
902	pg->uobject = obj;
903	pg->uanon = anon;
904	pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE;
905	pg->pg_version++;
906	if (anon) {
907		anon->an_page = pg;
908		atomic_setbits_int(&pg->pg_flags, PQ_ANON);
909#ifdef UBC
910		uvm_pgcnt_anon++;
911#endif
912	} else {
913		if (obj)
914			uvm_pageinsert(pg);
915	}
916#if defined(UVM_PAGE_TRKOWN)
917	pg->owner_tag = NULL;
918#endif
919	UVM_PAGE_OWN(pg, "new alloc");
920
921	if (flags & UVM_PGA_ZERO) {
922		/*
923		 * A zero'd page is not clean.  If we got a page not already
924		 * zero'd, then we have to zero it ourselves.
925		 */
926		atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
927		if (zeroit)
928			pmap_zero_page(pg);
929	}
930
931	UVMHIST_LOG(pghist, "allocated pg %p/%lx", pg,
932	    (u_long)VM_PAGE_TO_PHYS(pg), 0, 0);
933	return(pg);
934
935 fail:
936	uvm_unlock_fpageq();
937	UVMHIST_LOG(pghist, "failed!", 0, 0, 0, 0);
938	return (NULL);
939}
940
941/*
942 * uvm_pagerealloc: reallocate a page from one object to another
943 *
944 * => both objects must be locked
945 */
946
947void
948uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
949{
950
951	UVMHIST_FUNC("uvm_pagerealloc"); UVMHIST_CALLED(pghist);
952
953	/*
954	 * remove it from the old object
955	 */
956
957	if (pg->uobject) {
958		uvm_pageremove(pg);
959	}
960
961	/*
962	 * put it in the new object
963	 */
964
965	if (newobj) {
966		pg->uobject = newobj;
967		pg->offset = newoff;
968		pg->pg_version++;
969		uvm_pageinsert(pg);
970	}
971}
972
973
974/*
975 * uvm_pagefree: free page
976 *
977 * => erase page's identity (i.e. remove from object)
978 * => put page on free list
979 * => caller must lock owning object (either anon or uvm_object)
980 * => caller must lock page queues
981 * => assumes all valid mappings of pg are gone
982 */
983
984void
985uvm_pagefree(struct vm_page *pg)
986{
987	int saved_loan_count = pg->loan_count;
988	UVMHIST_FUNC("uvm_pagefree"); UVMHIST_CALLED(pghist);
989
990#ifdef DEBUG
991	if (pg->uobject == (void *)0xdeadbeef &&
992	    pg->uanon == (void *)0xdeadbeef) {
993		panic("uvm_pagefree: freeing free page %p", pg);
994	}
995#endif
996
997	UVMHIST_LOG(pghist, "freeing pg %p/%lx", pg,
998	    (u_long)VM_PAGE_TO_PHYS(pg), 0, 0);
999
1000	/*
1001	 * if the page was an object page (and thus "TABLED"), remove it
1002	 * from the object.
1003	 */
1004
1005	if (pg->pg_flags & PG_TABLED) {
1006
1007		/*
1008		 * if the object page is on loan we are going to drop ownership.
1009		 * it is possible that an anon will take over as owner for this
1010		 * page later on.   the anon will want a !PG_CLEAN page so that
1011		 * it knows it needs to allocate swap if it wants to page the
1012		 * page out.
1013		 */
1014
1015		/* in case an anon takes over */
1016		if (saved_loan_count)
1017			atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1018		uvm_pageremove(pg);
1019
1020		/*
1021		 * if our page was on loan, then we just lost control over it
1022		 * (in fact, if it was loaned to an anon, the anon may have
1023		 * already taken over ownership of the page by now and thus
1024		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just
1025		 * return (when the last loan is dropped, then the page can be
1026		 * freed by whatever was holding the last loan).
1027		 */
1028
1029		if (saved_loan_count)
1030			return;
1031	} else if (saved_loan_count && pg->uanon) {
1032		/*
1033		 * if our page is owned by an anon and is loaned out to the
1034		 * kernel then we just want to drop ownership and return.
1035		 * the kernel must free the page when all its loans clear ...
1036		 * note that the kernel can't change the loan status of our
1037		 * page as long as we are holding PQ lock.
1038		 */
1039		atomic_clearbits_int(&pg->pg_flags, PQ_ANON);
1040		pg->uanon->an_page = NULL;
1041		pg->uanon = NULL;
1042		return;
1043	}
1044	KASSERT(saved_loan_count == 0);
1045
1046	/*
1047	 * now remove the page from the queues
1048	 */
1049
1050	if (pg->pg_flags & PQ_ACTIVE) {
1051		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1052		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1053		uvmexp.active--;
1054	}
1055	if (pg->pg_flags & PQ_INACTIVE) {
1056		if (pg->pg_flags & PQ_SWAPBACKED)
1057			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1058		else
1059			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1060		atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1061		uvmexp.inactive--;
1062	}
1063
1064	/*
1065	 * if the page was wired, unwire it now.
1066	 */
1067
1068	if (pg->wire_count) {
1069		pg->wire_count = 0;
1070		uvmexp.wired--;
1071	}
1072	if (pg->uanon) {
1073		pg->uanon->an_page = NULL;
1074#ifdef UBC
1075		uvm_pgcnt_anon--;
1076#endif
1077	}
1078
1079	/*
1080	 * and put on free queue
1081	 */
1082
1083	atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
1084
1085	uvm_lock_fpageq();
1086#ifdef PAGEFASTRECYCLE
1087	TAILQ_INSERT_HEAD(&uvm.page_free[
1088	    uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1089#else
1090	TAILQ_INSERT_TAIL(&uvm.page_free[
1091	    uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1092#endif
1093	atomic_clearbits_int(&pg->pg_flags, PQ_MASK);
1094	atomic_setbits_int(&pg->pg_flags, PQ_FREE);
1095#ifdef DEBUG
1096	pg->uobject = (void *)0xdeadbeef;
1097	pg->offset = 0xdeadbeef;
1098	pg->uanon = (void *)0xdeadbeef;
1099#endif
1100	uvmexp.free++;
1101
1102	if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
1103		uvm.page_idle_zero = vm_page_zero_enable;
1104
1105	uvm_unlock_fpageq();
1106}
1107
1108/*
1109 * uvm_page_unbusy: unbusy an array of pages.
1110 *
1111 * => pages must either all belong to the same object, or all belong to anons.
1112 * => if pages are object-owned, object must be locked.
1113 * => if pages are anon-owned, anons must be unlockd and have 0 refcount.
1114 */
1115
1116void
1117uvm_page_unbusy(struct vm_page **pgs, int npgs)
1118{
1119	struct vm_page *pg;
1120	struct uvm_object *uobj;
1121	int i;
1122	UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(pdhist);
1123
1124	for (i = 0; i < npgs; i++) {
1125		pg = pgs[i];
1126
1127		if (pg == NULL || pg == PGO_DONTCARE) {
1128			continue;
1129		}
1130		if (pg->pg_flags & PG_WANTED) {
1131			wakeup(pg);
1132		}
1133		if (pg->pg_flags & PG_RELEASED) {
1134			UVMHIST_LOG(pdhist, "releasing pg %p", pg,0,0,0);
1135			uobj = pg->uobject;
1136			if (uobj != NULL) {
1137				uvm_lock_pageq();
1138				pmap_page_protect(pg, VM_PROT_NONE);
1139				/* XXX won't happen right now */
1140				if (pg->pg_flags & PQ_ANON)
1141					uao_dropswap(uobj,
1142					    pg->offset >> PAGE_SHIFT);
1143				uvm_pagefree(pg);
1144				uvm_unlock_pageq();
1145			} else {
1146				atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
1147				UVM_PAGE_OWN(pg, NULL);
1148				uvm_anfree(pg->uanon);
1149			}
1150		} else {
1151			UVMHIST_LOG(pdhist, "unbusying pg %p", pg,0,0,0);
1152			atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
1153			UVM_PAGE_OWN(pg, NULL);
1154		}
1155	}
1156}
1157
1158#if defined(UVM_PAGE_TRKOWN)
1159/*
1160 * uvm_page_own: set or release page ownership
1161 *
1162 * => this is a debugging function that keeps track of who sets PG_BUSY
1163 *	and where they do it.   it can be used to track down problems
1164 *	such a process setting "PG_BUSY" and never releasing it.
1165 * => page's object [if any] must be locked
1166 * => if "tag" is NULL then we are releasing page ownership
1167 */
1168void
1169uvm_page_own(struct vm_page *pg, char *tag)
1170{
1171	/* gain ownership? */
1172	if (tag) {
1173		if (pg->owner_tag) {
1174			printf("uvm_page_own: page %p already owned "
1175			    "by proc %d [%s]\n", pg,
1176			     pg->owner, pg->owner_tag);
1177			panic("uvm_page_own");
1178		}
1179		pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
1180		pg->owner_tag = tag;
1181		return;
1182	}
1183
1184	/* drop ownership */
1185	if (pg->owner_tag == NULL) {
1186		printf("uvm_page_own: dropping ownership of an non-owned "
1187		    "page (%p)\n", pg);
1188		panic("uvm_page_own");
1189	}
1190	pg->owner_tag = NULL;
1191	return;
1192}
1193#endif
1194
1195/*
1196 * uvm_pageidlezero: zero free pages while the system is idle.
1197 *
1198 * => we do at least one iteration per call, if we are below the target.
1199 * => we loop until we either reach the target or whichqs indicates that
1200 *	there is a process ready to run.
1201 */
1202void
1203uvm_pageidlezero(void)
1204{
1205	struct vm_page *pg;
1206	struct pgfreelist *pgfl;
1207	int free_list;
1208	UVMHIST_FUNC("uvm_pageidlezero"); UVMHIST_CALLED(pghist);
1209
1210	do {
1211		uvm_lock_fpageq();
1212
1213		if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
1214			uvm.page_idle_zero = FALSE;
1215			uvm_unlock_fpageq();
1216			return;
1217		}
1218
1219		for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1220			pgfl = &uvm.page_free[free_list];
1221			if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[
1222			    PGFL_UNKNOWN])) != NULL)
1223				break;
1224		}
1225
1226		if (pg == NULL) {
1227			/*
1228			 * No non-zero'd pages; don't bother trying again
1229			 * until we know we have non-zero'd pages free.
1230			 */
1231			uvm.page_idle_zero = FALSE;
1232			uvm_unlock_fpageq();
1233			return;
1234		}
1235
1236		TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1237		uvmexp.free--;
1238		uvm_unlock_fpageq();
1239
1240#ifdef PMAP_PAGEIDLEZERO
1241		if (PMAP_PAGEIDLEZERO(pg) == FALSE) {
1242			/*
1243			 * The machine-dependent code detected some
1244			 * reason for us to abort zeroing pages,
1245			 * probably because there is a process now
1246			 * ready to run.
1247			 */
1248			uvm_lock_fpageq();
1249			TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN],
1250			    pg, pageq);
1251			uvmexp.free++;
1252			uvmexp.zeroaborts++;
1253			uvm_unlock_fpageq();
1254			return;
1255		}
1256#else
1257		/*
1258		 * XXX This will toast the cache unless the pmap_zero_page()
1259		 * XXX implementation does uncached access.
1260		 */
1261		pmap_zero_page(pg);
1262#endif
1263		atomic_setbits_int(&pg->pg_flags, PG_ZERO);
1264
1265		uvm_lock_fpageq();
1266		TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq);
1267		uvmexp.free++;
1268		uvmexp.zeropages++;
1269		uvm_unlock_fpageq();
1270	} while (curcpu_is_idle());
1271}
1272
1273/*
1274 * when VM_PHYSSEG_MAX is 1, we can simplify these functions
1275 */
1276
1277#if VM_PHYSSEG_MAX > 1
1278/*
1279 * vm_physseg_find: find vm_physseg structure that belongs to a PA
1280 */
1281int
1282vm_physseg_find(paddr_t pframe, int *offp)
1283{
1284
1285#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
1286	/* binary search for it */
1287	int	start, len, try;
1288
1289	/*
1290	 * if try is too large (thus target is less than than try) we reduce
1291	 * the length to trunc(len/2) [i.e. everything smaller than "try"]
1292	 *
1293	 * if the try is too small (thus target is greater than try) then
1294	 * we set the new start to be (try + 1).   this means we need to
1295	 * reduce the length to (round(len/2) - 1).
1296	 *
1297	 * note "adjust" below which takes advantage of the fact that
1298	 *  (round(len/2) - 1) == trunc((len - 1) / 2)
1299	 * for any value of len we may have
1300	 */
1301
1302	for (start = 0, len = vm_nphysseg ; len != 0 ; len = len / 2) {
1303		try = start + (len / 2);	/* try in the middle */
1304
1305		/* start past our try? */
1306		if (pframe >= vm_physmem[try].start) {
1307			/* was try correct? */
1308			if (pframe < vm_physmem[try].end) {
1309				if (offp)
1310					*offp = pframe - vm_physmem[try].start;
1311				return(try);            /* got it */
1312			}
1313			start = try + 1;	/* next time, start here */
1314			len--;			/* "adjust" */
1315		} else {
1316			/*
1317			 * pframe before try, just reduce length of
1318			 * region, done in "for" loop
1319			 */
1320		}
1321	}
1322	return(-1);
1323
1324#else
1325	/* linear search for it */
1326	int	lcv;
1327
1328	for (lcv = 0; lcv < vm_nphysseg; lcv++) {
1329		if (pframe >= vm_physmem[lcv].start &&
1330		    pframe < vm_physmem[lcv].end) {
1331			if (offp)
1332				*offp = pframe - vm_physmem[lcv].start;
1333			return(lcv);		   /* got it */
1334		}
1335	}
1336	return(-1);
1337
1338#endif
1339}
1340
1341/*
1342 * PHYS_TO_VM_PAGE: find vm_page for a PA.   used by MI code to get vm_pages
1343 * back from an I/O mapping (ugh!).   used in some MD code as well.
1344 */
1345struct vm_page *
1346PHYS_TO_VM_PAGE(paddr_t pa)
1347{
1348	paddr_t pf = atop(pa);
1349	int	off;
1350	int	psi;
1351
1352	psi = vm_physseg_find(pf, &off);
1353
1354	return ((psi == -1) ? NULL : &vm_physmem[psi].pgs[off]);
1355}
1356#endif /* VM_PHYSSEG_MAX > 1 */
1357
1358/*
1359 * uvm_pagelookup: look up a page
1360 *
1361 * => caller should lock object to keep someone from pulling the page
1362 *	out from under it
1363 */
1364struct vm_page *
1365uvm_pagelookup(struct uvm_object *obj, voff_t off)
1366{
1367	/* XXX if stack is too much, handroll */
1368	struct vm_page pg;
1369
1370	pg.offset = off;
1371	return (RB_FIND(uvm_objtree, &obj->memt, &pg));
1372}
1373
1374/*
1375 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1376 *
1377 * => caller must lock page queues
1378 */
1379void
1380uvm_pagewire(struct vm_page *pg)
1381{
1382	if (pg->wire_count == 0) {
1383		if (pg->pg_flags & PQ_ACTIVE) {
1384			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1385			atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1386			uvmexp.active--;
1387		}
1388		if (pg->pg_flags & PQ_INACTIVE) {
1389			if (pg->pg_flags & PQ_SWAPBACKED)
1390				TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1391			else
1392				TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1393			atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1394			uvmexp.inactive--;
1395		}
1396		uvmexp.wired++;
1397	}
1398	pg->wire_count++;
1399}
1400
1401/*
1402 * uvm_pageunwire: unwire the page.
1403 *
1404 * => activate if wire count goes to zero.
1405 * => caller must lock page queues
1406 */
1407void
1408uvm_pageunwire(struct vm_page *pg)
1409{
1410	pg->wire_count--;
1411	if (pg->wire_count == 0) {
1412		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
1413		uvmexp.active++;
1414		atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
1415		uvmexp.wired--;
1416	}
1417}
1418
1419/*
1420 * uvm_pagedeactivate: deactivate page -- no pmaps have access to page
1421 *
1422 * => caller must lock page queues
1423 * => caller must check to make sure page is not wired
1424 * => object that page belongs to must be locked (so we can adjust pg->flags)
1425 */
1426void
1427uvm_pagedeactivate(struct vm_page *pg)
1428{
1429	if (pg->pg_flags & PQ_ACTIVE) {
1430		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1431		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1432		uvmexp.active--;
1433	}
1434	if ((pg->pg_flags & PQ_INACTIVE) == 0) {
1435		KASSERT(pg->wire_count == 0);
1436		if (pg->pg_flags & PQ_SWAPBACKED)
1437			TAILQ_INSERT_TAIL(&uvm.page_inactive_swp, pg, pageq);
1438		else
1439			TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
1440		atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE);
1441		uvmexp.inactive++;
1442		pmap_clear_reference(pg);
1443		/*
1444		 * update the "clean" bit.  this isn't 100%
1445		 * accurate, and doesn't have to be.  we'll
1446		 * re-sync it after we zap all mappings when
1447		 * scanning the inactive list.
1448		 */
1449		if ((pg->pg_flags & PG_CLEAN) != 0 &&
1450		    pmap_is_modified(pg))
1451			atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1452	}
1453}
1454
1455/*
1456 * uvm_pageactivate: activate page
1457 *
1458 * => caller must lock page queues
1459 */
1460void
1461uvm_pageactivate(struct vm_page *pg)
1462{
1463	if (pg->pg_flags & PQ_INACTIVE) {
1464		if (pg->pg_flags & PQ_SWAPBACKED)
1465			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1466		else
1467			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1468		atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1469		uvmexp.inactive--;
1470	}
1471	if (pg->wire_count == 0) {
1472
1473		/*
1474		 * if page is already active, remove it from list so we
1475		 * can put it at tail.  if it wasn't active, then mark
1476		 * it active and bump active count
1477		 */
1478		if (pg->pg_flags & PQ_ACTIVE)
1479			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1480		else {
1481			atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
1482			uvmexp.active++;
1483		}
1484
1485		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
1486	}
1487}
1488
1489/*
1490 * uvm_pagezero: zero fill a page
1491 *
1492 * => if page is part of an object then the object should be locked
1493 *	to protect pg->flags.
1494 */
1495void
1496uvm_pagezero(struct vm_page *pg)
1497{
1498	atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1499	pmap_zero_page(pg);
1500}
1501
1502/*
1503 * uvm_pagecopy: copy a page
1504 *
1505 * => if page is part of an object then the object should be locked
1506 *	to protect pg->flags.
1507 */
1508void
1509uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1510{
1511	atomic_clearbits_int(&dst->pg_flags, PG_CLEAN);
1512	pmap_copy_page(src, dst);
1513}
1514
1515/*
1516 * uvm_page_lookup_freelist: look up the free list for the specified page
1517 */
1518int
1519uvm_page_lookup_freelist(struct vm_page *pg)
1520{
1521#if VM_PHYSSEG_MAX == 1
1522	return (vm_physmem[0].free_list);
1523#else
1524	int lcv;
1525
1526	lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1527	KASSERT(lcv != -1);
1528	return (vm_physmem[lcv].free_list);
1529#endif
1530}
1531