uvm_page.c revision 1.3
1/*	$OpenBSD: uvm_page.c,v 1.3 1999/07/23 14:47:06 ho Exp $	*/
2/*	$NetBSD: uvm_page.c,v 1.15 1998/10/18 23:50:00 chs Exp $	*/
3
4/*
5 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
6 *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
7 */
8/*
9 * Copyright (c) 1997 Charles D. Cranor and Washington University.
10 * Copyright (c) 1991, 1993, The Regents of the University of California.
11 *
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * The Mach Operating System project at Carnegie-Mellon University.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 *    notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 *    notice, this list of conditions and the following disclaimer in the
24 *    documentation and/or other materials provided with the distribution.
25 * 3. All advertising materials mentioning features or use of this software
26 *    must display the following acknowledgement:
27 *	This product includes software developed by Charles D. Cranor,
28 *      Washington University, the University of California, Berkeley and
29 *      its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 *    may be used to endorse or promote products derived from this software
32 *    without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 *
46 *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
47 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
48 *
49 *
50 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
51 * All rights reserved.
52 *
53 * Permission to use, copy, modify and distribute this software and
54 * its documentation is hereby granted, provided that both the copyright
55 * notice and this permission notice appear in all copies of the
56 * software, derivative works or modified versions, and any portions
57 * thereof, and that both notices appear in supporting documentation.
58 *
59 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
60 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
61 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
62 *
63 * Carnegie Mellon requests users of this software to return to
64 *
65 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
66 *  School of Computer Science
67 *  Carnegie Mellon University
68 *  Pittsburgh PA 15213-3890
69 *
70 * any improvements or extensions that they make and grant Carnegie the
71 * rights to redistribute these changes.
72 */
73
74/*
75 * uvm_page.c: page ops.
76 */
77
78#include <sys/param.h>
79#include <sys/systm.h>
80#include <sys/malloc.h>
81#include <sys/proc.h>
82
83#include <vm/vm.h>
84#include <vm/vm_page.h>
85#include <vm/vm_kern.h>
86
87#define UVM_PAGE                /* pull in uvm_page.h functions */
88#include <uvm/uvm.h>
89
90/*
91 * global vars... XXXCDC: move to uvm. structure.
92 */
93
94/*
95 * physical memory config is stored in vm_physmem.
96 */
97
98struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];	/* XXXCDC: uvm.physmem */
99int vm_nphysseg = 0;				/* XXXCDC: uvm.nphysseg */
100
101/*
102 * local variables
103 */
104
105/*
106 * these variables record the values returned by vm_page_bootstrap,
107 * for debugging purposes.  The implementation of uvm_pageboot_alloc
108 * and pmap_startup here also uses them internally.
109 */
110
111static vaddr_t      virtual_space_start;
112static vaddr_t      virtual_space_end;
113
114/*
115 * we use a hash table with only one bucket during bootup.  we will
116 * later rehash (resize) the hash table once malloc() is ready.
117 * we static allocate the bootstrap bucket below...
118 */
119
120static struct pglist uvm_bootbucket;
121
122/*
123 * local prototypes
124 */
125
126static void uvm_pageinsert __P((struct vm_page *));
127
128
129/*
130 * inline functions
131 */
132
133/*
134 * uvm_pageinsert: insert a page in the object and the hash table
135 *
136 * => caller must lock object
137 * => caller must lock page queues
138 * => call should have already set pg's object and offset pointers
139 *    and bumped the version counter
140 */
141
142__inline static void
143uvm_pageinsert(pg)
144	struct vm_page *pg;
145{
146	struct pglist *buck;
147	int s;
148
149#ifdef DIAGNOSTIC
150	if (pg->flags & PG_TABLED)
151		panic("uvm_pageinsert: already inserted");
152#endif
153
154	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
155	s = splimp();
156	simple_lock(&uvm.hashlock);
157	TAILQ_INSERT_TAIL(buck, pg, hashq);	/* put in hash */
158	simple_unlock(&uvm.hashlock);
159	splx(s);
160
161	TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
162	pg->flags |= PG_TABLED;
163	pg->uobject->uo_npages++;
164
165}
166
167/*
168 * uvm_page_remove: remove page from object and hash
169 *
170 * => caller must lock object
171 * => caller must lock page queues
172 */
173
174void __inline
175uvm_pageremove(pg)
176	struct vm_page *pg;
177{
178	struct pglist *buck;
179	int s;
180
181#ifdef DIAGNOSTIC
182	if ((pg->flags & (PG_FAULTING)) != 0)
183		panic("uvm_pageremove: page is faulting");
184#endif
185
186	if ((pg->flags & PG_TABLED) == 0)
187		return;				/* XXX: log */
188
189	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
190	s = splimp();
191	simple_lock(&uvm.hashlock);
192	TAILQ_REMOVE(buck, pg, hashq);
193	simple_unlock(&uvm.hashlock);
194	splx(s);
195
196	/* object should be locked */
197	TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
198
199	pg->flags &= ~PG_TABLED;
200	pg->uobject->uo_npages--;
201	pg->uobject = NULL;
202	pg->version++;
203
204}
205
206/*
207 * uvm_page_init: init the page system.   called from uvm_init().
208 *
209 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
210 */
211
212void
213uvm_page_init(kvm_startp, kvm_endp)
214	vaddr_t *kvm_startp, *kvm_endp;
215{
216	int freepages, pagecount;
217	vm_page_t pagearray;
218	int lcv, n, i;
219	paddr_t paddr;
220
221
222	/*
223	 * step 1: init the page queues and page queue locks
224	 */
225	for (lcv = 0; lcv < VM_NFREELIST; lcv++)
226	  TAILQ_INIT(&uvm.page_free[lcv]);
227	TAILQ_INIT(&uvm.page_active);
228	TAILQ_INIT(&uvm.page_inactive_swp);
229	TAILQ_INIT(&uvm.page_inactive_obj);
230	simple_lock_init(&uvm.pageqlock);
231	simple_lock_init(&uvm.fpageqlock);
232
233	/*
234	 * step 2: init the <obj,offset> => <page> hash table. for now
235	 * we just have one bucket (the bootstrap bucket).   later on we
236	 * will malloc() new buckets as we dynamically resize the hash table.
237	 */
238
239	uvm.page_nhash = 1;			/* 1 bucket */
240	uvm.page_hashmask = 0;		/* mask for hash function */
241	uvm.page_hash = &uvm_bootbucket;	/* install bootstrap bucket */
242	TAILQ_INIT(uvm.page_hash);		/* init hash table */
243	simple_lock_init(&uvm.hashlock);	/* init hash table lock */
244
245	/*
246	 * step 3: allocate vm_page structures.
247	 */
248
249	/*
250	 * sanity check:
251	 * before calling this function the MD code is expected to register
252	 * some free RAM with the uvm_page_physload() function.   our job
253	 * now is to allocate vm_page structures for this memory.
254	 */
255
256	if (vm_nphysseg == 0)
257		panic("vm_page_bootstrap: no memory pre-allocated");
258
259	/*
260	 * first calculate the number of free pages...
261	 *
262	 * note that we use start/end rather than avail_start/avail_end.
263	 * this allows us to allocate extra vm_page structures in case we
264	 * want to return some memory to the pool after booting.
265	 */
266
267	freepages = 0;
268	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
269		freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
270
271	/*
272	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
273	 * use.   for each page of memory we use we need a vm_page structure.
274	 * thus, the total number of pages we can use is the total size of
275	 * the memory divided by the PAGE_SIZE plus the size of the vm_page
276	 * structure.   we add one to freepages as a fudge factor to avoid
277	 * truncation errors (since we can only allocate in terms of whole
278	 * pages).
279	 */
280
281	pagecount = ((freepages + 1) << PAGE_SHIFT) /
282	    (PAGE_SIZE + sizeof(struct vm_page));
283	pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
284	    sizeof(struct vm_page));
285	bzero(pagearray, pagecount * sizeof(struct vm_page));
286
287	/*
288	 * step 4: init the vm_page structures and put them in the correct
289	 * place...
290	 */
291
292	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
293
294		n = vm_physmem[lcv].end - vm_physmem[lcv].start;
295		if (n > pagecount) {
296			printf("uvm_page_init: lost %d page(s) in init\n",
297			    n - pagecount);
298			panic("uvm_page_init");  /* XXXCDC: shouldn't happen? */
299			/* n = pagecount; */
300		}
301		/* set up page array pointers */
302		vm_physmem[lcv].pgs = pagearray;
303		pagearray += n;
304		pagecount -= n;
305		vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
306
307		/* init and free vm_pages (we've already zeroed them) */
308		paddr = ptoa(vm_physmem[lcv].start);
309		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
310			vm_physmem[lcv].pgs[i].phys_addr = paddr;
311			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
312			    atop(paddr) <= vm_physmem[lcv].avail_end) {
313				uvmexp.npages++;
314				/* add page to free pool */
315				uvm_pagefree(&vm_physmem[lcv].pgs[i]);
316			}
317		}
318	}
319	/*
320	 * step 5: pass up the values of virtual_space_start and
321	 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
322	 * layers of the VM.
323	 */
324
325	*kvm_startp = round_page(virtual_space_start);
326	*kvm_endp = trunc_page(virtual_space_end);
327
328	/*
329	 * step 6: init pagedaemon lock
330	 */
331
332	simple_lock_init(&uvm.pagedaemon_lock);
333
334	/*
335	 * step 7: init reserve thresholds
336	 * XXXCDC - values may need adjusting
337	 */
338	uvmexp.reserve_pagedaemon = 1;
339	uvmexp.reserve_kernel = 5;
340
341	/*
342	 * done!
343	 */
344
345}
346
347/*
348 * uvm_setpagesize: set the page size
349 *
350 * => sets page_shift and page_mask from uvmexp.pagesize.
351 * => XXXCDC: move global vars.
352 */
353
354void
355uvm_setpagesize()
356{
357	if (uvmexp.pagesize == 0)
358		uvmexp.pagesize = DEFAULT_PAGE_SIZE;
359	uvmexp.pagemask = uvmexp.pagesize - 1;
360	if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
361		panic("uvm_setpagesize: page size not a power of two");
362	for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
363		if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
364			break;
365}
366
367/*
368 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
369 */
370
371vaddr_t
372uvm_pageboot_alloc(size)
373	vsize_t size;
374{
375#if defined(PMAP_STEAL_MEMORY)
376	vaddr_t addr;
377
378	/*
379	 * defer bootstrap allocation to MD code (it may want to allocate
380	 * from a direct-mapped segment).  pmap_steal_memory should round
381	 * off virtual_space_start/virtual_space_end.
382	 */
383
384	addr = pmap_steal_memory(size, &virtual_space_start,
385	    &virtual_space_end);
386
387	return(addr);
388
389#else /* !PMAP_STEAL_MEMORY */
390
391	vaddr_t addr, vaddr;
392	paddr_t paddr;
393
394	/* round to page size */
395	size = round_page(size);
396
397	/*
398	 * on first call to this function init ourselves.   we detect this
399	 * by checking virtual_space_start/end which are in the zero'd BSS area.
400	 */
401
402	if (virtual_space_start == virtual_space_end) {
403		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
404
405		/* round it the way we like it */
406		virtual_space_start = round_page(virtual_space_start);
407		virtual_space_end = trunc_page(virtual_space_end);
408	}
409
410	/*
411	 * allocate virtual memory for this request
412	 */
413
414	addr = virtual_space_start;
415	virtual_space_start += size;
416
417	/*
418	 * allocate and mapin physical pages to back new virtual pages
419	 */
420
421	for (vaddr = round_page(addr) ; vaddr < addr + size ;
422	    vaddr += PAGE_SIZE) {
423
424		if (!uvm_page_physget(&paddr))
425			panic("uvm_pageboot_alloc: out of memory");
426
427		/* XXX: should be wired, but some pmaps don't like that ... */
428#if defined(PMAP_NEW)
429		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
430#else
431		pmap_enter(pmap_kernel(), vaddr, paddr,
432		    VM_PROT_READ|VM_PROT_WRITE, FALSE);
433#endif
434
435	}
436	return(addr);
437#endif	/* PMAP_STEAL_MEMORY */
438}
439
440#if !defined(PMAP_STEAL_MEMORY)
441/*
442 * uvm_page_physget: "steal" one page from the vm_physmem structure.
443 *
444 * => attempt to allocate it off the end of a segment in which the "avail"
445 *    values match the start/end values.   if we can't do that, then we
446 *    will advance both values (making them equal, and removing some
447 *    vm_page structures from the non-avail area).
448 * => return false if out of memory.
449 */
450
451boolean_t
452uvm_page_physget(paddrp)
453	paddr_t *paddrp;
454{
455	int lcv, x;
456
457	/* pass 1: try allocating from a matching end */
458#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
459	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
460#else
461	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
462#endif
463	{
464
465		if (vm_physmem[lcv].pgs)
466			panic("vm_page_physget: called _after_ bootstrap");
467
468		/* try from front */
469		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
470		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
471			*paddrp = ptoa(vm_physmem[lcv].avail_start);
472			vm_physmem[lcv].avail_start++;
473			vm_physmem[lcv].start++;
474			/* nothing left?   nuke it */
475			if (vm_physmem[lcv].avail_start ==
476			    vm_physmem[lcv].end) {
477				if (vm_nphysseg == 1)
478				    panic("vm_page_physget: out of memory!");
479				vm_nphysseg--;
480				for (x = lcv ; x < vm_nphysseg ; x++)
481					/* structure copy */
482					vm_physmem[x] = vm_physmem[x+1];
483			}
484			return (TRUE);
485		}
486
487		/* try from rear */
488		if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
489		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
490			*paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
491			vm_physmem[lcv].avail_end--;
492			vm_physmem[lcv].end--;
493			/* nothing left?   nuke it */
494			if (vm_physmem[lcv].avail_end ==
495			    vm_physmem[lcv].start) {
496				if (vm_nphysseg == 1)
497				    panic("vm_page_physget: out of memory!");
498				vm_nphysseg--;
499				for (x = lcv ; x < vm_nphysseg ; x++)
500					/* structure copy */
501					vm_physmem[x] = vm_physmem[x+1];
502			}
503			return (TRUE);
504		}
505	}
506
507	/* pass2: forget about matching ends, just allocate something */
508#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
509	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
510#else
511	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
512#endif
513	{
514
515		/* any room in this bank? */
516		if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
517			continue;  /* nope */
518
519		*paddrp = ptoa(vm_physmem[lcv].avail_start);
520		vm_physmem[lcv].avail_start++;
521		/* truncate! */
522		vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
523
524		/* nothing left?   nuke it */
525		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
526			if (vm_nphysseg == 1)
527				panic("vm_page_physget: out of memory!");
528			vm_nphysseg--;
529			for (x = lcv ; x < vm_nphysseg ; x++)
530				/* structure copy */
531				vm_physmem[x] = vm_physmem[x+1];
532		}
533		return (TRUE);
534	}
535
536	return (FALSE);        /* whoops! */
537}
538#endif /* PMAP_STEAL_MEMORY */
539
540/*
541 * uvm_page_physload: load physical memory into VM system
542 *
543 * => all args are PFs
544 * => all pages in start/end get vm_page structures
545 * => areas marked by avail_start/avail_end get added to the free page pool
546 * => we are limited to VM_PHYSSEG_MAX physical memory segments
547 */
548
549void
550uvm_page_physload(start, end, avail_start, avail_end, free_list)
551	vaddr_t start, end, avail_start, avail_end;
552	int free_list;
553{
554	int preload, lcv;
555	psize_t npages;
556	struct vm_page *pgs;
557	struct vm_physseg *ps;
558
559	if (uvmexp.pagesize == 0)
560		panic("vm_page_physload: page size not set!");
561
562	if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
563		panic("uvm_page_physload: bad free list %d\n", free_list);
564
565	/*
566	 * do we have room?
567	 */
568	if (vm_nphysseg == VM_PHYSSEG_MAX) {
569		printf("vm_page_physload: unable to load physical memory "
570		    "segment\n");
571		printf("\t%d segments allocated, ignoring 0x%lx -> 0x%lx\n",
572		    VM_PHYSSEG_MAX, start, end);
573		return;
574	}
575
576	/*
577	 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
578	 * called yet, so malloc is not available).
579	 */
580	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
581		if (vm_physmem[lcv].pgs)
582			break;
583	}
584	preload = (lcv == vm_nphysseg);
585
586	/*
587	 * if VM is already running, attempt to malloc() vm_page structures
588	 */
589	if (!preload) {
590#if defined(VM_PHYSSEG_NOADD)
591		panic("vm_page_physload: tried to add RAM after vm_mem_init");
592#else
593		/* XXXCDC: need some sort of lockout for this case */
594		paddr_t paddr;
595		npages = end - start;  /* # of pages */
596		MALLOC(pgs, struct vm_page *, sizeof(struct vm_page) * npages,
597					 M_VMPAGE, M_NOWAIT);
598		if (pgs == NULL) {
599			printf("vm_page_physload: can not malloc vm_page "
600			    "structs for segment\n");
601			printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
602			return;
603		}
604		/* zero data, init phys_addr and free_list, and free pages */
605		bzero(pgs, sizeof(struct vm_page) * npages);
606		for (lcv = 0, paddr = ptoa(start) ;
607				 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
608			pgs[lcv].phys_addr = paddr;
609			pgs[lcv].free_list = free_list;
610			if (atop(paddr) >= avail_start &&
611			    atop(paddr) <= avail_end)
612				uvm_pagefree(&pgs[lcv]);
613		}
614		/* XXXCDC: incomplete: need to update uvmexp.free, what else? */
615		/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
616#endif
617	} else {
618
619		/* gcc complains if these don't get init'd */
620		pgs = NULL;
621		npages = 0;
622
623	}
624
625	/*
626	 * now insert us in the proper place in vm_physmem[]
627	 */
628
629#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
630
631	/* random: put it at the end (easy!) */
632	ps = &vm_physmem[vm_nphysseg];
633
634#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
635
636	{
637		int x;
638		/* sort by address for binary search */
639		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
640			if (start < vm_physmem[lcv].start)
641				break;
642		ps = &vm_physmem[lcv];
643		/* move back other entries, if necessary ... */
644		for (x = vm_nphysseg ; x > lcv ; x--)
645			/* structure copy */
646			vm_physmem[x] = vm_physmem[x - 1];
647	}
648
649#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
650
651	{
652		int x;
653		/* sort by largest segment first */
654		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
655			if ((end - start) >
656			    (vm_physmem[lcv].end - vm_physmem[lcv].start))
657				break;
658		ps = &vm_physmem[lcv];
659		/* move back other entries, if necessary ... */
660		for (x = vm_nphysseg ; x > lcv ; x--)
661			/* structure copy */
662			vm_physmem[x] = vm_physmem[x - 1];
663	}
664
665#else
666
667	panic("vm_page_physload: unknown physseg strategy selected!");
668
669#endif
670
671	ps->start = start;
672	ps->end = end;
673	ps->avail_start = avail_start;
674	ps->avail_end = avail_end;
675	if (preload) {
676		ps->pgs = NULL;
677	} else {
678		ps->pgs = pgs;
679		ps->lastpg = pgs + npages - 1;
680	}
681	ps->free_list = free_list;
682	vm_nphysseg++;
683
684	/*
685	 * done!
686	 */
687
688	if (!preload)
689		uvm_page_rehash();
690
691	return;
692}
693
694/*
695 * uvm_page_rehash: reallocate hash table based on number of free pages.
696 */
697
698void
699uvm_page_rehash()
700{
701	int freepages, lcv, bucketcount, s, oldcount;
702	struct pglist *newbuckets, *oldbuckets;
703	struct vm_page *pg;
704
705	/*
706	 * compute number of pages that can go in the free pool
707	 */
708
709	freepages = 0;
710	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
711		freepages +=
712		    (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
713
714	/*
715	 * compute number of buckets needed for this number of pages
716	 */
717
718	bucketcount = 1;
719	while (bucketcount < freepages)
720		bucketcount = bucketcount * 2;
721
722	/*
723	 * malloc new buckets
724	 */
725
726	MALLOC(newbuckets, struct pglist *, sizeof(struct pglist) * bucketcount,
727					 M_VMPBUCKET, M_NOWAIT);
728	if (newbuckets == NULL) {
729		printf("vm_page_physrehash: WARNING: could not grow page "
730		    "hash table\n");
731		return;
732	}
733	for (lcv = 0 ; lcv < bucketcount ; lcv++)
734		TAILQ_INIT(&newbuckets[lcv]);
735
736	/*
737	 * now replace the old buckets with the new ones and rehash everything
738	 */
739
740	s = splimp();
741	simple_lock(&uvm.hashlock);
742	/* swap old for new ... */
743	oldbuckets = uvm.page_hash;
744	oldcount = uvm.page_nhash;
745	uvm.page_hash = newbuckets;
746	uvm.page_nhash = bucketcount;
747	uvm.page_hashmask = bucketcount - 1;  /* power of 2 */
748
749	/* ... and rehash */
750	for (lcv = 0 ; lcv < oldcount ; lcv++) {
751		while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
752			TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
753			TAILQ_INSERT_TAIL(
754			  &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
755			  pg, hashq);
756		}
757	}
758	simple_unlock(&uvm.hashlock);
759	splx(s);
760
761	/*
762	 * free old bucket array if we malloc'd it previously
763	 */
764
765	if (oldbuckets != &uvm_bootbucket)
766		FREE(oldbuckets, M_VMPBUCKET);
767
768	/*
769	 * done
770	 */
771	return;
772}
773
774
775#if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
776
777void uvm_page_physdump __P((void)); /* SHUT UP GCC */
778
779/* call from DDB */
780void
781uvm_page_physdump()
782{
783	int lcv;
784
785	printf("rehash: physical memory config [segs=%d of %d]:\n",
786				 vm_nphysseg, VM_PHYSSEG_MAX);
787	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
788		printf("0x%lx->0x%lx [0x%lx->0x%lx]\n", vm_physmem[lcv].start,
789		    vm_physmem[lcv].end, vm_physmem[lcv].avail_start,
790		    vm_physmem[lcv].avail_end);
791	printf("STRATEGY = ");
792	switch (VM_PHYSSEG_STRAT) {
793	case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
794	case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
795	case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
796	default: printf("<<UNKNOWN>>!!!!\n");
797	}
798	printf("number of buckets = %d\n", uvm.page_nhash);
799}
800#endif
801
802/*
803 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
804 *
805 * => return null if no pages free
806 * => wake up pagedaemon if number of free pages drops below low water mark
807 * => if obj != NULL, obj must be locked (to put in hash)
808 * => if anon != NULL, anon must be locked (to put in anon)
809 * => only one of obj or anon can be non-null
810 * => caller must activate/deactivate page if it is not wired.
811 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
812 */
813
814struct vm_page *
815uvm_pagealloc_strat(obj, off, anon, strat, free_list)
816	struct uvm_object *obj;
817	vaddr_t off;
818	struct vm_anon *anon;
819	int strat, free_list;
820{
821	int lcv, s;
822	struct vm_page *pg;
823	struct pglist *freeq;
824
825#ifdef DIAGNOSTIC
826	/* sanity check */
827	if (obj && anon)
828		panic("uvm_pagealloc: obj and anon != NULL");
829#endif
830
831	s = splimp();
832
833	uvm_lock_fpageq();		/* lock free page queue */
834
835	/*
836	 * check to see if we need to generate some free pages waking
837	 * the pagedaemon.
838	 */
839
840	if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
841	    uvmexp.inactive < uvmexp.inactarg))
842		thread_wakeup(&uvm.pagedaemon);
843
844	/*
845	 * fail if any of these conditions is true:
846	 * [1]  there really are no free pages, or
847	 * [2]  only kernel "reserved" pages remain and
848	 *        the page isn't being allocated to a kernel object.
849	 * [3]  only pagedaemon "reserved" pages remain and
850	 *        the requestor isn't the pagedaemon.
851	 */
852
853	if ((uvmexp.free <= uvmexp.reserve_kernel &&
854	     !(obj && obj->uo_refs == UVM_OBJ_KERN)) ||
855	    (uvmexp.free <= uvmexp.reserve_pagedaemon &&
856	     !(obj == uvmexp.kmem_object && curproc == uvm.pagedaemon_proc)))
857		goto fail;
858
859 again:
860	switch (strat) {
861	case UVM_PGA_STRAT_NORMAL:
862		/* Check all freelists in descending priority order. */
863		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
864			freeq = &uvm.page_free[lcv];
865			if ((pg = freeq->tqh_first) != NULL)
866				goto gotit;
867		}
868
869		/* No pages free! */
870		goto fail;
871
872	case UVM_PGA_STRAT_ONLY:
873	case UVM_PGA_STRAT_FALLBACK:
874		/* Attempt to allocate from the specified free list. */
875#ifdef DIAGNOSTIC
876		if (free_list >= VM_NFREELIST || free_list < 0)
877			panic("uvm_pagealloc_strat: bad free list %d",
878			    free_list);
879#endif
880		freeq = &uvm.page_free[free_list];
881		if ((pg = freeq->tqh_first) != NULL)
882			goto gotit;
883
884		/* Fall back, if possible. */
885		if (strat == UVM_PGA_STRAT_FALLBACK) {
886			strat = UVM_PGA_STRAT_NORMAL;
887			goto again;
888		}
889
890		/* No pages free! */
891		goto fail;
892
893	default:
894		panic("uvm_pagealloc_strat: bad strat %d", strat);
895		/* NOTREACHED */
896	}
897
898 gotit:
899	TAILQ_REMOVE(freeq, pg, pageq);
900	uvmexp.free--;
901
902	uvm_unlock_fpageq();		/* unlock free page queue */
903	splx(s);
904
905	pg->offset = off;
906	pg->uobject = obj;
907	pg->uanon = anon;
908	pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
909	pg->version++;
910	pg->wire_count = 0;
911	pg->loan_count = 0;
912	if (anon) {
913		anon->u.an_page = pg;
914		pg->pqflags = PQ_ANON;
915	} else {
916		if (obj)
917			uvm_pageinsert(pg);
918		pg->pqflags = 0;
919	}
920#if defined(UVM_PAGE_TRKOWN)
921	pg->owner_tag = NULL;
922#endif
923	UVM_PAGE_OWN(pg, "new alloc");
924
925	return(pg);
926
927 fail:
928	uvm_unlock_fpageq();
929	splx(s);
930	return (NULL);
931}
932
933/*
934 * uvm_pagealloc_contig: allocate contiguous memory.
935 *
936 * XXX - fix comment.
937 */
938
939vaddr_t
940uvm_pagealloc_contig(size, low, high, alignment)
941	vaddr_t size;
942	vaddr_t low, high;
943	vaddr_t alignment;
944{
945	struct pglist pglist;
946	struct vm_page *pg;
947	vaddr_t addr, temp_addr;
948
949	size = round_page(size);
950
951	TAILQ_INIT(&pglist);
952	if (uvm_pglistalloc(size, low, high, alignment, 0,
953			    &pglist, 1, FALSE))
954	        return 0;
955	addr = vm_map_min(kernel_map);
956	if (uvm_map(kernel_map, &addr, size, NULL,
957		    UVM_UNKNOWN_OFFSET, TRUE) != KERN_SUCCESS)
958	        addr = 0;
959	temp_addr = addr;
960	for (pg = TAILQ_FIRST(&pglist); pg != NULL;
961	     pg = TAILQ_NEXT(pg, pageq)) {
962	        pg->uobject = uvm.kernel_object;
963		pg->offset = temp_addr - vm_map_min(kernel_map);
964		uvm_pageinsert(pg);
965	        uvm_pagewire(pg);
966#if defined(PMAP_NEW)
967		pmap_kenter_pa(temp_addr, VM_PAGE_TO_PHYS(pg),
968			       VM_PROT_READ|VM_PROT_WRITE);
969#else
970		pmap_enter(kernel_map->pmap, temp_addr, VM_PAGE_TO_PHYS(pg),
971		    UVM_PROT_READ|UVM_PROT_WRITE, TRUE);
972#endif
973		temp_addr += PAGE_SIZE;
974	}
975	return addr;
976}
977
978/*
979 * uvm_pagerealloc: reallocate a page from one object to another
980 *
981 * => both objects must be locked
982 */
983
984void
985uvm_pagerealloc(pg, newobj, newoff)
986	struct vm_page *pg;
987	struct uvm_object *newobj;
988	vaddr_t newoff;
989{
990	/*
991	 * remove it from the old object
992	 */
993
994	if (pg->uobject) {
995		uvm_pageremove(pg);
996	}
997
998	/*
999	 * put it in the new object
1000	 */
1001
1002	if (newobj) {
1003		pg->uobject = newobj;
1004		pg->offset = newoff;
1005		pg->version++;
1006		uvm_pageinsert(pg);
1007	}
1008
1009	return;
1010}
1011
1012
1013/*
1014 * uvm_pagefree: free page
1015 *
1016 * => erase page's identity (i.e. remove from hash/object)
1017 * => put page on free list
1018 * => caller must lock owning object (either anon or uvm_object)
1019 * => caller must lock page queues
1020 * => assumes all valid mappings of pg are gone
1021 */
1022
1023void uvm_pagefree(pg)
1024
1025struct vm_page *pg;
1026
1027{
1028	int s;
1029	int saved_loan_count = pg->loan_count;
1030
1031	/*
1032	 * if the page was an object page (and thus "TABLED"), remove it
1033	 * from the object.
1034	 */
1035
1036	if (pg->flags & PG_TABLED) {
1037
1038		/*
1039		 * if the object page is on loan we are going to drop ownership.
1040		 * it is possible that an anon will take over as owner for this
1041		 * page later on.   the anon will want a !PG_CLEAN page so that
1042		 * it knows it needs to allocate swap if it wants to page the
1043		 * page out.
1044		 */
1045
1046		if (saved_loan_count)
1047			pg->flags &= ~PG_CLEAN;	/* in case an anon takes over */
1048
1049		uvm_pageremove(pg);
1050
1051		/*
1052		 * if our page was on loan, then we just lost control over it
1053		 * (in fact, if it was loaned to an anon, the anon may have
1054		 * already taken over ownership of the page by now and thus
1055		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just
1056		 * return (when the last loan is dropped, then the page can be
1057		 * freed by whatever was holding the last loan).
1058		 */
1059		if (saved_loan_count)
1060			return;
1061
1062	} else if (saved_loan_count && (pg->pqflags & PQ_ANON)) {
1063
1064		/*
1065		 * if our page is owned by an anon and is loaned out to the
1066		 * kernel then we just want to drop ownership and return.
1067		 * the kernel must free the page when all its loans clear ...
1068		 * note that the kernel can't change the loan status of our
1069		 * page as long as we are holding PQ lock.
1070		 */
1071		pg->pqflags &= ~PQ_ANON;
1072		pg->uanon = NULL;
1073		return;
1074	}
1075
1076#ifdef DIAGNOSTIC
1077	if (saved_loan_count) {
1078		printf("uvm_pagefree: warning: freeing page with a loan "
1079		    "count of %d\n", saved_loan_count);
1080		panic("uvm_pagefree: loan count");
1081	}
1082#endif
1083
1084
1085	/*
1086	 * now remove the page from the queues
1087	 */
1088
1089	if (pg->pqflags & PQ_ACTIVE) {
1090		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1091		pg->pqflags &= ~PQ_ACTIVE;
1092		uvmexp.active--;
1093	}
1094	if (pg->pqflags & PQ_INACTIVE) {
1095		if (pg->pqflags & PQ_SWAPBACKED)
1096			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1097		else
1098			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1099		pg->pqflags &= ~PQ_INACTIVE;
1100		uvmexp.inactive--;
1101	}
1102
1103	/*
1104	 * if the page was wired, unwire it now.
1105	 */
1106	if (pg->wire_count)
1107	{
1108		pg->wire_count = 0;
1109		uvmexp.wired--;
1110	}
1111
1112	/*
1113	 * and put on free queue
1114	 */
1115
1116	s = splimp();
1117	uvm_lock_fpageq();
1118	TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)],
1119	    pg, pageq);
1120	pg->pqflags = PQ_FREE;
1121#ifdef DEBUG
1122	pg->uobject = (void *)0xdeadbeef;
1123	pg->offset = 0xdeadbeef;
1124	pg->uanon = (void *)0xdeadbeef;
1125#endif
1126	uvmexp.free++;
1127	uvm_unlock_fpageq();
1128	splx(s);
1129}
1130
1131#if defined(UVM_PAGE_TRKOWN)
1132/*
1133 * uvm_page_own: set or release page ownership
1134 *
1135 * => this is a debugging function that keeps track of who sets PG_BUSY
1136 *	and where they do it.   it can be used to track down problems
1137 *	such a process setting "PG_BUSY" and never releasing it.
1138 * => page's object [if any] must be locked
1139 * => if "tag" is NULL then we are releasing page ownership
1140 */
1141void
1142uvm_page_own(pg, tag)
1143	struct vm_page *pg;
1144	char *tag;
1145{
1146	/* gain ownership? */
1147	if (tag) {
1148		if (pg->owner_tag) {
1149			printf("uvm_page_own: page %p already owned "
1150			    "by proc %d [%s]\n", pg,
1151			     pg->owner, pg->owner_tag);
1152			panic("uvm_page_own");
1153		}
1154		pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
1155		pg->owner_tag = tag;
1156		return;
1157	}
1158
1159	/* drop ownership */
1160	if (pg->owner_tag == NULL) {
1161		printf("uvm_page_own: dropping ownership of an non-owned "
1162		    "page (%p)\n", pg);
1163		panic("uvm_page_own");
1164	}
1165	pg->owner_tag = NULL;
1166	return;
1167}
1168#endif
1169