uvm_loan.c revision 1.57
1/*	$NetBSD: uvm_loan.c,v 1.57 2005/12/24 20:45:10 perry Exp $	*/
2
3/*
4 *
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *      This product includes software developed by Charles D. Cranor and
19 *      Washington University.
20 * 4. The name of the author may not be used to endorse or promote products
21 *    derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
35 */
36
37/*
38 * uvm_loan.c: page loanout handler
39 */
40
41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.57 2005/12/24 20:45:10 perry Exp $");
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/proc.h>
48#include <sys/malloc.h>
49#include <sys/mman.h>
50
51#include <uvm/uvm.h>
52
53/*
54 * "loaned" pages are pages which are (read-only, copy-on-write) loaned
55 * from the VM system to other parts of the kernel.   this allows page
56 * copying to be avoided (e.g. you can loan pages from objs/anons to
57 * the mbuf system).
58 *
59 * there are 3 types of loans possible:
60 *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
61 *  A->K  anon page to wired kernel page (e.g. mbuf data area)
62 *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
63 * note that it possible to have an O page loaned to both an A and K
64 * at the same time.
65 *
66 * loans are tracked by pg->loan_count.  an O->A page will have both
67 * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
68 * of page is considered "owned" by the uvm_object (not the anon).
69 *
70 * each loan of a page to the kernel bumps the pg->wire_count.  the
71 * kernel mappings for these pages will be read-only and wired.  since
72 * the page will also be wired, it will not be a candidate for pageout,
73 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
74 * write fault in the kernel to one of these pages will not cause
75 * copy-on-write.  instead, the page fault is considered fatal.  this
76 * is because the kernel mapping will have no way to look up the
77 * object/anon which the page is owned by.  this is a good side-effect,
78 * since a kernel write to a loaned page is an error.
79 *
80 * owners that want to free their pages and discover that they are
81 * loaned out simply "disown" them (the page becomes an orphan).  these
82 * pages should be freed when the last loan is dropped.   in some cases
83 * an anon may "adopt" an orphaned page.
84 *
85 * locking: to read pg->loan_count either the owner or the page queues
86 * must be locked.   to modify pg->loan_count, both the owner of the page
87 * and the PQs must be locked.   pg->flags is (as always) locked by
88 * the owner of the page.
89 *
90 * note that locking from the "loaned" side is tricky since the object
91 * getting the loaned page has no reference to the page's owner and thus
92 * the owner could "die" at any time.   in order to prevent the owner
93 * from dying the page queues should be locked.   this forces us to sometimes
94 * use "try" locking.
95 *
96 * loans are typically broken by the following events:
97 *  1. user-level xwrite fault to a loaned page
98 *  2. pageout of clean+inactive O->A loaned page
99 *  3. owner frees page (e.g. pager flush)
100 *
101 * note that loaning a page causes all mappings of the page to become
102 * read-only (via pmap_page_protect).   this could have an unexpected
103 * effect on normal "wired" pages if one is not careful (XXX).
104 */
105
106/*
107 * local prototypes
108 */
109
110static int	uvm_loananon(struct uvm_faultinfo *, void ***,
111			     int, struct vm_anon *);
112static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
113			     int, vaddr_t);
114static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
115static void	uvm_unloananon(struct vm_anon **, int);
116static void	uvm_unloanpage(struct vm_page **, int);
117static int	uvm_loanpage(struct vm_page **, int);
118
119
120/*
121 * inlines
122 */
123
124/*
125 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
126 *
127 * => "ufi" is the result of a successful map lookup (meaning that
128 *	on entry the map is locked by the caller)
129 * => we may unlock and then relock the map if needed (for I/O)
130 * => we put our output result in "output"
131 * => we always return with the map unlocked
132 * => possible return values:
133 *	-1 == error, map is unlocked
134 *	 0 == map relock error (try again!), map is unlocked
135 *	>0 == number of pages we loaned, map is unlocked
136 *
137 * NOTE: We can live with this being an inline, because it is only called
138 * from one place.
139 */
140
141static inline int
142uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
143{
144	vaddr_t curaddr = ufi->orig_rvaddr;
145	vsize_t togo = ufi->size;
146	struct vm_aref *aref = &ufi->entry->aref;
147	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
148	struct vm_anon *anon;
149	int rv, result = 0;
150
151	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
152
153	/*
154	 * lock us the rest of the way down (we unlock before return)
155	 */
156	if (aref->ar_amap)
157		amap_lock(aref->ar_amap);
158	if (uobj)
159		simple_lock(&uobj->vmobjlock);
160
161	/*
162	 * loop until done
163	 */
164	while (togo) {
165
166		/*
167		 * find the page we want.   check the anon layer first.
168		 */
169
170		if (aref->ar_amap) {
171			anon = amap_lookup(aref, curaddr - ufi->entry->start);
172		} else {
173			anon = NULL;
174		}
175
176		/* locked: map, amap, uobj */
177		if (anon) {
178			rv = uvm_loananon(ufi, output, flags, anon);
179		} else if (uobj) {
180			rv = uvm_loanuobj(ufi, output, flags, curaddr);
181		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
182			rv = uvm_loanzero(ufi, output, flags);
183		} else {
184			uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
185			rv = -1;
186		}
187		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
188		LOCK_ASSERT(rv > 0 || aref->ar_amap == NULL ||
189		    !simple_lock_held(&aref->ar_amap->am_l));
190		LOCK_ASSERT(rv > 0 || uobj == NULL ||
191		    !simple_lock_held(&uobj->vmobjlock));
192
193		/* total failure */
194		if (rv < 0) {
195			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
196			return (-1);
197		}
198
199		/* relock failed, need to do another lookup */
200		if (rv == 0) {
201			UVMHIST_LOG(loanhist, "relock failure %d", result
202			    ,0,0,0);
203			return (result);
204		}
205
206		/*
207		 * got it... advance to next page
208		 */
209
210		result++;
211		togo -= PAGE_SIZE;
212		curaddr += PAGE_SIZE;
213	}
214
215	/*
216	 * unlock what we locked, unlock the maps and return
217	 */
218
219	if (aref->ar_amap)
220		amap_unlock(aref->ar_amap);
221	if (uobj)
222		simple_unlock(&uobj->vmobjlock);
223	uvmfault_unlockmaps(ufi, FALSE);
224	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
225	return (result);
226}
227
228/*
229 * normal functions
230 */
231
232/*
233 * uvm_loan: loan pages in a map out to anons or to the kernel
234 *
235 * => map should be unlocked
236 * => start and len should be multiples of PAGE_SIZE
237 * => result is either an array of anon's or vm_pages (depending on flags)
238 * => flag values: UVM_LOAN_TOANON - loan to anons
239 *                 UVM_LOAN_TOPAGE - loan to wired kernel page
240 *    one and only one of these flags must be set!
241 * => returns 0 (success), or an appropriate error number
242 */
243
244int
245uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
246{
247	struct uvm_faultinfo ufi;
248	void **result, **output;
249	int rv, error;
250
251	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
252
253	/*
254	 * ensure that one and only one of the flags is set
255	 */
256
257	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
258		((flags & UVM_LOAN_TOPAGE) == 0));
259	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
260
261	/*
262	 * "output" is a pointer to the current place to put the loaned page.
263	 */
264
265	result = v;
266	output = &result[0];	/* start at the beginning ... */
267
268	/*
269	 * while we've got pages to do
270	 */
271
272	while (len > 0) {
273
274		/*
275		 * fill in params for a call to uvmfault_lookup
276		 */
277
278		ufi.orig_map = map;
279		ufi.orig_rvaddr = start;
280		ufi.orig_size = len;
281
282		/*
283		 * do the lookup, the only time this will fail is if we hit on
284		 * an unmapped region (an error)
285		 */
286
287		if (!uvmfault_lookup(&ufi, FALSE)) {
288			error = ENOENT;
289			goto fail;
290		}
291
292		/*
293		 * map now locked.  now do the loanout...
294		 */
295
296		rv = uvm_loanentry(&ufi, &output, flags);
297		if (rv < 0) {
298			/* all unlocked due to error */
299			error = EINVAL;
300			goto fail;
301		}
302
303		/*
304		 * done!  the map is unlocked.  advance, if possible.
305		 *
306		 * XXXCDC: could be recoded to hold the map lock with
307		 *	   smarter code (but it only happens on map entry
308		 *	   boundaries, so it isn't that bad).
309		 */
310
311		if (rv) {
312			rv <<= PAGE_SHIFT;
313			len -= rv;
314			start += rv;
315		}
316	}
317	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
318	return 0;
319
320fail:
321	/*
322	 * failed to complete loans.  drop any loans and return failure code.
323	 * map is already unlocked.
324	 */
325
326	if (output - result) {
327		if (flags & UVM_LOAN_TOANON) {
328			uvm_unloananon((struct vm_anon **)result,
329			    output - result);
330		} else {
331			uvm_unloanpage((struct vm_page **)result,
332			    output - result);
333		}
334	}
335	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
336	return (error);
337}
338
339/*
340 * uvm_loananon: loan a page from an anon out
341 *
342 * => called with map, amap, uobj locked
343 * => return value:
344 *	-1 = fatal error, everything is unlocked, abort.
345 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
346 *		try again
347 *	 1 = got it, everything still locked
348 */
349
350int
351uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
352    struct vm_anon *anon)
353{
354	struct vm_page *pg;
355	int error;
356
357	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
358
359	/*
360	 * if we are loaning to "another" anon then it is easy, we just
361	 * bump the reference count on the current anon and return a
362	 * pointer to it (it becomes copy-on-write shared).
363	 */
364
365	if (flags & UVM_LOAN_TOANON) {
366		simple_lock(&anon->an_lock);
367		pg = anon->an_page;
368		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
369			if (pg->wire_count > 0) {
370				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
371				uvmfault_unlockall(ufi,
372				    ufi->entry->aref.ar_amap,
373				    ufi->entry->object.uvm_obj, anon);
374				return (-1);
375			}
376			pmap_page_protect(pg, VM_PROT_READ);
377		}
378		anon->an_ref++;
379		**output = anon;
380		(*output)++;
381		simple_unlock(&anon->an_lock);
382		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
383		return (1);
384	}
385
386	/*
387	 * we are loaning to a kernel-page.   we need to get the page
388	 * resident so we can wire it.   uvmfault_anonget will handle
389	 * this for us.
390	 */
391
392	simple_lock(&anon->an_lock);
393	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
394
395	/*
396	 * if we were unable to get the anon, then uvmfault_anonget has
397	 * unlocked everything and returned an error code.
398	 */
399
400	if (error) {
401		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
402
403		/* need to refault (i.e. refresh our lookup) ? */
404		if (error == ERESTART) {
405			return (0);
406		}
407
408		/* "try again"?   sleep a bit and retry ... */
409		if (error == EAGAIN) {
410			tsleep(&lbolt, PVM, "loanagain", 0);
411			return (0);
412		}
413
414		/* otherwise flag it as an error */
415		return (-1);
416	}
417
418	/*
419	 * we have the page and its owner locked: do the loan now.
420	 */
421
422	pg = anon->an_page;
423	uvm_lock_pageq();
424	if (pg->wire_count > 0) {
425		uvm_unlock_pageq();
426		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
427		KASSERT(pg->uobject == NULL);
428		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
429		    ufi->entry->object.uvm_obj, anon);
430		return (-1);
431	}
432	if (pg->loan_count == 0) {
433		pmap_page_protect(pg, VM_PROT_READ);
434	}
435	pg->loan_count++;
436	uvm_pagedequeue(pg);
437	uvm_unlock_pageq();
438	**output = pg;
439	(*output)++;
440
441	/* unlock anon and return success */
442	if (pg->uobject)	/* XXXCDC: what if this is our uobj? bad */
443		simple_unlock(&pg->uobject->vmobjlock);
444	simple_unlock(&anon->an_lock);
445	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
446	return (1);
447}
448
449/*
450 * uvm_loanpage: loan out pages to kernel (->K)
451 *
452 * => pages should be object-owned and the object should be locked.
453 * => in the case of error, the object might be unlocked and relocked.
454 * => caller should busy the pages beforehand.
455 * => pages will be unbusied.
456 * => fail with EBUSY if meet a wired page.
457 */
458static int
459uvm_loanpage(struct vm_page **pgpp, int npages)
460{
461	int i;
462	int error = 0;
463
464	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
465
466	for (i = 0; i < npages; i++) {
467		struct vm_page *pg = pgpp[i];
468
469		KASSERT(pg->uobject != NULL);
470		KASSERT(pg->uobject == pgpp[0]->uobject);
471		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
472		LOCK_ASSERT(simple_lock_held(&pg->uobject->vmobjlock));
473		KASSERT(pg->flags & PG_BUSY);
474
475		uvm_lock_pageq();
476		if (pg->wire_count > 0) {
477			uvm_unlock_pageq();
478			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
479			error = EBUSY;
480			break;
481		}
482		if (pg->loan_count == 0) {
483			pmap_page_protect(pg, VM_PROT_READ);
484		}
485		pg->loan_count++;
486		uvm_pagedequeue(pg);
487		uvm_unlock_pageq();
488	}
489
490	uvm_page_unbusy(pgpp, npages);
491
492	if (error) {
493		/*
494		 * backout what we've done
495		 */
496		struct simplelock *slock = &pgpp[0]->uobject->vmobjlock;
497
498		simple_unlock(slock);
499		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
500		simple_lock(slock);
501	}
502
503	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
504	return error;
505}
506
507/*
508 * XXX UBC temp limit
509 * number of pages to get at once.
510 * should be <= MAX_READ_AHEAD in genfs_vnops.c
511 */
512#define	UVM_LOAN_GET_CHUNK	16
513
514/*
515 * uvm_loanuobjpages: loan pages from a uobj out (O->K)
516 *
517 * => uobj shouldn't be locked.  (we'll lock it)
518 * => fail with EBUSY if we meet a wired page.
519 */
520int
521uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
522    struct vm_page **origpgpp)
523{
524	int ndone; /* # of pages loaned out */
525	struct vm_page **pgpp;
526	int error;
527	int i;
528	struct simplelock *slock;
529
530	pgpp = origpgpp;
531	for (ndone = 0; ndone < orignpages; ) {
532		int npages;
533		/* npendloan: # of pages busied but not loand out yet. */
534		int npendloan = 0xdead; /* XXX gcc */
535reget:
536		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
537		simple_lock(&uobj->vmobjlock);
538		error = (*uobj->pgops->pgo_get)(uobj,
539		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
540		    VM_PROT_READ, 0, PGO_SYNCIO);
541		if (error == EAGAIN) {
542			tsleep(&lbolt, PVM, "nfsread", 0);
543			continue;
544		}
545		if (error)
546			goto fail;
547
548		KASSERT(npages > 0);
549
550		/* loan and unbusy pages */
551		slock = NULL;
552		for (i = 0; i < npages; i++) {
553			struct simplelock *nextslock; /* slock for next page */
554			struct vm_page *pg = *pgpp;
555
556			/* XXX assuming that the page is owned by uobj */
557			KASSERT(pg->uobject != NULL);
558			nextslock = &pg->uobject->vmobjlock;
559
560			if (slock != nextslock) {
561				if (slock) {
562					KASSERT(npendloan > 0);
563					error = uvm_loanpage(pgpp - npendloan,
564					    npendloan);
565					simple_unlock(slock);
566					if (error)
567						goto fail;
568					ndone += npendloan;
569					KASSERT(origpgpp + ndone == pgpp);
570				}
571				slock = nextslock;
572				npendloan = 0;
573				simple_lock(slock);
574			}
575
576			if ((pg->flags & PG_RELEASED) != 0) {
577				/*
578				 * release pages and try again.
579				 */
580				simple_unlock(slock);
581				for (; i < npages; i++) {
582					pg = pgpp[i];
583					slock = &pg->uobject->vmobjlock;
584
585					simple_lock(slock);
586					uvm_lock_pageq();
587					uvm_page_unbusy(&pg, 1);
588					uvm_unlock_pageq();
589					simple_unlock(slock);
590				}
591				goto reget;
592			}
593
594			npendloan++;
595			pgpp++;
596			KASSERT(origpgpp + ndone + npendloan == pgpp);
597		}
598		KASSERT(slock != NULL);
599		KASSERT(npendloan > 0);
600		error = uvm_loanpage(pgpp - npendloan, npendloan);
601		simple_unlock(slock);
602		if (error)
603			goto fail;
604		ndone += npendloan;
605		KASSERT(origpgpp + ndone == pgpp);
606	}
607
608	return 0;
609
610fail:
611	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
612
613	return error;
614}
615
616/*
617 * uvm_loanuobj: loan a page from a uobj out
618 *
619 * => called with map, amap, uobj locked
620 * => return value:
621 *	-1 = fatal error, everything is unlocked, abort.
622 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
623 *		try again
624 *	 1 = got it, everything still locked
625 */
626
627static int
628uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
629{
630	struct vm_amap *amap = ufi->entry->aref.ar_amap;
631	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
632	struct vm_page *pg;
633	struct vm_anon *anon;
634	int error, npages;
635	boolean_t locked;
636
637	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
638
639	/*
640	 * first we must make sure the page is resident.
641	 *
642	 * XXXCDC: duplicate code with uvm_fault().
643	 */
644
645	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
646		npages = 1;
647		pg = NULL;
648		error = (*uobj->pgops->pgo_get)(uobj,
649		    va - ufi->entry->start + ufi->entry->offset,
650		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
651	} else {
652		error = EIO;		/* must have pgo_get op */
653	}
654
655	/*
656	 * check the result of the locked pgo_get.  if there is a problem,
657	 * then we fail the loan.
658	 */
659
660	if (error && error != EBUSY) {
661		uvmfault_unlockall(ufi, amap, uobj, NULL);
662		return (-1);
663	}
664
665	/*
666	 * if we need to unlock for I/O, do so now.
667	 */
668
669	if (error == EBUSY) {
670		uvmfault_unlockall(ufi, amap, NULL, NULL);
671
672		/* locked: uobj */
673		npages = 1;
674		error = (*uobj->pgops->pgo_get)(uobj,
675		    va - ufi->entry->start + ufi->entry->offset,
676		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
677		/* locked: <nothing> */
678
679		if (error) {
680			if (error == EAGAIN) {
681				tsleep(&lbolt, PVM, "fltagain2", 0);
682				return (0);
683			}
684			return (-1);
685		}
686
687		/*
688		 * pgo_get was a success.   attempt to relock everything.
689		 */
690
691		locked = uvmfault_relock(ufi);
692		if (locked && amap)
693			amap_lock(amap);
694		simple_lock(&uobj->vmobjlock);
695
696		/*
697		 * verify that the page has not be released and re-verify
698		 * that amap slot is still free.   if there is a problem we
699		 * drop our lock (thus force a lookup refresh/retry).
700		 */
701
702		if ((pg->flags & PG_RELEASED) != 0 ||
703		    (locked && amap && amap_lookup(&ufi->entry->aref,
704		    ufi->orig_rvaddr - ufi->entry->start))) {
705			if (locked)
706				uvmfault_unlockall(ufi, amap, NULL, NULL);
707			locked = FALSE;
708		}
709
710		/*
711		 * didn't get the lock?   release the page and retry.
712		 */
713
714		if (locked == FALSE) {
715			if (pg->flags & PG_WANTED) {
716				wakeup(pg);
717			}
718			if (pg->flags & PG_RELEASED) {
719				uvm_lock_pageq();
720				uvm_pagefree(pg);
721				uvm_unlock_pageq();
722				return (0);
723			}
724			uvm_lock_pageq();
725			uvm_pageactivate(pg);
726			uvm_unlock_pageq();
727			pg->flags &= ~(PG_BUSY|PG_WANTED);
728			UVM_PAGE_OWN(pg, NULL);
729			simple_unlock(&uobj->vmobjlock);
730			return (0);
731		}
732	}
733
734	/*
735	 * at this point we have the page we want ("pg") marked PG_BUSY for us
736	 * and we have all data structures locked.  do the loanout.  page can
737	 * not be PG_RELEASED (we caught this above).
738	 */
739
740	if ((flags & UVM_LOAN_TOANON) == 0) {
741		if (uvm_loanpage(&pg, 1)) {
742			uvmfault_unlockall(ufi, amap, uobj, NULL);
743			return (-1);
744		}
745		**output = pg;
746		(*output)++;
747		return (1);
748	}
749
750	/*
751	 * must be a loan to an anon.   check to see if there is already
752	 * an anon associated with this page.  if so, then just return
753	 * a reference to this object.   the page should already be
754	 * mapped read-only because it is already on loan.
755	 */
756
757	if (pg->uanon) {
758		anon = pg->uanon;
759		simple_lock(&anon->an_lock);
760		anon->an_ref++;
761		simple_unlock(&anon->an_lock);
762		if (pg->flags & PG_WANTED) {
763			wakeup(pg);
764		}
765		pg->flags &= ~(PG_WANTED|PG_BUSY);
766		UVM_PAGE_OWN(pg, NULL);
767		**output = anon;
768		(*output)++;
769		return (1);
770	}
771
772	/*
773	 * need to allocate a new anon
774	 */
775
776	anon = uvm_analloc();
777	if (anon == NULL) {
778		goto fail;
779	}
780	anon->an_page = pg;
781	pg->uanon = anon;
782	uvm_lock_pageq();
783	if (pg->wire_count > 0) {
784		uvm_unlock_pageq();
785		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
786		pg->uanon = NULL;
787		anon->an_page = NULL;
788		anon->an_ref--;
789		simple_unlock(&anon->an_lock);
790		uvm_anfree(anon);
791		goto fail;
792	}
793	if (pg->loan_count == 0) {
794		pmap_page_protect(pg, VM_PROT_READ);
795	}
796	pg->loan_count++;
797	uvm_pageactivate(pg);
798	uvm_unlock_pageq();
799	if (pg->flags & PG_WANTED) {
800		wakeup(pg);
801	}
802	pg->flags &= ~(PG_WANTED|PG_BUSY);
803	UVM_PAGE_OWN(pg, NULL);
804	simple_unlock(&anon->an_lock);
805	**output = anon;
806	(*output)++;
807	return (1);
808
809fail:
810	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
811	/*
812	 * unlock everything and bail out.
813	 */
814	if (pg->flags & PG_WANTED) {
815		wakeup(pg);
816	}
817	pg->flags &= ~(PG_WANTED|PG_BUSY);
818	UVM_PAGE_OWN(pg, NULL);
819	uvmfault_unlockall(ufi, amap, uobj, NULL);
820	return (-1);
821}
822
823/*
824 * uvm_loanzero: loan a zero-fill page out
825 *
826 * => called with map, amap, uobj locked
827 * => return value:
828 *	-1 = fatal error, everything is unlocked, abort.
829 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
830 *		try again
831 *	 1 = got it, everything still locked
832 */
833
834static struct uvm_object uvm_loanzero_object;
835
836static int
837uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
838{
839	struct vm_anon *anon;
840	struct vm_page *pg;
841	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
842	struct vm_amap *amap = ufi->entry->aref.ar_amap;
843
844	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
845again:
846	simple_lock(&uvm_loanzero_object.vmobjlock);
847
848	/*
849	 * first, get ahold of our single zero page.
850	 */
851
852	if (__predict_false((pg =
853			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
854		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
855					   UVM_PGA_ZERO)) == NULL) {
856			simple_unlock(&uvm_loanzero_object.vmobjlock);
857			uvmfault_unlockall(ufi, amap, uobj, NULL);
858			uvm_wait("loanzero");
859			if (!uvmfault_relock(ufi)) {
860				return (0);
861			}
862			if (amap) {
863				amap_lock(amap);
864			}
865			if (uobj) {
866				simple_lock(&uobj->vmobjlock);
867			}
868			goto again;
869		}
870
871		/* got a zero'd page. */
872		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
873		pg->flags |= PG_RDONLY;
874		uvm_lock_pageq();
875		uvm_pageactivate(pg);
876		uvm_unlock_pageq();
877		UVM_PAGE_OWN(pg, NULL);
878	}
879
880	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
881		uvm_lock_pageq();
882		pg->loan_count++;
883		uvm_pagedequeue(pg);
884		uvm_unlock_pageq();
885		simple_unlock(&uvm_loanzero_object.vmobjlock);
886		**output = pg;
887		(*output)++;
888		return (1);
889	}
890
891	/*
892	 * loaning to an anon.  check to see if there is already an anon
893	 * associated with this page.  if so, then just return a reference
894	 * to this object.
895	 */
896
897	if (pg->uanon) {
898		anon = pg->uanon;
899		simple_lock(&anon->an_lock);
900		anon->an_ref++;
901		simple_unlock(&anon->an_lock);
902		simple_unlock(&uvm_loanzero_object.vmobjlock);
903		**output = anon;
904		(*output)++;
905		return (1);
906	}
907
908	/*
909	 * need to allocate a new anon
910	 */
911
912	anon = uvm_analloc();
913	if (anon == NULL) {
914		/* out of swap causes us to fail */
915		simple_unlock(&uvm_loanzero_object.vmobjlock);
916		uvmfault_unlockall(ufi, amap, uobj, NULL);
917		return (-1);
918	}
919	anon->an_page = pg;
920	pg->uanon = anon;
921	uvm_lock_pageq();
922	pg->loan_count++;
923	uvm_pageactivate(pg);
924	uvm_unlock_pageq();
925	simple_unlock(&uvm_loanzero_object.vmobjlock);
926	**output = anon;
927	(*output)++;
928	return (1);
929}
930
931
932/*
933 * uvm_unloananon: kill loans on anons (basically a normal ref drop)
934 *
935 * => we expect all our resources to be unlocked
936 */
937
938static void
939uvm_unloananon(struct vm_anon **aloans, int nanons)
940{
941	struct vm_anon *anon;
942
943	while (nanons-- > 0) {
944		int refs;
945
946		anon = *aloans++;
947		simple_lock(&anon->an_lock);
948		refs = --anon->an_ref;
949		simple_unlock(&anon->an_lock);
950
951		if (refs == 0) {
952			uvm_anfree(anon);
953		}
954	}
955}
956
957/*
958 * uvm_unloanpage: kill loans on pages loaned out to the kernel
959 *
960 * => we expect all our resources to be unlocked
961 */
962
963static void
964uvm_unloanpage(struct vm_page **ploans, int npages)
965{
966	struct vm_page *pg;
967	struct simplelock *slock;
968
969	uvm_lock_pageq();
970	while (npages-- > 0) {
971		pg = *ploans++;
972
973		/*
974		 * do a little dance to acquire the object or anon lock
975		 * as appropriate.  we are locking in the wrong order,
976		 * so we have to do a try-lock here.
977		 */
978
979		slock = NULL;
980		while (pg->uobject != NULL || pg->uanon != NULL) {
981			if (pg->uobject != NULL) {
982				slock = &pg->uobject->vmobjlock;
983			} else {
984				slock = &pg->uanon->an_lock;
985			}
986			if (simple_lock_try(slock)) {
987				break;
988			}
989			uvm_unlock_pageq();
990			uvm_lock_pageq();
991			slock = NULL;
992		}
993
994		/*
995		 * drop our loan.  if page is owned by an anon but
996		 * PQ_ANON is not set, the page was loaned to the anon
997		 * from an object which dropped ownership, so resolve
998		 * this by turning the anon's loan into real ownership
999		 * (ie. decrement loan_count again and set PQ_ANON).
1000		 * after all this, if there are no loans left, put the
1001		 * page back a paging queue (if the page is owned by
1002		 * an anon) or free it (if the page is now unowned).
1003		 */
1004
1005		KASSERT(pg->loan_count > 0);
1006		pg->loan_count--;
1007		if (pg->uobject == NULL && pg->uanon != NULL &&
1008		    (pg->pqflags & PQ_ANON) == 0) {
1009			KASSERT(pg->loan_count > 0);
1010			pg->loan_count--;
1011			pg->pqflags |= PQ_ANON;
1012		}
1013		if (pg->loan_count == 0) {
1014			if (pg->uobject == NULL && pg->uanon == NULL) {
1015				KASSERT((pg->flags & PG_BUSY) == 0);
1016				uvm_pagefree(pg);
1017			} else {
1018				uvm_pageactivate(pg);
1019			}
1020		} else if (pg->loan_count == 1 && pg->uobject != NULL &&
1021			   pg->uanon != NULL) {
1022			uvm_pageactivate(pg);
1023		}
1024		if (slock != NULL) {
1025			simple_unlock(slock);
1026		}
1027	}
1028	uvm_unlock_pageq();
1029}
1030
1031/*
1032 * uvm_unloan: kill loans on pages or anons.
1033 */
1034
1035void
1036uvm_unloan(void *v, int npages, int flags)
1037{
1038	if (flags & UVM_LOAN_TOANON) {
1039		uvm_unloananon(v, npages);
1040	} else {
1041		uvm_unloanpage(v, npages);
1042	}
1043}
1044
1045/*
1046 * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1047 * method, because the page can end up on a paging queue, and the
1048 * page daemon will want to call pgo_put when it encounters the page
1049 * on the inactive list.
1050 */
1051
1052static int
1053ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1054{
1055	struct vm_page *pg;
1056
1057	KDASSERT(uobj == &uvm_loanzero_object);
1058
1059	/*
1060	 * Don't need to do any work here if we're not freeing pages.
1061	 */
1062
1063	if ((flags & PGO_FREE) == 0) {
1064		simple_unlock(&uobj->vmobjlock);
1065		return 0;
1066	}
1067
1068	/*
1069	 * we don't actually want to ever free the uvm_loanzero_page, so
1070	 * just reactivate or dequeue it.
1071	 */
1072
1073	pg = TAILQ_FIRST(&uobj->memq);
1074	KASSERT(pg != NULL);
1075	KASSERT(TAILQ_NEXT(pg, listq) == NULL);
1076
1077	uvm_lock_pageq();
1078	if (pg->uanon)
1079		uvm_pageactivate(pg);
1080	else
1081		uvm_pagedequeue(pg);
1082	uvm_unlock_pageq();
1083
1084	simple_unlock(&uobj->vmobjlock);
1085	return 0;
1086}
1087
1088static struct uvm_pagerops ulz_pager = {
1089	NULL,		/* init */
1090	NULL,		/* reference */
1091	NULL,		/* detach */
1092	NULL,		/* fault */
1093	NULL,		/* get */
1094	ulz_put,	/* put */
1095};
1096
1097/*
1098 * uvm_loan_init(): initialize the uvm_loan() facility.
1099 */
1100
1101void
1102uvm_loan_init(void)
1103{
1104
1105	simple_lock_init(&uvm_loanzero_object.vmobjlock);
1106	TAILQ_INIT(&uvm_loanzero_object.memq);
1107	uvm_loanzero_object.pgops = &ulz_pager;
1108
1109	UVMHIST_INIT(loanhist, 300);
1110}
1111
1112/*
1113 * uvm_loanbreak: break loan on a uobj page
1114 *
1115 * => called with uobj locked
1116 * => the page should be busy
1117 * => return value:
1118 *	newly allocated page if succeeded
1119 */
1120struct vm_page *
1121uvm_loanbreak(struct vm_page *uobjpage)
1122{
1123	struct vm_page *pg;
1124#ifdef DIAGNOSTIC
1125	struct uvm_object *uobj = uobjpage->uobject;
1126#endif
1127
1128	KASSERT(uobj != NULL);
1129	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
1130	KASSERT(uobjpage->flags & PG_BUSY);
1131
1132	/* alloc new un-owned page */
1133	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1134	if (pg == NULL)
1135		return NULL;
1136
1137	/*
1138	 * copy the data from the old page to the new
1139	 * one and clear the fake/clean flags on the
1140	 * new page (keep it busy).  force a reload
1141	 * of the old page by clearing it from all
1142	 * pmaps.  then lock the page queues to
1143	 * rename the pages.
1144	 */
1145
1146	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1147	pg->flags &= ~(PG_FAKE|PG_CLEAN);
1148	pmap_page_protect(uobjpage, VM_PROT_NONE);
1149	if (uobjpage->flags & PG_WANTED)
1150		wakeup(uobjpage);
1151	/* uobj still locked */
1152	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1153	UVM_PAGE_OWN(uobjpage, NULL);
1154
1155	uvm_lock_pageq();
1156
1157	/*
1158	 * replace uobjpage with new page.
1159	 */
1160
1161	uvm_pagereplace(uobjpage, pg);
1162
1163	/*
1164	 * if the page is no longer referenced by
1165	 * an anon (i.e. we are breaking an O->K
1166	 * loan), then remove it from any pageq's.
1167	 */
1168	if (uobjpage->uanon == NULL)
1169		uvm_pagedequeue(uobjpage);
1170
1171	/*
1172	 * at this point we have absolutely no
1173	 * control over uobjpage
1174	 */
1175
1176	/* install new page */
1177	uvm_pageactivate(pg);
1178	uvm_unlock_pageq();
1179
1180	/*
1181	 * done!  loan is broken and "pg" is
1182	 * PG_BUSY.   it can now replace uobjpage.
1183	 */
1184
1185	return pg;
1186}
1187