uvm_loan.c revision 1.69
1/*	$NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $	*/
2
3/*
4 *
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *      This product includes software developed by Charles D. Cranor and
19 *      Washington University.
20 * 4. The name of the author may not be used to endorse or promote products
21 *    derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
35 */
36
37/*
38 * uvm_loan.c: page loanout handler
39 */
40
41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $");
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/proc.h>
48#include <sys/malloc.h>
49#include <sys/mman.h>
50
51#include <uvm/uvm.h>
52
53/*
54 * "loaned" pages are pages which are (read-only, copy-on-write) loaned
55 * from the VM system to other parts of the kernel.   this allows page
56 * copying to be avoided (e.g. you can loan pages from objs/anons to
57 * the mbuf system).
58 *
59 * there are 3 types of loans possible:
60 *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
61 *  A->K  anon page to wired kernel page (e.g. mbuf data area)
62 *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
63 * note that it possible to have an O page loaned to both an A and K
64 * at the same time.
65 *
66 * loans are tracked by pg->loan_count.  an O->A page will have both
67 * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
68 * of page is considered "owned" by the uvm_object (not the anon).
69 *
70 * each loan of a page to the kernel bumps the pg->wire_count.  the
71 * kernel mappings for these pages will be read-only and wired.  since
72 * the page will also be wired, it will not be a candidate for pageout,
73 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
74 * write fault in the kernel to one of these pages will not cause
75 * copy-on-write.  instead, the page fault is considered fatal.  this
76 * is because the kernel mapping will have no way to look up the
77 * object/anon which the page is owned by.  this is a good side-effect,
78 * since a kernel write to a loaned page is an error.
79 *
80 * owners that want to free their pages and discover that they are
81 * loaned out simply "disown" them (the page becomes an orphan).  these
82 * pages should be freed when the last loan is dropped.   in some cases
83 * an anon may "adopt" an orphaned page.
84 *
85 * locking: to read pg->loan_count either the owner or the page queues
86 * must be locked.   to modify pg->loan_count, both the owner of the page
87 * and the PQs must be locked.   pg->flags is (as always) locked by
88 * the owner of the page.
89 *
90 * note that locking from the "loaned" side is tricky since the object
91 * getting the loaned page has no reference to the page's owner and thus
92 * the owner could "die" at any time.   in order to prevent the owner
93 * from dying the page queues should be locked.   this forces us to sometimes
94 * use "try" locking.
95 *
96 * loans are typically broken by the following events:
97 *  1. user-level xwrite fault to a loaned page
98 *  2. pageout of clean+inactive O->A loaned page
99 *  3. owner frees page (e.g. pager flush)
100 *
101 * note that loaning a page causes all mappings of the page to become
102 * read-only (via pmap_page_protect).   this could have an unexpected
103 * effect on normal "wired" pages if one is not careful (XXX).
104 */
105
106/*
107 * local prototypes
108 */
109
110static int	uvm_loananon(struct uvm_faultinfo *, void ***,
111			     int, struct vm_anon *);
112static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
113			     int, vaddr_t);
114static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
115static void	uvm_unloananon(struct vm_anon **, int);
116static void	uvm_unloanpage(struct vm_page **, int);
117static int	uvm_loanpage(struct vm_page **, int);
118
119
120/*
121 * inlines
122 */
123
124/*
125 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
126 *
127 * => "ufi" is the result of a successful map lookup (meaning that
128 *	on entry the map is locked by the caller)
129 * => we may unlock and then relock the map if needed (for I/O)
130 * => we put our output result in "output"
131 * => we always return with the map unlocked
132 * => possible return values:
133 *	-1 == error, map is unlocked
134 *	 0 == map relock error (try again!), map is unlocked
135 *	>0 == number of pages we loaned, map is unlocked
136 *
137 * NOTE: We can live with this being an inline, because it is only called
138 * from one place.
139 */
140
141static inline int
142uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
143{
144	vaddr_t curaddr = ufi->orig_rvaddr;
145	vsize_t togo = ufi->size;
146	struct vm_aref *aref = &ufi->entry->aref;
147	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
148	struct vm_anon *anon;
149	int rv, result = 0;
150
151	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
152
153	/*
154	 * lock us the rest of the way down (we unlock before return)
155	 */
156	if (aref->ar_amap)
157		amap_lock(aref->ar_amap);
158
159	/*
160	 * loop until done
161	 */
162	while (togo) {
163
164		/*
165		 * find the page we want.   check the anon layer first.
166		 */
167
168		if (aref->ar_amap) {
169			anon = amap_lookup(aref, curaddr - ufi->entry->start);
170		} else {
171			anon = NULL;
172		}
173
174		/* locked: map, amap, uobj */
175		if (anon) {
176			rv = uvm_loananon(ufi, output, flags, anon);
177		} else if (uobj) {
178			rv = uvm_loanuobj(ufi, output, flags, curaddr);
179		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
180			rv = uvm_loanzero(ufi, output, flags);
181		} else {
182			uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
183			rv = -1;
184		}
185		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
186		KASSERT(rv > 0 || aref->ar_amap == NULL ||
187		    !mutex_owned(&aref->ar_amap->am_l));
188
189		/* total failure */
190		if (rv < 0) {
191			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
192			return (-1);
193		}
194
195		/* relock failed, need to do another lookup */
196		if (rv == 0) {
197			UVMHIST_LOG(loanhist, "relock failure %d", result
198			    ,0,0,0);
199			return (result);
200		}
201
202		/*
203		 * got it... advance to next page
204		 */
205
206		result++;
207		togo -= PAGE_SIZE;
208		curaddr += PAGE_SIZE;
209	}
210
211	/*
212	 * unlock what we locked, unlock the maps and return
213	 */
214
215	if (aref->ar_amap)
216		amap_unlock(aref->ar_amap);
217	uvmfault_unlockmaps(ufi, false);
218	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
219	return (result);
220}
221
222/*
223 * normal functions
224 */
225
226/*
227 * uvm_loan: loan pages in a map out to anons or to the kernel
228 *
229 * => map should be unlocked
230 * => start and len should be multiples of PAGE_SIZE
231 * => result is either an array of anon's or vm_pages (depending on flags)
232 * => flag values: UVM_LOAN_TOANON - loan to anons
233 *                 UVM_LOAN_TOPAGE - loan to wired kernel page
234 *    one and only one of these flags must be set!
235 * => returns 0 (success), or an appropriate error number
236 */
237
238int
239uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
240{
241	struct uvm_faultinfo ufi;
242	void **result, **output;
243	int rv, error;
244
245	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
246
247	/*
248	 * ensure that one and only one of the flags is set
249	 */
250
251	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
252		((flags & UVM_LOAN_TOPAGE) == 0));
253	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
254
255	/*
256	 * "output" is a pointer to the current place to put the loaned page.
257	 */
258
259	result = v;
260	output = &result[0];	/* start at the beginning ... */
261
262	/*
263	 * while we've got pages to do
264	 */
265
266	while (len > 0) {
267
268		/*
269		 * fill in params for a call to uvmfault_lookup
270		 */
271
272		ufi.orig_map = map;
273		ufi.orig_rvaddr = start;
274		ufi.orig_size = len;
275
276		/*
277		 * do the lookup, the only time this will fail is if we hit on
278		 * an unmapped region (an error)
279		 */
280
281		if (!uvmfault_lookup(&ufi, false)) {
282			error = ENOENT;
283			goto fail;
284		}
285
286		/*
287		 * map now locked.  now do the loanout...
288		 */
289
290		rv = uvm_loanentry(&ufi, &output, flags);
291		if (rv < 0) {
292			/* all unlocked due to error */
293			error = EINVAL;
294			goto fail;
295		}
296
297		/*
298		 * done!  the map is unlocked.  advance, if possible.
299		 *
300		 * XXXCDC: could be recoded to hold the map lock with
301		 *	   smarter code (but it only happens on map entry
302		 *	   boundaries, so it isn't that bad).
303		 */
304
305		if (rv) {
306			rv <<= PAGE_SHIFT;
307			len -= rv;
308			start += rv;
309		}
310	}
311	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
312	return 0;
313
314fail:
315	/*
316	 * failed to complete loans.  drop any loans and return failure code.
317	 * map is already unlocked.
318	 */
319
320	if (output - result) {
321		if (flags & UVM_LOAN_TOANON) {
322			uvm_unloananon((struct vm_anon **)result,
323			    output - result);
324		} else {
325			uvm_unloanpage((struct vm_page **)result,
326			    output - result);
327		}
328	}
329	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
330	return (error);
331}
332
333/*
334 * uvm_loananon: loan a page from an anon out
335 *
336 * => called with map, amap, uobj locked
337 * => return value:
338 *	-1 = fatal error, everything is unlocked, abort.
339 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
340 *		try again
341 *	 1 = got it, everything still locked
342 */
343
344int
345uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
346    struct vm_anon *anon)
347{
348	struct vm_page *pg;
349	int error;
350
351	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
352
353	/*
354	 * if we are loaning to "another" anon then it is easy, we just
355	 * bump the reference count on the current anon and return a
356	 * pointer to it (it becomes copy-on-write shared).
357	 */
358
359	if (flags & UVM_LOAN_TOANON) {
360		simple_lock(&anon->an_lock);
361		pg = anon->an_page;
362		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
363			if (pg->wire_count > 0) {
364				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
365				uvmfault_unlockall(ufi,
366				    ufi->entry->aref.ar_amap,
367				    ufi->entry->object.uvm_obj, anon);
368				return (-1);
369			}
370			pmap_page_protect(pg, VM_PROT_READ);
371		}
372		anon->an_ref++;
373		**output = anon;
374		(*output)++;
375		simple_unlock(&anon->an_lock);
376		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
377		return (1);
378	}
379
380	/*
381	 * we are loaning to a kernel-page.   we need to get the page
382	 * resident so we can wire it.   uvmfault_anonget will handle
383	 * this for us.
384	 */
385
386	simple_lock(&anon->an_lock);
387	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
388
389	/*
390	 * if we were unable to get the anon, then uvmfault_anonget has
391	 * unlocked everything and returned an error code.
392	 */
393
394	if (error) {
395		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
396
397		/* need to refault (i.e. refresh our lookup) ? */
398		if (error == ERESTART) {
399			return (0);
400		}
401
402		/* "try again"?   sleep a bit and retry ... */
403		if (error == EAGAIN) {
404			tsleep(&lbolt, PVM, "loanagain", 0);
405			return (0);
406		}
407
408		/* otherwise flag it as an error */
409		return (-1);
410	}
411
412	/*
413	 * we have the page and its owner locked: do the loan now.
414	 */
415
416	pg = anon->an_page;
417	uvm_lock_pageq();
418	if (pg->wire_count > 0) {
419		uvm_unlock_pageq();
420		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
421		KASSERT(pg->uobject == NULL);
422		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
423		    NULL, anon);
424		return (-1);
425	}
426	if (pg->loan_count == 0) {
427		pmap_page_protect(pg, VM_PROT_READ);
428	}
429	pg->loan_count++;
430	uvm_pageactivate(pg);
431	uvm_unlock_pageq();
432	**output = pg;
433	(*output)++;
434
435	/* unlock anon and return success */
436	if (pg->uobject)
437		simple_unlock(&pg->uobject->vmobjlock);
438	simple_unlock(&anon->an_lock);
439	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
440	return (1);
441}
442
443/*
444 * uvm_loanpage: loan out pages to kernel (->K)
445 *
446 * => pages should be object-owned and the object should be locked.
447 * => in the case of error, the object might be unlocked and relocked.
448 * => caller should busy the pages beforehand.
449 * => pages will be unbusied.
450 * => fail with EBUSY if meet a wired page.
451 */
452static int
453uvm_loanpage(struct vm_page **pgpp, int npages)
454{
455	int i;
456	int error = 0;
457
458	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
459
460	for (i = 0; i < npages; i++) {
461		struct vm_page *pg = pgpp[i];
462
463		KASSERT(pg->uobject != NULL);
464		KASSERT(pg->uobject == pgpp[0]->uobject);
465		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
466		LOCK_ASSERT(simple_lock_held(&pg->uobject->vmobjlock));
467		KASSERT(pg->flags & PG_BUSY);
468
469		uvm_lock_pageq();
470		if (pg->wire_count > 0) {
471			uvm_unlock_pageq();
472			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
473			error = EBUSY;
474			break;
475		}
476		if (pg->loan_count == 0) {
477			pmap_page_protect(pg, VM_PROT_READ);
478		}
479		pg->loan_count++;
480		uvm_pageactivate(pg);
481		uvm_unlock_pageq();
482	}
483
484	uvm_page_unbusy(pgpp, npages);
485
486	if (error) {
487		/*
488		 * backout what we've done
489		 */
490		struct simplelock *slock = &pgpp[0]->uobject->vmobjlock;
491
492		simple_unlock(slock);
493		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
494		simple_lock(slock);
495	}
496
497	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
498	return error;
499}
500
501/*
502 * XXX UBC temp limit
503 * number of pages to get at once.
504 * should be <= MAX_READ_AHEAD in genfs_vnops.c
505 */
506#define	UVM_LOAN_GET_CHUNK	16
507
508/*
509 * uvm_loanuobjpages: loan pages from a uobj out (O->K)
510 *
511 * => uobj shouldn't be locked.  (we'll lock it)
512 * => fail with EBUSY if we meet a wired page.
513 */
514int
515uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
516    struct vm_page **origpgpp)
517{
518	int ndone; /* # of pages loaned out */
519	struct vm_page **pgpp;
520	int error;
521	int i;
522	struct simplelock *slock;
523
524	pgpp = origpgpp;
525	for (ndone = 0; ndone < orignpages; ) {
526		int npages;
527		/* npendloan: # of pages busied but not loand out yet. */
528		int npendloan = 0xdead; /* XXX gcc */
529reget:
530		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
531		simple_lock(&uobj->vmobjlock);
532		error = (*uobj->pgops->pgo_get)(uobj,
533		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
534		    VM_PROT_READ, 0, PGO_SYNCIO);
535		if (error == EAGAIN) {
536			tsleep(&lbolt, PVM, "nfsread", 0);
537			continue;
538		}
539		if (error)
540			goto fail;
541
542		KASSERT(npages > 0);
543
544		/* loan and unbusy pages */
545		slock = NULL;
546		for (i = 0; i < npages; i++) {
547			struct simplelock *nextslock; /* slock for next page */
548			struct vm_page *pg = *pgpp;
549
550			/* XXX assuming that the page is owned by uobj */
551			KASSERT(pg->uobject != NULL);
552			nextslock = &pg->uobject->vmobjlock;
553
554			if (slock != nextslock) {
555				if (slock) {
556					KASSERT(npendloan > 0);
557					error = uvm_loanpage(pgpp - npendloan,
558					    npendloan);
559					simple_unlock(slock);
560					if (error)
561						goto fail;
562					ndone += npendloan;
563					KASSERT(origpgpp + ndone == pgpp);
564				}
565				slock = nextslock;
566				npendloan = 0;
567				simple_lock(slock);
568			}
569
570			if ((pg->flags & PG_RELEASED) != 0) {
571				/*
572				 * release pages and try again.
573				 */
574				simple_unlock(slock);
575				for (; i < npages; i++) {
576					pg = pgpp[i];
577					slock = &pg->uobject->vmobjlock;
578
579					simple_lock(slock);
580					uvm_lock_pageq();
581					uvm_page_unbusy(&pg, 1);
582					uvm_unlock_pageq();
583					simple_unlock(slock);
584				}
585				goto reget;
586			}
587
588			npendloan++;
589			pgpp++;
590			KASSERT(origpgpp + ndone + npendloan == pgpp);
591		}
592		KASSERT(slock != NULL);
593		KASSERT(npendloan > 0);
594		error = uvm_loanpage(pgpp - npendloan, npendloan);
595		simple_unlock(slock);
596		if (error)
597			goto fail;
598		ndone += npendloan;
599		KASSERT(origpgpp + ndone == pgpp);
600	}
601
602	return 0;
603
604fail:
605	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
606
607	return error;
608}
609
610/*
611 * uvm_loanuobj: loan a page from a uobj out
612 *
613 * => called with map, amap, uobj locked
614 * => return value:
615 *	-1 = fatal error, everything is unlocked, abort.
616 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
617 *		try again
618 *	 1 = got it, everything still locked
619 */
620
621static int
622uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
623{
624	struct vm_amap *amap = ufi->entry->aref.ar_amap;
625	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
626	struct vm_page *pg;
627	struct vm_anon *anon;
628	int error, npages;
629	bool locked;
630
631	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
632
633	/*
634	 * first we must make sure the page is resident.
635	 *
636	 * XXXCDC: duplicate code with uvm_fault().
637	 */
638
639	simple_lock(&uobj->vmobjlock);
640	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
641		npages = 1;
642		pg = NULL;
643		error = (*uobj->pgops->pgo_get)(uobj,
644		    va - ufi->entry->start + ufi->entry->offset,
645		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
646	} else {
647		error = EIO;		/* must have pgo_get op */
648	}
649
650	/*
651	 * check the result of the locked pgo_get.  if there is a problem,
652	 * then we fail the loan.
653	 */
654
655	if (error && error != EBUSY) {
656		uvmfault_unlockall(ufi, amap, uobj, NULL);
657		return (-1);
658	}
659
660	/*
661	 * if we need to unlock for I/O, do so now.
662	 */
663
664	if (error == EBUSY) {
665		uvmfault_unlockall(ufi, amap, NULL, NULL);
666
667		/* locked: uobj */
668		npages = 1;
669		error = (*uobj->pgops->pgo_get)(uobj,
670		    va - ufi->entry->start + ufi->entry->offset,
671		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
672		/* locked: <nothing> */
673
674		if (error) {
675			if (error == EAGAIN) {
676				tsleep(&lbolt, PVM, "fltagain2", 0);
677				return (0);
678			}
679			return (-1);
680		}
681
682		/*
683		 * pgo_get was a success.   attempt to relock everything.
684		 */
685
686		locked = uvmfault_relock(ufi);
687		if (locked && amap)
688			amap_lock(amap);
689		uobj = pg->uobject;
690		simple_lock(&uobj->vmobjlock);
691
692		/*
693		 * verify that the page has not be released and re-verify
694		 * that amap slot is still free.   if there is a problem we
695		 * drop our lock (thus force a lookup refresh/retry).
696		 */
697
698		if ((pg->flags & PG_RELEASED) != 0 ||
699		    (locked && amap && amap_lookup(&ufi->entry->aref,
700		    ufi->orig_rvaddr - ufi->entry->start))) {
701			if (locked)
702				uvmfault_unlockall(ufi, amap, NULL, NULL);
703			locked = false;
704		}
705
706		/*
707		 * didn't get the lock?   release the page and retry.
708		 */
709
710		if (locked == false) {
711			if (pg->flags & PG_WANTED) {
712				wakeup(pg);
713			}
714			if (pg->flags & PG_RELEASED) {
715				uvm_lock_pageq();
716				uvm_pagefree(pg);
717				uvm_unlock_pageq();
718				simple_unlock(&uobj->vmobjlock);
719				return (0);
720			}
721			uvm_lock_pageq();
722			uvm_pageactivate(pg);
723			uvm_unlock_pageq();
724			pg->flags &= ~(PG_BUSY|PG_WANTED);
725			UVM_PAGE_OWN(pg, NULL);
726			simple_unlock(&uobj->vmobjlock);
727			return (0);
728		}
729	}
730
731	KASSERT(uobj == pg->uobject);
732
733	/*
734	 * at this point we have the page we want ("pg") marked PG_BUSY for us
735	 * and we have all data structures locked.  do the loanout.  page can
736	 * not be PG_RELEASED (we caught this above).
737	 */
738
739	if ((flags & UVM_LOAN_TOANON) == 0) {
740		if (uvm_loanpage(&pg, 1)) {
741			uvmfault_unlockall(ufi, amap, uobj, NULL);
742			return (-1);
743		}
744		simple_unlock(&uobj->vmobjlock);
745		**output = pg;
746		(*output)++;
747		return (1);
748	}
749
750	/*
751	 * must be a loan to an anon.   check to see if there is already
752	 * an anon associated with this page.  if so, then just return
753	 * a reference to this object.   the page should already be
754	 * mapped read-only because it is already on loan.
755	 */
756
757	if (pg->uanon) {
758		anon = pg->uanon;
759		simple_lock(&anon->an_lock);
760		anon->an_ref++;
761		simple_unlock(&anon->an_lock);
762		if (pg->flags & PG_WANTED) {
763			wakeup(pg);
764		}
765		pg->flags &= ~(PG_WANTED|PG_BUSY);
766		UVM_PAGE_OWN(pg, NULL);
767		simple_unlock(&uobj->vmobjlock);
768		**output = anon;
769		(*output)++;
770		return (1);
771	}
772
773	/*
774	 * need to allocate a new anon
775	 */
776
777	anon = uvm_analloc();
778	if (anon == NULL) {
779		goto fail;
780	}
781	anon->an_page = pg;
782	pg->uanon = anon;
783	uvm_lock_pageq();
784	if (pg->wire_count > 0) {
785		uvm_unlock_pageq();
786		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
787		pg->uanon = NULL;
788		anon->an_page = NULL;
789		anon->an_ref--;
790		simple_unlock(&anon->an_lock);
791		uvm_anfree(anon);
792		goto fail;
793	}
794	if (pg->loan_count == 0) {
795		pmap_page_protect(pg, VM_PROT_READ);
796	}
797	pg->loan_count++;
798	uvm_pageactivate(pg);
799	uvm_unlock_pageq();
800	if (pg->flags & PG_WANTED) {
801		wakeup(pg);
802	}
803	pg->flags &= ~(PG_WANTED|PG_BUSY);
804	UVM_PAGE_OWN(pg, NULL);
805	simple_unlock(&uobj->vmobjlock);
806	simple_unlock(&anon->an_lock);
807	**output = anon;
808	(*output)++;
809	return (1);
810
811fail:
812	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
813	/*
814	 * unlock everything and bail out.
815	 */
816	if (pg->flags & PG_WANTED) {
817		wakeup(pg);
818	}
819	pg->flags &= ~(PG_WANTED|PG_BUSY);
820	UVM_PAGE_OWN(pg, NULL);
821	uvmfault_unlockall(ufi, amap, uobj, NULL);
822	return (-1);
823}
824
825/*
826 * uvm_loanzero: loan a zero-fill page out
827 *
828 * => called with map, amap, uobj locked
829 * => return value:
830 *	-1 = fatal error, everything is unlocked, abort.
831 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
832 *		try again
833 *	 1 = got it, everything still locked
834 */
835
836static struct uvm_object uvm_loanzero_object;
837
838static int
839uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
840{
841	struct vm_anon *anon;
842	struct vm_page *pg;
843	struct vm_amap *amap = ufi->entry->aref.ar_amap;
844
845	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
846again:
847	simple_lock(&uvm_loanzero_object.vmobjlock);
848
849	/*
850	 * first, get ahold of our single zero page.
851	 */
852
853	if (__predict_false((pg =
854			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
855		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
856					   UVM_PGA_ZERO)) == NULL) {
857			simple_unlock(&uvm_loanzero_object.vmobjlock);
858			uvmfault_unlockall(ufi, amap, NULL, NULL);
859			uvm_wait("loanzero");
860			if (!uvmfault_relock(ufi)) {
861				return (0);
862			}
863			if (amap) {
864				amap_lock(amap);
865			}
866			goto again;
867		}
868
869		/* got a zero'd page. */
870		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
871		pg->flags |= PG_RDONLY;
872		uvm_lock_pageq();
873		uvm_pageactivate(pg);
874		uvm_unlock_pageq();
875		UVM_PAGE_OWN(pg, NULL);
876	}
877
878	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
879		uvm_lock_pageq();
880		pg->loan_count++;
881		uvm_unlock_pageq();
882		simple_unlock(&uvm_loanzero_object.vmobjlock);
883		**output = pg;
884		(*output)++;
885		return (1);
886	}
887
888	/*
889	 * loaning to an anon.  check to see if there is already an anon
890	 * associated with this page.  if so, then just return a reference
891	 * to this object.
892	 */
893
894	if (pg->uanon) {
895		anon = pg->uanon;
896		simple_lock(&anon->an_lock);
897		anon->an_ref++;
898		simple_unlock(&anon->an_lock);
899		simple_unlock(&uvm_loanzero_object.vmobjlock);
900		**output = anon;
901		(*output)++;
902		return (1);
903	}
904
905	/*
906	 * need to allocate a new anon
907	 */
908
909	anon = uvm_analloc();
910	if (anon == NULL) {
911		/* out of swap causes us to fail */
912		simple_unlock(&uvm_loanzero_object.vmobjlock);
913		uvmfault_unlockall(ufi, amap, NULL, NULL);
914		return (-1);
915	}
916	anon->an_page = pg;
917	pg->uanon = anon;
918	uvm_lock_pageq();
919	pg->loan_count++;
920	uvm_pageactivate(pg);
921	uvm_unlock_pageq();
922	simple_unlock(&anon->an_lock);
923	simple_unlock(&uvm_loanzero_object.vmobjlock);
924	**output = anon;
925	(*output)++;
926	return (1);
927}
928
929
930/*
931 * uvm_unloananon: kill loans on anons (basically a normal ref drop)
932 *
933 * => we expect all our resources to be unlocked
934 */
935
936static void
937uvm_unloananon(struct vm_anon **aloans, int nanons)
938{
939	struct vm_anon *anon;
940
941	while (nanons-- > 0) {
942		int refs;
943
944		anon = *aloans++;
945		simple_lock(&anon->an_lock);
946		refs = --anon->an_ref;
947		simple_unlock(&anon->an_lock);
948
949		if (refs == 0) {
950			uvm_anfree(anon);
951		}
952	}
953}
954
955/*
956 * uvm_unloanpage: kill loans on pages loaned out to the kernel
957 *
958 * => we expect all our resources to be unlocked
959 */
960
961static void
962uvm_unloanpage(struct vm_page **ploans, int npages)
963{
964	struct vm_page *pg;
965	struct simplelock *slock;
966
967	uvm_lock_pageq();
968	while (npages-- > 0) {
969		pg = *ploans++;
970
971		/*
972		 * do a little dance to acquire the object or anon lock
973		 * as appropriate.  we are locking in the wrong order,
974		 * so we have to do a try-lock here.
975		 */
976
977		slock = NULL;
978		while (pg->uobject != NULL || pg->uanon != NULL) {
979			if (pg->uobject != NULL) {
980				slock = &pg->uobject->vmobjlock;
981			} else {
982				slock = &pg->uanon->an_lock;
983			}
984			if (simple_lock_try(slock)) {
985				break;
986			}
987			uvm_unlock_pageq();
988			uvm_lock_pageq();
989			slock = NULL;
990		}
991
992		/*
993		 * drop our loan.  if page is owned by an anon but
994		 * PQ_ANON is not set, the page was loaned to the anon
995		 * from an object which dropped ownership, so resolve
996		 * this by turning the anon's loan into real ownership
997		 * (ie. decrement loan_count again and set PQ_ANON).
998		 * after all this, if there are no loans left, put the
999		 * page back a paging queue (if the page is owned by
1000		 * an anon) or free it (if the page is now unowned).
1001		 */
1002
1003		KASSERT(pg->loan_count > 0);
1004		pg->loan_count--;
1005		if (pg->uobject == NULL && pg->uanon != NULL &&
1006		    (pg->pqflags & PQ_ANON) == 0) {
1007			KASSERT(pg->loan_count > 0);
1008			pg->loan_count--;
1009			pg->pqflags |= PQ_ANON;
1010		}
1011		if (pg->loan_count == 0 && pg->uobject == NULL &&
1012		    pg->uanon == NULL) {
1013			KASSERT((pg->flags & PG_BUSY) == 0);
1014			uvm_pagefree(pg);
1015		}
1016		if (slock != NULL) {
1017			simple_unlock(slock);
1018		}
1019	}
1020	uvm_unlock_pageq();
1021}
1022
1023/*
1024 * uvm_unloan: kill loans on pages or anons.
1025 */
1026
1027void
1028uvm_unloan(void *v, int npages, int flags)
1029{
1030	if (flags & UVM_LOAN_TOANON) {
1031		uvm_unloananon(v, npages);
1032	} else {
1033		uvm_unloanpage(v, npages);
1034	}
1035}
1036
1037/*
1038 * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1039 * method, because the page can end up on a paging queue, and the
1040 * page daemon will want to call pgo_put when it encounters the page
1041 * on the inactive list.
1042 */
1043
1044static int
1045ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1046{
1047	struct vm_page *pg;
1048
1049	KDASSERT(uobj == &uvm_loanzero_object);
1050
1051	/*
1052	 * Don't need to do any work here if we're not freeing pages.
1053	 */
1054
1055	if ((flags & PGO_FREE) == 0) {
1056		simple_unlock(&uobj->vmobjlock);
1057		return 0;
1058	}
1059
1060	/*
1061	 * we don't actually want to ever free the uvm_loanzero_page, so
1062	 * just reactivate or dequeue it.
1063	 */
1064
1065	pg = TAILQ_FIRST(&uobj->memq);
1066	KASSERT(pg != NULL);
1067	KASSERT(TAILQ_NEXT(pg, listq) == NULL);
1068
1069	uvm_lock_pageq();
1070	if (pg->uanon)
1071		uvm_pageactivate(pg);
1072	else
1073		uvm_pagedequeue(pg);
1074	uvm_unlock_pageq();
1075
1076	simple_unlock(&uobj->vmobjlock);
1077	return 0;
1078}
1079
1080static const struct uvm_pagerops ulz_pager = {
1081	.pgo_put = ulz_put,
1082};
1083
1084/*
1085 * uvm_loan_init(): initialize the uvm_loan() facility.
1086 */
1087
1088void
1089uvm_loan_init(void)
1090{
1091
1092	simple_lock_init(&uvm_loanzero_object.vmobjlock);
1093	TAILQ_INIT(&uvm_loanzero_object.memq);
1094	uvm_loanzero_object.pgops = &ulz_pager;
1095
1096	UVMHIST_INIT(loanhist, 300);
1097}
1098
1099/*
1100 * uvm_loanbreak: break loan on a uobj page
1101 *
1102 * => called with uobj locked
1103 * => the page should be busy
1104 * => return value:
1105 *	newly allocated page if succeeded
1106 */
1107struct vm_page *
1108uvm_loanbreak(struct vm_page *uobjpage)
1109{
1110	struct vm_page *pg;
1111#ifdef DIAGNOSTIC
1112	struct uvm_object *uobj = uobjpage->uobject;
1113#endif
1114
1115	KASSERT(uobj != NULL);
1116	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
1117	KASSERT(uobjpage->flags & PG_BUSY);
1118
1119	/* alloc new un-owned page */
1120	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1121	if (pg == NULL)
1122		return NULL;
1123
1124	/*
1125	 * copy the data from the old page to the new
1126	 * one and clear the fake flags on the new page (keep it busy).
1127	 * force a reload of the old page by clearing it from all
1128	 * pmaps.
1129	 * transfer dirtiness of the old page to the new page.
1130	 * then lock the page queues to rename the pages.
1131	 */
1132
1133	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1134	pg->flags &= ~PG_FAKE;
1135	pmap_page_protect(uobjpage, VM_PROT_NONE);
1136	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1137		pmap_clear_modify(pg);
1138		pg->flags |= PG_CLEAN;
1139	} else {
1140		/* uvm_pagecopy marked it dirty */
1141		KASSERT((pg->flags & PG_CLEAN) == 0);
1142		/* a object with a dirty page should be dirty. */
1143		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1144	}
1145	if (uobjpage->flags & PG_WANTED)
1146		wakeup(uobjpage);
1147	/* uobj still locked */
1148	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1149	UVM_PAGE_OWN(uobjpage, NULL);
1150
1151	uvm_lock_pageq();
1152
1153	/*
1154	 * replace uobjpage with new page.
1155	 */
1156
1157	uvm_pagereplace(uobjpage, pg);
1158
1159	/*
1160	 * if the page is no longer referenced by
1161	 * an anon (i.e. we are breaking an O->K
1162	 * loan), then remove it from any pageq's.
1163	 */
1164	if (uobjpage->uanon == NULL)
1165		uvm_pagedequeue(uobjpage);
1166
1167	/*
1168	 * at this point we have absolutely no
1169	 * control over uobjpage
1170	 */
1171
1172	/* install new page */
1173	uvm_pageactivate(pg);
1174	uvm_unlock_pageq();
1175
1176	/*
1177	 * done!  loan is broken and "pg" is
1178	 * PG_BUSY.   it can now replace uobjpage.
1179	 */
1180
1181	return pg;
1182}
1183