uvm_loan.c revision 1.78
1/*	$NetBSD: uvm_loan.c,v 1.78 2011/02/02 15:13:34 chuck Exp $	*/
2
3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28 */
29
30/*
31 * uvm_loan.c: page loanout handler
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.78 2011/02/02 15:13:34 chuck Exp $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/proc.h>
41#include <sys/malloc.h>
42#include <sys/mman.h>
43
44#include <uvm/uvm.h>
45
46/*
47 * "loaned" pages are pages which are (read-only, copy-on-write) loaned
48 * from the VM system to other parts of the kernel.   this allows page
49 * copying to be avoided (e.g. you can loan pages from objs/anons to
50 * the mbuf system).
51 *
52 * there are 3 types of loans possible:
53 *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
54 *  A->K  anon page to wired kernel page (e.g. mbuf data area)
55 *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
56 * note that it possible to have an O page loaned to both an A and K
57 * at the same time.
58 *
59 * loans are tracked by pg->loan_count.  an O->A page will have both
60 * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
61 * of page is considered "owned" by the uvm_object (not the anon).
62 *
63 * each loan of a page to the kernel bumps the pg->wire_count.  the
64 * kernel mappings for these pages will be read-only and wired.  since
65 * the page will also be wired, it will not be a candidate for pageout,
66 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
67 * write fault in the kernel to one of these pages will not cause
68 * copy-on-write.  instead, the page fault is considered fatal.  this
69 * is because the kernel mapping will have no way to look up the
70 * object/anon which the page is owned by.  this is a good side-effect,
71 * since a kernel write to a loaned page is an error.
72 *
73 * owners that want to free their pages and discover that they are
74 * loaned out simply "disown" them (the page becomes an orphan).  these
75 * pages should be freed when the last loan is dropped.   in some cases
76 * an anon may "adopt" an orphaned page.
77 *
78 * locking: to read pg->loan_count either the owner or the page queues
79 * must be locked.   to modify pg->loan_count, both the owner of the page
80 * and the PQs must be locked.   pg->flags is (as always) locked by
81 * the owner of the page.
82 *
83 * note that locking from the "loaned" side is tricky since the object
84 * getting the loaned page has no reference to the page's owner and thus
85 * the owner could "die" at any time.   in order to prevent the owner
86 * from dying the page queues should be locked.   this forces us to sometimes
87 * use "try" locking.
88 *
89 * loans are typically broken by the following events:
90 *  1. user-level xwrite fault to a loaned page
91 *  2. pageout of clean+inactive O->A loaned page
92 *  3. owner frees page (e.g. pager flush)
93 *
94 * note that loaning a page causes all mappings of the page to become
95 * read-only (via pmap_page_protect).   this could have an unexpected
96 * effect on normal "wired" pages if one is not careful (XXX).
97 */
98
99/*
100 * local prototypes
101 */
102
103static int	uvm_loananon(struct uvm_faultinfo *, void ***,
104			     int, struct vm_anon *);
105static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
106			     int, vaddr_t);
107static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
108static void	uvm_unloananon(struct vm_anon **, int);
109static void	uvm_unloanpage(struct vm_page **, int);
110static int	uvm_loanpage(struct vm_page **, int);
111
112
113/*
114 * inlines
115 */
116
117/*
118 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
119 *
120 * => "ufi" is the result of a successful map lookup (meaning that
121 *	on entry the map is locked by the caller)
122 * => we may unlock and then relock the map if needed (for I/O)
123 * => we put our output result in "output"
124 * => we always return with the map unlocked
125 * => possible return values:
126 *	-1 == error, map is unlocked
127 *	 0 == map relock error (try again!), map is unlocked
128 *	>0 == number of pages we loaned, map is unlocked
129 *
130 * NOTE: We can live with this being an inline, because it is only called
131 * from one place.
132 */
133
134static inline int
135uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
136{
137	vaddr_t curaddr = ufi->orig_rvaddr;
138	vsize_t togo = ufi->size;
139	struct vm_aref *aref = &ufi->entry->aref;
140	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
141	struct vm_anon *anon;
142	int rv, result = 0;
143
144	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
145
146	/*
147	 * lock us the rest of the way down (we unlock before return)
148	 */
149	if (aref->ar_amap)
150		amap_lock(aref->ar_amap);
151
152	/*
153	 * loop until done
154	 */
155	while (togo) {
156
157		/*
158		 * find the page we want.   check the anon layer first.
159		 */
160
161		if (aref->ar_amap) {
162			anon = amap_lookup(aref, curaddr - ufi->entry->start);
163		} else {
164			anon = NULL;
165		}
166
167		/* locked: map, amap, uobj */
168		if (anon) {
169			rv = uvm_loananon(ufi, output, flags, anon);
170		} else if (uobj) {
171			rv = uvm_loanuobj(ufi, output, flags, curaddr);
172		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
173			rv = uvm_loanzero(ufi, output, flags);
174		} else {
175			uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
176			rv = -1;
177		}
178		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
179		KASSERT(rv > 0 || aref->ar_amap == NULL ||
180		    !mutex_owned(&aref->ar_amap->am_l));
181		KASSERT(rv > 0 || uobj == NULL ||
182		    !mutex_owned(&uobj->vmobjlock));
183
184		/* total failure */
185		if (rv < 0) {
186			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
187			return (-1);
188		}
189
190		/* relock failed, need to do another lookup */
191		if (rv == 0) {
192			UVMHIST_LOG(loanhist, "relock failure %d", result
193			    ,0,0,0);
194			return (result);
195		}
196
197		/*
198		 * got it... advance to next page
199		 */
200
201		result++;
202		togo -= PAGE_SIZE;
203		curaddr += PAGE_SIZE;
204	}
205
206	/*
207	 * unlock what we locked, unlock the maps and return
208	 */
209
210	if (aref->ar_amap)
211		amap_unlock(aref->ar_amap);
212	uvmfault_unlockmaps(ufi, false);
213	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
214	return (result);
215}
216
217/*
218 * normal functions
219 */
220
221/*
222 * uvm_loan: loan pages in a map out to anons or to the kernel
223 *
224 * => map should be unlocked
225 * => start and len should be multiples of PAGE_SIZE
226 * => result is either an array of anon's or vm_pages (depending on flags)
227 * => flag values: UVM_LOAN_TOANON - loan to anons
228 *                 UVM_LOAN_TOPAGE - loan to wired kernel page
229 *    one and only one of these flags must be set!
230 * => returns 0 (success), or an appropriate error number
231 */
232
233int
234uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
235{
236	struct uvm_faultinfo ufi;
237	void **result, **output;
238	int rv, error;
239
240	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
241
242	/*
243	 * ensure that one and only one of the flags is set
244	 */
245
246	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
247		((flags & UVM_LOAN_TOPAGE) == 0));
248	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
249
250	/*
251	 * "output" is a pointer to the current place to put the loaned page.
252	 */
253
254	result = v;
255	output = &result[0];	/* start at the beginning ... */
256
257	/*
258	 * while we've got pages to do
259	 */
260
261	while (len > 0) {
262
263		/*
264		 * fill in params for a call to uvmfault_lookup
265		 */
266
267		ufi.orig_map = map;
268		ufi.orig_rvaddr = start;
269		ufi.orig_size = len;
270
271		/*
272		 * do the lookup, the only time this will fail is if we hit on
273		 * an unmapped region (an error)
274		 */
275
276		if (!uvmfault_lookup(&ufi, false)) {
277			error = ENOENT;
278			goto fail;
279		}
280
281		/*
282		 * map now locked.  now do the loanout...
283		 */
284
285		rv = uvm_loanentry(&ufi, &output, flags);
286		if (rv < 0) {
287			/* all unlocked due to error */
288			error = EINVAL;
289			goto fail;
290		}
291
292		/*
293		 * done!  the map is unlocked.  advance, if possible.
294		 *
295		 * XXXCDC: could be recoded to hold the map lock with
296		 *	   smarter code (but it only happens on map entry
297		 *	   boundaries, so it isn't that bad).
298		 */
299
300		if (rv) {
301			rv <<= PAGE_SHIFT;
302			len -= rv;
303			start += rv;
304		}
305	}
306	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
307	return 0;
308
309fail:
310	/*
311	 * failed to complete loans.  drop any loans and return failure code.
312	 * map is already unlocked.
313	 */
314
315	if (output - result) {
316		if (flags & UVM_LOAN_TOANON) {
317			uvm_unloananon((struct vm_anon **)result,
318			    output - result);
319		} else {
320			uvm_unloanpage((struct vm_page **)result,
321			    output - result);
322		}
323	}
324	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
325	return (error);
326}
327
328/*
329 * uvm_loananon: loan a page from an anon out
330 *
331 * => called with map, amap, uobj locked
332 * => return value:
333 *	-1 = fatal error, everything is unlocked, abort.
334 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
335 *		try again
336 *	 1 = got it, everything still locked
337 */
338
339int
340uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
341    struct vm_anon *anon)
342{
343	struct vm_page *pg;
344	int error;
345
346	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
347
348	/*
349	 * if we are loaning to "another" anon then it is easy, we just
350	 * bump the reference count on the current anon and return a
351	 * pointer to it (it becomes copy-on-write shared).
352	 */
353
354	if (flags & UVM_LOAN_TOANON) {
355		mutex_enter(&anon->an_lock);
356		pg = anon->an_page;
357		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
358			if (pg->wire_count > 0) {
359				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
360				uvmfault_unlockall(ufi,
361				    ufi->entry->aref.ar_amap,
362				    ufi->entry->object.uvm_obj, anon);
363				return (-1);
364			}
365			pmap_page_protect(pg, VM_PROT_READ);
366		}
367		anon->an_ref++;
368		**output = anon;
369		(*output)++;
370		mutex_exit(&anon->an_lock);
371		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
372		return (1);
373	}
374
375	/*
376	 * we are loaning to a kernel-page.   we need to get the page
377	 * resident so we can wire it.   uvmfault_anonget will handle
378	 * this for us.
379	 */
380
381	mutex_enter(&anon->an_lock);
382	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
383
384	/*
385	 * if we were unable to get the anon, then uvmfault_anonget has
386	 * unlocked everything and returned an error code.
387	 */
388
389	if (error) {
390		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
391
392		/* need to refault (i.e. refresh our lookup) ? */
393		if (error == ERESTART) {
394			return (0);
395		}
396
397		/* "try again"?   sleep a bit and retry ... */
398		if (error == EAGAIN) {
399			kpause("loanagain", false, hz/2, NULL);
400			return (0);
401		}
402
403		/* otherwise flag it as an error */
404		return (-1);
405	}
406
407	/*
408	 * we have the page and its owner locked: do the loan now.
409	 */
410
411	pg = anon->an_page;
412	mutex_enter(&uvm_pageqlock);
413	if (pg->wire_count > 0) {
414		mutex_exit(&uvm_pageqlock);
415		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
416		KASSERT(pg->uobject == NULL);
417		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
418		    NULL, anon);
419		return (-1);
420	}
421	if (pg->loan_count == 0) {
422		pmap_page_protect(pg, VM_PROT_READ);
423	}
424	pg->loan_count++;
425	uvm_pageactivate(pg);
426	mutex_exit(&uvm_pageqlock);
427	**output = pg;
428	(*output)++;
429
430	/* unlock anon and return success */
431	if (pg->uobject)
432		mutex_exit(&pg->uobject->vmobjlock);
433	mutex_exit(&anon->an_lock);
434	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
435	return (1);
436}
437
438/*
439 * uvm_loanpage: loan out pages to kernel (->K)
440 *
441 * => pages should be object-owned and the object should be locked.
442 * => in the case of error, the object might be unlocked and relocked.
443 * => caller should busy the pages beforehand.
444 * => pages will be unbusied.
445 * => fail with EBUSY if meet a wired page.
446 */
447static int
448uvm_loanpage(struct vm_page **pgpp, int npages)
449{
450	int i;
451	int error = 0;
452
453	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
454
455	for (i = 0; i < npages; i++) {
456		struct vm_page *pg = pgpp[i];
457
458		KASSERT(pg->uobject != NULL);
459		KASSERT(pg->uobject == pgpp[0]->uobject);
460		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
461		KASSERT(mutex_owned(&pg->uobject->vmobjlock));
462		KASSERT(pg->flags & PG_BUSY);
463
464		mutex_enter(&uvm_pageqlock);
465		if (pg->wire_count > 0) {
466			mutex_exit(&uvm_pageqlock);
467			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
468			error = EBUSY;
469			break;
470		}
471		if (pg->loan_count == 0) {
472			pmap_page_protect(pg, VM_PROT_READ);
473		}
474		pg->loan_count++;
475		uvm_pageactivate(pg);
476		mutex_exit(&uvm_pageqlock);
477	}
478
479	uvm_page_unbusy(pgpp, npages);
480
481	if (error) {
482		/*
483		 * backout what we've done
484		 */
485		kmutex_t *slock = &pgpp[0]->uobject->vmobjlock;
486
487		mutex_exit(slock);
488		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
489		mutex_enter(slock);
490	}
491
492	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
493	return error;
494}
495
496/*
497 * XXX UBC temp limit
498 * number of pages to get at once.
499 * should be <= MAX_READ_AHEAD in genfs_vnops.c
500 */
501#define	UVM_LOAN_GET_CHUNK	16
502
503/*
504 * uvm_loanuobjpages: loan pages from a uobj out (O->K)
505 *
506 * => uobj shouldn't be locked.  (we'll lock it)
507 * => fail with EBUSY if we meet a wired page.
508 */
509int
510uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
511    struct vm_page **origpgpp)
512{
513	int ndone; /* # of pages loaned out */
514	struct vm_page **pgpp;
515	int error;
516	int i;
517	kmutex_t *slock;
518
519	pgpp = origpgpp;
520	for (ndone = 0; ndone < orignpages; ) {
521		int npages;
522		/* npendloan: # of pages busied but not loand out yet. */
523		int npendloan = 0xdead; /* XXX gcc */
524reget:
525		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
526		mutex_enter(&uobj->vmobjlock);
527		error = (*uobj->pgops->pgo_get)(uobj,
528		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
529		    VM_PROT_READ, 0, PGO_SYNCIO);
530		if (error == EAGAIN) {
531			kpause("loanuopg", false, hz/2, NULL);
532			continue;
533		}
534		if (error)
535			goto fail;
536
537		KASSERT(npages > 0);
538
539		/* loan and unbusy pages */
540		slock = NULL;
541		for (i = 0; i < npages; i++) {
542			kmutex_t *nextslock; /* slock for next page */
543			struct vm_page *pg = *pgpp;
544
545			/* XXX assuming that the page is owned by uobj */
546			KASSERT(pg->uobject != NULL);
547			nextslock = &pg->uobject->vmobjlock;
548
549			if (slock != nextslock) {
550				if (slock) {
551					KASSERT(npendloan > 0);
552					error = uvm_loanpage(pgpp - npendloan,
553					    npendloan);
554					mutex_exit(slock);
555					if (error)
556						goto fail;
557					ndone += npendloan;
558					KASSERT(origpgpp + ndone == pgpp);
559				}
560				slock = nextslock;
561				npendloan = 0;
562				mutex_enter(slock);
563			}
564
565			if ((pg->flags & PG_RELEASED) != 0) {
566				/*
567				 * release pages and try again.
568				 */
569				mutex_exit(slock);
570				for (; i < npages; i++) {
571					pg = pgpp[i];
572					slock = &pg->uobject->vmobjlock;
573
574					mutex_enter(slock);
575					mutex_enter(&uvm_pageqlock);
576					uvm_page_unbusy(&pg, 1);
577					mutex_exit(&uvm_pageqlock);
578					mutex_exit(slock);
579				}
580				goto reget;
581			}
582
583			npendloan++;
584			pgpp++;
585			KASSERT(origpgpp + ndone + npendloan == pgpp);
586		}
587		KASSERT(slock != NULL);
588		KASSERT(npendloan > 0);
589		error = uvm_loanpage(pgpp - npendloan, npendloan);
590		mutex_exit(slock);
591		if (error)
592			goto fail;
593		ndone += npendloan;
594		KASSERT(origpgpp + ndone == pgpp);
595	}
596
597	return 0;
598
599fail:
600	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
601
602	return error;
603}
604
605/*
606 * uvm_loanuobj: loan a page from a uobj out
607 *
608 * => called with map, amap, uobj locked
609 * => return value:
610 *	-1 = fatal error, everything is unlocked, abort.
611 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
612 *		try again
613 *	 1 = got it, everything still locked
614 */
615
616static int
617uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
618{
619	struct vm_amap *amap = ufi->entry->aref.ar_amap;
620	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
621	struct vm_page *pg;
622	struct vm_anon *anon;
623	int error, npages;
624	bool locked;
625
626	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
627
628	/*
629	 * first we must make sure the page is resident.
630	 *
631	 * XXXCDC: duplicate code with uvm_fault().
632	 */
633
634	/* locked: maps(read), amap(if there) */
635	mutex_enter(&uobj->vmobjlock);
636	/* locked: maps(read), amap(if there), uobj */
637
638	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
639		npages = 1;
640		pg = NULL;
641		error = (*uobj->pgops->pgo_get)(uobj,
642		    va - ufi->entry->start + ufi->entry->offset,
643		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
644	} else {
645		error = EIO;		/* must have pgo_get op */
646	}
647
648	/*
649	 * check the result of the locked pgo_get.  if there is a problem,
650	 * then we fail the loan.
651	 */
652
653	if (error && error != EBUSY) {
654		uvmfault_unlockall(ufi, amap, uobj, NULL);
655		return (-1);
656	}
657
658	/*
659	 * if we need to unlock for I/O, do so now.
660	 */
661
662	if (error == EBUSY) {
663		uvmfault_unlockall(ufi, amap, NULL, NULL);
664
665		/* locked: uobj */
666		npages = 1;
667		error = (*uobj->pgops->pgo_get)(uobj,
668		    va - ufi->entry->start + ufi->entry->offset,
669		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
670		/* locked: <nothing> */
671
672		if (error) {
673			if (error == EAGAIN) {
674				kpause("fltagain2", false, hz/2, NULL);
675				return (0);
676			}
677			return (-1);
678		}
679
680		/*
681		 * pgo_get was a success.   attempt to relock everything.
682		 */
683
684		locked = uvmfault_relock(ufi);
685		if (locked && amap)
686			amap_lock(amap);
687		uobj = pg->uobject;
688		mutex_enter(&uobj->vmobjlock);
689
690		/*
691		 * verify that the page has not be released and re-verify
692		 * that amap slot is still free.   if there is a problem we
693		 * drop our lock (thus force a lookup refresh/retry).
694		 */
695
696		if ((pg->flags & PG_RELEASED) != 0 ||
697		    (locked && amap && amap_lookup(&ufi->entry->aref,
698		    ufi->orig_rvaddr - ufi->entry->start))) {
699			if (locked)
700				uvmfault_unlockall(ufi, amap, NULL, NULL);
701			locked = false;
702		}
703
704		/*
705		 * didn't get the lock?   release the page and retry.
706		 */
707
708		if (locked == false) {
709			if (pg->flags & PG_WANTED) {
710				wakeup(pg);
711			}
712			if (pg->flags & PG_RELEASED) {
713				mutex_enter(&uvm_pageqlock);
714				uvm_pagefree(pg);
715				mutex_exit(&uvm_pageqlock);
716				mutex_exit(&uobj->vmobjlock);
717				return (0);
718			}
719			mutex_enter(&uvm_pageqlock);
720			uvm_pageactivate(pg);
721			mutex_exit(&uvm_pageqlock);
722			pg->flags &= ~(PG_BUSY|PG_WANTED);
723			UVM_PAGE_OWN(pg, NULL);
724			mutex_exit(&uobj->vmobjlock);
725			return (0);
726		}
727	}
728
729	KASSERT(uobj == pg->uobject);
730
731	/*
732	 * at this point we have the page we want ("pg") marked PG_BUSY for us
733	 * and we have all data structures locked.  do the loanout.  page can
734	 * not be PG_RELEASED (we caught this above).
735	 */
736
737	if ((flags & UVM_LOAN_TOANON) == 0) {
738		if (uvm_loanpage(&pg, 1)) {
739			uvmfault_unlockall(ufi, amap, uobj, NULL);
740			return (-1);
741		}
742		mutex_exit(&uobj->vmobjlock);
743		**output = pg;
744		(*output)++;
745		return (1);
746	}
747
748	/*
749	 * must be a loan to an anon.   check to see if there is already
750	 * an anon associated with this page.  if so, then just return
751	 * a reference to this object.   the page should already be
752	 * mapped read-only because it is already on loan.
753	 */
754
755	if (pg->uanon) {
756		anon = pg->uanon;
757		mutex_enter(&anon->an_lock);
758		anon->an_ref++;
759		mutex_exit(&anon->an_lock);
760		if (pg->flags & PG_WANTED) {
761			wakeup(pg);
762		}
763		pg->flags &= ~(PG_WANTED|PG_BUSY);
764		UVM_PAGE_OWN(pg, NULL);
765		mutex_exit(&uobj->vmobjlock);
766		**output = anon;
767		(*output)++;
768		return (1);
769	}
770
771	/*
772	 * need to allocate a new anon
773	 */
774
775	anon = uvm_analloc();
776	if (anon == NULL) {
777		goto fail;
778	}
779	anon->an_page = pg;
780	pg->uanon = anon;
781	mutex_enter(&uvm_pageqlock);
782	if (pg->wire_count > 0) {
783		mutex_exit(&uvm_pageqlock);
784		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
785		pg->uanon = NULL;
786		anon->an_page = NULL;
787		anon->an_ref--;
788		mutex_exit(&anon->an_lock);
789		uvm_anfree(anon);
790		goto fail;
791	}
792	if (pg->loan_count == 0) {
793		pmap_page_protect(pg, VM_PROT_READ);
794	}
795	pg->loan_count++;
796	uvm_pageactivate(pg);
797	mutex_exit(&uvm_pageqlock);
798	if (pg->flags & PG_WANTED) {
799		wakeup(pg);
800	}
801	pg->flags &= ~(PG_WANTED|PG_BUSY);
802	UVM_PAGE_OWN(pg, NULL);
803	mutex_exit(&uobj->vmobjlock);
804	mutex_exit(&anon->an_lock);
805	**output = anon;
806	(*output)++;
807	return (1);
808
809fail:
810	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
811	/*
812	 * unlock everything and bail out.
813	 */
814	if (pg->flags & PG_WANTED) {
815		wakeup(pg);
816	}
817	pg->flags &= ~(PG_WANTED|PG_BUSY);
818	UVM_PAGE_OWN(pg, NULL);
819	uvmfault_unlockall(ufi, amap, uobj, NULL);
820	return (-1);
821}
822
823/*
824 * uvm_loanzero: loan a zero-fill page out
825 *
826 * => called with map, amap, uobj locked
827 * => return value:
828 *	-1 = fatal error, everything is unlocked, abort.
829 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
830 *		try again
831 *	 1 = got it, everything still locked
832 */
833
834static struct uvm_object uvm_loanzero_object;
835
836static int
837uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
838{
839	struct vm_anon *anon;
840	struct vm_page *pg;
841	struct vm_amap *amap = ufi->entry->aref.ar_amap;
842
843	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
844again:
845	mutex_enter(&uvm_loanzero_object.vmobjlock);
846
847	/*
848	 * first, get ahold of our single zero page.
849	 */
850
851	if (__predict_false((pg =
852			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
853		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
854					   UVM_PGA_ZERO)) == NULL) {
855			mutex_exit(&uvm_loanzero_object.vmobjlock);
856			uvmfault_unlockall(ufi, amap, NULL, NULL);
857			uvm_wait("loanzero");
858			if (!uvmfault_relock(ufi)) {
859				return (0);
860			}
861			if (amap) {
862				amap_lock(amap);
863			}
864			goto again;
865		}
866
867		/* got a zero'd page. */
868		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
869		pg->flags |= PG_RDONLY;
870		mutex_enter(&uvm_pageqlock);
871		uvm_pageactivate(pg);
872		mutex_exit(&uvm_pageqlock);
873		UVM_PAGE_OWN(pg, NULL);
874	}
875
876	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
877		mutex_enter(&uvm_pageqlock);
878		pg->loan_count++;
879		mutex_exit(&uvm_pageqlock);
880		mutex_exit(&uvm_loanzero_object.vmobjlock);
881		**output = pg;
882		(*output)++;
883		return (1);
884	}
885
886	/*
887	 * loaning to an anon.  check to see if there is already an anon
888	 * associated with this page.  if so, then just return a reference
889	 * to this object.
890	 */
891
892	if (pg->uanon) {
893		anon = pg->uanon;
894		mutex_enter(&anon->an_lock);
895		anon->an_ref++;
896		mutex_exit(&anon->an_lock);
897		mutex_exit(&uvm_loanzero_object.vmobjlock);
898		**output = anon;
899		(*output)++;
900		return (1);
901	}
902
903	/*
904	 * need to allocate a new anon
905	 */
906
907	anon = uvm_analloc();
908	if (anon == NULL) {
909		/* out of swap causes us to fail */
910		mutex_exit(&uvm_loanzero_object.vmobjlock);
911		uvmfault_unlockall(ufi, amap, NULL, NULL);
912		return (-1);
913	}
914	anon->an_page = pg;
915	pg->uanon = anon;
916	mutex_enter(&uvm_pageqlock);
917	pg->loan_count++;
918	uvm_pageactivate(pg);
919	mutex_exit(&uvm_pageqlock);
920	mutex_exit(&anon->an_lock);
921	mutex_exit(&uvm_loanzero_object.vmobjlock);
922	**output = anon;
923	(*output)++;
924	return (1);
925}
926
927
928/*
929 * uvm_unloananon: kill loans on anons (basically a normal ref drop)
930 *
931 * => we expect all our resources to be unlocked
932 */
933
934static void
935uvm_unloananon(struct vm_anon **aloans, int nanons)
936{
937	struct vm_anon *anon;
938
939	while (nanons-- > 0) {
940		int refs;
941
942		anon = *aloans++;
943		mutex_enter(&anon->an_lock);
944		refs = --anon->an_ref;
945		mutex_exit(&anon->an_lock);
946
947		if (refs == 0) {
948			uvm_anfree(anon);
949		}
950	}
951}
952
953/*
954 * uvm_unloanpage: kill loans on pages loaned out to the kernel
955 *
956 * => we expect all our resources to be unlocked
957 */
958
959static void
960uvm_unloanpage(struct vm_page **ploans, int npages)
961{
962	struct vm_page *pg;
963	kmutex_t *slock;
964
965	mutex_enter(&uvm_pageqlock);
966	while (npages-- > 0) {
967		pg = *ploans++;
968
969		/*
970		 * do a little dance to acquire the object or anon lock
971		 * as appropriate.  we are locking in the wrong order,
972		 * so we have to do a try-lock here.
973		 */
974
975		slock = NULL;
976		while (pg->uobject != NULL || pg->uanon != NULL) {
977			if (pg->uobject != NULL) {
978				slock = &pg->uobject->vmobjlock;
979			} else {
980				slock = &pg->uanon->an_lock;
981			}
982			if (mutex_tryenter(slock)) {
983				break;
984			}
985			mutex_exit(&uvm_pageqlock);
986			/* XXX Better than yielding but inadequate. */
987			kpause("livelock", false, 1, NULL);
988			mutex_enter(&uvm_pageqlock);
989			slock = NULL;
990		}
991
992		/*
993		 * drop our loan.  if page is owned by an anon but
994		 * PQ_ANON is not set, the page was loaned to the anon
995		 * from an object which dropped ownership, so resolve
996		 * this by turning the anon's loan into real ownership
997		 * (ie. decrement loan_count again and set PQ_ANON).
998		 * after all this, if there are no loans left, put the
999		 * page back a paging queue (if the page is owned by
1000		 * an anon) or free it (if the page is now unowned).
1001		 */
1002
1003		KASSERT(pg->loan_count > 0);
1004		pg->loan_count--;
1005		if (pg->uobject == NULL && pg->uanon != NULL &&
1006		    (pg->pqflags & PQ_ANON) == 0) {
1007			KASSERT(pg->loan_count > 0);
1008			pg->loan_count--;
1009			pg->pqflags |= PQ_ANON;
1010		}
1011		if (pg->loan_count == 0 && pg->uobject == NULL &&
1012		    pg->uanon == NULL) {
1013			KASSERT((pg->flags & PG_BUSY) == 0);
1014			uvm_pagefree(pg);
1015		}
1016		if (slock != NULL) {
1017			mutex_exit(slock);
1018		}
1019	}
1020	mutex_exit(&uvm_pageqlock);
1021}
1022
1023/*
1024 * uvm_unloan: kill loans on pages or anons.
1025 */
1026
1027void
1028uvm_unloan(void *v, int npages, int flags)
1029{
1030	if (flags & UVM_LOAN_TOANON) {
1031		uvm_unloananon(v, npages);
1032	} else {
1033		uvm_unloanpage(v, npages);
1034	}
1035}
1036
1037/*
1038 * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1039 * method, because the page can end up on a paging queue, and the
1040 * page daemon will want to call pgo_put when it encounters the page
1041 * on the inactive list.
1042 */
1043
1044static int
1045ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1046{
1047	struct vm_page *pg;
1048
1049	KDASSERT(uobj == &uvm_loanzero_object);
1050
1051	/*
1052	 * Don't need to do any work here if we're not freeing pages.
1053	 */
1054
1055	if ((flags & PGO_FREE) == 0) {
1056		mutex_exit(&uobj->vmobjlock);
1057		return 0;
1058	}
1059
1060	/*
1061	 * we don't actually want to ever free the uvm_loanzero_page, so
1062	 * just reactivate or dequeue it.
1063	 */
1064
1065	pg = TAILQ_FIRST(&uobj->memq);
1066	KASSERT(pg != NULL);
1067	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1068
1069	mutex_enter(&uvm_pageqlock);
1070	if (pg->uanon)
1071		uvm_pageactivate(pg);
1072	else
1073		uvm_pagedequeue(pg);
1074	mutex_exit(&uvm_pageqlock);
1075
1076	mutex_exit(&uobj->vmobjlock);
1077	return 0;
1078}
1079
1080static const struct uvm_pagerops ulz_pager = {
1081	.pgo_put = ulz_put,
1082};
1083
1084/*
1085 * uvm_loan_init(): initialize the uvm_loan() facility.
1086 */
1087
1088void
1089uvm_loan_init(void)
1090{
1091
1092	UVM_OBJ_INIT(&uvm_loanzero_object, &ulz_pager, 0);
1093
1094	UVMHIST_INIT(loanhist, 300);
1095}
1096
1097/*
1098 * uvm_loanbreak: break loan on a uobj page
1099 *
1100 * => called with uobj locked
1101 * => the page should be busy
1102 * => return value:
1103 *	newly allocated page if succeeded
1104 */
1105struct vm_page *
1106uvm_loanbreak(struct vm_page *uobjpage)
1107{
1108	struct vm_page *pg;
1109#ifdef DIAGNOSTIC
1110	struct uvm_object *uobj = uobjpage->uobject;
1111#endif
1112
1113	KASSERT(uobj != NULL);
1114	KASSERT(mutex_owned(&uobj->vmobjlock));
1115	KASSERT(uobjpage->flags & PG_BUSY);
1116
1117	/* alloc new un-owned page */
1118	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1119	if (pg == NULL)
1120		return NULL;
1121
1122	/*
1123	 * copy the data from the old page to the new
1124	 * one and clear the fake flags on the new page (keep it busy).
1125	 * force a reload of the old page by clearing it from all
1126	 * pmaps.
1127	 * transfer dirtiness of the old page to the new page.
1128	 * then lock the page queues to rename the pages.
1129	 */
1130
1131	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1132	pg->flags &= ~PG_FAKE;
1133	pmap_page_protect(uobjpage, VM_PROT_NONE);
1134	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1135		pmap_clear_modify(pg);
1136		pg->flags |= PG_CLEAN;
1137	} else {
1138		/* uvm_pagecopy marked it dirty */
1139		KASSERT((pg->flags & PG_CLEAN) == 0);
1140		/* a object with a dirty page should be dirty. */
1141		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1142	}
1143	if (uobjpage->flags & PG_WANTED)
1144		wakeup(uobjpage);
1145	/* uobj still locked */
1146	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1147	UVM_PAGE_OWN(uobjpage, NULL);
1148
1149	mutex_enter(&uvm_pageqlock);
1150
1151	/*
1152	 * replace uobjpage with new page.
1153	 */
1154
1155	uvm_pagereplace(uobjpage, pg);
1156
1157	/*
1158	 * if the page is no longer referenced by
1159	 * an anon (i.e. we are breaking an O->K
1160	 * loan), then remove it from any pageq's.
1161	 */
1162	if (uobjpage->uanon == NULL)
1163		uvm_pagedequeue(uobjpage);
1164
1165	/*
1166	 * at this point we have absolutely no
1167	 * control over uobjpage
1168	 */
1169
1170	/* install new page */
1171	uvm_pageactivate(pg);
1172	mutex_exit(&uvm_pageqlock);
1173
1174	/*
1175	 * done!  loan is broken and "pg" is
1176	 * PG_BUSY.   it can now replace uobjpage.
1177	 */
1178
1179	return pg;
1180}
1181
1182int
1183uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1184{
1185	struct vm_page *pg;
1186
1187	KASSERT(mutex_owned(&anon->an_lock));
1188	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
1189
1190	/* get new un-owned replacement page */
1191	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1192	if (pg == NULL) {
1193		return ENOMEM;
1194	}
1195
1196	/*
1197	 * copy data, kill loan, and drop uobj lock (if any)
1198	 */
1199	/* copy old -> new */
1200	uvm_pagecopy(anon->an_page, pg);
1201
1202	/* force reload */
1203	pmap_page_protect(anon->an_page, VM_PROT_NONE);
1204	mutex_enter(&uvm_pageqlock);	  /* KILL loan */
1205
1206	anon->an_page->uanon = NULL;
1207	/* in case we owned */
1208	anon->an_page->pqflags &= ~PQ_ANON;
1209
1210	if (uobj) {
1211		/* if we were receiver of loan */
1212		anon->an_page->loan_count--;
1213	} else {
1214		/*
1215		 * we were the lender (A->K); need to remove the page from
1216		 * pageq's.
1217		 */
1218		uvm_pagedequeue(anon->an_page);
1219	}
1220
1221	if (uobj) {
1222		mutex_exit(&uobj->vmobjlock);
1223	}
1224
1225	/* install new page in anon */
1226	anon->an_page = pg;
1227	pg->uanon = anon;
1228	pg->pqflags |= PQ_ANON;
1229
1230	uvm_pageactivate(pg);
1231	mutex_exit(&uvm_pageqlock);
1232
1233	pg->flags &= ~(PG_BUSY|PG_FAKE);
1234	UVM_PAGE_OWN(pg, NULL);
1235
1236	/* done! */
1237
1238	return 0;
1239}
1240