uvm_loan.c revision 1.71
1/*	$NetBSD: uvm_loan.c,v 1.71 2008/06/04 13:23:30 ad Exp $	*/
2
3/*
4 *
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *      This product includes software developed by Charles D. Cranor and
19 *      Washington University.
20 * 4. The name of the author may not be used to endorse or promote products
21 *    derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
35 */
36
37/*
38 * uvm_loan.c: page loanout handler
39 */
40
41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.71 2008/06/04 13:23:30 ad Exp $");
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/proc.h>
48#include <sys/malloc.h>
49#include <sys/mman.h>
50
51#include <uvm/uvm.h>
52
53/*
54 * "loaned" pages are pages which are (read-only, copy-on-write) loaned
55 * from the VM system to other parts of the kernel.   this allows page
56 * copying to be avoided (e.g. you can loan pages from objs/anons to
57 * the mbuf system).
58 *
59 * there are 3 types of loans possible:
60 *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
61 *  A->K  anon page to wired kernel page (e.g. mbuf data area)
62 *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
63 * note that it possible to have an O page loaned to both an A and K
64 * at the same time.
65 *
66 * loans are tracked by pg->loan_count.  an O->A page will have both
67 * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
68 * of page is considered "owned" by the uvm_object (not the anon).
69 *
70 * each loan of a page to the kernel bumps the pg->wire_count.  the
71 * kernel mappings for these pages will be read-only and wired.  since
72 * the page will also be wired, it will not be a candidate for pageout,
73 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
74 * write fault in the kernel to one of these pages will not cause
75 * copy-on-write.  instead, the page fault is considered fatal.  this
76 * is because the kernel mapping will have no way to look up the
77 * object/anon which the page is owned by.  this is a good side-effect,
78 * since a kernel write to a loaned page is an error.
79 *
80 * owners that want to free their pages and discover that they are
81 * loaned out simply "disown" them (the page becomes an orphan).  these
82 * pages should be freed when the last loan is dropped.   in some cases
83 * an anon may "adopt" an orphaned page.
84 *
85 * locking: to read pg->loan_count either the owner or the page queues
86 * must be locked.   to modify pg->loan_count, both the owner of the page
87 * and the PQs must be locked.   pg->flags is (as always) locked by
88 * the owner of the page.
89 *
90 * note that locking from the "loaned" side is tricky since the object
91 * getting the loaned page has no reference to the page's owner and thus
92 * the owner could "die" at any time.   in order to prevent the owner
93 * from dying the page queues should be locked.   this forces us to sometimes
94 * use "try" locking.
95 *
96 * loans are typically broken by the following events:
97 *  1. user-level xwrite fault to a loaned page
98 *  2. pageout of clean+inactive O->A loaned page
99 *  3. owner frees page (e.g. pager flush)
100 *
101 * note that loaning a page causes all mappings of the page to become
102 * read-only (via pmap_page_protect).   this could have an unexpected
103 * effect on normal "wired" pages if one is not careful (XXX).
104 */
105
106/*
107 * local prototypes
108 */
109
110static int	uvm_loananon(struct uvm_faultinfo *, void ***,
111			     int, struct vm_anon *);
112static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
113			     int, vaddr_t);
114static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
115static void	uvm_unloananon(struct vm_anon **, int);
116static void	uvm_unloanpage(struct vm_page **, int);
117static int	uvm_loanpage(struct vm_page **, int);
118
119
120/*
121 * inlines
122 */
123
124/*
125 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
126 *
127 * => "ufi" is the result of a successful map lookup (meaning that
128 *	on entry the map is locked by the caller)
129 * => we may unlock and then relock the map if needed (for I/O)
130 * => we put our output result in "output"
131 * => we always return with the map unlocked
132 * => possible return values:
133 *	-1 == error, map is unlocked
134 *	 0 == map relock error (try again!), map is unlocked
135 *	>0 == number of pages we loaned, map is unlocked
136 *
137 * NOTE: We can live with this being an inline, because it is only called
138 * from one place.
139 */
140
141static inline int
142uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
143{
144	vaddr_t curaddr = ufi->orig_rvaddr;
145	vsize_t togo = ufi->size;
146	struct vm_aref *aref = &ufi->entry->aref;
147	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
148	struct vm_anon *anon;
149	int rv, result = 0;
150
151	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
152
153	/*
154	 * lock us the rest of the way down (we unlock before return)
155	 */
156	if (aref->ar_amap)
157		amap_lock(aref->ar_amap);
158
159	/*
160	 * loop until done
161	 */
162	while (togo) {
163
164		/*
165		 * find the page we want.   check the anon layer first.
166		 */
167
168		if (aref->ar_amap) {
169			anon = amap_lookup(aref, curaddr - ufi->entry->start);
170		} else {
171			anon = NULL;
172		}
173
174		/* locked: map, amap, uobj */
175		if (anon) {
176			rv = uvm_loananon(ufi, output, flags, anon);
177		} else if (uobj) {
178			rv = uvm_loanuobj(ufi, output, flags, curaddr);
179		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
180			rv = uvm_loanzero(ufi, output, flags);
181		} else {
182			uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
183			rv = -1;
184		}
185		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
186		KASSERT(rv > 0 || aref->ar_amap == NULL ||
187		    !mutex_owned(&aref->ar_amap->am_l));
188		KASSERT(rv > 0 || uobj == NULL ||
189		    !mutex_owned(&uobj->vmobjlock));
190
191		/* total failure */
192		if (rv < 0) {
193			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
194			return (-1);
195		}
196
197		/* relock failed, need to do another lookup */
198		if (rv == 0) {
199			UVMHIST_LOG(loanhist, "relock failure %d", result
200			    ,0,0,0);
201			return (result);
202		}
203
204		/*
205		 * got it... advance to next page
206		 */
207
208		result++;
209		togo -= PAGE_SIZE;
210		curaddr += PAGE_SIZE;
211	}
212
213	/*
214	 * unlock what we locked, unlock the maps and return
215	 */
216
217	if (aref->ar_amap)
218		amap_unlock(aref->ar_amap);
219	uvmfault_unlockmaps(ufi, false);
220	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
221	return (result);
222}
223
224/*
225 * normal functions
226 */
227
228/*
229 * uvm_loan: loan pages in a map out to anons or to the kernel
230 *
231 * => map should be unlocked
232 * => start and len should be multiples of PAGE_SIZE
233 * => result is either an array of anon's or vm_pages (depending on flags)
234 * => flag values: UVM_LOAN_TOANON - loan to anons
235 *                 UVM_LOAN_TOPAGE - loan to wired kernel page
236 *    one and only one of these flags must be set!
237 * => returns 0 (success), or an appropriate error number
238 */
239
240int
241uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
242{
243	struct uvm_faultinfo ufi;
244	void **result, **output;
245	int rv, error;
246
247	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
248
249	/*
250	 * ensure that one and only one of the flags is set
251	 */
252
253	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
254		((flags & UVM_LOAN_TOPAGE) == 0));
255	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
256
257	/*
258	 * "output" is a pointer to the current place to put the loaned page.
259	 */
260
261	result = v;
262	output = &result[0];	/* start at the beginning ... */
263
264	/*
265	 * while we've got pages to do
266	 */
267
268	while (len > 0) {
269
270		/*
271		 * fill in params for a call to uvmfault_lookup
272		 */
273
274		ufi.orig_map = map;
275		ufi.orig_rvaddr = start;
276		ufi.orig_size = len;
277
278		/*
279		 * do the lookup, the only time this will fail is if we hit on
280		 * an unmapped region (an error)
281		 */
282
283		if (!uvmfault_lookup(&ufi, false)) {
284			error = ENOENT;
285			goto fail;
286		}
287
288		/*
289		 * map now locked.  now do the loanout...
290		 */
291
292		rv = uvm_loanentry(&ufi, &output, flags);
293		if (rv < 0) {
294			/* all unlocked due to error */
295			error = EINVAL;
296			goto fail;
297		}
298
299		/*
300		 * done!  the map is unlocked.  advance, if possible.
301		 *
302		 * XXXCDC: could be recoded to hold the map lock with
303		 *	   smarter code (but it only happens on map entry
304		 *	   boundaries, so it isn't that bad).
305		 */
306
307		if (rv) {
308			rv <<= PAGE_SHIFT;
309			len -= rv;
310			start += rv;
311		}
312	}
313	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
314	return 0;
315
316fail:
317	/*
318	 * failed to complete loans.  drop any loans and return failure code.
319	 * map is already unlocked.
320	 */
321
322	if (output - result) {
323		if (flags & UVM_LOAN_TOANON) {
324			uvm_unloananon((struct vm_anon **)result,
325			    output - result);
326		} else {
327			uvm_unloanpage((struct vm_page **)result,
328			    output - result);
329		}
330	}
331	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
332	return (error);
333}
334
335/*
336 * uvm_loananon: loan a page from an anon out
337 *
338 * => called with map, amap, uobj locked
339 * => return value:
340 *	-1 = fatal error, everything is unlocked, abort.
341 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
342 *		try again
343 *	 1 = got it, everything still locked
344 */
345
346int
347uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
348    struct vm_anon *anon)
349{
350	struct vm_page *pg;
351	int error;
352
353	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
354
355	/*
356	 * if we are loaning to "another" anon then it is easy, we just
357	 * bump the reference count on the current anon and return a
358	 * pointer to it (it becomes copy-on-write shared).
359	 */
360
361	if (flags & UVM_LOAN_TOANON) {
362		mutex_enter(&anon->an_lock);
363		pg = anon->an_page;
364		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
365			if (pg->wire_count > 0) {
366				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
367				uvmfault_unlockall(ufi,
368				    ufi->entry->aref.ar_amap,
369				    ufi->entry->object.uvm_obj, anon);
370				return (-1);
371			}
372			pmap_page_protect(pg, VM_PROT_READ);
373		}
374		anon->an_ref++;
375		**output = anon;
376		(*output)++;
377		mutex_exit(&anon->an_lock);
378		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
379		return (1);
380	}
381
382	/*
383	 * we are loaning to a kernel-page.   we need to get the page
384	 * resident so we can wire it.   uvmfault_anonget will handle
385	 * this for us.
386	 */
387
388	mutex_enter(&anon->an_lock);
389	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
390
391	/*
392	 * if we were unable to get the anon, then uvmfault_anonget has
393	 * unlocked everything and returned an error code.
394	 */
395
396	if (error) {
397		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
398
399		/* need to refault (i.e. refresh our lookup) ? */
400		if (error == ERESTART) {
401			return (0);
402		}
403
404		/* "try again"?   sleep a bit and retry ... */
405		if (error == EAGAIN) {
406			tsleep(&lbolt, PVM, "loanagain", 0);
407			return (0);
408		}
409
410		/* otherwise flag it as an error */
411		return (-1);
412	}
413
414	/*
415	 * we have the page and its owner locked: do the loan now.
416	 */
417
418	pg = anon->an_page;
419	mutex_enter(&uvm_pageqlock);
420	if (pg->wire_count > 0) {
421		mutex_exit(&uvm_pageqlock);
422		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
423		KASSERT(pg->uobject == NULL);
424		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
425		    NULL, anon);
426		return (-1);
427	}
428	if (pg->loan_count == 0) {
429		pmap_page_protect(pg, VM_PROT_READ);
430	}
431	pg->loan_count++;
432	uvm_pageactivate(pg);
433	mutex_exit(&uvm_pageqlock);
434	**output = pg;
435	(*output)++;
436
437	/* unlock anon and return success */
438	if (pg->uobject)
439		mutex_exit(&pg->uobject->vmobjlock);
440	mutex_exit(&anon->an_lock);
441	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
442	return (1);
443}
444
445/*
446 * uvm_loanpage: loan out pages to kernel (->K)
447 *
448 * => pages should be object-owned and the object should be locked.
449 * => in the case of error, the object might be unlocked and relocked.
450 * => caller should busy the pages beforehand.
451 * => pages will be unbusied.
452 * => fail with EBUSY if meet a wired page.
453 */
454static int
455uvm_loanpage(struct vm_page **pgpp, int npages)
456{
457	int i;
458	int error = 0;
459
460	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
461
462	for (i = 0; i < npages; i++) {
463		struct vm_page *pg = pgpp[i];
464
465		KASSERT(pg->uobject != NULL);
466		KASSERT(pg->uobject == pgpp[0]->uobject);
467		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
468		KASSERT(mutex_owned(&pg->uobject->vmobjlock));
469		KASSERT(pg->flags & PG_BUSY);
470
471		mutex_enter(&uvm_pageqlock);
472		if (pg->wire_count > 0) {
473			mutex_exit(&uvm_pageqlock);
474			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
475			error = EBUSY;
476			break;
477		}
478		if (pg->loan_count == 0) {
479			pmap_page_protect(pg, VM_PROT_READ);
480		}
481		pg->loan_count++;
482		uvm_pageactivate(pg);
483		mutex_exit(&uvm_pageqlock);
484	}
485
486	uvm_page_unbusy(pgpp, npages);
487
488	if (error) {
489		/*
490		 * backout what we've done
491		 */
492		kmutex_t *slock = &pgpp[0]->uobject->vmobjlock;
493
494		mutex_exit(slock);
495		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
496		mutex_enter(slock);
497	}
498
499	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
500	return error;
501}
502
503/*
504 * XXX UBC temp limit
505 * number of pages to get at once.
506 * should be <= MAX_READ_AHEAD in genfs_vnops.c
507 */
508#define	UVM_LOAN_GET_CHUNK	16
509
510/*
511 * uvm_loanuobjpages: loan pages from a uobj out (O->K)
512 *
513 * => uobj shouldn't be locked.  (we'll lock it)
514 * => fail with EBUSY if we meet a wired page.
515 */
516int
517uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
518    struct vm_page **origpgpp)
519{
520	int ndone; /* # of pages loaned out */
521	struct vm_page **pgpp;
522	int error;
523	int i;
524	kmutex_t *slock;
525
526	pgpp = origpgpp;
527	for (ndone = 0; ndone < orignpages; ) {
528		int npages;
529		/* npendloan: # of pages busied but not loand out yet. */
530		int npendloan = 0xdead; /* XXX gcc */
531reget:
532		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
533		mutex_enter(&uobj->vmobjlock);
534		error = (*uobj->pgops->pgo_get)(uobj,
535		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
536		    VM_PROT_READ, 0, PGO_SYNCIO);
537		if (error == EAGAIN) {
538			tsleep(&lbolt, PVM, "nfsread", 0);
539			continue;
540		}
541		if (error)
542			goto fail;
543
544		KASSERT(npages > 0);
545
546		/* loan and unbusy pages */
547		slock = NULL;
548		for (i = 0; i < npages; i++) {
549			kmutex_t *nextslock; /* slock for next page */
550			struct vm_page *pg = *pgpp;
551
552			/* XXX assuming that the page is owned by uobj */
553			KASSERT(pg->uobject != NULL);
554			nextslock = &pg->uobject->vmobjlock;
555
556			if (slock != nextslock) {
557				if (slock) {
558					KASSERT(npendloan > 0);
559					error = uvm_loanpage(pgpp - npendloan,
560					    npendloan);
561					mutex_exit(slock);
562					if (error)
563						goto fail;
564					ndone += npendloan;
565					KASSERT(origpgpp + ndone == pgpp);
566				}
567				slock = nextslock;
568				npendloan = 0;
569				mutex_enter(slock);
570			}
571
572			if ((pg->flags & PG_RELEASED) != 0) {
573				/*
574				 * release pages and try again.
575				 */
576				mutex_exit(slock);
577				for (; i < npages; i++) {
578					pg = pgpp[i];
579					slock = &pg->uobject->vmobjlock;
580
581					mutex_enter(slock);
582					mutex_enter(&uvm_pageqlock);
583					uvm_page_unbusy(&pg, 1);
584					mutex_exit(&uvm_pageqlock);
585					mutex_exit(slock);
586				}
587				goto reget;
588			}
589
590			npendloan++;
591			pgpp++;
592			KASSERT(origpgpp + ndone + npendloan == pgpp);
593		}
594		KASSERT(slock != NULL);
595		KASSERT(npendloan > 0);
596		error = uvm_loanpage(pgpp - npendloan, npendloan);
597		mutex_exit(slock);
598		if (error)
599			goto fail;
600		ndone += npendloan;
601		KASSERT(origpgpp + ndone == pgpp);
602	}
603
604	return 0;
605
606fail:
607	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
608
609	return error;
610}
611
612/*
613 * uvm_loanuobj: loan a page from a uobj out
614 *
615 * => called with map, amap, uobj locked
616 * => return value:
617 *	-1 = fatal error, everything is unlocked, abort.
618 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
619 *		try again
620 *	 1 = got it, everything still locked
621 */
622
623static int
624uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
625{
626	struct vm_amap *amap = ufi->entry->aref.ar_amap;
627	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
628	struct vm_page *pg;
629	struct vm_anon *anon;
630	int error, npages;
631	bool locked;
632
633	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
634
635	/*
636	 * first we must make sure the page is resident.
637	 *
638	 * XXXCDC: duplicate code with uvm_fault().
639	 */
640
641	mutex_enter(&uobj->vmobjlock);
642	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
643		npages = 1;
644		pg = NULL;
645		error = (*uobj->pgops->pgo_get)(uobj,
646		    va - ufi->entry->start + ufi->entry->offset,
647		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
648	} else {
649		error = EIO;		/* must have pgo_get op */
650	}
651
652	/*
653	 * check the result of the locked pgo_get.  if there is a problem,
654	 * then we fail the loan.
655	 */
656
657	if (error && error != EBUSY) {
658		uvmfault_unlockall(ufi, amap, uobj, NULL);
659		return (-1);
660	}
661
662	/*
663	 * if we need to unlock for I/O, do so now.
664	 */
665
666	if (error == EBUSY) {
667		uvmfault_unlockall(ufi, amap, NULL, NULL);
668
669		/* locked: uobj */
670		npages = 1;
671		error = (*uobj->pgops->pgo_get)(uobj,
672		    va - ufi->entry->start + ufi->entry->offset,
673		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
674		/* locked: <nothing> */
675
676		if (error) {
677			if (error == EAGAIN) {
678				tsleep(&lbolt, PVM, "fltagain2", 0);
679				return (0);
680			}
681			return (-1);
682		}
683
684		/*
685		 * pgo_get was a success.   attempt to relock everything.
686		 */
687
688		locked = uvmfault_relock(ufi);
689		if (locked && amap)
690			amap_lock(amap);
691		uobj = pg->uobject;
692		mutex_enter(&uobj->vmobjlock);
693
694		/*
695		 * verify that the page has not be released and re-verify
696		 * that amap slot is still free.   if there is a problem we
697		 * drop our lock (thus force a lookup refresh/retry).
698		 */
699
700		if ((pg->flags & PG_RELEASED) != 0 ||
701		    (locked && amap && amap_lookup(&ufi->entry->aref,
702		    ufi->orig_rvaddr - ufi->entry->start))) {
703			if (locked)
704				uvmfault_unlockall(ufi, amap, NULL, NULL);
705			locked = false;
706		}
707
708		/*
709		 * didn't get the lock?   release the page and retry.
710		 */
711
712		if (locked == false) {
713			if (pg->flags & PG_WANTED) {
714				wakeup(pg);
715			}
716			if (pg->flags & PG_RELEASED) {
717				mutex_enter(&uvm_pageqlock);
718				uvm_pagefree(pg);
719				mutex_exit(&uvm_pageqlock);
720				mutex_exit(&uobj->vmobjlock);
721				return (0);
722			}
723			mutex_enter(&uvm_pageqlock);
724			uvm_pageactivate(pg);
725			mutex_exit(&uvm_pageqlock);
726			pg->flags &= ~(PG_BUSY|PG_WANTED);
727			UVM_PAGE_OWN(pg, NULL);
728			mutex_exit(&uobj->vmobjlock);
729			return (0);
730		}
731	}
732
733	KASSERT(uobj == pg->uobject);
734
735	/*
736	 * at this point we have the page we want ("pg") marked PG_BUSY for us
737	 * and we have all data structures locked.  do the loanout.  page can
738	 * not be PG_RELEASED (we caught this above).
739	 */
740
741	if ((flags & UVM_LOAN_TOANON) == 0) {
742		if (uvm_loanpage(&pg, 1)) {
743			uvmfault_unlockall(ufi, amap, uobj, NULL);
744			return (-1);
745		}
746		mutex_exit(&uobj->vmobjlock);
747		**output = pg;
748		(*output)++;
749		return (1);
750	}
751
752	/*
753	 * must be a loan to an anon.   check to see if there is already
754	 * an anon associated with this page.  if so, then just return
755	 * a reference to this object.   the page should already be
756	 * mapped read-only because it is already on loan.
757	 */
758
759	if (pg->uanon) {
760		anon = pg->uanon;
761		mutex_enter(&anon->an_lock);
762		anon->an_ref++;
763		mutex_exit(&anon->an_lock);
764		if (pg->flags & PG_WANTED) {
765			wakeup(pg);
766		}
767		pg->flags &= ~(PG_WANTED|PG_BUSY);
768		UVM_PAGE_OWN(pg, NULL);
769		mutex_exit(&uobj->vmobjlock);
770		**output = anon;
771		(*output)++;
772		return (1);
773	}
774
775	/*
776	 * need to allocate a new anon
777	 */
778
779	anon = uvm_analloc();
780	if (anon == NULL) {
781		goto fail;
782	}
783	anon->an_page = pg;
784	pg->uanon = anon;
785	mutex_enter(&uvm_pageqlock);
786	if (pg->wire_count > 0) {
787		mutex_exit(&uvm_pageqlock);
788		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
789		pg->uanon = NULL;
790		anon->an_page = NULL;
791		anon->an_ref--;
792		mutex_exit(&anon->an_lock);
793		uvm_anfree(anon);
794		goto fail;
795	}
796	if (pg->loan_count == 0) {
797		pmap_page_protect(pg, VM_PROT_READ);
798	}
799	pg->loan_count++;
800	uvm_pageactivate(pg);
801	mutex_exit(&uvm_pageqlock);
802	if (pg->flags & PG_WANTED) {
803		wakeup(pg);
804	}
805	pg->flags &= ~(PG_WANTED|PG_BUSY);
806	UVM_PAGE_OWN(pg, NULL);
807	mutex_exit(&uobj->vmobjlock);
808	mutex_exit(&anon->an_lock);
809	**output = anon;
810	(*output)++;
811	return (1);
812
813fail:
814	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
815	/*
816	 * unlock everything and bail out.
817	 */
818	if (pg->flags & PG_WANTED) {
819		wakeup(pg);
820	}
821	pg->flags &= ~(PG_WANTED|PG_BUSY);
822	UVM_PAGE_OWN(pg, NULL);
823	uvmfault_unlockall(ufi, amap, uobj, NULL);
824	return (-1);
825}
826
827/*
828 * uvm_loanzero: loan a zero-fill page out
829 *
830 * => called with map, amap, uobj locked
831 * => return value:
832 *	-1 = fatal error, everything is unlocked, abort.
833 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
834 *		try again
835 *	 1 = got it, everything still locked
836 */
837
838static struct uvm_object uvm_loanzero_object;
839
840static int
841uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
842{
843	struct vm_anon *anon;
844	struct vm_page *pg;
845	struct vm_amap *amap = ufi->entry->aref.ar_amap;
846
847	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
848again:
849	mutex_enter(&uvm_loanzero_object.vmobjlock);
850
851	/*
852	 * first, get ahold of our single zero page.
853	 */
854
855	if (__predict_false((pg =
856			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
857		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
858					   UVM_PGA_ZERO)) == NULL) {
859			mutex_exit(&uvm_loanzero_object.vmobjlock);
860			uvmfault_unlockall(ufi, amap, NULL, NULL);
861			uvm_wait("loanzero");
862			if (!uvmfault_relock(ufi)) {
863				return (0);
864			}
865			if (amap) {
866				amap_lock(amap);
867			}
868			goto again;
869		}
870
871		/* got a zero'd page. */
872		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
873		pg->flags |= PG_RDONLY;
874		mutex_enter(&uvm_pageqlock);
875		uvm_pageactivate(pg);
876		mutex_exit(&uvm_pageqlock);
877		UVM_PAGE_OWN(pg, NULL);
878	}
879
880	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
881		mutex_enter(&uvm_pageqlock);
882		pg->loan_count++;
883		mutex_exit(&uvm_pageqlock);
884		mutex_exit(&uvm_loanzero_object.vmobjlock);
885		**output = pg;
886		(*output)++;
887		return (1);
888	}
889
890	/*
891	 * loaning to an anon.  check to see if there is already an anon
892	 * associated with this page.  if so, then just return a reference
893	 * to this object.
894	 */
895
896	if (pg->uanon) {
897		anon = pg->uanon;
898		mutex_enter(&anon->an_lock);
899		anon->an_ref++;
900		mutex_exit(&anon->an_lock);
901		mutex_exit(&uvm_loanzero_object.vmobjlock);
902		**output = anon;
903		(*output)++;
904		return (1);
905	}
906
907	/*
908	 * need to allocate a new anon
909	 */
910
911	anon = uvm_analloc();
912	if (anon == NULL) {
913		/* out of swap causes us to fail */
914		mutex_exit(&uvm_loanzero_object.vmobjlock);
915		uvmfault_unlockall(ufi, amap, NULL, NULL);
916		return (-1);
917	}
918	anon->an_page = pg;
919	pg->uanon = anon;
920	mutex_enter(&uvm_pageqlock);
921	pg->loan_count++;
922	uvm_pageactivate(pg);
923	mutex_exit(&uvm_pageqlock);
924	mutex_exit(&anon->an_lock);
925	mutex_exit(&uvm_loanzero_object.vmobjlock);
926	**output = anon;
927	(*output)++;
928	return (1);
929}
930
931
932/*
933 * uvm_unloananon: kill loans on anons (basically a normal ref drop)
934 *
935 * => we expect all our resources to be unlocked
936 */
937
938static void
939uvm_unloananon(struct vm_anon **aloans, int nanons)
940{
941	struct vm_anon *anon;
942
943	while (nanons-- > 0) {
944		int refs;
945
946		anon = *aloans++;
947		mutex_enter(&anon->an_lock);
948		refs = --anon->an_ref;
949		mutex_exit(&anon->an_lock);
950
951		if (refs == 0) {
952			uvm_anfree(anon);
953		}
954	}
955}
956
957/*
958 * uvm_unloanpage: kill loans on pages loaned out to the kernel
959 *
960 * => we expect all our resources to be unlocked
961 */
962
963static void
964uvm_unloanpage(struct vm_page **ploans, int npages)
965{
966	struct vm_page *pg;
967	kmutex_t *slock;
968
969	mutex_enter(&uvm_pageqlock);
970	while (npages-- > 0) {
971		pg = *ploans++;
972
973		/*
974		 * do a little dance to acquire the object or anon lock
975		 * as appropriate.  we are locking in the wrong order,
976		 * so we have to do a try-lock here.
977		 */
978
979		slock = NULL;
980		while (pg->uobject != NULL || pg->uanon != NULL) {
981			if (pg->uobject != NULL) {
982				slock = &pg->uobject->vmobjlock;
983			} else {
984				slock = &pg->uanon->an_lock;
985			}
986			if (mutex_tryenter(slock)) {
987				break;
988			}
989			mutex_exit(&uvm_pageqlock);
990			/* XXX Better than yielding but inadequate. */
991			kpause("livelock", false, 1, NULL);
992			mutex_enter(&uvm_pageqlock);
993			slock = NULL;
994		}
995
996		/*
997		 * drop our loan.  if page is owned by an anon but
998		 * PQ_ANON is not set, the page was loaned to the anon
999		 * from an object which dropped ownership, so resolve
1000		 * this by turning the anon's loan into real ownership
1001		 * (ie. decrement loan_count again and set PQ_ANON).
1002		 * after all this, if there are no loans left, put the
1003		 * page back a paging queue (if the page is owned by
1004		 * an anon) or free it (if the page is now unowned).
1005		 */
1006
1007		KASSERT(pg->loan_count > 0);
1008		pg->loan_count--;
1009		if (pg->uobject == NULL && pg->uanon != NULL &&
1010		    (pg->pqflags & PQ_ANON) == 0) {
1011			KASSERT(pg->loan_count > 0);
1012			pg->loan_count--;
1013			pg->pqflags |= PQ_ANON;
1014		}
1015		if (pg->loan_count == 0 && pg->uobject == NULL &&
1016		    pg->uanon == NULL) {
1017			KASSERT((pg->flags & PG_BUSY) == 0);
1018			uvm_pagefree(pg);
1019		}
1020		if (slock != NULL) {
1021			mutex_exit(slock);
1022		}
1023	}
1024	mutex_exit(&uvm_pageqlock);
1025}
1026
1027/*
1028 * uvm_unloan: kill loans on pages or anons.
1029 */
1030
1031void
1032uvm_unloan(void *v, int npages, int flags)
1033{
1034	if (flags & UVM_LOAN_TOANON) {
1035		uvm_unloananon(v, npages);
1036	} else {
1037		uvm_unloanpage(v, npages);
1038	}
1039}
1040
1041/*
1042 * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1043 * method, because the page can end up on a paging queue, and the
1044 * page daemon will want to call pgo_put when it encounters the page
1045 * on the inactive list.
1046 */
1047
1048static int
1049ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1050{
1051	struct vm_page *pg;
1052
1053	KDASSERT(uobj == &uvm_loanzero_object);
1054
1055	/*
1056	 * Don't need to do any work here if we're not freeing pages.
1057	 */
1058
1059	if ((flags & PGO_FREE) == 0) {
1060		mutex_exit(&uobj->vmobjlock);
1061		return 0;
1062	}
1063
1064	/*
1065	 * we don't actually want to ever free the uvm_loanzero_page, so
1066	 * just reactivate or dequeue it.
1067	 */
1068
1069	pg = TAILQ_FIRST(&uobj->memq);
1070	KASSERT(pg != NULL);
1071	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1072
1073	mutex_enter(&uvm_pageqlock);
1074	if (pg->uanon)
1075		uvm_pageactivate(pg);
1076	else
1077		uvm_pagedequeue(pg);
1078	mutex_exit(&uvm_pageqlock);
1079
1080	mutex_exit(&uobj->vmobjlock);
1081	return 0;
1082}
1083
1084static const struct uvm_pagerops ulz_pager = {
1085	.pgo_put = ulz_put,
1086};
1087
1088/*
1089 * uvm_loan_init(): initialize the uvm_loan() facility.
1090 */
1091
1092void
1093uvm_loan_init(void)
1094{
1095
1096	mutex_init(&uvm_loanzero_object.vmobjlock, MUTEX_DEFAULT, IPL_NONE);
1097	TAILQ_INIT(&uvm_loanzero_object.memq);
1098	uvm_loanzero_object.pgops = &ulz_pager;
1099
1100	UVMHIST_INIT(loanhist, 300);
1101}
1102
1103/*
1104 * uvm_loanbreak: break loan on a uobj page
1105 *
1106 * => called with uobj locked
1107 * => the page should be busy
1108 * => return value:
1109 *	newly allocated page if succeeded
1110 */
1111struct vm_page *
1112uvm_loanbreak(struct vm_page *uobjpage)
1113{
1114	struct vm_page *pg;
1115#ifdef DIAGNOSTIC
1116	struct uvm_object *uobj = uobjpage->uobject;
1117#endif
1118
1119	KASSERT(uobj != NULL);
1120	KASSERT(mutex_owned(&uobj->vmobjlock));
1121	KASSERT(uobjpage->flags & PG_BUSY);
1122
1123	/* alloc new un-owned page */
1124	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1125	if (pg == NULL)
1126		return NULL;
1127
1128	/*
1129	 * copy the data from the old page to the new
1130	 * one and clear the fake flags on the new page (keep it busy).
1131	 * force a reload of the old page by clearing it from all
1132	 * pmaps.
1133	 * transfer dirtiness of the old page to the new page.
1134	 * then lock the page queues to rename the pages.
1135	 */
1136
1137	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1138	pg->flags &= ~PG_FAKE;
1139	pmap_page_protect(uobjpage, VM_PROT_NONE);
1140	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1141		pmap_clear_modify(pg);
1142		pg->flags |= PG_CLEAN;
1143	} else {
1144		/* uvm_pagecopy marked it dirty */
1145		KASSERT((pg->flags & PG_CLEAN) == 0);
1146		/* a object with a dirty page should be dirty. */
1147		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1148	}
1149	if (uobjpage->flags & PG_WANTED)
1150		wakeup(uobjpage);
1151	/* uobj still locked */
1152	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1153	UVM_PAGE_OWN(uobjpage, NULL);
1154
1155	mutex_enter(&uvm_pageqlock);
1156
1157	/*
1158	 * replace uobjpage with new page.
1159	 */
1160
1161	uvm_pagereplace(uobjpage, pg);
1162
1163	/*
1164	 * if the page is no longer referenced by
1165	 * an anon (i.e. we are breaking an O->K
1166	 * loan), then remove it from any pageq's.
1167	 */
1168	if (uobjpage->uanon == NULL)
1169		uvm_pagedequeue(uobjpage);
1170
1171	/*
1172	 * at this point we have absolutely no
1173	 * control over uobjpage
1174	 */
1175
1176	/* install new page */
1177	uvm_pageactivate(pg);
1178	mutex_exit(&uvm_pageqlock);
1179
1180	/*
1181	 * done!  loan is broken and "pg" is
1182	 * PG_BUSY.   it can now replace uobjpage.
1183	 */
1184
1185	return pg;
1186}
1187