uvm_loan.c revision 1.90
1/*	$NetBSD: uvm_loan.c,v 1.90 2019/12/14 15:08:45 ad Exp $	*/
2
3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28 */
29
30/*
31 * uvm_loan.c: page loanout handler
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.90 2019/12/14 15:08:45 ad Exp $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/mman.h>
41
42#include <uvm/uvm.h>
43
44#ifdef UVMHIST
45UVMHIST_DEFINE(loanhist);
46#endif
47
48/*
49 * "loaned" pages are pages which are (read-only, copy-on-write) loaned
50 * from the VM system to other parts of the kernel.   this allows page
51 * copying to be avoided (e.g. you can loan pages from objs/anons to
52 * the mbuf system).
53 *
54 * there are 3 types of loans possible:
55 *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
56 *  A->K  anon page to wired kernel page (e.g. mbuf data area)
57 *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
58 * note that it possible to have an O page loaned to both an A and K
59 * at the same time.
60 *
61 * loans are tracked by pg->loan_count.  an O->A page will have both
62 * a uvm_object and a vm_anon, but PG_ANON will not be set.   this sort
63 * of page is considered "owned" by the uvm_object (not the anon).
64 *
65 * each loan of a page to the kernel bumps the pg->wire_count.  the
66 * kernel mappings for these pages will be read-only and wired.  since
67 * the page will also be wired, it will not be a candidate for pageout,
68 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
69 * write fault in the kernel to one of these pages will not cause
70 * copy-on-write.  instead, the page fault is considered fatal.  this
71 * is because the kernel mapping will have no way to look up the
72 * object/anon which the page is owned by.  this is a good side-effect,
73 * since a kernel write to a loaned page is an error.
74 *
75 * owners that want to free their pages and discover that they are
76 * loaned out simply "disown" them (the page becomes an orphan).  these
77 * pages should be freed when the last loan is dropped.   in some cases
78 * an anon may "adopt" an orphaned page.
79 *
80 * locking: to read pg->loan_count either the owner or pg->interlock
81 * must be locked.   to modify pg->loan_count, both the owner of the page
82 * and pg->interlock must be locked.   pg->flags is (as always) locked by
83 * the owner of the page.
84 *
85 * note that locking from the "loaned" side is tricky since the object
86 * getting the loaned page has no reference to the page's owner and thus
87 * the owner could "die" at any time.   in order to prevent the owner
88 * from dying pg->interlock should be locked.   this forces us to sometimes
89 * use "try" locking.
90 *
91 * loans are typically broken by the following events:
92 *  1. user-level xwrite fault to a loaned page
93 *  2. pageout of clean+inactive O->A loaned page
94 *  3. owner frees page (e.g. pager flush)
95 *
96 * note that loaning a page causes all mappings of the page to become
97 * read-only (via pmap_page_protect).   this could have an unexpected
98 * effect on normal "wired" pages if one is not careful (XXX).
99 */
100
101/*
102 * local prototypes
103 */
104
105static int	uvm_loananon(struct uvm_faultinfo *, void ***,
106			     int, struct vm_anon *);
107static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
108			     int, vaddr_t);
109static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
110static void	uvm_unloananon(struct vm_anon **, int);
111static void	uvm_unloanpage(struct vm_page **, int);
112static int	uvm_loanpage(struct vm_page **, int);
113
114
115/*
116 * inlines
117 */
118
119/*
120 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
121 *
122 * => "ufi" is the result of a successful map lookup (meaning that
123 *	on entry the map is locked by the caller)
124 * => we may unlock and then relock the map if needed (for I/O)
125 * => we put our output result in "output"
126 * => we always return with the map unlocked
127 * => possible return values:
128 *	-1 == error, map is unlocked
129 *	 0 == map relock error (try again!), map is unlocked
130 *	>0 == number of pages we loaned, map is unlocked
131 *
132 * NOTE: We can live with this being an inline, because it is only called
133 * from one place.
134 */
135
136static inline int
137uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
138{
139	vaddr_t curaddr = ufi->orig_rvaddr;
140	vsize_t togo = ufi->size;
141	struct vm_aref *aref = &ufi->entry->aref;
142	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
143	struct vm_anon *anon;
144	int rv, result = 0;
145
146	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
147
148	/*
149	 * lock us the rest of the way down (we unlock before return)
150	 */
151	if (aref->ar_amap) {
152		amap_lock(aref->ar_amap);
153	}
154
155	/*
156	 * loop until done
157	 */
158	while (togo) {
159
160		/*
161		 * find the page we want.   check the anon layer first.
162		 */
163
164		if (aref->ar_amap) {
165			anon = amap_lookup(aref, curaddr - ufi->entry->start);
166		} else {
167			anon = NULL;
168		}
169
170		/* locked: map, amap, uobj */
171		if (anon) {
172			rv = uvm_loananon(ufi, output, flags, anon);
173		} else if (uobj) {
174			rv = uvm_loanuobj(ufi, output, flags, curaddr);
175		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
176			rv = uvm_loanzero(ufi, output, flags);
177		} else {
178			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
179			rv = -1;
180		}
181		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
182		KASSERT(rv > 0 || aref->ar_amap == NULL ||
183		    !mutex_owned(aref->ar_amap->am_lock));
184		KASSERT(rv > 0 || uobj == NULL ||
185		    !mutex_owned(uobj->vmobjlock));
186
187		/* total failure */
188		if (rv < 0) {
189			UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0);
190			return (-1);
191		}
192
193		/* relock failed, need to do another lookup */
194		if (rv == 0) {
195			UVMHIST_LOG(loanhist, "relock failure %jd", result
196			    ,0,0,0);
197			return (result);
198		}
199
200		/*
201		 * got it... advance to next page
202		 */
203
204		result++;
205		togo -= PAGE_SIZE;
206		curaddr += PAGE_SIZE;
207	}
208
209	/*
210	 * unlock what we locked, unlock the maps and return
211	 */
212
213	if (aref->ar_amap) {
214		amap_unlock(aref->ar_amap);
215	}
216	uvmfault_unlockmaps(ufi, false);
217	UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0);
218	return (result);
219}
220
221/*
222 * normal functions
223 */
224
225/*
226 * uvm_loan: loan pages in a map out to anons or to the kernel
227 *
228 * => map should be unlocked
229 * => start and len should be multiples of PAGE_SIZE
230 * => result is either an array of anon's or vm_pages (depending on flags)
231 * => flag values: UVM_LOAN_TOANON - loan to anons
232 *                 UVM_LOAN_TOPAGE - loan to wired kernel page
233 *    one and only one of these flags must be set!
234 * => returns 0 (success), or an appropriate error number
235 */
236
237int
238uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
239{
240	struct uvm_faultinfo ufi;
241	void **result, **output;
242	int rv, error;
243
244	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
245
246	/*
247	 * ensure that one and only one of the flags is set
248	 */
249
250	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
251		((flags & UVM_LOAN_TOPAGE) == 0));
252
253	/*
254	 * "output" is a pointer to the current place to put the loaned page.
255	 */
256
257	result = v;
258	output = &result[0];	/* start at the beginning ... */
259
260	/*
261	 * while we've got pages to do
262	 */
263
264	while (len > 0) {
265
266		/*
267		 * fill in params for a call to uvmfault_lookup
268		 */
269
270		ufi.orig_map = map;
271		ufi.orig_rvaddr = start;
272		ufi.orig_size = len;
273
274		/*
275		 * do the lookup, the only time this will fail is if we hit on
276		 * an unmapped region (an error)
277		 */
278
279		if (!uvmfault_lookup(&ufi, false)) {
280			error = ENOENT;
281			goto fail;
282		}
283
284		/*
285		 * map now locked.  now do the loanout...
286		 */
287
288		rv = uvm_loanentry(&ufi, &output, flags);
289		if (rv < 0) {
290			/* all unlocked due to error */
291			error = EINVAL;
292			goto fail;
293		}
294
295		/*
296		 * done!  the map is unlocked.  advance, if possible.
297		 *
298		 * XXXCDC: could be recoded to hold the map lock with
299		 *	   smarter code (but it only happens on map entry
300		 *	   boundaries, so it isn't that bad).
301		 */
302
303		if (rv) {
304			rv <<= PAGE_SHIFT;
305			len -= rv;
306			start += rv;
307		}
308	}
309	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
310	return 0;
311
312fail:
313	/*
314	 * failed to complete loans.  drop any loans and return failure code.
315	 * map is already unlocked.
316	 */
317
318	if (output - result) {
319		if (flags & UVM_LOAN_TOANON) {
320			uvm_unloananon((struct vm_anon **)result,
321			    output - result);
322		} else {
323			uvm_unloanpage((struct vm_page **)result,
324			    output - result);
325		}
326	}
327	UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
328	return (error);
329}
330
331/*
332 * uvm_loananon: loan a page from an anon out
333 *
334 * => called with map, amap, uobj locked
335 * => return value:
336 *	-1 = fatal error, everything is unlocked, abort.
337 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
338 *		try again
339 *	 1 = got it, everything still locked
340 */
341
342int
343uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
344    struct vm_anon *anon)
345{
346	struct vm_page *pg;
347	int error;
348
349	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
350
351	/*
352	 * if we are loaning to "another" anon then it is easy, we just
353	 * bump the reference count on the current anon and return a
354	 * pointer to it (it becomes copy-on-write shared).
355	 */
356
357	if (flags & UVM_LOAN_TOANON) {
358		KASSERT(mutex_owned(anon->an_lock));
359		pg = anon->an_page;
360		if (pg && (pg->flags & PG_ANON) != 0 && anon->an_ref == 1) {
361			if (pg->wire_count > 0) {
362				UVMHIST_LOG(loanhist, "->A wired %#jx",
363				    (uintptr_t)pg, 0, 0, 0);
364				uvmfault_unlockall(ufi,
365				    ufi->entry->aref.ar_amap,
366				    ufi->entry->object.uvm_obj);
367				return (-1);
368			}
369			pmap_page_protect(pg, VM_PROT_READ);
370		}
371		anon->an_ref++;
372		**output = anon;
373		(*output)++;
374		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
375		return (1);
376	}
377
378	/*
379	 * we are loaning to a kernel-page.   we need to get the page
380	 * resident so we can wire it.   uvmfault_anonget will handle
381	 * this for us.
382	 */
383
384	KASSERT(mutex_owned(anon->an_lock));
385	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
386
387	/*
388	 * if we were unable to get the anon, then uvmfault_anonget has
389	 * unlocked everything and returned an error code.
390	 */
391
392	if (error) {
393		UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
394
395		/* need to refault (i.e. refresh our lookup) ? */
396		if (error == ERESTART) {
397			return (0);
398		}
399
400		/* "try again"?   sleep a bit and retry ... */
401		if (error == EAGAIN) {
402			kpause("loanagain", false, hz/2, NULL);
403			return (0);
404		}
405
406		/* otherwise flag it as an error */
407		return (-1);
408	}
409
410	/*
411	 * we have the page and its owner locked: do the loan now.
412	 */
413
414	pg = anon->an_page;
415	if (pg->wire_count > 0) {
416		UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0);
417		KASSERT(pg->uobject == NULL);
418		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
419		return (-1);
420	}
421	if (pg->loan_count == 0) {
422		pmap_page_protect(pg, VM_PROT_READ);
423	}
424	mutex_enter(&pg->interlock);
425	pg->loan_count++;
426	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
427	mutex_exit(&pg->interlock);
428	uvm_pageactivate(pg);
429	**output = pg;
430	(*output)++;
431
432	/* unlock and return success */
433	if (pg->uobject)
434		mutex_exit(pg->uobject->vmobjlock);
435	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
436	return (1);
437}
438
439/*
440 * uvm_loanpage: loan out pages to kernel (->K)
441 *
442 * => pages should be object-owned and the object should be locked.
443 * => in the case of error, the object might be unlocked and relocked.
444 * => caller should busy the pages beforehand.
445 * => pages will be unbusied.
446 * => fail with EBUSY if meet a wired page.
447 */
448static int
449uvm_loanpage(struct vm_page **pgpp, int npages)
450{
451	int i;
452	int error = 0;
453
454	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
455
456	for (i = 0; i < npages; i++) {
457		struct vm_page *pg = pgpp[i];
458
459		KASSERT(pg->uobject != NULL);
460		KASSERT(pg->uobject == pgpp[0]->uobject);
461		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
462		KASSERT(mutex_owned(pg->uobject->vmobjlock));
463		KASSERT(pg->flags & PG_BUSY);
464
465		if (pg->wire_count > 0) {
466			UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
467			    0, 0, 0);
468			error = EBUSY;
469			break;
470		}
471		if (pg->loan_count == 0) {
472			pmap_page_protect(pg, VM_PROT_READ);
473		}
474		mutex_enter(&pg->interlock);
475		pg->loan_count++;
476		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
477		mutex_exit(&pg->interlock);
478		uvm_pageactivate(pg);
479	}
480
481	uvm_page_unbusy(pgpp, npages);
482
483	if (error) {
484		/*
485		 * backout what we've done
486		 */
487		kmutex_t *slock = pgpp[0]->uobject->vmobjlock;
488
489		mutex_exit(slock);
490		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
491		mutex_enter(slock);
492	}
493
494	UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0);
495	return error;
496}
497
498/*
499 * XXX UBC temp limit
500 * number of pages to get at once.
501 * should be <= MAX_READ_AHEAD in genfs_vnops.c
502 */
503#define	UVM_LOAN_GET_CHUNK	16
504
505/*
506 * uvm_loanuobjpages: loan pages from a uobj out (O->K)
507 *
508 * => uobj shouldn't be locked.  (we'll lock it)
509 * => fail with EBUSY if we meet a wired page.
510 */
511int
512uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
513    struct vm_page **origpgpp)
514{
515	int ndone; /* # of pages loaned out */
516	struct vm_page **pgpp;
517	int error;
518	int i;
519	kmutex_t *slock;
520
521	pgpp = origpgpp;
522	for (ndone = 0; ndone < orignpages; ) {
523		int npages;
524		/* npendloan: # of pages busied but not loand out yet. */
525		int npendloan = 0xdead; /* XXX gcc */
526reget:
527		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
528		mutex_enter(uobj->vmobjlock);
529		error = (*uobj->pgops->pgo_get)(uobj,
530		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
531		    VM_PROT_READ, 0, PGO_SYNCIO);
532		if (error == EAGAIN) {
533			kpause("loanuopg", false, hz/2, NULL);
534			continue;
535		}
536		if (error)
537			goto fail;
538
539		KASSERT(npages > 0);
540
541		/* loan and unbusy pages */
542		slock = NULL;
543		for (i = 0; i < npages; i++) {
544			kmutex_t *nextslock; /* slock for next page */
545			struct vm_page *pg = *pgpp;
546
547			/* XXX assuming that the page is owned by uobj */
548			KASSERT(pg->uobject != NULL);
549			nextslock = pg->uobject->vmobjlock;
550
551			if (slock != nextslock) {
552				if (slock) {
553					KASSERT(npendloan > 0);
554					error = uvm_loanpage(pgpp - npendloan,
555					    npendloan);
556					mutex_exit(slock);
557					if (error)
558						goto fail;
559					ndone += npendloan;
560					KASSERT(origpgpp + ndone == pgpp);
561				}
562				slock = nextslock;
563				npendloan = 0;
564				mutex_enter(slock);
565			}
566
567			if ((pg->flags & PG_RELEASED) != 0) {
568				/*
569				 * release pages and try again.
570				 */
571				mutex_exit(slock);
572				for (; i < npages; i++) {
573					pg = pgpp[i];
574					slock = pg->uobject->vmobjlock;
575
576					mutex_enter(slock);
577					uvm_page_unbusy(&pg, 1);
578					mutex_exit(slock);
579				}
580				goto reget;
581			}
582
583			npendloan++;
584			pgpp++;
585			KASSERT(origpgpp + ndone + npendloan == pgpp);
586		}
587		KASSERT(slock != NULL);
588		KASSERT(npendloan > 0);
589		error = uvm_loanpage(pgpp - npendloan, npendloan);
590		mutex_exit(slock);
591		if (error)
592			goto fail;
593		ndone += npendloan;
594		KASSERT(origpgpp + ndone == pgpp);
595	}
596
597	return 0;
598
599fail:
600	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
601
602	return error;
603}
604
605/*
606 * uvm_loanuobj: loan a page from a uobj out
607 *
608 * => called with map, amap, uobj locked
609 * => return value:
610 *	-1 = fatal error, everything is unlocked, abort.
611 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
612 *		try again
613 *	 1 = got it, everything still locked
614 */
615
616static int
617uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
618{
619	struct vm_amap *amap = ufi->entry->aref.ar_amap;
620	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
621	struct vm_page *pg;
622	int error, npages;
623	bool locked;
624
625	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
626
627	/*
628	 * first we must make sure the page is resident.
629	 *
630	 * XXXCDC: duplicate code with uvm_fault().
631	 */
632
633	/* locked: maps(read), amap(if there) */
634	mutex_enter(uobj->vmobjlock);
635	/* locked: maps(read), amap(if there), uobj */
636
637	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
638		npages = 1;
639		pg = NULL;
640		error = (*uobj->pgops->pgo_get)(uobj,
641		    va - ufi->entry->start + ufi->entry->offset,
642		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
643	} else {
644		error = EIO;		/* must have pgo_get op */
645	}
646
647	/*
648	 * check the result of the locked pgo_get.  if there is a problem,
649	 * then we fail the loan.
650	 */
651
652	if (error && error != EBUSY) {
653		uvmfault_unlockall(ufi, amap, uobj);
654		return (-1);
655	}
656
657	/*
658	 * if we need to unlock for I/O, do so now.
659	 */
660
661	if (error == EBUSY) {
662		uvmfault_unlockall(ufi, amap, NULL);
663
664		/* locked: uobj */
665		npages = 1;
666		error = (*uobj->pgops->pgo_get)(uobj,
667		    va - ufi->entry->start + ufi->entry->offset,
668		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
669		/* locked: <nothing> */
670
671		if (error) {
672			if (error == EAGAIN) {
673				kpause("fltagain2", false, hz/2, NULL);
674				return (0);
675			}
676			return (-1);
677		}
678
679		/*
680		 * pgo_get was a success.   attempt to relock everything.
681		 */
682
683		locked = uvmfault_relock(ufi);
684		if (locked && amap)
685			amap_lock(amap);
686		uobj = pg->uobject;
687		mutex_enter(uobj->vmobjlock);
688
689		/*
690		 * verify that the page has not be released and re-verify
691		 * that amap slot is still free.   if there is a problem we
692		 * drop our lock (thus force a lookup refresh/retry).
693		 */
694
695		if ((pg->flags & PG_RELEASED) != 0 ||
696		    (locked && amap && amap_lookup(&ufi->entry->aref,
697		    ufi->orig_rvaddr - ufi->entry->start))) {
698			if (locked)
699				uvmfault_unlockall(ufi, amap, NULL);
700			locked = false;
701		}
702
703		/*
704		 * didn't get the lock?   release the page and retry.
705		 */
706
707		if (locked == false) {
708			if (pg->flags & PG_WANTED) {
709				wakeup(pg);
710			}
711			if (pg->flags & PG_RELEASED) {
712				uvm_pagefree(pg);
713				mutex_exit(uobj->vmobjlock);
714				return (0);
715			}
716			uvm_pageactivate(pg);
717			pg->flags &= ~(PG_BUSY|PG_WANTED);
718			UVM_PAGE_OWN(pg, NULL);
719			mutex_exit(uobj->vmobjlock);
720			return (0);
721		}
722	}
723
724	KASSERT(uobj == pg->uobject);
725
726	/*
727	 * at this point we have the page we want ("pg") marked PG_BUSY for us
728	 * and we have all data structures locked.  do the loanout.  page can
729	 * not be PG_RELEASED (we caught this above).
730	 */
731
732	if ((flags & UVM_LOAN_TOANON) == 0) {
733		if (uvm_loanpage(&pg, 1)) {
734			uvmfault_unlockall(ufi, amap, uobj);
735			return (-1);
736		}
737		mutex_exit(uobj->vmobjlock);
738		**output = pg;
739		(*output)++;
740		return (1);
741	}
742
743#ifdef notdef
744	/*
745	 * must be a loan to an anon.   check to see if there is already
746	 * an anon associated with this page.  if so, then just return
747	 * a reference to this object.   the page should already be
748	 * mapped read-only because it is already on loan.
749	 */
750
751	if (pg->uanon) {
752		/* XXX: locking */
753		anon = pg->uanon;
754		anon->an_ref++;
755		if (pg->flags & PG_WANTED) {
756			wakeup(pg);
757		}
758		pg->flags &= ~(PG_WANTED|PG_BUSY);
759		UVM_PAGE_OWN(pg, NULL);
760		mutex_exit(uobj->vmobjlock);
761		**output = anon;
762		(*output)++;
763		return (1);
764	}
765
766	/*
767	 * need to allocate a new anon
768	 */
769
770	anon = uvm_analloc();
771	if (anon == NULL) {
772		goto fail;
773	}
774	if (pg->wire_count > 0) {
775		UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0);
776		goto fail;
777	}
778	if (pg->loan_count == 0) {
779		pmap_page_protect(pg, VM_PROT_READ);
780	}
781	mutex_enter(&pg->interlock);
782	pg->loan_count++;
783	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
784	pg->uanon = anon;
785	anon->an_page = pg;
786	anon->an_lock = /* TODO: share amap lock */
787	mutex_exit(&pg->interlock);
788	uvm_pageactivate(pg);
789	if (pg->flags & PG_WANTED) {
790		wakeup(pg);
791	}
792	pg->flags &= ~(PG_WANTED|PG_BUSY);
793	UVM_PAGE_OWN(pg, NULL);
794	mutex_exit(uobj->vmobjlock);
795	mutex_exit(&anon->an_lock);
796	**output = anon;
797	(*output)++;
798	return (1);
799
800fail:
801	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
802	/*
803	 * unlock everything and bail out.
804	 */
805	if (pg->flags & PG_WANTED) {
806		wakeup(pg);
807	}
808	pg->flags &= ~(PG_WANTED|PG_BUSY);
809	UVM_PAGE_OWN(pg, NULL);
810	uvmfault_unlockall(ufi, amap, uobj, NULL);
811	if (anon) {
812		anon->an_ref--;
813		uvm_anon_free(anon);
814	}
815#endif	/* notdef */
816	return (-1);
817}
818
819/*
820 * uvm_loanzero: loan a zero-fill page out
821 *
822 * => called with map, amap, uobj locked
823 * => return value:
824 *	-1 = fatal error, everything is unlocked, abort.
825 *	 0 = lookup in ufi went stale, everything unlocked, relookup and
826 *		try again
827 *	 1 = got it, everything still locked
828 */
829
830static struct uvm_object uvm_loanzero_object;
831static kmutex_t uvm_loanzero_lock __cacheline_aligned;
832
833static int
834uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
835{
836	struct vm_page *pg;
837	struct vm_amap *amap = ufi->entry->aref.ar_amap;
838
839	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
840again:
841	mutex_enter(uvm_loanzero_object.vmobjlock);
842
843	/*
844	 * first, get ahold of our single zero page.
845	 */
846
847	if (__predict_false((pg =
848			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
849		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
850					   UVM_PGA_ZERO)) == NULL) {
851			mutex_exit(uvm_loanzero_object.vmobjlock);
852			uvmfault_unlockall(ufi, amap, NULL);
853			uvm_wait("loanzero");
854			if (!uvmfault_relock(ufi)) {
855				return (0);
856			}
857			if (amap) {
858				amap_lock(amap);
859			}
860			goto again;
861		}
862
863		/* got a zero'd page. */
864		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
865		pg->flags |= PG_RDONLY;
866		uvm_pageactivate(pg);
867		UVM_PAGE_OWN(pg, NULL);
868	}
869
870	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
871		mutex_enter(&pg->interlock);
872		pg->loan_count++;
873		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
874		mutex_exit(&pg->interlock);
875		mutex_exit(uvm_loanzero_object.vmobjlock);
876		**output = pg;
877		(*output)++;
878		return (1);
879	}
880
881#ifdef notdef
882	/*
883	 * loaning to an anon.  check to see if there is already an anon
884	 * associated with this page.  if so, then just return a reference
885	 * to this object.
886	 */
887
888	if (pg->uanon) {
889		anon = pg->uanon;
890		mutex_enter(&anon->an_lock);
891		anon->an_ref++;
892		mutex_exit(&anon->an_lock);
893		mutex_exit(uvm_loanzero_object.vmobjlock);
894		**output = anon;
895		(*output)++;
896		return (1);
897	}
898
899	/*
900	 * need to allocate a new anon
901	 */
902
903	anon = uvm_analloc();
904	if (anon == NULL) {
905		/* out of swap causes us to fail */
906		mutex_exit(uvm_loanzero_object.vmobjlock);
907		uvmfault_unlockall(ufi, amap, NULL, NULL);
908		return (-1);
909	}
910	anon->an_page = pg;
911	pg->uanon = anon;
912	mutex_enter(&pg->interlock);
913	pg->loan_count++;
914	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
915	mutex_exit(&pg->interlock);
916	uvm_pageactivate(pg);
917	mutex_exit(&anon->an_lock);
918	mutex_exit(uvm_loanzero_object.vmobjlock);
919	**output = anon;
920	(*output)++;
921	return (1);
922#else
923	return (-1);
924#endif
925}
926
927
928/*
929 * uvm_unloananon: kill loans on anons (basically a normal ref drop)
930 *
931 * => we expect all our resources to be unlocked
932 */
933
934static void
935uvm_unloananon(struct vm_anon **aloans, int nanons)
936{
937#ifdef notdef
938	struct vm_anon *anon, *to_free = NULL;
939
940	/* TODO: locking */
941	amap_lock(amap);
942	while (nanons-- > 0) {
943		anon = *aloans++;
944		if (--anon->an_ref == 0) {
945			anon->an_link = to_free;
946			to_free = anon;
947		}
948	}
949	uvm_anon_freelst(amap, to_free);
950#endif	/* notdef */
951}
952
953/*
954 * uvm_unloanpage: kill loans on pages loaned out to the kernel
955 *
956 * => we expect all our resources to be unlocked
957 */
958
959static void
960uvm_unloanpage(struct vm_page **ploans, int npages)
961{
962	struct vm_page *pg;
963	kmutex_t *slock;
964
965	while (npages-- > 0) {
966		pg = *ploans++;
967
968		/*
969		 * do a little dance to acquire the object or anon lock
970		 * as appropriate.  we are locking in the wrong order,
971		 * so we have to do a try-lock here.
972		 */
973
974		mutex_enter(&pg->interlock);
975		slock = NULL;
976		while (pg->uobject != NULL || pg->uanon != NULL) {
977			if (pg->uobject != NULL) {
978				slock = pg->uobject->vmobjlock;
979			} else {
980				slock = pg->uanon->an_lock;
981			}
982			if (mutex_tryenter(slock)) {
983				break;
984			}
985			/* XXX Better than yielding but inadequate. */
986			kpause("livelock", false, 1, &pg->interlock);
987			slock = NULL;
988		}
989
990		/*
991		 * drop our loan.  if page is owned by an anon but
992		 * PG_ANON is not set, the page was loaned to the anon
993		 * from an object which dropped ownership, so resolve
994		 * this by turning the anon's loan into real ownership
995		 * (ie. decrement loan_count again and set PG_ANON).
996		 * after all this, if there are no loans left, put the
997		 * page back a paging queue (if the page is owned by
998		 * an anon) or free it (if the page is now unowned).
999		 */
1000
1001		KASSERT(pg->loan_count > 0);
1002		pg->loan_count--;
1003		if (pg->uobject == NULL && pg->uanon != NULL &&
1004		    (pg->flags & PG_ANON) == 0) {
1005			KASSERT(pg->loan_count > 0);
1006			pg->loan_count--;
1007			pg->flags |= PG_ANON;
1008		}
1009		mutex_exit(&pg->interlock);
1010		if (pg->loan_count == 0 && pg->uobject == NULL &&
1011		    pg->uanon == NULL) {
1012			KASSERT((pg->flags & PG_BUSY) == 0);
1013			uvm_pagefree(pg);
1014		}
1015		if (slock != NULL) {
1016			mutex_exit(slock);
1017		}
1018	}
1019}
1020
1021/*
1022 * uvm_unloan: kill loans on pages or anons.
1023 */
1024
1025void
1026uvm_unloan(void *v, int npages, int flags)
1027{
1028	if (flags & UVM_LOAN_TOANON) {
1029		uvm_unloananon(v, npages);
1030	} else {
1031		uvm_unloanpage(v, npages);
1032	}
1033}
1034
1035/*
1036 * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1037 * method, because the page can end up on a paging queue, and the
1038 * page daemon will want to call pgo_put when it encounters the page
1039 * on the inactive list.
1040 */
1041
1042static int
1043ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1044{
1045	struct vm_page *pg;
1046
1047	KDASSERT(uobj == &uvm_loanzero_object);
1048
1049	/*
1050	 * Don't need to do any work here if we're not freeing pages.
1051	 */
1052
1053	if ((flags & PGO_FREE) == 0) {
1054		mutex_exit(uobj->vmobjlock);
1055		return 0;
1056	}
1057
1058	/*
1059	 * we don't actually want to ever free the uvm_loanzero_page, so
1060	 * just reactivate or dequeue it.
1061	 */
1062
1063	pg = TAILQ_FIRST(&uobj->memq);
1064	KASSERT(pg != NULL);
1065	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1066
1067	if (pg->uanon)
1068		uvm_pageactivate(pg);
1069	else {
1070		uvm_pagedequeue(pg);
1071	}
1072
1073	mutex_exit(uobj->vmobjlock);
1074	return 0;
1075}
1076
1077static const struct uvm_pagerops ulz_pager = {
1078	.pgo_put = ulz_put,
1079};
1080
1081/*
1082 * uvm_loan_init(): initialize the uvm_loan() facility.
1083 */
1084
1085void
1086uvm_loan_init(void)
1087{
1088
1089	mutex_init(&uvm_loanzero_lock, MUTEX_DEFAULT, IPL_NONE);
1090	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
1091	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
1092
1093	UVMHIST_INIT(loanhist, 300);
1094}
1095
1096/*
1097 * uvm_loanbreak: break loan on a uobj page
1098 *
1099 * => called with uobj locked
1100 * => the page should be busy
1101 * => return value:
1102 *	newly allocated page if succeeded
1103 */
1104struct vm_page *
1105uvm_loanbreak(struct vm_page *uobjpage)
1106{
1107	struct vm_page *pg;
1108	struct uvm_object *uobj __diagused = uobjpage->uobject;
1109
1110	KASSERT(uobj != NULL);
1111	KASSERT(mutex_owned(uobj->vmobjlock));
1112	KASSERT(uobjpage->flags & PG_BUSY);
1113
1114	/* alloc new un-owned page */
1115	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1116	if (pg == NULL)
1117		return NULL;
1118
1119	/*
1120	 * copy the data from the old page to the new
1121	 * one and clear the fake flags on the new page (keep it busy).
1122	 * force a reload of the old page by clearing it from all
1123	 * pmaps.
1124	 * transfer dirtiness of the old page to the new page.
1125	 * then lock pg->interlock to rename the pages.
1126	 */
1127
1128	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1129	pg->flags &= ~PG_FAKE;
1130	pmap_page_protect(uobjpage, VM_PROT_NONE);
1131	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1132		pmap_clear_modify(pg);
1133		pg->flags |= PG_CLEAN;
1134	} else {
1135		/* uvm_pagecopy marked it dirty */
1136		KASSERT((pg->flags & PG_CLEAN) == 0);
1137		/* a object with a dirty page should be dirty. */
1138		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1139	}
1140	if (uobjpage->flags & PG_WANTED)
1141		wakeup(uobjpage);
1142	/* uobj still locked */
1143	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1144	UVM_PAGE_OWN(uobjpage, NULL);
1145
1146	/*
1147	 * replace uobjpage with new page.
1148	 */
1149
1150	mutex_enter(&uobjpage->interlock);
1151	uvm_pagereplace(uobjpage, pg);
1152	mutex_exit(&uobjpage->interlock);
1153
1154	/*
1155	 * if the page is no longer referenced by
1156	 * an anon (i.e. we are breaking an O->K
1157	 * loan), then remove it from any pageq's.
1158	 */
1159	if (uobjpage->uanon == NULL)
1160		uvm_pagedequeue(uobjpage);
1161
1162	/*
1163	 * at this point we have absolutely no
1164	 * control over uobjpage
1165	 */
1166
1167	uvm_pageactivate(pg);
1168
1169	/*
1170	 * done!  loan is broken and "pg" is
1171	 * PG_BUSY.   it can now replace uobjpage.
1172	 */
1173
1174	return pg;
1175}
1176
1177int
1178uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1179{
1180	struct vm_page *pg, *dequeuepg;
1181
1182	KASSERT(mutex_owned(anon->an_lock));
1183	KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
1184
1185	/* get new un-owned replacement page */
1186	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1187	if (pg == NULL) {
1188		return ENOMEM;
1189	}
1190
1191	/* copy old -> new */
1192	uvm_pagecopy(anon->an_page, pg);
1193
1194	/* force reload */
1195	pmap_page_protect(anon->an_page, VM_PROT_NONE);
1196	if (pg < anon->an_page) {
1197		mutex_enter(&pg->interlock);
1198		mutex_enter(&anon->an_page->interlock);
1199	} else {
1200		mutex_enter(&anon->an_page->interlock);
1201		mutex_enter(&pg->interlock);
1202	}
1203	anon->an_page->uanon = NULL;
1204	/* in case we owned */
1205	anon->an_page->flags &= ~PG_ANON;
1206
1207	if (uobj) {
1208		/* if we were receiver of loan */
1209		anon->an_page->loan_count--;
1210		dequeuepg = NULL;
1211	} else {
1212		/*
1213		 * we were the lender (A->K); need to remove the page from
1214		 * pageq's.
1215		 */
1216		dequeuepg = anon->an_page;
1217	}
1218
1219	/* install new page in anon */
1220	anon->an_page = pg;
1221	pg->uanon = anon;
1222	pg->flags |= PG_ANON;
1223
1224	mutex_exit(&pg->interlock);
1225	mutex_exit(&anon->an_page->interlock);
1226	uvm_pageactivate(pg);
1227	if (dequeuepg != NULL) {
1228		uvm_pagedequeue(anon->an_page);
1229	}
1230
1231	pg->flags &= ~(PG_BUSY|PG_FAKE);
1232	UVM_PAGE_OWN(pg, NULL);
1233
1234	if (uobj) {
1235		mutex_exit(uobj->vmobjlock);
1236	}
1237
1238	/* done! */
1239
1240	return 0;
1241}
1242