uvm_anon.c revision 1.26
1/*	$NetBSD: uvm_anon.c,v 1.26 2003/08/28 13:12:17 pk Exp $	*/
2
3/*
4 *
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *      This product includes software developed by Charles D. Cranor and
19 *      Washington University.
20 * 4. The name of the author may not be used to endorse or promote products
21 *    derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35/*
36 * uvm_anon.c: uvm anon ops
37 */
38
39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.26 2003/08/28 13:12:17 pk Exp $");
41
42#include "opt_uvmhist.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
47#include <sys/malloc.h>
48#include <sys/pool.h>
49#include <sys/kernel.h>
50
51#include <uvm/uvm.h>
52#include <uvm/uvm_swap.h>
53
54/*
55 * anonblock_list: global list of anon blocks,
56 * locked by swap_syscall_lock (since we never remove
57 * anything from this list and we only add to it via swapctl(2)).
58 */
59
60struct uvm_anonblock {
61	LIST_ENTRY(uvm_anonblock) list;
62	int count;
63	struct vm_anon *anons;
64};
65static LIST_HEAD(anonlist, uvm_anonblock) anonblock_list;
66
67
68static boolean_t anon_pagein __P((struct vm_anon *));
69
70
71/*
72 * allocate anons
73 */
74void
75uvm_anon_init()
76{
77	int nanon = uvmexp.free - (uvmexp.free / 16); /* XXXCDC ??? */
78
79	simple_lock_init(&uvm.afreelock);
80	LIST_INIT(&anonblock_list);
81
82	/*
83	 * Allocate the initial anons.
84	 */
85	uvm_anon_add(nanon);
86}
87
88/*
89 * add some more anons to the free pool.  called when we add
90 * more swap space.
91 *
92 * => swap_syscall_lock should be held (protects anonblock_list).
93 */
94int
95uvm_anon_add(count)
96	int	count;
97{
98	struct uvm_anonblock *anonblock;
99	struct vm_anon *anon;
100	int lcv, needed;
101
102	simple_lock(&uvm.afreelock);
103	uvmexp.nanonneeded += count;
104	needed = uvmexp.nanonneeded - uvmexp.nanon;
105	simple_unlock(&uvm.afreelock);
106
107	if (needed <= 0) {
108		return 0;
109	}
110	anon = (void *)uvm_km_alloc(kernel_map, sizeof(*anon) * needed);
111	if (anon == NULL) {
112		simple_lock(&uvm.afreelock);
113		uvmexp.nanonneeded -= count;
114		simple_unlock(&uvm.afreelock);
115		return ENOMEM;
116	}
117	MALLOC(anonblock, void *, sizeof(*anonblock), M_UVMAMAP, M_WAITOK);
118
119	anonblock->count = needed;
120	anonblock->anons = anon;
121	LIST_INSERT_HEAD(&anonblock_list, anonblock, list);
122	memset(anon, 0, sizeof(*anon) * needed);
123
124	simple_lock(&uvm.afreelock);
125	uvmexp.nanon += needed;
126	uvmexp.nfreeanon += needed;
127	for (lcv = 0; lcv < needed; lcv++) {
128		simple_lock_init(&anon->an_lock);
129		anon[lcv].u.an_nxt = uvm.afree;
130		uvm.afree = &anon[lcv];
131		simple_lock_init(&uvm.afree->an_lock);
132	}
133	simple_unlock(&uvm.afreelock);
134	return 0;
135}
136
137/*
138 * remove anons from the free pool.
139 */
140void
141uvm_anon_remove(count)
142	int count;
143{
144	/*
145	 * we never actually free any anons, to avoid allocation overhead.
146	 * XXX someday we might want to try to free anons.
147	 */
148
149	simple_lock(&uvm.afreelock);
150	uvmexp.nanonneeded -= count;
151	simple_unlock(&uvm.afreelock);
152}
153
154/*
155 * allocate an anon
156 *
157 * => new anon is returned locked!
158 */
159struct vm_anon *
160uvm_analloc()
161{
162	struct vm_anon *a;
163
164	simple_lock(&uvm.afreelock);
165	a = uvm.afree;
166	if (a) {
167		uvm.afree = a->u.an_nxt;
168		uvmexp.nfreeanon--;
169		a->an_ref = 1;
170		a->an_swslot = 0;
171		a->u.an_page = NULL;		/* so we can free quickly */
172		LOCK_ASSERT(simple_lock_held(&a->an_lock) == 0);
173		simple_lock(&a->an_lock);
174	}
175	simple_unlock(&uvm.afreelock);
176	return(a);
177}
178
179/*
180 * uvm_anfree: free a single anon structure
181 *
182 * => caller must remove anon from its amap before calling (if it was in
183 *	an amap).
184 * => anon must be unlocked and have a zero reference count.
185 * => we may lock the pageq's.
186 */
187
188void
189uvm_anfree(anon)
190	struct vm_anon *anon;
191{
192	struct vm_page *pg;
193	UVMHIST_FUNC("uvm_anfree"); UVMHIST_CALLED(maphist);
194	UVMHIST_LOG(maphist,"(anon=0x%x)", anon, 0,0,0);
195
196	KASSERT(anon->an_ref == 0);
197	LOCK_ASSERT(!simple_lock_held(&anon->an_lock));
198
199	/*
200	 * get page
201	 */
202
203	pg = anon->u.an_page;
204
205	/*
206	 * if there is a resident page and it is loaned, then anon may not
207	 * own it.   call out to uvm_anon_lockpage() to ensure the real owner
208 	 * of the page has been identified and locked.
209	 */
210
211	if (pg && pg->loan_count) {
212		simple_lock(&anon->an_lock);
213		pg = uvm_anon_lockloanpg(anon);
214		simple_unlock(&anon->an_lock);
215	}
216
217	/*
218	 * if we have a resident page, we must dispose of it before freeing
219	 * the anon.
220	 */
221
222	if (pg) {
223
224		/*
225		 * if the page is owned by a uobject (now locked), then we must
226		 * kill the loan on the page rather than free it.
227		 */
228
229		if (pg->uobject) {
230			uvm_lock_pageq();
231			KASSERT(pg->loan_count > 0);
232			pg->loan_count--;
233			pg->uanon = NULL;
234			uvm_unlock_pageq();
235			simple_unlock(&pg->uobject->vmobjlock);
236		} else {
237
238			/*
239			 * page has no uobject, so we must be the owner of it.
240			 * if page is busy then we wait until it is not busy,
241			 * and then free it.
242			 */
243
244			KASSERT((pg->flags & PG_RELEASED) == 0);
245			simple_lock(&anon->an_lock);
246			pmap_page_protect(pg, VM_PROT_NONE);
247			while ((pg = anon->u.an_page) &&
248			       (pg->flags & PG_BUSY) != 0) {
249				pg->flags |= PG_WANTED;
250				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, 0,
251				    "anfree", 0);
252				simple_lock(&anon->an_lock);
253			}
254			if (pg) {
255				uvm_lock_pageq();
256				uvm_pagefree(pg);
257				uvm_unlock_pageq();
258			}
259			simple_unlock(&anon->an_lock);
260			UVMHIST_LOG(maphist, "anon 0x%x, page 0x%x: "
261				    "freed now!", anon, pg, 0, 0);
262		}
263	}
264	if (pg == NULL && anon->an_swslot > 0) {
265		/* this page is no longer only in swap. */
266		simple_lock(&uvm.swap_data_lock);
267		KASSERT(uvmexp.swpgonly > 0);
268		uvmexp.swpgonly--;
269		simple_unlock(&uvm.swap_data_lock);
270	}
271
272	/*
273	 * free any swap resources.
274	 */
275
276	uvm_anon_dropswap(anon);
277
278	/*
279	 * now that we've stripped the data areas from the anon,
280	 * free the anon itself.
281	 */
282
283	simple_lock(&uvm.afreelock);
284	anon->u.an_nxt = uvm.afree;
285	uvm.afree = anon;
286	uvmexp.nfreeanon++;
287	simple_unlock(&uvm.afreelock);
288	UVMHIST_LOG(maphist,"<- done!",0,0,0,0);
289}
290
291/*
292 * uvm_anon_dropswap:  release any swap resources from this anon.
293 *
294 * => anon must be locked or have a reference count of 0.
295 */
296void
297uvm_anon_dropswap(anon)
298	struct vm_anon *anon;
299{
300	UVMHIST_FUNC("uvm_anon_dropswap"); UVMHIST_CALLED(maphist);
301
302	if (anon->an_swslot == 0)
303		return;
304
305	UVMHIST_LOG(maphist,"freeing swap for anon %p, paged to swslot 0x%x",
306		    anon, anon->an_swslot, 0, 0);
307	uvm_swap_free(anon->an_swslot, 1);
308	anon->an_swslot = 0;
309}
310
311/*
312 * uvm_anon_lockloanpg: given a locked anon, lock its resident page
313 *
314 * => anon is locked by caller
315 * => on return: anon is locked
316 *		 if there is a resident page:
317 *			if it has a uobject, it is locked by us
318 *			if it is ownerless, we take over as owner
319 *		 we return the resident page (it can change during
320 *		 this function)
321 * => note that the only time an anon has an ownerless resident page
322 *	is if the page was loaned from a uvm_object and the uvm_object
323 *	disowned it
324 * => this only needs to be called when you want to do an operation
325 *	on an anon's resident page and that page has a non-zero loan
326 *	count.
327 */
328struct vm_page *
329uvm_anon_lockloanpg(anon)
330	struct vm_anon *anon;
331{
332	struct vm_page *pg;
333	boolean_t locked = FALSE;
334
335	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
336
337	/*
338	 * loop while we have a resident page that has a non-zero loan count.
339	 * if we successfully get our lock, we will "break" the loop.
340	 * note that the test for pg->loan_count is not protected -- this
341	 * may produce false positive results.   note that a false positive
342	 * result may cause us to do more work than we need to, but it will
343	 * not produce an incorrect result.
344	 */
345
346	while (((pg = anon->u.an_page) != NULL) && pg->loan_count != 0) {
347
348		/*
349		 * quickly check to see if the page has an object before
350		 * bothering to lock the page queues.   this may also produce
351		 * a false positive result, but that's ok because we do a real
352		 * check after that.
353		 */
354
355		if (pg->uobject) {
356			uvm_lock_pageq();
357			if (pg->uobject) {
358				locked =
359				    simple_lock_try(&pg->uobject->vmobjlock);
360			} else {
361				/* object disowned before we got PQ lock */
362				locked = TRUE;
363			}
364			uvm_unlock_pageq();
365
366			/*
367			 * if we didn't get a lock (try lock failed), then we
368			 * toggle our anon lock and try again
369			 */
370
371			if (!locked) {
372				simple_unlock(&anon->an_lock);
373
374				/*
375				 * someone locking the object has a chance to
376				 * lock us right now
377				 */
378
379				simple_lock(&anon->an_lock);
380				continue;
381			}
382		}
383
384		/*
385		 * if page is un-owned [i.e. the object dropped its ownership],
386		 * then we can take over as owner!
387		 */
388
389		if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) {
390			uvm_lock_pageq();
391			pg->pqflags |= PQ_ANON;
392			pg->loan_count--;
393			uvm_unlock_pageq();
394		}
395		break;
396	}
397	return(pg);
398}
399
400
401
402/*
403 * page in every anon that is paged out to a range of swslots.
404 *
405 * swap_syscall_lock should be held (protects anonblock_list).
406 */
407
408boolean_t
409anon_swap_off(startslot, endslot)
410	int startslot, endslot;
411{
412	struct uvm_anonblock *anonblock;
413
414	LIST_FOREACH(anonblock, &anonblock_list, list) {
415		int i;
416
417		/*
418		 * loop thru all the anons in the anonblock,
419		 * paging in where needed.
420		 */
421
422		for (i = 0; i < anonblock->count; i++) {
423			struct vm_anon *anon = &anonblock->anons[i];
424			int slot;
425
426			/*
427			 * lock anon to work on it.
428			 */
429
430			simple_lock(&anon->an_lock);
431
432			/*
433			 * is this anon's swap slot in range?
434			 */
435
436			slot = anon->an_swslot;
437			if (slot >= startslot && slot < endslot) {
438				boolean_t rv;
439
440				/*
441				 * yup, page it in.
442				 */
443
444				/* locked: anon */
445				rv = anon_pagein(anon);
446				/* unlocked: anon */
447
448				if (rv) {
449					return rv;
450				}
451			} else {
452
453				/*
454				 * nope, unlock and proceed.
455				 */
456
457				simple_unlock(&anon->an_lock);
458			}
459		}
460	}
461	return FALSE;
462}
463
464
465/*
466 * fetch an anon's page.
467 *
468 * => anon must be locked, and is unlocked upon return.
469 * => returns TRUE if pagein was aborted due to lack of memory.
470 */
471
472static boolean_t
473anon_pagein(anon)
474	struct vm_anon *anon;
475{
476	struct vm_page *pg;
477	struct uvm_object *uobj;
478	int rv;
479
480	/* locked: anon */
481	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
482
483	rv = uvmfault_anonget(NULL, NULL, anon);
484
485	/*
486	 * if rv == 0, anon is still locked, else anon
487	 * is unlocked
488	 */
489
490	switch (rv) {
491	case 0:
492		break;
493
494	case EIO:
495	case ERESTART:
496
497		/*
498		 * nothing more to do on errors.
499		 * ERESTART can only mean that the anon was freed,
500		 * so again there's nothing to do.
501		 */
502
503		return FALSE;
504
505	default:
506		return TRUE;
507	}
508
509	/*
510	 * ok, we've got the page now.
511	 * mark it as dirty, clear its swslot and un-busy it.
512	 */
513
514	pg = anon->u.an_page;
515	uobj = pg->uobject;
516	if (anon->an_swslot > 0)
517		uvm_swap_free(anon->an_swslot, 1);
518	anon->an_swslot = 0;
519	pg->flags &= ~(PG_CLEAN);
520
521	/*
522	 * deactivate the page (to put it on a page queue)
523	 */
524
525	pmap_clear_reference(pg);
526	uvm_lock_pageq();
527	if (pg->wire_count == 0)
528		uvm_pagedeactivate(pg);
529	uvm_unlock_pageq();
530
531	if (pg->flags & PG_WANTED) {
532		wakeup(pg);
533		pg->flags &= ~(PG_WANTED);
534	}
535
536	/*
537	 * unlock the anon and we're done.
538	 */
539
540	simple_unlock(&anon->an_lock);
541	if (uobj) {
542		simple_unlock(&uobj->vmobjlock);
543	}
544	return FALSE;
545}
546