Deleted Added
full compact
swap_pager.c (12779) swap_pager.c (12819)
1/*
2 * Copyright (c) 1994 John S. Dyson
3 * Copyright (c) 1990 University of Utah.
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40 *
41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
1/*
2 * Copyright (c) 1994 John S. Dyson
3 * Copyright (c) 1990 University of Utah.
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40 *
41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
42 * $Id: swap_pager.c,v 1.54 1995/12/11 04:58:02 dyson Exp $
42 * $Id: swap_pager.c,v 1.55 1995/12/11 15:43:33 dyson Exp $
43 */
44
45/*
46 * Quick hack to page to dedicated partition(s).
47 * TODO:
48 * Add multiprocessor locks
49 * Deal with async writes in a better fashion
50 */
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/kernel.h>
55#include <sys/proc.h>
56#include <sys/buf.h>
57#include <sys/vnode.h>
58#include <sys/malloc.h>
59#include <sys/vmmeter.h>
60
61#include <miscfs/specfs/specdev.h>
62#include <sys/rlist.h>
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_prot.h>
67#include <vm/vm_object.h>
68#include <vm/vm_page.h>
69#include <vm/vm_pager.h>
70#include <vm/vm_pageout.h>
71#include <vm/swap_pager.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_extern.h>
74
75#ifndef NPENDINGIO
76#define NPENDINGIO 10
77#endif
78
79int nswiodone;
80int swap_pager_full;
81extern int vm_swap_size;
82static int no_swap_space = 1;
83struct rlist *swaplist;
84int nswaplist;
85
86#define MAX_PAGEOUT_CLUSTER 16
87
88TAILQ_HEAD(swpclean, swpagerclean);
89
90typedef struct swpagerclean *swp_clean_t;
91
92struct swpagerclean {
93 TAILQ_ENTRY(swpagerclean) spc_list;
94 int spc_flags;
95 struct buf *spc_bp;
96 vm_object_t spc_object;
97 vm_offset_t spc_kva;
98 int spc_count;
99 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
100} swcleanlist[NPENDINGIO];
101
102
103/* spc_flags values */
104#define SPC_ERROR 0x01
105
106#define SWB_EMPTY (-1)
107
108struct swpclean swap_pager_done; /* list of completed page cleans */
109struct swpclean swap_pager_inuse; /* list of pending page cleans */
110struct swpclean swap_pager_free; /* list of free pager clean structs */
111struct pagerlst swap_pager_object_list; /* list of "named" anon region objects */
112struct pagerlst swap_pager_un_object_list; /* list of "unnamed" anon region objects */
113
114#define SWAP_FREE_NEEDED 0x1 /* need a swap block */
115#define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
116int swap_pager_needflags;
117
118static struct pagerlst *swp_qs[] = {
119 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
120};
121
122/*
123 * pagerops for OBJT_SWAP - "swap pager".
124 */
125static vm_object_t
126 swap_pager_alloc __P((void *handle, vm_size_t size,
127 vm_prot_t prot, vm_ooffset_t offset));
128static void swap_pager_dealloc __P((vm_object_t object));
129static boolean_t
130 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
131 int *before, int *after));
132static void swap_pager_init __P((void));
133struct pagerops swappagerops = {
134 swap_pager_init,
135 swap_pager_alloc,
136 swap_pager_dealloc,
137 swap_pager_getpages,
138 swap_pager_putpages,
139 swap_pager_haspage,
140 swap_pager_sync
141};
142
143static int npendingio = NPENDINGIO;
144int dmmin, dmmax;
145
146static __pure int
147 swap_pager_block_index __P((vm_offset_t offset)) __pure2;
148static __pure int
149 swap_pager_block_offset __P((vm_offset_t offset)) __pure2;
150static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
151 vm_pindex_t pindex, int *valid));
152static void swap_pager_finish __P((swp_clean_t spc));
153static void swap_pager_freepage __P((vm_page_t m));
154static void swap_pager_free_swap __P((vm_object_t object));
155static void swap_pager_freeswapspace __P((vm_object_t object,
156 unsigned int from,
157 unsigned int to));
158static int swap_pager_getswapspace __P((vm_object_t object,
159 unsigned int amount,
160 daddr_t *rtval));
161static void swap_pager_iodone __P((struct buf *));
162static void swap_pager_iodone1 __P((struct buf *bp));
43 */
44
45/*
46 * Quick hack to page to dedicated partition(s).
47 * TODO:
48 * Add multiprocessor locks
49 * Deal with async writes in a better fashion
50 */
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/kernel.h>
55#include <sys/proc.h>
56#include <sys/buf.h>
57#include <sys/vnode.h>
58#include <sys/malloc.h>
59#include <sys/vmmeter.h>
60
61#include <miscfs/specfs/specdev.h>
62#include <sys/rlist.h>
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_prot.h>
67#include <vm/vm_object.h>
68#include <vm/vm_page.h>
69#include <vm/vm_pager.h>
70#include <vm/vm_pageout.h>
71#include <vm/swap_pager.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_extern.h>
74
75#ifndef NPENDINGIO
76#define NPENDINGIO 10
77#endif
78
79int nswiodone;
80int swap_pager_full;
81extern int vm_swap_size;
82static int no_swap_space = 1;
83struct rlist *swaplist;
84int nswaplist;
85
86#define MAX_PAGEOUT_CLUSTER 16
87
88TAILQ_HEAD(swpclean, swpagerclean);
89
90typedef struct swpagerclean *swp_clean_t;
91
92struct swpagerclean {
93 TAILQ_ENTRY(swpagerclean) spc_list;
94 int spc_flags;
95 struct buf *spc_bp;
96 vm_object_t spc_object;
97 vm_offset_t spc_kva;
98 int spc_count;
99 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
100} swcleanlist[NPENDINGIO];
101
102
103/* spc_flags values */
104#define SPC_ERROR 0x01
105
106#define SWB_EMPTY (-1)
107
108struct swpclean swap_pager_done; /* list of completed page cleans */
109struct swpclean swap_pager_inuse; /* list of pending page cleans */
110struct swpclean swap_pager_free; /* list of free pager clean structs */
111struct pagerlst swap_pager_object_list; /* list of "named" anon region objects */
112struct pagerlst swap_pager_un_object_list; /* list of "unnamed" anon region objects */
113
114#define SWAP_FREE_NEEDED 0x1 /* need a swap block */
115#define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
116int swap_pager_needflags;
117
118static struct pagerlst *swp_qs[] = {
119 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
120};
121
122/*
123 * pagerops for OBJT_SWAP - "swap pager".
124 */
125static vm_object_t
126 swap_pager_alloc __P((void *handle, vm_size_t size,
127 vm_prot_t prot, vm_ooffset_t offset));
128static void swap_pager_dealloc __P((vm_object_t object));
129static boolean_t
130 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
131 int *before, int *after));
132static void swap_pager_init __P((void));
133struct pagerops swappagerops = {
134 swap_pager_init,
135 swap_pager_alloc,
136 swap_pager_dealloc,
137 swap_pager_getpages,
138 swap_pager_putpages,
139 swap_pager_haspage,
140 swap_pager_sync
141};
142
143static int npendingio = NPENDINGIO;
144int dmmin, dmmax;
145
146static __pure int
147 swap_pager_block_index __P((vm_offset_t offset)) __pure2;
148static __pure int
149 swap_pager_block_offset __P((vm_offset_t offset)) __pure2;
150static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
151 vm_pindex_t pindex, int *valid));
152static void swap_pager_finish __P((swp_clean_t spc));
153static void swap_pager_freepage __P((vm_page_t m));
154static void swap_pager_free_swap __P((vm_object_t object));
155static void swap_pager_freeswapspace __P((vm_object_t object,
156 unsigned int from,
157 unsigned int to));
158static int swap_pager_getswapspace __P((vm_object_t object,
159 unsigned int amount,
160 daddr_t *rtval));
161static void swap_pager_iodone __P((struct buf *));
162static void swap_pager_iodone1 __P((struct buf *bp));
163static int swap_pager_ready __P((void));
164static void swap_pager_reclaim __P((void));
165static void swap_pager_ridpages __P((vm_page_t *m, int count,
166 int reqpage));
167static void swap_pager_setvalid __P((vm_object_t object,
168 vm_offset_t offset, int valid));
169static void swapsizecheck __P((void));
170
171static inline void
172swapsizecheck()
173{
174 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
175 if (swap_pager_full == 0)
176 printf("swap_pager: out of space\n");
177 swap_pager_full = 1;
178 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
179 swap_pager_full = 0;
180}
181
182static void
183swap_pager_init()
184{
185 TAILQ_INIT(&swap_pager_object_list);
186 TAILQ_INIT(&swap_pager_un_object_list);
187
188 /*
189 * Initialize clean lists
190 */
191 TAILQ_INIT(&swap_pager_inuse);
192 TAILQ_INIT(&swap_pager_done);
193 TAILQ_INIT(&swap_pager_free);
194
195 /*
196 * Calculate the swap allocation constants.
197 */
198 dmmin = CLBYTES / DEV_BSIZE;
199 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
200}
201
202void
203swap_pager_swap_init()
204{
205 swp_clean_t spc;
206 struct buf *bp;
207 int i;
208
209 /*
210 * kva's are allocated here so that we dont need to keep doing
211 * kmem_alloc pageables at runtime
212 */
213 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
214 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
215 if (!spc->spc_kva) {
216 break;
217 }
218 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
219 if (!spc->spc_bp) {
220 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
221 break;
222 }
223 spc->spc_flags = 0;
224 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
225 }
226}
227
228int
229swap_pager_swp_alloc(object, wait)
230 vm_object_t object;
231 int wait;
232{
233 sw_blk_t swb;
234 int nblocks;
235 int i, j;
236
237 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
238 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
239 if (swb == NULL)
240 return 1;
241
242 for (i = 0; i < nblocks; i++) {
243 swb[i].swb_valid = 0;
244 swb[i].swb_locked = 0;
245 for (j = 0; j < SWB_NPAGES; j++)
246 swb[i].swb_block[j] = SWB_EMPTY;
247 }
248
249 object->un_pager.swp.swp_nblocks = nblocks;
250 object->un_pager.swp.swp_allocsize = 0;
251 object->un_pager.swp.swp_blocks = swb;
252 object->un_pager.swp.swp_poip = 0;
253
254 if (object->handle != NULL) {
255 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
256 } else {
257 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
258 }
259
260 return 0;
261}
262
263/*
264 * Allocate an object and associated resources.
265 * Note that if we are called from the pageout daemon (handle == NULL)
266 * we should not wait for memory as it could resulting in deadlock.
267 */
268static vm_object_t
269swap_pager_alloc(handle, size, prot, offset)
270 void *handle;
271 register vm_size_t size;
272 vm_prot_t prot;
273 vm_ooffset_t offset;
274{
275 vm_object_t object;
276
277 /*
278 * If this is a "named" anonymous region, look it up and use the
279 * object if it exists, otherwise allocate a new one.
280 */
281 if (handle) {
282 object = vm_pager_object_lookup(&swap_pager_object_list, handle);
283 if (object != NULL) {
284 vm_object_reference(object);
285 } else {
286 /*
287 * XXX - there is a race condition here. Two processes
288 * can request the same named object simultaneuously,
289 * and if one blocks for memory, the result is a disaster.
290 * Probably quite rare, but is yet another reason to just
291 * rip support of "named anonymous regions" out altogether.
292 */
293 object = vm_object_allocate(OBJT_SWAP,
294 OFF_TO_IDX(offset+ PAGE_SIZE - 1 + size));
295 object->handle = handle;
296 (void) swap_pager_swp_alloc(object, M_WAITOK);
297 }
298 } else {
299 object = vm_object_allocate(OBJT_SWAP,
300 OFF_TO_IDX(offset + PAGE_SIZE - 1 + size));
301 (void) swap_pager_swp_alloc(object, M_WAITOK);
302 }
303
304 return (object);
305}
306
307/*
308 * returns disk block associated with pager and offset
309 * additionally, as a side effect returns a flag indicating
310 * if the block has been written
311 */
312
313inline static daddr_t *
314swap_pager_diskaddr(object, pindex, valid)
315 vm_object_t object;
316 vm_pindex_t pindex;
317 int *valid;
318{
319 register sw_blk_t swb;
320 int ix;
321
322 if (valid)
323 *valid = 0;
324 ix = pindex / SWB_NPAGES;
325 if ((ix >= object->un_pager.swp.swp_nblocks) ||
326 (pindex >= object->size)) {
327 return (FALSE);
328 }
329 swb = &object->un_pager.swp.swp_blocks[ix];
330 ix = pindex % SWB_NPAGES;
331 if (valid)
332 *valid = swb->swb_valid & (1 << ix);
333 return &swb->swb_block[ix];
334}
335
336/*
337 * Utility routine to set the valid (written) bit for
338 * a block associated with a pager and offset
339 */
340static void
341swap_pager_setvalid(object, offset, valid)
342 vm_object_t object;
343 vm_offset_t offset;
344 int valid;
345{
346 register sw_blk_t swb;
347 int ix;
348
349 ix = offset / SWB_NPAGES;
350 if (ix >= object->un_pager.swp.swp_nblocks)
351 return;
352
353 swb = &object->un_pager.swp.swp_blocks[ix];
354 ix = offset % SWB_NPAGES;
355 if (valid)
356 swb->swb_valid |= (1 << ix);
357 else
358 swb->swb_valid &= ~(1 << ix);
359 return;
360}
361
362/*
363 * this routine allocates swap space with a fragmentation
364 * minimization policy.
365 */
366static int
367swap_pager_getswapspace(object, amount, rtval)
368 vm_object_t object;
369 unsigned int amount;
370 daddr_t *rtval;
371{
372 unsigned location;
373 vm_swap_size -= amount;
374 if (!rlist_alloc(&swaplist, amount, &location)) {
375 vm_swap_size += amount;
376 return 0;
377 } else {
378 swapsizecheck();
379 object->un_pager.swp.swp_allocsize += amount;
380 *rtval = location;
381 return 1;
382 }
383}
384
385/*
386 * this routine frees swap space with a fragmentation
387 * minimization policy.
388 */
389static void
390swap_pager_freeswapspace(object, from, to)
391 vm_object_t object;
392 unsigned int from;
393 unsigned int to;
394{
395 rlist_free(&swaplist, from, to);
396 vm_swap_size += (to - from) + 1;
397 object->un_pager.swp.swp_allocsize -= (to - from) + 1;
398 swapsizecheck();
399}
400/*
401 * this routine frees swap blocks from a specified pager
402 */
403void
404swap_pager_freespace(object, start, size)
405 vm_object_t object;
406 vm_pindex_t start;
407 vm_size_t size;
408{
409 vm_pindex_t i;
410 int s;
411
412 s = splbio();
413 for (i = start; i < start + size; i += 1) {
414 int valid;
415 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
416
417 if (addr && *addr != SWB_EMPTY) {
418 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
419 if (valid) {
420 swap_pager_setvalid(object, i, 0);
421 }
422 *addr = SWB_EMPTY;
423 }
424 }
425 splx(s);
426}
427
428static void
429swap_pager_free_swap(object)
430 vm_object_t object;
431{
432 register int i, j;
433 register sw_blk_t swb;
434 int first_block=0, block_count=0;
435 int s;
436 /*
437 * Free left over swap blocks
438 */
439 s = splbio();
440 for (i = 0, swb = object->un_pager.swp.swp_blocks;
441 i < object->un_pager.swp.swp_nblocks; i++, swb++) {
442 for (j = 0; j < SWB_NPAGES; j++) {
443 if (swb->swb_block[j] != SWB_EMPTY) {
444 /*
445 * initially the length of the run is zero
446 */
447 if (block_count == 0) {
448 first_block = swb->swb_block[j];
449 block_count = btodb(PAGE_SIZE);
450 swb->swb_block[j] = SWB_EMPTY;
451 /*
452 * if the new block can be included into the current run
453 */
454 } else if (swb->swb_block[j] == first_block + block_count) {
455 block_count += btodb(PAGE_SIZE);
456 swb->swb_block[j] = SWB_EMPTY;
457 /*
458 * terminate the previous run, and start a new one
459 */
460 } else {
461 swap_pager_freeswapspace(object, first_block,
462 (unsigned) first_block + block_count - 1);
463 first_block = swb->swb_block[j];
464 block_count = btodb(PAGE_SIZE);
465 swb->swb_block[j] = SWB_EMPTY;
466 }
467 }
468 }
469 }
470
471 if (block_count) {
472 swap_pager_freeswapspace(object, first_block,
473 (unsigned) first_block + block_count - 1);
474 }
475 splx(s);
476}
477
478
479/*
480 * swap_pager_reclaim frees up over-allocated space from all pagers
481 * this eliminates internal fragmentation due to allocation of space
482 * for segments that are never swapped to. It has been written so that
483 * it does not block until the rlist_free operation occurs; it keeps
484 * the queues consistant.
485 */
486
487/*
488 * Maximum number of blocks (pages) to reclaim per pass
489 */
490#define MAXRECLAIM 128
491
492static void
493swap_pager_reclaim()
494{
495 vm_object_t object;
496 int i, j, k;
497 int s;
498 int reclaimcount;
499 static struct {
500 int address;
501 vm_object_t object;
502 } reclaims[MAXRECLAIM];
503 static int in_reclaim;
504
505 /*
506 * allow only one process to be in the swap_pager_reclaim subroutine
507 */
508 s = splbio();
509 if (in_reclaim) {
510 tsleep(&in_reclaim, PSWP, "swrclm", 0);
511 splx(s);
512 return;
513 }
514 in_reclaim = 1;
515 reclaimcount = 0;
516
517 /* for each pager queue */
518 for (k = 0; swp_qs[k]; k++) {
519
520 object = swp_qs[k]->tqh_first;
521 while (object && (reclaimcount < MAXRECLAIM)) {
522
523 /*
524 * see if any blocks associated with a pager has been
525 * allocated but not used (written)
526 */
527 if (object->paging_in_progress == 0) {
528 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
529 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
530
531 if (swb->swb_locked)
532 continue;
533 for (j = 0; j < SWB_NPAGES; j++) {
534 if (swb->swb_block[j] != SWB_EMPTY &&
535 (swb->swb_valid & (1 << j)) == 0) {
536 reclaims[reclaimcount].address = swb->swb_block[j];
537 reclaims[reclaimcount++].object = object;
538 swb->swb_block[j] = SWB_EMPTY;
539 if (reclaimcount >= MAXRECLAIM)
540 goto rfinished;
541 }
542 }
543 }
544 }
545 object = object->pager_object_list.tqe_next;
546 }
547 }
548
549rfinished:
550
551 /*
552 * free the blocks that have been added to the reclaim list
553 */
554 for (i = 0; i < reclaimcount; i++) {
555 swap_pager_freeswapspace(reclaims[i].object,
556 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
557 }
558 splx(s);
559 in_reclaim = 0;
560 wakeup(&in_reclaim);
561}
562
563
564/*
565 * swap_pager_copy copies blocks from one pager to another and
566 * destroys the source pager
567 */
568
569void
570swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
571 vm_object_t srcobject;
572 vm_pindex_t srcoffset;
573 vm_object_t dstobject;
574 vm_pindex_t dstoffset;
575 vm_pindex_t offset;
576{
577 vm_pindex_t i;
578 int origsize;
579 int s;
580
581 if (vm_swap_size)
582 no_swap_space = 0;
583
584 origsize = srcobject->un_pager.swp.swp_allocsize;
585
586 /*
587 * remove the source object from the swap_pager internal queue
588 */
589 if (srcobject->handle == NULL) {
590 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
591 } else {
592 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
593 }
594
595 s = splbio();
596 while (srcobject->un_pager.swp.swp_poip) {
597 tsleep(srcobject, PVM, "spgout", 0);
598 }
599 splx(s);
600
601 /*
602 * clean all of the pages that are currently active and finished
603 */
604 swap_pager_sync();
605
606 s = splbio();
607 /*
608 * transfer source to destination
609 */
610 for (i = 0; i < dstobject->size; i += 1) {
611 int srcvalid, dstvalid;
612 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
613 &srcvalid);
614 daddr_t *dstaddrp;
615
616 /*
617 * see if the source has space allocated
618 */
619 if (srcaddrp && *srcaddrp != SWB_EMPTY) {
620 /*
621 * if the source is valid and the dest has no space,
622 * then copy the allocation from the srouce to the
623 * dest.
624 */
625 if (srcvalid) {
626 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
627 &dstvalid);
628 /*
629 * if the dest already has a valid block,
630 * deallocate the source block without
631 * copying.
632 */
633 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
634 swap_pager_freeswapspace(dstobject, *dstaddrp,
635 *dstaddrp + btodb(PAGE_SIZE) - 1);
636 *dstaddrp = SWB_EMPTY;
637 }
638 if (dstaddrp && *dstaddrp == SWB_EMPTY) {
639 *dstaddrp = *srcaddrp;
640 *srcaddrp = SWB_EMPTY;
641 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
642 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
643 swap_pager_setvalid(dstobject, i + dstoffset, 1);
644 }
645 }
646 /*
647 * if the source is not empty at this point, then
648 * deallocate the space.
649 */
650 if (*srcaddrp != SWB_EMPTY) {
651 swap_pager_freeswapspace(srcobject, *srcaddrp,
652 *srcaddrp + btodb(PAGE_SIZE) - 1);
653 *srcaddrp = SWB_EMPTY;
654 }
655 }
656 }
657 splx(s);
658
659 /*
660 * Free left over swap blocks
661 */
662 swap_pager_free_swap(srcobject);
663
664 if (srcobject->un_pager.swp.swp_allocsize) {
665 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
666 srcobject->un_pager.swp.swp_allocsize, origsize);
667 }
668
669 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
670 srcobject->un_pager.swp.swp_blocks = NULL;
671
672 return;
673}
674
675static void
676swap_pager_dealloc(object)
677 vm_object_t object;
678{
679 int s;
680
681 /*
682 * Remove from list right away so lookups will fail if we block for
683 * pageout completion.
684 */
685 if (object->handle == NULL) {
686 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
687 } else {
688 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
689 }
690
691 /*
692 * Wait for all pageouts to finish and remove all entries from
693 * cleaning list.
694 */
695
696 s = splbio();
697 while (object->un_pager.swp.swp_poip) {
698 tsleep(object, PVM, "swpout", 0);
699 }
700 splx(s);
701
702
703 swap_pager_sync();
704
705 /*
706 * Free left over swap blocks
707 */
708 swap_pager_free_swap(object);
709
710 if (object->un_pager.swp.swp_allocsize) {
711 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
712 object->un_pager.swp.swp_allocsize);
713 }
714 /*
715 * Free swap management resources
716 */
717 free(object->un_pager.swp.swp_blocks, M_VMPGDATA);
718 object->un_pager.swp.swp_blocks = NULL;
719}
720
721static inline __pure int
722swap_pager_block_index(pindex)
723 vm_pindex_t pindex;
724{
725 return (pindex / SWB_NPAGES);
726}
727
728static inline __pure int
729swap_pager_block_offset(pindex)
730 vm_pindex_t pindex;
731{
732 return (pindex % SWB_NPAGES);
733}
734
735/*
736 * swap_pager_haspage returns TRUE if the pager has data that has
737 * been written out.
738 */
739static boolean_t
740swap_pager_haspage(object, pindex, before, after)
741 vm_object_t object;
742 vm_pindex_t pindex;
743 int *before;
744 int *after;
745{
746 register sw_blk_t swb;
747 int ix;
748
749 if (before != NULL)
750 *before = 0;
751 if (after != NULL)
752 *after = 0;
753 ix = pindex / SWB_NPAGES;
754 if (ix >= object->un_pager.swp.swp_nblocks) {
755 return (FALSE);
756 }
757 swb = &object->un_pager.swp.swp_blocks[ix];
758 ix = pindex % SWB_NPAGES;
759
760 if (swb->swb_block[ix] != SWB_EMPTY) {
761
762 if (swb->swb_valid & (1 << ix)) {
763 int tix;
764 if (before) {
765 for(tix = ix - 1; tix >= 0; --tix) {
766 if ((swb->swb_valid & (1 << tix)) == 0)
767 break;
768 if ((swb->swb_block[tix] +
769 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
770 swb->swb_block[ix])
771 break;
772 (*before)++;
773 }
774 }
775
776 if (after) {
777 for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
778 if ((swb->swb_valid & (1 << tix)) == 0)
779 break;
780 if ((swb->swb_block[tix] -
781 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
782 swb->swb_block[ix])
783 break;
784 (*after)++;
785 }
786 }
787
788 return TRUE;
789 }
790 }
791 return (FALSE);
792}
793
794/*
795 * swap_pager_freepage is a convienience routine that clears the busy
796 * bit and deallocates a page.
797 */
798static void
799swap_pager_freepage(m)
800 vm_page_t m;
801{
802 PAGE_WAKEUP(m);
803 vm_page_free(m);
804}
805
806/*
807 * swap_pager_ridpages is a convienience routine that deallocates all
808 * but the required page. this is usually used in error returns that
809 * need to invalidate the "extra" readahead pages.
810 */
811static void
812swap_pager_ridpages(m, count, reqpage)
813 vm_page_t *m;
814 int count;
815 int reqpage;
816{
817 int i;
818
819 for (i = 0; i < count; i++)
820 if (i != reqpage)
821 swap_pager_freepage(m[i]);
822}
823
824/*
825 * swap_pager_iodone1 is the completion routine for both reads and async writes
826 */
827static void
828swap_pager_iodone1(bp)
829 struct buf *bp;
830{
831 bp->b_flags |= B_DONE;
832 bp->b_flags &= ~B_ASYNC;
833 wakeup(bp);
834}
835
836int
837swap_pager_getpages(object, m, count, reqpage)
838 vm_object_t object;
839 vm_page_t *m;
840 int count, reqpage;
841{
842 register struct buf *bp;
843 sw_blk_t swb[count];
844 register int s;
845 int i;
846 boolean_t rv;
847 vm_offset_t kva, off[count];
848 swp_clean_t spc;
849 vm_pindex_t paging_offset;
850 int reqaddr[count];
851 int sequential;
852
853 int first, last;
854 int failed;
855 int reqdskregion;
856
857 object = m[reqpage]->object;
858 paging_offset = OFF_TO_IDX(object->paging_offset);
859 sequential = (m[reqpage]->pindex == (object->last_read + 1));
860
861 for (i = 0; i < count; i++) {
862 vm_pindex_t fidx = m[i]->pindex + paging_offset;
863 int ix = swap_pager_block_index(fidx);
864
865 if (ix >= object->un_pager.swp.swp_nblocks) {
866 int j;
867
868 if (i <= reqpage) {
869 swap_pager_ridpages(m, count, reqpage);
870 return (VM_PAGER_FAIL);
871 }
872 for (j = i; j < count; j++) {
873 swap_pager_freepage(m[j]);
874 }
875 count = i;
876 break;
877 }
878 swb[i] = &object->un_pager.swp.swp_blocks[ix];
879 off[i] = swap_pager_block_offset(fidx);
880 reqaddr[i] = swb[i]->swb_block[off[i]];
881 }
882
883 /* make sure that our required input request is existant */
884
885 if (reqaddr[reqpage] == SWB_EMPTY ||
886 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
887 swap_pager_ridpages(m, count, reqpage);
888 return (VM_PAGER_FAIL);
889 }
890 reqdskregion = reqaddr[reqpage] / dmmax;
891
892 /*
893 * search backwards for the first contiguous page to transfer
894 */
895 failed = 0;
896 first = 0;
897 for (i = reqpage - 1; i >= 0; --i) {
898 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
899 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
900 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
901 ((reqaddr[i] / dmmax) != reqdskregion)) {
902 failed = 1;
903 swap_pager_freepage(m[i]);
904 if (first == 0)
905 first = i + 1;
906 }
907 }
908 /*
909 * search forwards for the last contiguous page to transfer
910 */
911 failed = 0;
912 last = count;
913 for (i = reqpage + 1; i < count; i++) {
914 if (failed || (reqaddr[i] == SWB_EMPTY) ||
915 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
916 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
917 ((reqaddr[i] / dmmax) != reqdskregion)) {
918 failed = 1;
919 swap_pager_freepage(m[i]);
920 if (last == count)
921 last = i;
922 }
923 }
924
925 count = last;
926 if (first != 0) {
927 for (i = first; i < count; i++) {
928 m[i - first] = m[i];
929 reqaddr[i - first] = reqaddr[i];
930 off[i - first] = off[i];
931 }
932 count -= first;
933 reqpage -= first;
934 }
935 ++swb[reqpage]->swb_locked;
936
937 /*
938 * at this point: "m" is a pointer to the array of vm_page_t for
939 * paging I/O "count" is the number of vm_page_t entries represented
940 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
941 * into "m" for the page actually faulted
942 */
943
944 spc = NULL; /* we might not use an spc data structure */
945
946 if ((count == 1) && (swap_pager_free.tqh_first != NULL)) {
947 spc = swap_pager_free.tqh_first;
948 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
949 kva = spc->spc_kva;
950 bp = spc->spc_bp;
951 bzero(bp, sizeof *bp);
952 bp->b_spc = spc;
953 bp->b_vnbufs.le_next = NOLIST;
954 } else {
955 /*
956 * Get a swap buffer header to perform the IO
957 */
958 bp = getpbuf();
959 kva = (vm_offset_t) bp->b_data;
960 }
961
962 /*
963 * map our page(s) into kva for input
964 */
965 pmap_qenter(kva, m, count);
966
967 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
968 bp->b_iodone = swap_pager_iodone1;
969 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
970 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
971 crhold(bp->b_rcred);
972 crhold(bp->b_wcred);
973 bp->b_un.b_addr = (caddr_t) kva;
974 bp->b_blkno = reqaddr[0];
975 bp->b_bcount = PAGE_SIZE * count;
976 bp->b_bufsize = PAGE_SIZE * count;
977
978 pbgetvp(swapdev_vp, bp);
979
980 cnt.v_swapin++;
981 cnt.v_swappgsin += count;
982 /*
983 * perform the I/O
984 */
985 VOP_STRATEGY(bp);
986
987 /*
988 * wait for the sync I/O to complete
989 */
990 s = splbio();
991 while ((bp->b_flags & B_DONE) == 0) {
992 tsleep(bp, PVM, "swread", 0);
993 }
994
995 if (bp->b_flags & B_ERROR) {
996 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
997 bp->b_blkno, bp->b_bcount, bp->b_error);
998 rv = VM_PAGER_ERROR;
999 } else {
1000 rv = VM_PAGER_OK;
1001 }
1002
1003 /*
1004 * relpbuf does this, but we maintain our own buffer list also...
1005 */
1006 if (bp->b_vp)
1007 pbrelvp(bp);
1008
1009 splx(s);
1010 swb[reqpage]->swb_locked--;
1011
1012 /*
1013 * remove the mapping for kernel virtual
1014 */
1015 pmap_qremove(kva, count);
1016
1017 if (spc) {
1018 m[reqpage]->object->last_read = m[reqpage]->pindex;
1019 if (bp->b_flags & B_WANTED)
1020 wakeup(bp);
1021 /*
1022 * if we have used an spc, we need to free it.
1023 */
1024 if (bp->b_rcred != NOCRED)
1025 crfree(bp->b_rcred);
1026 if (bp->b_wcred != NOCRED)
1027 crfree(bp->b_wcred);
1028 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1029 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1030 wakeup(&swap_pager_free);
1031 }
1032 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1033 pagedaemon_wakeup();
1034 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1035 } else {
1036 /*
1037 * release the physical I/O buffer
1038 */
1039 relpbuf(bp);
1040 /*
1041 * finish up input if everything is ok
1042 */
1043 if (rv == VM_PAGER_OK) {
1044 for (i = 0; i < count; i++) {
1045 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1046 m[i]->dirty = 0;
1047 m[i]->flags &= ~PG_ZERO;
1048 if (i != reqpage) {
1049 /*
1050 * whether or not to leave the page
1051 * activated is up in the air, but we
1052 * should put the page on a page queue
1053 * somewhere. (it already is in the
1054 * object). After some emperical
1055 * results, it is best to deactivate
1056 * the readahead pages.
1057 */
1058 vm_page_deactivate(m[i]);
1059
1060 /*
1061 * just in case someone was asking for
1062 * this page we now tell them that it
1063 * is ok to use
1064 */
1065 m[i]->valid = VM_PAGE_BITS_ALL;
1066 PAGE_WAKEUP(m[i]);
1067 }
1068 }
1069
1070 m[reqpage]->object->last_read = m[count-1]->pindex;
1071
1072 /*
1073 * If we're out of swap space, then attempt to free
1074 * some whenever pages are brought in. We must clear
1075 * the clean flag so that the page contents will be
1076 * preserved.
1077 */
1078 if (swap_pager_full) {
1079 for (i = 0; i < count; i++) {
1080 m[i]->dirty = VM_PAGE_BITS_ALL;
1081 }
1082 swap_pager_freespace(object, m[0]->pindex + paging_offset, count);
1083 }
1084 } else {
1085 swap_pager_ridpages(m, count, reqpage);
1086 }
1087 }
1088 if (rv == VM_PAGER_OK) {
1089 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
1090 m[reqpage]->valid = VM_PAGE_BITS_ALL;
1091 m[reqpage]->dirty = 0;
1092 }
1093 return (rv);
1094}
1095
1096int
1097swap_pager_putpages(object, m, count, sync, rtvals)
1098 vm_object_t object;
1099 vm_page_t *m;
1100 int count;
1101 boolean_t sync;
1102 int *rtvals;
1103{
1104 register struct buf *bp;
1105 sw_blk_t swb[count];
1106 register int s;
1107 int i, j, ix;
1108 boolean_t rv;
1109 vm_offset_t kva, off, fidx;
1110 swp_clean_t spc;
1111 vm_pindex_t paging_pindex;
1112 int reqaddr[count];
1113 int failed;
1114
1115 if (vm_swap_size)
1116 no_swap_space = 0;
1117 if (no_swap_space) {
1118 for (i = 0; i < count; i++)
1119 rtvals[i] = VM_PAGER_FAIL;
1120 return VM_PAGER_FAIL;
1121 }
1122 spc = NULL;
1123
1124 object = m[0]->object;
1125 paging_pindex = OFF_TO_IDX(object->paging_offset);
1126
1127 failed = 0;
1128 for (j = 0; j < count; j++) {
1129 fidx = m[j]->pindex + paging_pindex;
1130 ix = swap_pager_block_index(fidx);
1131 swb[j] = 0;
1132 if (ix >= object->un_pager.swp.swp_nblocks) {
1133 rtvals[j] = VM_PAGER_FAIL;
1134 failed = 1;
1135 continue;
1136 } else {
1137 rtvals[j] = VM_PAGER_OK;
1138 }
1139 swb[j] = &object->un_pager.swp.swp_blocks[ix];
1140 swb[j]->swb_locked++;
1141 if (failed) {
1142 rtvals[j] = VM_PAGER_FAIL;
1143 continue;
1144 }
1145 off = swap_pager_block_offset(fidx);
1146 reqaddr[j] = swb[j]->swb_block[off];
1147 if (reqaddr[j] == SWB_EMPTY) {
1148 daddr_t blk;
1149 int tries;
1150 int ntoget;
1151
1152 tries = 0;
1153 s = splbio();
1154
1155 /*
1156 * if any other pages have been allocated in this
1157 * block, we only try to get one page.
1158 */
1159 for (i = 0; i < SWB_NPAGES; i++) {
1160 if (swb[j]->swb_block[i] != SWB_EMPTY)
1161 break;
1162 }
1163
1164 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1165 /*
1166 * this code is alittle conservative, but works (the
1167 * intent of this code is to allocate small chunks for
1168 * small objects)
1169 */
1170 if ((off == 0) && ((fidx + ntoget) > object->size)) {
1171 ntoget = object->size - fidx;
1172 }
1173 retrygetspace:
1174 if (!swap_pager_full && ntoget > 1 &&
1175 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
1176 &blk)) {
1177
1178 for (i = 0; i < ntoget; i++) {
1179 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1180 swb[j]->swb_valid = 0;
1181 }
1182
1183 reqaddr[j] = swb[j]->swb_block[off];
1184 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
1185 &swb[j]->swb_block[off])) {
1186 /*
1187 * if the allocation has failed, we try to
1188 * reclaim space and retry.
1189 */
1190 if (++tries == 1) {
1191 swap_pager_reclaim();
1192 goto retrygetspace;
1193 }
1194 rtvals[j] = VM_PAGER_AGAIN;
1195 failed = 1;
1196 swap_pager_full = 1;
1197 } else {
1198 reqaddr[j] = swb[j]->swb_block[off];
1199 swb[j]->swb_valid &= ~(1 << off);
1200 }
1201 splx(s);
1202 }
1203 }
1204
1205 /*
1206 * search forwards for the last contiguous page to transfer
1207 */
1208 failed = 0;
1209 for (i = 0; i < count; i++) {
1210 if (failed ||
1211 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1212 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
1213 (rtvals[i] != VM_PAGER_OK)) {
1214 failed = 1;
1215 if (rtvals[i] == VM_PAGER_OK)
1216 rtvals[i] = VM_PAGER_AGAIN;
1217 }
1218 }
1219
1220 for (i = 0; i < count; i++) {
1221 if (rtvals[i] != VM_PAGER_OK) {
1222 if (swb[i])
1223 --swb[i]->swb_locked;
1224 }
1225 }
1226
1227 for (i = 0; i < count; i++)
1228 if (rtvals[i] != VM_PAGER_OK)
1229 break;
1230
1231 if (i == 0) {
1232 return VM_PAGER_AGAIN;
1233 }
1234 count = i;
1235 for (i = 0; i < count; i++) {
1236 if (reqaddr[i] == SWB_EMPTY) {
1237 printf("I/O to empty block???? -- pindex: %d, i: %d\n",
1238 m[i]->pindex, i);
1239 }
1240 }
1241
1242 /*
1243 * For synchronous writes, we clean up all completed async pageouts.
1244 */
1245 if (sync == TRUE) {
1246 swap_pager_sync();
1247 }
1248 kva = 0;
1249
1250 /*
1251 * get a swap pager clean data structure, block until we get it
1252 */
1253 if (swap_pager_free.tqh_first == NULL ||
1254 swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1255 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1256 s = splbio();
1257 if (curproc == pageproc) {
1258 /*
1259 * pageout daemon needs a swap control block
1260 */
1261 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED;
1262 /*
1263 * if it does not get one within a short time, then
1264 * there is a potential deadlock, so we go-on trying
1265 * to free pages.
1266 */
1267 tsleep(&swap_pager_free, PVM, "swpfre", hz/10);
1268 swap_pager_sync();
1269 if (swap_pager_free.tqh_first == NULL ||
1270 swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1271 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1272 splx(s);
1273 return VM_PAGER_AGAIN;
1274 }
1275 } else
1276 pagedaemon_wakeup();
1277 while (swap_pager_free.tqh_first == NULL ||
1278 swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1279 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1280 if (curproc == pageproc) {
1281 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1282 if((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_reserved)
1283 wakeup(&cnt.v_free_count);
1284 }
1285
1286 swap_pager_needflags |= SWAP_FREE_NEEDED;
1287 tsleep(&swap_pager_free, PVM, "swpfre", 0);
1288 if (curproc == pageproc)
1289 swap_pager_sync();
1290 else
1291 pagedaemon_wakeup();
1292 }
1293 splx(s);
1294 }
1295 spc = swap_pager_free.tqh_first;
1296 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1297
1298 kva = spc->spc_kva;
1299
1300 /*
1301 * map our page(s) into kva for I/O
1302 */
1303 pmap_qenter(kva, m, count);
1304
1305 /*
1306 * get the base I/O offset into the swap file
1307 */
1308 for (i = 0; i < count; i++) {
1309 fidx = m[i]->pindex + paging_pindex;
1310 off = swap_pager_block_offset(fidx);
1311 /*
1312 * set the valid bit
1313 */
1314 swb[i]->swb_valid |= (1 << off);
1315 /*
1316 * and unlock the data structure
1317 */
1318 swb[i]->swb_locked--;
1319 }
1320
1321 /*
1322 * Get a swap buffer header and perform the IO
1323 */
1324 bp = spc->spc_bp;
1325 bzero(bp, sizeof *bp);
1326 bp->b_spc = spc;
1327 bp->b_vnbufs.le_next = NOLIST;
1328
1329 bp->b_flags = B_BUSY | B_PAGING;
1330 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
1331 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1332 if (bp->b_rcred != NOCRED)
1333 crhold(bp->b_rcred);
1334 if (bp->b_wcred != NOCRED)
1335 crhold(bp->b_wcred);
1336 bp->b_data = (caddr_t) kva;
1337 bp->b_blkno = reqaddr[0];
1338 pbgetvp(swapdev_vp, bp);
1339
1340 bp->b_bcount = PAGE_SIZE * count;
1341 bp->b_bufsize = PAGE_SIZE * count;
1342 swapdev_vp->v_numoutput++;
1343
1344 /*
1345 * If this is an async write we set up additional buffer fields and
1346 * place a "cleaning" entry on the inuse queue.
1347 */
1348 s = splbio();
1349 if (sync == FALSE) {
1350 spc->spc_flags = 0;
1351 spc->spc_object = object;
1352 for (i = 0; i < count; i++)
1353 spc->spc_m[i] = m[i];
1354 spc->spc_count = count;
1355 /*
1356 * the completion routine for async writes
1357 */
1358 bp->b_flags |= B_CALL;
1359 bp->b_iodone = swap_pager_iodone;
1360 bp->b_dirtyoff = 0;
1361 bp->b_dirtyend = bp->b_bcount;
1362 object->un_pager.swp.swp_poip++;
1363 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1364 } else {
1365 object->un_pager.swp.swp_poip++;
1366 bp->b_flags |= B_CALL;
1367 bp->b_iodone = swap_pager_iodone1;
1368 }
1369
1370 cnt.v_swapout++;
1371 cnt.v_swappgsout += count;
1372 /*
1373 * perform the I/O
1374 */
1375 VOP_STRATEGY(bp);
1376 if (sync == FALSE) {
1377 if ((bp->b_flags & B_DONE) == B_DONE) {
1378 swap_pager_sync();
1379 }
1380 splx(s);
1381 for (i = 0; i < count; i++) {
1382 rtvals[i] = VM_PAGER_PEND;
1383 }
1384 return VM_PAGER_PEND;
1385 }
1386 /*
1387 * wait for the sync I/O to complete
1388 */
1389 while ((bp->b_flags & B_DONE) == 0) {
1390 tsleep(bp, PVM, "swwrt", 0);
1391 }
1392 if (bp->b_flags & B_ERROR) {
1393 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1394 bp->b_blkno, bp->b_bcount, bp->b_error);
1395 rv = VM_PAGER_ERROR;
1396 } else {
1397 rv = VM_PAGER_OK;
1398 }
1399
1400 object->un_pager.swp.swp_poip--;
1401 if (object->un_pager.swp.swp_poip == 0)
1402 wakeup(object);
1403
1404 if (bp->b_vp)
1405 pbrelvp(bp);
1406 if (bp->b_flags & B_WANTED)
1407 wakeup(bp);
1408
1409 splx(s);
1410
1411 /*
1412 * remove the mapping for kernel virtual
1413 */
1414 pmap_qremove(kva, count);
1415
1416 /*
1417 * if we have written the page, then indicate that the page is clean.
1418 */
1419 if (rv == VM_PAGER_OK) {
1420 for (i = 0; i < count; i++) {
1421 if (rtvals[i] == VM_PAGER_OK) {
1422 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1423 m[i]->dirty = 0;
1424 /*
1425 * optimization, if a page has been read
1426 * during the pageout process, we activate it.
1427 */
1428 if ((m[i]->flags & PG_ACTIVE) == 0 &&
1429 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
1430 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
1431 vm_page_activate(m[i]);
1432 }
1433 }
1434 }
1435 } else {
1436 for (i = 0; i < count; i++) {
1437 rtvals[i] = rv;
1438 }
1439 }
1440
1441 if (bp->b_rcred != NOCRED)
1442 crfree(bp->b_rcred);
1443 if (bp->b_wcred != NOCRED)
1444 crfree(bp->b_wcred);
1445 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1446 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1447 wakeup(&swap_pager_free);
1448 }
1449 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1450 pagedaemon_wakeup();
1451 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1452 return (rv);
1453}
1454
1455void
1456swap_pager_sync()
1457{
1458 register swp_clean_t spc, tspc;
1459 register int s;
1460
1461 tspc = NULL;
1462 if (swap_pager_done.tqh_first == NULL)
1463 return;
1464 for (;;) {
1465 s = splbio();
1466 /*
1467 * Look up and removal from done list must be done at splbio()
1468 * to avoid conflicts with swap_pager_iodone.
1469 */
1470 while ((spc = swap_pager_done.tqh_first) != 0) {
1471 pmap_qremove(spc->spc_kva, spc->spc_count);
1472 swap_pager_finish(spc);
1473 TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1474 goto doclean;
1475 }
1476
1477 /*
1478 * No operations done, thats all we can do for now.
1479 */
1480
1481 splx(s);
1482 break;
1483
1484 /*
1485 * The desired page was found to be busy earlier in the scan
1486 * but has since completed.
1487 */
1488doclean:
1489 if (tspc && tspc == spc) {
1490 tspc = NULL;
1491 }
1492 spc->spc_flags = 0;
1493 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1494 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1495 wakeup(&swap_pager_free);
1496 }
1497 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1498 pagedaemon_wakeup();
1499 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1500 splx(s);
1501 }
1502
1503 return;
1504}
1505
1506void
1507swap_pager_finish(spc)
1508 register swp_clean_t spc;
1509{
1510 vm_object_t object = spc->spc_m[0]->object;
1511 int i;
1512
1513 object->paging_in_progress -= spc->spc_count;
1514 if ((object->paging_in_progress == 0) &&
1515 (object->flags & OBJ_PIPWNT)) {
1516 object->flags &= ~OBJ_PIPWNT;
1517 wakeup(object);
1518 }
1519
1520 /*
1521 * If no error, mark as clean and inform the pmap system. If error,
1522 * mark as dirty so we will try again. (XXX could get stuck doing
1523 * this, should give up after awhile)
1524 */
1525 if (spc->spc_flags & SPC_ERROR) {
1526 for (i = 0; i < spc->spc_count; i++) {
1527 printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1528 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
1529 }
1530 } else {
1531 for (i = 0; i < spc->spc_count; i++) {
1532 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1533 spc->spc_m[i]->dirty = 0;
1534 if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 &&
1535 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
1536 vm_page_activate(spc->spc_m[i]);
1537 }
1538 }
1539
1540
1541 for (i = 0; i < spc->spc_count; i++) {
1542 /*
1543 * we wakeup any processes that are waiting on these pages.
1544 */
1545 PAGE_WAKEUP(spc->spc_m[i]);
1546 }
1547 nswiodone -= spc->spc_count;
1548
1549 return;
1550}
1551
1552/*
1553 * swap_pager_iodone
1554 */
1555static void
1556swap_pager_iodone(bp)
1557 register struct buf *bp;
1558{
1559 register swp_clean_t spc;
1560 int s;
1561
1562 s = splbio();
1563 spc = (swp_clean_t) bp->b_spc;
1564 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1565 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1566 if (bp->b_flags & B_ERROR) {
1567 spc->spc_flags |= SPC_ERROR;
1568 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
1569 (bp->b_flags & B_READ) ? "pagein" : "pageout",
1570 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1571 }
1572
1573 if (bp->b_vp)
1574 pbrelvp(bp);
1575
1576 if (bp->b_flags & B_WANTED)
1577 wakeup(bp);
1578
1579 if (bp->b_rcred != NOCRED)
1580 crfree(bp->b_rcred);
1581 if (bp->b_wcred != NOCRED)
1582 crfree(bp->b_wcred);
1583
1584 nswiodone += spc->spc_count;
1585 if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
1586 wakeup(spc->spc_object);
1587 }
1588 if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1589 swap_pager_inuse.tqh_first == 0) {
1590 swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1591 wakeup(&swap_pager_free);
1592 }
1593
1594 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
1595 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
1596 pagedaemon_wakeup();
1597 }
1598
1599 if (vm_pageout_pages_needed) {
1600 wakeup(&vm_pageout_pages_needed);
1601 vm_pageout_pages_needed = 0;
1602 }
1603 if ((swap_pager_inuse.tqh_first == NULL) ||
1604 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
1605 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
1606 pagedaemon_wakeup();
1607 }
1608 splx(s);
1609}
163static void swap_pager_reclaim __P((void));
164static void swap_pager_ridpages __P((vm_page_t *m, int count,
165 int reqpage));
166static void swap_pager_setvalid __P((vm_object_t object,
167 vm_offset_t offset, int valid));
168static void swapsizecheck __P((void));
169
170static inline void
171swapsizecheck()
172{
173 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
174 if (swap_pager_full == 0)
175 printf("swap_pager: out of space\n");
176 swap_pager_full = 1;
177 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
178 swap_pager_full = 0;
179}
180
181static void
182swap_pager_init()
183{
184 TAILQ_INIT(&swap_pager_object_list);
185 TAILQ_INIT(&swap_pager_un_object_list);
186
187 /*
188 * Initialize clean lists
189 */
190 TAILQ_INIT(&swap_pager_inuse);
191 TAILQ_INIT(&swap_pager_done);
192 TAILQ_INIT(&swap_pager_free);
193
194 /*
195 * Calculate the swap allocation constants.
196 */
197 dmmin = CLBYTES / DEV_BSIZE;
198 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
199}
200
201void
202swap_pager_swap_init()
203{
204 swp_clean_t spc;
205 struct buf *bp;
206 int i;
207
208 /*
209 * kva's are allocated here so that we dont need to keep doing
210 * kmem_alloc pageables at runtime
211 */
212 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
213 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
214 if (!spc->spc_kva) {
215 break;
216 }
217 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
218 if (!spc->spc_bp) {
219 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
220 break;
221 }
222 spc->spc_flags = 0;
223 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
224 }
225}
226
227int
228swap_pager_swp_alloc(object, wait)
229 vm_object_t object;
230 int wait;
231{
232 sw_blk_t swb;
233 int nblocks;
234 int i, j;
235
236 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
237 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
238 if (swb == NULL)
239 return 1;
240
241 for (i = 0; i < nblocks; i++) {
242 swb[i].swb_valid = 0;
243 swb[i].swb_locked = 0;
244 for (j = 0; j < SWB_NPAGES; j++)
245 swb[i].swb_block[j] = SWB_EMPTY;
246 }
247
248 object->un_pager.swp.swp_nblocks = nblocks;
249 object->un_pager.swp.swp_allocsize = 0;
250 object->un_pager.swp.swp_blocks = swb;
251 object->un_pager.swp.swp_poip = 0;
252
253 if (object->handle != NULL) {
254 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
255 } else {
256 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
257 }
258
259 return 0;
260}
261
262/*
263 * Allocate an object and associated resources.
264 * Note that if we are called from the pageout daemon (handle == NULL)
265 * we should not wait for memory as it could resulting in deadlock.
266 */
267static vm_object_t
268swap_pager_alloc(handle, size, prot, offset)
269 void *handle;
270 register vm_size_t size;
271 vm_prot_t prot;
272 vm_ooffset_t offset;
273{
274 vm_object_t object;
275
276 /*
277 * If this is a "named" anonymous region, look it up and use the
278 * object if it exists, otherwise allocate a new one.
279 */
280 if (handle) {
281 object = vm_pager_object_lookup(&swap_pager_object_list, handle);
282 if (object != NULL) {
283 vm_object_reference(object);
284 } else {
285 /*
286 * XXX - there is a race condition here. Two processes
287 * can request the same named object simultaneuously,
288 * and if one blocks for memory, the result is a disaster.
289 * Probably quite rare, but is yet another reason to just
290 * rip support of "named anonymous regions" out altogether.
291 */
292 object = vm_object_allocate(OBJT_SWAP,
293 OFF_TO_IDX(offset+ PAGE_SIZE - 1 + size));
294 object->handle = handle;
295 (void) swap_pager_swp_alloc(object, M_WAITOK);
296 }
297 } else {
298 object = vm_object_allocate(OBJT_SWAP,
299 OFF_TO_IDX(offset + PAGE_SIZE - 1 + size));
300 (void) swap_pager_swp_alloc(object, M_WAITOK);
301 }
302
303 return (object);
304}
305
306/*
307 * returns disk block associated with pager and offset
308 * additionally, as a side effect returns a flag indicating
309 * if the block has been written
310 */
311
312inline static daddr_t *
313swap_pager_diskaddr(object, pindex, valid)
314 vm_object_t object;
315 vm_pindex_t pindex;
316 int *valid;
317{
318 register sw_blk_t swb;
319 int ix;
320
321 if (valid)
322 *valid = 0;
323 ix = pindex / SWB_NPAGES;
324 if ((ix >= object->un_pager.swp.swp_nblocks) ||
325 (pindex >= object->size)) {
326 return (FALSE);
327 }
328 swb = &object->un_pager.swp.swp_blocks[ix];
329 ix = pindex % SWB_NPAGES;
330 if (valid)
331 *valid = swb->swb_valid & (1 << ix);
332 return &swb->swb_block[ix];
333}
334
335/*
336 * Utility routine to set the valid (written) bit for
337 * a block associated with a pager and offset
338 */
339static void
340swap_pager_setvalid(object, offset, valid)
341 vm_object_t object;
342 vm_offset_t offset;
343 int valid;
344{
345 register sw_blk_t swb;
346 int ix;
347
348 ix = offset / SWB_NPAGES;
349 if (ix >= object->un_pager.swp.swp_nblocks)
350 return;
351
352 swb = &object->un_pager.swp.swp_blocks[ix];
353 ix = offset % SWB_NPAGES;
354 if (valid)
355 swb->swb_valid |= (1 << ix);
356 else
357 swb->swb_valid &= ~(1 << ix);
358 return;
359}
360
361/*
362 * this routine allocates swap space with a fragmentation
363 * minimization policy.
364 */
365static int
366swap_pager_getswapspace(object, amount, rtval)
367 vm_object_t object;
368 unsigned int amount;
369 daddr_t *rtval;
370{
371 unsigned location;
372 vm_swap_size -= amount;
373 if (!rlist_alloc(&swaplist, amount, &location)) {
374 vm_swap_size += amount;
375 return 0;
376 } else {
377 swapsizecheck();
378 object->un_pager.swp.swp_allocsize += amount;
379 *rtval = location;
380 return 1;
381 }
382}
383
384/*
385 * this routine frees swap space with a fragmentation
386 * minimization policy.
387 */
388static void
389swap_pager_freeswapspace(object, from, to)
390 vm_object_t object;
391 unsigned int from;
392 unsigned int to;
393{
394 rlist_free(&swaplist, from, to);
395 vm_swap_size += (to - from) + 1;
396 object->un_pager.swp.swp_allocsize -= (to - from) + 1;
397 swapsizecheck();
398}
399/*
400 * this routine frees swap blocks from a specified pager
401 */
402void
403swap_pager_freespace(object, start, size)
404 vm_object_t object;
405 vm_pindex_t start;
406 vm_size_t size;
407{
408 vm_pindex_t i;
409 int s;
410
411 s = splbio();
412 for (i = start; i < start + size; i += 1) {
413 int valid;
414 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
415
416 if (addr && *addr != SWB_EMPTY) {
417 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
418 if (valid) {
419 swap_pager_setvalid(object, i, 0);
420 }
421 *addr = SWB_EMPTY;
422 }
423 }
424 splx(s);
425}
426
427static void
428swap_pager_free_swap(object)
429 vm_object_t object;
430{
431 register int i, j;
432 register sw_blk_t swb;
433 int first_block=0, block_count=0;
434 int s;
435 /*
436 * Free left over swap blocks
437 */
438 s = splbio();
439 for (i = 0, swb = object->un_pager.swp.swp_blocks;
440 i < object->un_pager.swp.swp_nblocks; i++, swb++) {
441 for (j = 0; j < SWB_NPAGES; j++) {
442 if (swb->swb_block[j] != SWB_EMPTY) {
443 /*
444 * initially the length of the run is zero
445 */
446 if (block_count == 0) {
447 first_block = swb->swb_block[j];
448 block_count = btodb(PAGE_SIZE);
449 swb->swb_block[j] = SWB_EMPTY;
450 /*
451 * if the new block can be included into the current run
452 */
453 } else if (swb->swb_block[j] == first_block + block_count) {
454 block_count += btodb(PAGE_SIZE);
455 swb->swb_block[j] = SWB_EMPTY;
456 /*
457 * terminate the previous run, and start a new one
458 */
459 } else {
460 swap_pager_freeswapspace(object, first_block,
461 (unsigned) first_block + block_count - 1);
462 first_block = swb->swb_block[j];
463 block_count = btodb(PAGE_SIZE);
464 swb->swb_block[j] = SWB_EMPTY;
465 }
466 }
467 }
468 }
469
470 if (block_count) {
471 swap_pager_freeswapspace(object, first_block,
472 (unsigned) first_block + block_count - 1);
473 }
474 splx(s);
475}
476
477
478/*
479 * swap_pager_reclaim frees up over-allocated space from all pagers
480 * this eliminates internal fragmentation due to allocation of space
481 * for segments that are never swapped to. It has been written so that
482 * it does not block until the rlist_free operation occurs; it keeps
483 * the queues consistant.
484 */
485
486/*
487 * Maximum number of blocks (pages) to reclaim per pass
488 */
489#define MAXRECLAIM 128
490
491static void
492swap_pager_reclaim()
493{
494 vm_object_t object;
495 int i, j, k;
496 int s;
497 int reclaimcount;
498 static struct {
499 int address;
500 vm_object_t object;
501 } reclaims[MAXRECLAIM];
502 static int in_reclaim;
503
504 /*
505 * allow only one process to be in the swap_pager_reclaim subroutine
506 */
507 s = splbio();
508 if (in_reclaim) {
509 tsleep(&in_reclaim, PSWP, "swrclm", 0);
510 splx(s);
511 return;
512 }
513 in_reclaim = 1;
514 reclaimcount = 0;
515
516 /* for each pager queue */
517 for (k = 0; swp_qs[k]; k++) {
518
519 object = swp_qs[k]->tqh_first;
520 while (object && (reclaimcount < MAXRECLAIM)) {
521
522 /*
523 * see if any blocks associated with a pager has been
524 * allocated but not used (written)
525 */
526 if (object->paging_in_progress == 0) {
527 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
528 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
529
530 if (swb->swb_locked)
531 continue;
532 for (j = 0; j < SWB_NPAGES; j++) {
533 if (swb->swb_block[j] != SWB_EMPTY &&
534 (swb->swb_valid & (1 << j)) == 0) {
535 reclaims[reclaimcount].address = swb->swb_block[j];
536 reclaims[reclaimcount++].object = object;
537 swb->swb_block[j] = SWB_EMPTY;
538 if (reclaimcount >= MAXRECLAIM)
539 goto rfinished;
540 }
541 }
542 }
543 }
544 object = object->pager_object_list.tqe_next;
545 }
546 }
547
548rfinished:
549
550 /*
551 * free the blocks that have been added to the reclaim list
552 */
553 for (i = 0; i < reclaimcount; i++) {
554 swap_pager_freeswapspace(reclaims[i].object,
555 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
556 }
557 splx(s);
558 in_reclaim = 0;
559 wakeup(&in_reclaim);
560}
561
562
563/*
564 * swap_pager_copy copies blocks from one pager to another and
565 * destroys the source pager
566 */
567
568void
569swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
570 vm_object_t srcobject;
571 vm_pindex_t srcoffset;
572 vm_object_t dstobject;
573 vm_pindex_t dstoffset;
574 vm_pindex_t offset;
575{
576 vm_pindex_t i;
577 int origsize;
578 int s;
579
580 if (vm_swap_size)
581 no_swap_space = 0;
582
583 origsize = srcobject->un_pager.swp.swp_allocsize;
584
585 /*
586 * remove the source object from the swap_pager internal queue
587 */
588 if (srcobject->handle == NULL) {
589 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
590 } else {
591 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
592 }
593
594 s = splbio();
595 while (srcobject->un_pager.swp.swp_poip) {
596 tsleep(srcobject, PVM, "spgout", 0);
597 }
598 splx(s);
599
600 /*
601 * clean all of the pages that are currently active and finished
602 */
603 swap_pager_sync();
604
605 s = splbio();
606 /*
607 * transfer source to destination
608 */
609 for (i = 0; i < dstobject->size; i += 1) {
610 int srcvalid, dstvalid;
611 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
612 &srcvalid);
613 daddr_t *dstaddrp;
614
615 /*
616 * see if the source has space allocated
617 */
618 if (srcaddrp && *srcaddrp != SWB_EMPTY) {
619 /*
620 * if the source is valid and the dest has no space,
621 * then copy the allocation from the srouce to the
622 * dest.
623 */
624 if (srcvalid) {
625 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
626 &dstvalid);
627 /*
628 * if the dest already has a valid block,
629 * deallocate the source block without
630 * copying.
631 */
632 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
633 swap_pager_freeswapspace(dstobject, *dstaddrp,
634 *dstaddrp + btodb(PAGE_SIZE) - 1);
635 *dstaddrp = SWB_EMPTY;
636 }
637 if (dstaddrp && *dstaddrp == SWB_EMPTY) {
638 *dstaddrp = *srcaddrp;
639 *srcaddrp = SWB_EMPTY;
640 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
641 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
642 swap_pager_setvalid(dstobject, i + dstoffset, 1);
643 }
644 }
645 /*
646 * if the source is not empty at this point, then
647 * deallocate the space.
648 */
649 if (*srcaddrp != SWB_EMPTY) {
650 swap_pager_freeswapspace(srcobject, *srcaddrp,
651 *srcaddrp + btodb(PAGE_SIZE) - 1);
652 *srcaddrp = SWB_EMPTY;
653 }
654 }
655 }
656 splx(s);
657
658 /*
659 * Free left over swap blocks
660 */
661 swap_pager_free_swap(srcobject);
662
663 if (srcobject->un_pager.swp.swp_allocsize) {
664 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
665 srcobject->un_pager.swp.swp_allocsize, origsize);
666 }
667
668 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
669 srcobject->un_pager.swp.swp_blocks = NULL;
670
671 return;
672}
673
674static void
675swap_pager_dealloc(object)
676 vm_object_t object;
677{
678 int s;
679
680 /*
681 * Remove from list right away so lookups will fail if we block for
682 * pageout completion.
683 */
684 if (object->handle == NULL) {
685 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
686 } else {
687 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
688 }
689
690 /*
691 * Wait for all pageouts to finish and remove all entries from
692 * cleaning list.
693 */
694
695 s = splbio();
696 while (object->un_pager.swp.swp_poip) {
697 tsleep(object, PVM, "swpout", 0);
698 }
699 splx(s);
700
701
702 swap_pager_sync();
703
704 /*
705 * Free left over swap blocks
706 */
707 swap_pager_free_swap(object);
708
709 if (object->un_pager.swp.swp_allocsize) {
710 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
711 object->un_pager.swp.swp_allocsize);
712 }
713 /*
714 * Free swap management resources
715 */
716 free(object->un_pager.swp.swp_blocks, M_VMPGDATA);
717 object->un_pager.swp.swp_blocks = NULL;
718}
719
720static inline __pure int
721swap_pager_block_index(pindex)
722 vm_pindex_t pindex;
723{
724 return (pindex / SWB_NPAGES);
725}
726
727static inline __pure int
728swap_pager_block_offset(pindex)
729 vm_pindex_t pindex;
730{
731 return (pindex % SWB_NPAGES);
732}
733
734/*
735 * swap_pager_haspage returns TRUE if the pager has data that has
736 * been written out.
737 */
738static boolean_t
739swap_pager_haspage(object, pindex, before, after)
740 vm_object_t object;
741 vm_pindex_t pindex;
742 int *before;
743 int *after;
744{
745 register sw_blk_t swb;
746 int ix;
747
748 if (before != NULL)
749 *before = 0;
750 if (after != NULL)
751 *after = 0;
752 ix = pindex / SWB_NPAGES;
753 if (ix >= object->un_pager.swp.swp_nblocks) {
754 return (FALSE);
755 }
756 swb = &object->un_pager.swp.swp_blocks[ix];
757 ix = pindex % SWB_NPAGES;
758
759 if (swb->swb_block[ix] != SWB_EMPTY) {
760
761 if (swb->swb_valid & (1 << ix)) {
762 int tix;
763 if (before) {
764 for(tix = ix - 1; tix >= 0; --tix) {
765 if ((swb->swb_valid & (1 << tix)) == 0)
766 break;
767 if ((swb->swb_block[tix] +
768 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
769 swb->swb_block[ix])
770 break;
771 (*before)++;
772 }
773 }
774
775 if (after) {
776 for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
777 if ((swb->swb_valid & (1 << tix)) == 0)
778 break;
779 if ((swb->swb_block[tix] -
780 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
781 swb->swb_block[ix])
782 break;
783 (*after)++;
784 }
785 }
786
787 return TRUE;
788 }
789 }
790 return (FALSE);
791}
792
793/*
794 * swap_pager_freepage is a convienience routine that clears the busy
795 * bit and deallocates a page.
796 */
797static void
798swap_pager_freepage(m)
799 vm_page_t m;
800{
801 PAGE_WAKEUP(m);
802 vm_page_free(m);
803}
804
805/*
806 * swap_pager_ridpages is a convienience routine that deallocates all
807 * but the required page. this is usually used in error returns that
808 * need to invalidate the "extra" readahead pages.
809 */
810static void
811swap_pager_ridpages(m, count, reqpage)
812 vm_page_t *m;
813 int count;
814 int reqpage;
815{
816 int i;
817
818 for (i = 0; i < count; i++)
819 if (i != reqpage)
820 swap_pager_freepage(m[i]);
821}
822
823/*
824 * swap_pager_iodone1 is the completion routine for both reads and async writes
825 */
826static void
827swap_pager_iodone1(bp)
828 struct buf *bp;
829{
830 bp->b_flags |= B_DONE;
831 bp->b_flags &= ~B_ASYNC;
832 wakeup(bp);
833}
834
835int
836swap_pager_getpages(object, m, count, reqpage)
837 vm_object_t object;
838 vm_page_t *m;
839 int count, reqpage;
840{
841 register struct buf *bp;
842 sw_blk_t swb[count];
843 register int s;
844 int i;
845 boolean_t rv;
846 vm_offset_t kva, off[count];
847 swp_clean_t spc;
848 vm_pindex_t paging_offset;
849 int reqaddr[count];
850 int sequential;
851
852 int first, last;
853 int failed;
854 int reqdskregion;
855
856 object = m[reqpage]->object;
857 paging_offset = OFF_TO_IDX(object->paging_offset);
858 sequential = (m[reqpage]->pindex == (object->last_read + 1));
859
860 for (i = 0; i < count; i++) {
861 vm_pindex_t fidx = m[i]->pindex + paging_offset;
862 int ix = swap_pager_block_index(fidx);
863
864 if (ix >= object->un_pager.swp.swp_nblocks) {
865 int j;
866
867 if (i <= reqpage) {
868 swap_pager_ridpages(m, count, reqpage);
869 return (VM_PAGER_FAIL);
870 }
871 for (j = i; j < count; j++) {
872 swap_pager_freepage(m[j]);
873 }
874 count = i;
875 break;
876 }
877 swb[i] = &object->un_pager.swp.swp_blocks[ix];
878 off[i] = swap_pager_block_offset(fidx);
879 reqaddr[i] = swb[i]->swb_block[off[i]];
880 }
881
882 /* make sure that our required input request is existant */
883
884 if (reqaddr[reqpage] == SWB_EMPTY ||
885 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
886 swap_pager_ridpages(m, count, reqpage);
887 return (VM_PAGER_FAIL);
888 }
889 reqdskregion = reqaddr[reqpage] / dmmax;
890
891 /*
892 * search backwards for the first contiguous page to transfer
893 */
894 failed = 0;
895 first = 0;
896 for (i = reqpage - 1; i >= 0; --i) {
897 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
898 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
899 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
900 ((reqaddr[i] / dmmax) != reqdskregion)) {
901 failed = 1;
902 swap_pager_freepage(m[i]);
903 if (first == 0)
904 first = i + 1;
905 }
906 }
907 /*
908 * search forwards for the last contiguous page to transfer
909 */
910 failed = 0;
911 last = count;
912 for (i = reqpage + 1; i < count; i++) {
913 if (failed || (reqaddr[i] == SWB_EMPTY) ||
914 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
915 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
916 ((reqaddr[i] / dmmax) != reqdskregion)) {
917 failed = 1;
918 swap_pager_freepage(m[i]);
919 if (last == count)
920 last = i;
921 }
922 }
923
924 count = last;
925 if (first != 0) {
926 for (i = first; i < count; i++) {
927 m[i - first] = m[i];
928 reqaddr[i - first] = reqaddr[i];
929 off[i - first] = off[i];
930 }
931 count -= first;
932 reqpage -= first;
933 }
934 ++swb[reqpage]->swb_locked;
935
936 /*
937 * at this point: "m" is a pointer to the array of vm_page_t for
938 * paging I/O "count" is the number of vm_page_t entries represented
939 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
940 * into "m" for the page actually faulted
941 */
942
943 spc = NULL; /* we might not use an spc data structure */
944
945 if ((count == 1) && (swap_pager_free.tqh_first != NULL)) {
946 spc = swap_pager_free.tqh_first;
947 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
948 kva = spc->spc_kva;
949 bp = spc->spc_bp;
950 bzero(bp, sizeof *bp);
951 bp->b_spc = spc;
952 bp->b_vnbufs.le_next = NOLIST;
953 } else {
954 /*
955 * Get a swap buffer header to perform the IO
956 */
957 bp = getpbuf();
958 kva = (vm_offset_t) bp->b_data;
959 }
960
961 /*
962 * map our page(s) into kva for input
963 */
964 pmap_qenter(kva, m, count);
965
966 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
967 bp->b_iodone = swap_pager_iodone1;
968 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
969 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
970 crhold(bp->b_rcred);
971 crhold(bp->b_wcred);
972 bp->b_un.b_addr = (caddr_t) kva;
973 bp->b_blkno = reqaddr[0];
974 bp->b_bcount = PAGE_SIZE * count;
975 bp->b_bufsize = PAGE_SIZE * count;
976
977 pbgetvp(swapdev_vp, bp);
978
979 cnt.v_swapin++;
980 cnt.v_swappgsin += count;
981 /*
982 * perform the I/O
983 */
984 VOP_STRATEGY(bp);
985
986 /*
987 * wait for the sync I/O to complete
988 */
989 s = splbio();
990 while ((bp->b_flags & B_DONE) == 0) {
991 tsleep(bp, PVM, "swread", 0);
992 }
993
994 if (bp->b_flags & B_ERROR) {
995 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
996 bp->b_blkno, bp->b_bcount, bp->b_error);
997 rv = VM_PAGER_ERROR;
998 } else {
999 rv = VM_PAGER_OK;
1000 }
1001
1002 /*
1003 * relpbuf does this, but we maintain our own buffer list also...
1004 */
1005 if (bp->b_vp)
1006 pbrelvp(bp);
1007
1008 splx(s);
1009 swb[reqpage]->swb_locked--;
1010
1011 /*
1012 * remove the mapping for kernel virtual
1013 */
1014 pmap_qremove(kva, count);
1015
1016 if (spc) {
1017 m[reqpage]->object->last_read = m[reqpage]->pindex;
1018 if (bp->b_flags & B_WANTED)
1019 wakeup(bp);
1020 /*
1021 * if we have used an spc, we need to free it.
1022 */
1023 if (bp->b_rcred != NOCRED)
1024 crfree(bp->b_rcred);
1025 if (bp->b_wcred != NOCRED)
1026 crfree(bp->b_wcred);
1027 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1028 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1029 wakeup(&swap_pager_free);
1030 }
1031 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1032 pagedaemon_wakeup();
1033 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1034 } else {
1035 /*
1036 * release the physical I/O buffer
1037 */
1038 relpbuf(bp);
1039 /*
1040 * finish up input if everything is ok
1041 */
1042 if (rv == VM_PAGER_OK) {
1043 for (i = 0; i < count; i++) {
1044 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1045 m[i]->dirty = 0;
1046 m[i]->flags &= ~PG_ZERO;
1047 if (i != reqpage) {
1048 /*
1049 * whether or not to leave the page
1050 * activated is up in the air, but we
1051 * should put the page on a page queue
1052 * somewhere. (it already is in the
1053 * object). After some emperical
1054 * results, it is best to deactivate
1055 * the readahead pages.
1056 */
1057 vm_page_deactivate(m[i]);
1058
1059 /*
1060 * just in case someone was asking for
1061 * this page we now tell them that it
1062 * is ok to use
1063 */
1064 m[i]->valid = VM_PAGE_BITS_ALL;
1065 PAGE_WAKEUP(m[i]);
1066 }
1067 }
1068
1069 m[reqpage]->object->last_read = m[count-1]->pindex;
1070
1071 /*
1072 * If we're out of swap space, then attempt to free
1073 * some whenever pages are brought in. We must clear
1074 * the clean flag so that the page contents will be
1075 * preserved.
1076 */
1077 if (swap_pager_full) {
1078 for (i = 0; i < count; i++) {
1079 m[i]->dirty = VM_PAGE_BITS_ALL;
1080 }
1081 swap_pager_freespace(object, m[0]->pindex + paging_offset, count);
1082 }
1083 } else {
1084 swap_pager_ridpages(m, count, reqpage);
1085 }
1086 }
1087 if (rv == VM_PAGER_OK) {
1088 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
1089 m[reqpage]->valid = VM_PAGE_BITS_ALL;
1090 m[reqpage]->dirty = 0;
1091 }
1092 return (rv);
1093}
1094
1095int
1096swap_pager_putpages(object, m, count, sync, rtvals)
1097 vm_object_t object;
1098 vm_page_t *m;
1099 int count;
1100 boolean_t sync;
1101 int *rtvals;
1102{
1103 register struct buf *bp;
1104 sw_blk_t swb[count];
1105 register int s;
1106 int i, j, ix;
1107 boolean_t rv;
1108 vm_offset_t kva, off, fidx;
1109 swp_clean_t spc;
1110 vm_pindex_t paging_pindex;
1111 int reqaddr[count];
1112 int failed;
1113
1114 if (vm_swap_size)
1115 no_swap_space = 0;
1116 if (no_swap_space) {
1117 for (i = 0; i < count; i++)
1118 rtvals[i] = VM_PAGER_FAIL;
1119 return VM_PAGER_FAIL;
1120 }
1121 spc = NULL;
1122
1123 object = m[0]->object;
1124 paging_pindex = OFF_TO_IDX(object->paging_offset);
1125
1126 failed = 0;
1127 for (j = 0; j < count; j++) {
1128 fidx = m[j]->pindex + paging_pindex;
1129 ix = swap_pager_block_index(fidx);
1130 swb[j] = 0;
1131 if (ix >= object->un_pager.swp.swp_nblocks) {
1132 rtvals[j] = VM_PAGER_FAIL;
1133 failed = 1;
1134 continue;
1135 } else {
1136 rtvals[j] = VM_PAGER_OK;
1137 }
1138 swb[j] = &object->un_pager.swp.swp_blocks[ix];
1139 swb[j]->swb_locked++;
1140 if (failed) {
1141 rtvals[j] = VM_PAGER_FAIL;
1142 continue;
1143 }
1144 off = swap_pager_block_offset(fidx);
1145 reqaddr[j] = swb[j]->swb_block[off];
1146 if (reqaddr[j] == SWB_EMPTY) {
1147 daddr_t blk;
1148 int tries;
1149 int ntoget;
1150
1151 tries = 0;
1152 s = splbio();
1153
1154 /*
1155 * if any other pages have been allocated in this
1156 * block, we only try to get one page.
1157 */
1158 for (i = 0; i < SWB_NPAGES; i++) {
1159 if (swb[j]->swb_block[i] != SWB_EMPTY)
1160 break;
1161 }
1162
1163 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1164 /*
1165 * this code is alittle conservative, but works (the
1166 * intent of this code is to allocate small chunks for
1167 * small objects)
1168 */
1169 if ((off == 0) && ((fidx + ntoget) > object->size)) {
1170 ntoget = object->size - fidx;
1171 }
1172 retrygetspace:
1173 if (!swap_pager_full && ntoget > 1 &&
1174 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
1175 &blk)) {
1176
1177 for (i = 0; i < ntoget; i++) {
1178 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1179 swb[j]->swb_valid = 0;
1180 }
1181
1182 reqaddr[j] = swb[j]->swb_block[off];
1183 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
1184 &swb[j]->swb_block[off])) {
1185 /*
1186 * if the allocation has failed, we try to
1187 * reclaim space and retry.
1188 */
1189 if (++tries == 1) {
1190 swap_pager_reclaim();
1191 goto retrygetspace;
1192 }
1193 rtvals[j] = VM_PAGER_AGAIN;
1194 failed = 1;
1195 swap_pager_full = 1;
1196 } else {
1197 reqaddr[j] = swb[j]->swb_block[off];
1198 swb[j]->swb_valid &= ~(1 << off);
1199 }
1200 splx(s);
1201 }
1202 }
1203
1204 /*
1205 * search forwards for the last contiguous page to transfer
1206 */
1207 failed = 0;
1208 for (i = 0; i < count; i++) {
1209 if (failed ||
1210 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1211 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
1212 (rtvals[i] != VM_PAGER_OK)) {
1213 failed = 1;
1214 if (rtvals[i] == VM_PAGER_OK)
1215 rtvals[i] = VM_PAGER_AGAIN;
1216 }
1217 }
1218
1219 for (i = 0; i < count; i++) {
1220 if (rtvals[i] != VM_PAGER_OK) {
1221 if (swb[i])
1222 --swb[i]->swb_locked;
1223 }
1224 }
1225
1226 for (i = 0; i < count; i++)
1227 if (rtvals[i] != VM_PAGER_OK)
1228 break;
1229
1230 if (i == 0) {
1231 return VM_PAGER_AGAIN;
1232 }
1233 count = i;
1234 for (i = 0; i < count; i++) {
1235 if (reqaddr[i] == SWB_EMPTY) {
1236 printf("I/O to empty block???? -- pindex: %d, i: %d\n",
1237 m[i]->pindex, i);
1238 }
1239 }
1240
1241 /*
1242 * For synchronous writes, we clean up all completed async pageouts.
1243 */
1244 if (sync == TRUE) {
1245 swap_pager_sync();
1246 }
1247 kva = 0;
1248
1249 /*
1250 * get a swap pager clean data structure, block until we get it
1251 */
1252 if (swap_pager_free.tqh_first == NULL ||
1253 swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1254 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1255 s = splbio();
1256 if (curproc == pageproc) {
1257 /*
1258 * pageout daemon needs a swap control block
1259 */
1260 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED;
1261 /*
1262 * if it does not get one within a short time, then
1263 * there is a potential deadlock, so we go-on trying
1264 * to free pages.
1265 */
1266 tsleep(&swap_pager_free, PVM, "swpfre", hz/10);
1267 swap_pager_sync();
1268 if (swap_pager_free.tqh_first == NULL ||
1269 swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1270 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1271 splx(s);
1272 return VM_PAGER_AGAIN;
1273 }
1274 } else
1275 pagedaemon_wakeup();
1276 while (swap_pager_free.tqh_first == NULL ||
1277 swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1278 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1279 if (curproc == pageproc) {
1280 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1281 if((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_reserved)
1282 wakeup(&cnt.v_free_count);
1283 }
1284
1285 swap_pager_needflags |= SWAP_FREE_NEEDED;
1286 tsleep(&swap_pager_free, PVM, "swpfre", 0);
1287 if (curproc == pageproc)
1288 swap_pager_sync();
1289 else
1290 pagedaemon_wakeup();
1291 }
1292 splx(s);
1293 }
1294 spc = swap_pager_free.tqh_first;
1295 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1296
1297 kva = spc->spc_kva;
1298
1299 /*
1300 * map our page(s) into kva for I/O
1301 */
1302 pmap_qenter(kva, m, count);
1303
1304 /*
1305 * get the base I/O offset into the swap file
1306 */
1307 for (i = 0; i < count; i++) {
1308 fidx = m[i]->pindex + paging_pindex;
1309 off = swap_pager_block_offset(fidx);
1310 /*
1311 * set the valid bit
1312 */
1313 swb[i]->swb_valid |= (1 << off);
1314 /*
1315 * and unlock the data structure
1316 */
1317 swb[i]->swb_locked--;
1318 }
1319
1320 /*
1321 * Get a swap buffer header and perform the IO
1322 */
1323 bp = spc->spc_bp;
1324 bzero(bp, sizeof *bp);
1325 bp->b_spc = spc;
1326 bp->b_vnbufs.le_next = NOLIST;
1327
1328 bp->b_flags = B_BUSY | B_PAGING;
1329 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
1330 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1331 if (bp->b_rcred != NOCRED)
1332 crhold(bp->b_rcred);
1333 if (bp->b_wcred != NOCRED)
1334 crhold(bp->b_wcred);
1335 bp->b_data = (caddr_t) kva;
1336 bp->b_blkno = reqaddr[0];
1337 pbgetvp(swapdev_vp, bp);
1338
1339 bp->b_bcount = PAGE_SIZE * count;
1340 bp->b_bufsize = PAGE_SIZE * count;
1341 swapdev_vp->v_numoutput++;
1342
1343 /*
1344 * If this is an async write we set up additional buffer fields and
1345 * place a "cleaning" entry on the inuse queue.
1346 */
1347 s = splbio();
1348 if (sync == FALSE) {
1349 spc->spc_flags = 0;
1350 spc->spc_object = object;
1351 for (i = 0; i < count; i++)
1352 spc->spc_m[i] = m[i];
1353 spc->spc_count = count;
1354 /*
1355 * the completion routine for async writes
1356 */
1357 bp->b_flags |= B_CALL;
1358 bp->b_iodone = swap_pager_iodone;
1359 bp->b_dirtyoff = 0;
1360 bp->b_dirtyend = bp->b_bcount;
1361 object->un_pager.swp.swp_poip++;
1362 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1363 } else {
1364 object->un_pager.swp.swp_poip++;
1365 bp->b_flags |= B_CALL;
1366 bp->b_iodone = swap_pager_iodone1;
1367 }
1368
1369 cnt.v_swapout++;
1370 cnt.v_swappgsout += count;
1371 /*
1372 * perform the I/O
1373 */
1374 VOP_STRATEGY(bp);
1375 if (sync == FALSE) {
1376 if ((bp->b_flags & B_DONE) == B_DONE) {
1377 swap_pager_sync();
1378 }
1379 splx(s);
1380 for (i = 0; i < count; i++) {
1381 rtvals[i] = VM_PAGER_PEND;
1382 }
1383 return VM_PAGER_PEND;
1384 }
1385 /*
1386 * wait for the sync I/O to complete
1387 */
1388 while ((bp->b_flags & B_DONE) == 0) {
1389 tsleep(bp, PVM, "swwrt", 0);
1390 }
1391 if (bp->b_flags & B_ERROR) {
1392 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1393 bp->b_blkno, bp->b_bcount, bp->b_error);
1394 rv = VM_PAGER_ERROR;
1395 } else {
1396 rv = VM_PAGER_OK;
1397 }
1398
1399 object->un_pager.swp.swp_poip--;
1400 if (object->un_pager.swp.swp_poip == 0)
1401 wakeup(object);
1402
1403 if (bp->b_vp)
1404 pbrelvp(bp);
1405 if (bp->b_flags & B_WANTED)
1406 wakeup(bp);
1407
1408 splx(s);
1409
1410 /*
1411 * remove the mapping for kernel virtual
1412 */
1413 pmap_qremove(kva, count);
1414
1415 /*
1416 * if we have written the page, then indicate that the page is clean.
1417 */
1418 if (rv == VM_PAGER_OK) {
1419 for (i = 0; i < count; i++) {
1420 if (rtvals[i] == VM_PAGER_OK) {
1421 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1422 m[i]->dirty = 0;
1423 /*
1424 * optimization, if a page has been read
1425 * during the pageout process, we activate it.
1426 */
1427 if ((m[i]->flags & PG_ACTIVE) == 0 &&
1428 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
1429 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
1430 vm_page_activate(m[i]);
1431 }
1432 }
1433 }
1434 } else {
1435 for (i = 0; i < count; i++) {
1436 rtvals[i] = rv;
1437 }
1438 }
1439
1440 if (bp->b_rcred != NOCRED)
1441 crfree(bp->b_rcred);
1442 if (bp->b_wcred != NOCRED)
1443 crfree(bp->b_wcred);
1444 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1445 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1446 wakeup(&swap_pager_free);
1447 }
1448 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1449 pagedaemon_wakeup();
1450 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1451 return (rv);
1452}
1453
1454void
1455swap_pager_sync()
1456{
1457 register swp_clean_t spc, tspc;
1458 register int s;
1459
1460 tspc = NULL;
1461 if (swap_pager_done.tqh_first == NULL)
1462 return;
1463 for (;;) {
1464 s = splbio();
1465 /*
1466 * Look up and removal from done list must be done at splbio()
1467 * to avoid conflicts with swap_pager_iodone.
1468 */
1469 while ((spc = swap_pager_done.tqh_first) != 0) {
1470 pmap_qremove(spc->spc_kva, spc->spc_count);
1471 swap_pager_finish(spc);
1472 TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1473 goto doclean;
1474 }
1475
1476 /*
1477 * No operations done, thats all we can do for now.
1478 */
1479
1480 splx(s);
1481 break;
1482
1483 /*
1484 * The desired page was found to be busy earlier in the scan
1485 * but has since completed.
1486 */
1487doclean:
1488 if (tspc && tspc == spc) {
1489 tspc = NULL;
1490 }
1491 spc->spc_flags = 0;
1492 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1493 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1494 wakeup(&swap_pager_free);
1495 }
1496 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1497 pagedaemon_wakeup();
1498 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1499 splx(s);
1500 }
1501
1502 return;
1503}
1504
1505void
1506swap_pager_finish(spc)
1507 register swp_clean_t spc;
1508{
1509 vm_object_t object = spc->spc_m[0]->object;
1510 int i;
1511
1512 object->paging_in_progress -= spc->spc_count;
1513 if ((object->paging_in_progress == 0) &&
1514 (object->flags & OBJ_PIPWNT)) {
1515 object->flags &= ~OBJ_PIPWNT;
1516 wakeup(object);
1517 }
1518
1519 /*
1520 * If no error, mark as clean and inform the pmap system. If error,
1521 * mark as dirty so we will try again. (XXX could get stuck doing
1522 * this, should give up after awhile)
1523 */
1524 if (spc->spc_flags & SPC_ERROR) {
1525 for (i = 0; i < spc->spc_count; i++) {
1526 printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1527 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
1528 }
1529 } else {
1530 for (i = 0; i < spc->spc_count; i++) {
1531 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1532 spc->spc_m[i]->dirty = 0;
1533 if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 &&
1534 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
1535 vm_page_activate(spc->spc_m[i]);
1536 }
1537 }
1538
1539
1540 for (i = 0; i < spc->spc_count; i++) {
1541 /*
1542 * we wakeup any processes that are waiting on these pages.
1543 */
1544 PAGE_WAKEUP(spc->spc_m[i]);
1545 }
1546 nswiodone -= spc->spc_count;
1547
1548 return;
1549}
1550
1551/*
1552 * swap_pager_iodone
1553 */
1554static void
1555swap_pager_iodone(bp)
1556 register struct buf *bp;
1557{
1558 register swp_clean_t spc;
1559 int s;
1560
1561 s = splbio();
1562 spc = (swp_clean_t) bp->b_spc;
1563 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1564 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1565 if (bp->b_flags & B_ERROR) {
1566 spc->spc_flags |= SPC_ERROR;
1567 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
1568 (bp->b_flags & B_READ) ? "pagein" : "pageout",
1569 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1570 }
1571
1572 if (bp->b_vp)
1573 pbrelvp(bp);
1574
1575 if (bp->b_flags & B_WANTED)
1576 wakeup(bp);
1577
1578 if (bp->b_rcred != NOCRED)
1579 crfree(bp->b_rcred);
1580 if (bp->b_wcred != NOCRED)
1581 crfree(bp->b_wcred);
1582
1583 nswiodone += spc->spc_count;
1584 if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
1585 wakeup(spc->spc_object);
1586 }
1587 if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1588 swap_pager_inuse.tqh_first == 0) {
1589 swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1590 wakeup(&swap_pager_free);
1591 }
1592
1593 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
1594 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
1595 pagedaemon_wakeup();
1596 }
1597
1598 if (vm_pageout_pages_needed) {
1599 wakeup(&vm_pageout_pages_needed);
1600 vm_pageout_pages_needed = 0;
1601 }
1602 if ((swap_pager_inuse.tqh_first == NULL) ||
1603 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
1604 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
1605 pagedaemon_wakeup();
1606 }
1607 splx(s);
1608}
1610
1611/*
1612 * return true if any swap control structures can be allocated
1613 */
1614static int
1615swap_pager_ready()
1616{
1617 if (swap_pager_free.tqh_first)
1618 return 1;
1619 else
1620 return 0;
1621}