vnode_pager.c revision 239246
1139825Simp/*-
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
2158705Scharnier *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * Page to/from files (vnodes).
451541Srgrimes */
461541Srgrimes
471549Srgrimes/*
481549Srgrimes * TODO:
499507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
507695Sdg *	greatly re-simplify the vnode_pager.
511549Srgrimes */
521549Srgrimes
53116226Sobrien#include <sys/cdefs.h>
54116226Sobrien__FBSDID("$FreeBSD: head/sys/vm/vnode_pager.c 239246 2012-08-14 11:45:47Z kib $");
55116226Sobrien
561541Srgrimes#include <sys/param.h>
571541Srgrimes#include <sys/systm.h>
581541Srgrimes#include <sys/proc.h>
591541Srgrimes#include <sys/vnode.h>
601541Srgrimes#include <sys/mount.h>
6160041Sphk#include <sys/bio.h>
629507Sdg#include <sys/buf.h>
6312662Sdg#include <sys/vmmeter.h>
64140767Sphk#include <sys/limits.h>
6551340Sdillon#include <sys/conf.h>
66127926Salc#include <sys/sf_buf.h>
671541Srgrimes
68148875Sssouhlal#include <machine/atomic.h>
69148875Sssouhlal
701541Srgrimes#include <vm/vm.h>
71239065Skib#include <vm/vm_param.h>
7212662Sdg#include <vm/vm_object.h>
731541Srgrimes#include <vm/vm_page.h>
749507Sdg#include <vm/vm_pager.h>
7531853Sdyson#include <vm/vm_map.h>
761541Srgrimes#include <vm/vnode_pager.h>
7712662Sdg#include <vm/vm_extern.h>
781541Srgrimes
79163359Salcstatic int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
80163359Salc    daddr_t *rtaddress, int *run);
8192727Salfredstatic int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m);
8292727Salfredstatic int vnode_pager_input_old(vm_object_t object, vm_page_t m);
8392727Salfredstatic void vnode_pager_dealloc(vm_object_t);
8492727Salfredstatic int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int);
8592727Salfredstatic void vnode_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
8692727Salfredstatic boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
87194766Skibstatic vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
88194766Skib    vm_ooffset_t, struct ucred *cred);
8911943Sbde
901541Srgrimesstruct pagerops vnodepagerops = {
91118466Sphk	.pgo_alloc =	vnode_pager_alloc,
92118466Sphk	.pgo_dealloc =	vnode_pager_dealloc,
93118466Sphk	.pgo_getpages =	vnode_pager_getpages,
94118466Sphk	.pgo_putpages =	vnode_pager_putpages,
95118466Sphk	.pgo_haspage =	vnode_pager_haspage,
961541Srgrimes};
971541Srgrimes
9879127Sjhbint vnode_pbuf_freecnt;
9910556Sdyson
100140767Sphk/* Create the VM system backing object for this vnode */
101140767Sphkint
102155177Syarvnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
103140767Sphk{
104140767Sphk	vm_object_t object;
105140767Sphk	vm_ooffset_t size = isize;
106140767Sphk	struct vattr va;
107140767Sphk
108140767Sphk	if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
109140767Sphk		return (0);
110140767Sphk
111140767Sphk	while ((object = vp->v_object) != NULL) {
112140767Sphk		VM_OBJECT_LOCK(object);
113140767Sphk		if (!(object->flags & OBJ_DEAD)) {
114140767Sphk			VM_OBJECT_UNLOCK(object);
115140767Sphk			return (0);
116140767Sphk		}
117175294Sattilio		VOP_UNLOCK(vp, 0);
118140767Sphk		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
119140767Sphk		msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vodead", 0);
120175202Sattilio		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
121140767Sphk	}
122140767Sphk
123140767Sphk	if (size == 0) {
124140767Sphk		if (vn_isdisk(vp, NULL)) {
125140767Sphk			size = IDX_TO_OFF(INT_MAX);
126140767Sphk		} else {
127182371Sattilio			if (VOP_GETATTR(vp, &va, td->td_ucred))
128140767Sphk				return (0);
129140767Sphk			size = va.va_size;
130140767Sphk		}
131140767Sphk	}
132140767Sphk
133194766Skib	object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred);
134140767Sphk	/*
135140767Sphk	 * Dereference the reference we just created.  This assumes
136140767Sphk	 * that the object is associated with the vp.
137140767Sphk	 */
138140767Sphk	VM_OBJECT_LOCK(object);
139140767Sphk	object->ref_count--;
140140767Sphk	VM_OBJECT_UNLOCK(object);
141140767Sphk	vrele(vp);
142140767Sphk
143140767Sphk	KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object"));
144140767Sphk
145140767Sphk	return (0);
146140767Sphk}
147140767Sphk
148140929Sphkvoid
149140929Sphkvnode_destroy_vobject(struct vnode *vp)
150140929Sphk{
151140929Sphk	struct vm_object *obj;
152140929Sphk
153140929Sphk	obj = vp->v_object;
154140929Sphk	if (obj == NULL)
155140929Sphk		return;
156171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject");
157140929Sphk	VM_OBJECT_LOCK(obj);
158140929Sphk	if (obj->ref_count == 0) {
159140929Sphk		/*
160140929Sphk		 * vclean() may be called twice. The first time
161140929Sphk		 * removes the primary reference to the object,
162140929Sphk		 * the second time goes one further and is a
163140929Sphk		 * special-case to terminate the object.
164140929Sphk		 *
165140929Sphk		 * don't double-terminate the object
166140929Sphk		 */
167140929Sphk		if ((obj->flags & OBJ_DEAD) == 0)
168140929Sphk			vm_object_terminate(obj);
169140929Sphk		else
170140929Sphk			VM_OBJECT_UNLOCK(obj);
171140929Sphk	} else {
172140929Sphk		/*
173140929Sphk		 * Woe to the process that tries to page now :-).
174140929Sphk		 */
175140929Sphk		vm_pager_deallocate(obj);
176140929Sphk		VM_OBJECT_UNLOCK(obj);
177140929Sphk	}
178144610Sjeff	vp->v_object = NULL;
179140929Sphk}
180140929Sphk
181140929Sphk
1821541Srgrimes/*
1831541Srgrimes * Allocate (or lookup) pager for a vnode.
1841541Srgrimes * Handle is a vnode pointer.
18598604Salc *
18698604Salc * MPSAFE
1871541Srgrimes */
1889507Sdgvm_object_t
18940286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
190194766Skib    vm_ooffset_t offset, struct ucred *cred)
1911541Srgrimes{
1929456Sdg	vm_object_t object;
1931541Srgrimes	struct vnode *vp;
1941541Srgrimes
1951541Srgrimes	/*
1961541Srgrimes	 * Pageout to vnode, no can do yet.
1971541Srgrimes	 */
1981541Srgrimes	if (handle == NULL)
1991827Sdg		return (NULL);
2001541Srgrimes
2019411Sdg	vp = (struct vnode *) handle;
2029411Sdg
2031541Srgrimes	/*
2049411Sdg	 * If the object is being terminated, wait for it to
2059411Sdg	 * go away.
2069411Sdg	 */
207179159Supsretry:
208114074Salc	while ((object = vp->v_object) != NULL) {
209114074Salc		VM_OBJECT_LOCK(object);
210181020Sjhb		if ((object->flags & OBJ_DEAD) == 0)
211114074Salc			break;
212137297Salc		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
213114074Salc		msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vadead", 0);
2149507Sdg	}
2155455Sdg
21632071Sdyson	if (vp->v_usecount == 0)
21732071Sdyson		panic("vnode_pager_alloc: no vnode reference");
21832071Sdyson
2199507Sdg	if (object == NULL) {
2201541Srgrimes		/*
221179159Sups		 * Add an object of the appropriate size
2221541Srgrimes		 */
22340286Sdg		object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
2241827Sdg
22540286Sdg		object->un_pager.vnp.vnp_size = size;
226232071Skib		object->un_pager.vnp.writemappings = 0;
2271549Srgrimes
2289507Sdg		object->handle = handle;
229179765Sups		VI_LOCK(vp);
230179765Sups		if (vp->v_object != NULL) {
231179159Sups			/*
232179159Sups			 * Object has been created while we were sleeping
233179159Sups			 */
234179765Sups			VI_UNLOCK(vp);
235179159Sups			vm_object_destroy(object);
236179159Sups			goto retry;
237179159Sups		}
2389507Sdg		vp->v_object = object;
239179765Sups		VI_UNLOCK(vp);
240179765Sups	} else {
24132286Sdyson		object->ref_count++;
242179765Sups		VM_OBJECT_UNLOCK(object);
243179765Sups	}
244143559Sjeff	vref(vp);
2459507Sdg	return (object);
2461541Srgrimes}
2471541Srgrimes
248114774Salc/*
249114774Salc *	The object must be locked.
250114774Salc */
25112820Sphkstatic void
2529507Sdgvnode_pager_dealloc(object)
2539507Sdg	vm_object_t object;
2541541Srgrimes{
255202529Skib	struct vnode *vp;
256202529Skib	int refs;
2571541Srgrimes
258202529Skib	vp = object->handle;
2599507Sdg	if (vp == NULL)
2609507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
2619507Sdg
262114774Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
26333817Sdyson	vm_object_pip_wait(object, "vnpdea");
264202529Skib	refs = object->ref_count;
2651541Srgrimes
2669507Sdg	object->handle = NULL;
26733109Sdyson	object->type = OBJT_DEAD;
268137297Salc	if (object->flags & OBJ_DISCONNECTWNT) {
269137297Salc		vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
270137297Salc		wakeup(object);
271137297Salc	}
272171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
273232071Skib	if (object->un_pager.vnp.writemappings > 0) {
274232071Skib		object->un_pager.vnp.writemappings = 0;
275232071Skib		vp->v_writecount--;
276232701Sjhb		CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
277232701Sjhb		    __func__, vp, vp->v_writecount);
278232071Skib	}
2799507Sdg	vp->v_object = NULL;
280140734Sphk	vp->v_vflag &= ~VV_TEXT;
281232071Skib	VM_OBJECT_UNLOCK(object);
282202529Skib	while (refs-- > 0)
283202529Skib		vunref(vp);
284232071Skib	VM_OBJECT_LOCK(object);
2851549Srgrimes}
2861541Srgrimes
28712820Sphkstatic boolean_t
28812767Sdysonvnode_pager_haspage(object, pindex, before, after)
2899507Sdg	vm_object_t object;
29012767Sdyson	vm_pindex_t pindex;
2919507Sdg	int *before;
2929507Sdg	int *after;
2931541Srgrimes{
2949507Sdg	struct vnode *vp = object->handle;
29596572Sphk	daddr_t bn;
29612423Sphk	int err;
29710556Sdyson	daddr_t reqblock;
29811701Sdyson	int poff;
29911701Sdyson	int bsize;
30012914Sdyson	int pagesperblock, blocksperpage;
301140723Sjeff	int vfslocked;
3021541Srgrimes
303116695Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
30451340Sdillon	/*
30551340Sdillon	 * If no vp or vp is doomed or marked transparent to VM, we do not
30651340Sdillon	 * have the page.
30751340Sdillon	 */
308155384Sjeff	if (vp == NULL || vp->v_iflag & VI_DOOMED)
30932585Sdyson		return FALSE;
3101541Srgrimes	/*
311155384Sjeff	 * If the offset is beyond end of file we do
3125455Sdg	 * not have the page.
3131541Srgrimes	 */
314155384Sjeff	if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)
3154797Sdg		return FALSE;
3161541Srgrimes
31711576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
31810556Sdyson	pagesperblock = bsize / PAGE_SIZE;
31912914Sdyson	blocksperpage = 0;
32012914Sdyson	if (pagesperblock > 0) {
32112914Sdyson		reqblock = pindex / pagesperblock;
32212914Sdyson	} else {
32312914Sdyson		blocksperpage = (PAGE_SIZE / bsize);
32412914Sdyson		reqblock = pindex * blocksperpage;
32512914Sdyson	}
326116695Salc	VM_OBJECT_UNLOCK(object);
327140723Sjeff	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
328119045Sphk	err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before);
329140723Sjeff	VFS_UNLOCK_GIANT(vfslocked);
330116695Salc	VM_OBJECT_LOCK(object);
3318876Srgrimes	if (err)
3329507Sdg		return TRUE;
33392029Seivind	if (bn == -1)
33410576Sdyson		return FALSE;
33512914Sdyson	if (pagesperblock > 0) {
33612914Sdyson		poff = pindex - (reqblock * pagesperblock);
33712914Sdyson		if (before) {
33812914Sdyson			*before *= pagesperblock;
33912914Sdyson			*before += poff;
34010669Sdyson		}
34112914Sdyson		if (after) {
34212914Sdyson			int numafter;
34312914Sdyson			*after *= pagesperblock;
34412914Sdyson			numafter = pagesperblock - (poff + 1);
34599211Srobert			if (IDX_TO_OFF(pindex + numafter) >
34699211Srobert			    object->un_pager.vnp.vnp_size) {
34799211Srobert				numafter =
34899211Srobert		    		    OFF_TO_IDX(object->un_pager.vnp.vnp_size) -
34999211Srobert				    pindex;
35012914Sdyson			}
35112914Sdyson			*after += numafter;
35212914Sdyson		}
35312914Sdyson	} else {
35412914Sdyson		if (before) {
35512914Sdyson			*before /= blocksperpage;
35612914Sdyson		}
35712914Sdyson
35812914Sdyson		if (after) {
35912914Sdyson			*after /= blocksperpage;
36012914Sdyson		}
36110556Sdyson	}
36210576Sdyson	return TRUE;
3631541Srgrimes}
3641541Srgrimes
3651541Srgrimes/*
3661541Srgrimes * Lets the VM system know about a change in size for a file.
3679507Sdg * We adjust our own internal size and flush any cached pages in
3681541Srgrimes * the associated object that are affected by the size change.
3691541Srgrimes *
3701541Srgrimes * Note: this routine may be invoked as a result of a pager put
3711541Srgrimes * operation (possibly at object termination time), so we must be careful.
3721541Srgrimes */
3731541Srgrimesvoid
3741541Srgrimesvnode_pager_setsize(vp, nsize)
3751541Srgrimes	struct vnode *vp;
37612767Sdyson	vm_ooffset_t nsize;
3771541Srgrimes{
378116167Salc	vm_object_t object;
379116167Salc	vm_page_t m;
38038542Sluoqi	vm_pindex_t nobjsize;
3811541Srgrimes
382116167Salc	if ((object = vp->v_object) == NULL)
3831541Srgrimes		return;
384188386Skib/* 	ASSERT_VOP_ELOCKED(vp, "vnode_pager_setsize and not locked vnode"); */
385116167Salc	VM_OBJECT_LOCK(object);
386116167Salc	if (nsize == object->un_pager.vnp.vnp_size) {
387116167Salc		/*
388116167Salc		 * Hasn't changed size
389116167Salc		 */
390116167Salc		VM_OBJECT_UNLOCK(object);
3913374Sdg		return;
392116167Salc	}
39338542Sluoqi	nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
3949507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
395116167Salc		/*
396116167Salc		 * File has shrunk. Toss any cached pages beyond the new EOF.
397116167Salc		 */
398116167Salc		if (nobjsize < object->size)
39938542Sluoqi			vm_object_page_remove(object, nobjsize, object->size,
400223677Salc			    0);
4011827Sdg		/*
4021827Sdg		 * this gets rid of garbage at the end of a page that is now
40387834Sdillon		 * only partially backed by the vnode.
40487834Sdillon		 *
40587834Sdillon		 * XXX for some reason (I don't know yet), if we take a
40687834Sdillon		 * completely invalid page and mark it partially valid
40787834Sdillon		 * it can screw up NFS reads, so we don't allow the case.
4081827Sdg		 */
409116167Salc		if ((nsize & PAGE_MASK) &&
410121230Salc		    (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL &&
411121230Salc		    m->valid != 0) {
412121230Salc			int base = (int)nsize & PAGE_MASK;
413121230Salc			int size = PAGE_SIZE - base;
41470374Sdillon
415121230Salc			/*
416121230Salc			 * Clear out partial-page garbage in case
417121230Salc			 * the page has been mapped.
418121230Salc			 */
419121230Salc			pmap_zero_page_area(m, base, size);
42070374Sdillon
421121230Salc			/*
422193303Salc			 * Update the valid bits to reflect the blocks that
423193303Salc			 * have been zeroed.  Some of these valid bits may
424193303Salc			 * have already been set.
425193303Salc			 */
426228156Skib			vm_page_set_valid_range(m, base, size);
427193303Salc
428193303Salc			/*
429193303Salc			 * Round "base" to the next block boundary so that the
430193303Salc			 * dirty bit for a partially zeroed block is not
431193303Salc			 * cleared.
432193303Salc			 */
433193303Salc			base = roundup2(base, DEV_BSIZE);
434193303Salc
435193303Salc			/*
436193303Salc			 * Clear out partial-page dirty bits.
437121230Salc			 *
438121230Salc			 * note that we do not clear out the valid
439121230Salc			 * bits.  This would prevent bogus_page
440121230Salc			 * replacement from working properly.
441121230Salc			 */
442193303Salc			vm_page_clear_dirty(m, base, PAGE_SIZE - base);
443172875Salc		} else if ((nsize & PAGE_MASK) &&
444237172Sattilio		    vm_page_is_cached(object, OFF_TO_IDX(nsize))) {
445172875Salc			vm_page_cache_free(object, OFF_TO_IDX(nsize),
446172875Salc			    nobjsize);
4471827Sdg		}
4481541Srgrimes	}
44912767Sdyson	object->un_pager.vnp.vnp_size = nsize;
45038542Sluoqi	object->size = nobjsize;
451116167Salc	VM_OBJECT_UNLOCK(object);
4521541Srgrimes}
4531541Srgrimes
4541549Srgrimes/*
4551549Srgrimes * calculate the linear (byte) disk address of specified virtual
4561549Srgrimes * file address
4571549Srgrimes */
458163359Salcstatic int
459163359Salcvnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress,
460163359Salc    int *run)
4611549Srgrimes{
4625455Sdg	int bsize;
4635455Sdg	int err;
46412767Sdyson	daddr_t vblock;
465146340Sbz	daddr_t voffset;
4661549Srgrimes
467138531Salc	if (address < 0)
4685455Sdg		return -1;
4695455Sdg
470155384Sjeff	if (vp->v_iflag & VI_DOOMED)
47111701Sdyson		return -1;
47211701Sdyson
4731549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4741549Srgrimes	vblock = address / bsize;
4751549Srgrimes	voffset = address % bsize;
4761549Srgrimes
477163359Salc	err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL);
478163359Salc	if (err == 0) {
479163359Salc		if (*rtaddress != -1)
480163359Salc			*rtaddress += voffset / DEV_BSIZE;
48192029Seivind		if (run) {
4826151Sdg			*run += 1;
4836151Sdg			*run *= bsize/PAGE_SIZE;
4846151Sdg			*run -= voffset/PAGE_SIZE;
4856151Sdg		}
4866151Sdg	}
4871549Srgrimes
488163359Salc	return (err);
4891549Srgrimes}
4901549Srgrimes
4911549Srgrimes/*
49296755Strhodes * small block filesystem vnode pager input
4931549Srgrimes */
49412820Sphkstatic int
4959507Sdgvnode_pager_input_smlfs(object, m)
4969507Sdg	vm_object_t object;
4971549Srgrimes	vm_page_t m;
4981549Srgrimes{
499137726Sphk	struct vnode *vp;
500137726Sphk	struct bufobj *bo;
5011549Srgrimes	struct buf *bp;
502127926Salc	struct sf_buf *sf;
503146340Sbz	daddr_t fileaddr;
5041549Srgrimes	vm_offset_t bsize;
505227102Skib	vm_page_bits_t bits;
506227102Skib	int error, i;
5071549Srgrimes
508227102Skib	error = 0;
5099507Sdg	vp = object->handle;
510155384Sjeff	if (vp->v_iflag & VI_DOOMED)
51111701Sdyson		return VM_PAGER_BAD;
51211701Sdyson
5131549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
5141549Srgrimes
515137726Sphk	VOP_BMAP(vp, 0, &bo, 0, NULL, NULL);
5161549Srgrimes
517127926Salc	sf = sf_buf_alloc(m, 0);
5181549Srgrimes
5191827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
52086092Sdillon		vm_ooffset_t address;
5211827Sdg
522191935Salc		bits = vm_page_bits(i * bsize, bsize);
523191935Salc		if (m->valid & bits)
5245455Sdg			continue;
5251549Srgrimes
52686092Sdillon		address = IDX_TO_OFF(m->pindex) + i * bsize;
52786092Sdillon		if (address >= object->un_pager.vnp.vnp_size) {
52886092Sdillon			fileaddr = -1;
52986092Sdillon		} else {
530163359Salc			error = vnode_pager_addr(vp, address, &fileaddr, NULL);
531163359Salc			if (error)
532163359Salc				break;
53386092Sdillon		}
5341827Sdg		if (fileaddr != -1) {
53542957Sdillon			bp = getpbuf(&vnode_pbuf_freecnt);
5361549Srgrimes
5371827Sdg			/* build a minimal buffer header */
53858345Sphk			bp->b_iocmd = BIO_READ;
539119092Sphk			bp->b_iodone = bdone;
54084827Sjhb			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
54184827Sjhb			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
54291406Sjhb			bp->b_rcred = crhold(curthread->td_ucred);
54391406Sjhb			bp->b_wcred = crhold(curthread->td_ucred);
544127926Salc			bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize;
5456626Sdg			bp->b_blkno = fileaddr;
546137726Sphk			pbgetbo(bo, bp);
547233627Smckusick			bp->b_vp = vp;
5481549Srgrimes			bp->b_bcount = bsize;
5491549Srgrimes			bp->b_bufsize = bsize;
55070374Sdillon			bp->b_runningbufspace = bp->b_bufsize;
551189595Sjhb			atomic_add_long(&runningbufspace, bp->b_runningbufspace);
5521827Sdg
5531827Sdg			/* do the input */
554121205Sphk			bp->b_iooffset = dbtob(bp->b_blkno);
555136927Sphk			bstrategy(bp);
5561549Srgrimes
557119092Sphk			bwait(bp, PVM, "vnsrd");
558119092Sphk
55958934Sphk			if ((bp->b_ioflags & BIO_ERROR) != 0)
5601549Srgrimes				error = EIO;
5611549Srgrimes
5621827Sdg			/*
5631827Sdg			 * free the buffer header back to the swap buffer pool
5641827Sdg			 */
565233627Smckusick			bp->b_vp = NULL;
566137726Sphk			pbrelbo(bp);
56742957Sdillon			relpbuf(bp, &vnode_pbuf_freecnt);
5681827Sdg			if (error)
5691549Srgrimes				break;
570191935Salc		} else
571127926Salc			bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize);
572191935Salc		KASSERT((m->dirty & bits) == 0,
573191935Salc		    ("vnode_pager_input_smlfs: page %p is dirty", m));
574191935Salc		VM_OBJECT_LOCK(object);
575191935Salc		m->valid |= bits;
576191935Salc		VM_OBJECT_UNLOCK(object);
5771549Srgrimes	}
578127926Salc	sf_buf_free(sf);
5791827Sdg	if (error) {
5804207Sdg		return VM_PAGER_ERROR;
5811549Srgrimes	}
5821549Srgrimes	return VM_PAGER_OK;
5831549Srgrimes}
5841549Srgrimes
5851549Srgrimes/*
586139296Sphk * old style vnode pager input routine
5871549Srgrimes */
58812820Sphkstatic int
5899507Sdgvnode_pager_input_old(object, m)
5909507Sdg	vm_object_t object;
5911549Srgrimes	vm_page_t m;
5921549Srgrimes{
5931541Srgrimes	struct uio auio;
5941541Srgrimes	struct iovec aiov;
5955455Sdg	int error;
5965455Sdg	int size;
597127926Salc	struct sf_buf *sf;
59877398Sjhb	struct vnode *vp;
5991549Srgrimes
600121495Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
6011549Srgrimes	error = 0;
6021827Sdg
6031549Srgrimes	/*
6041549Srgrimes	 * Return failure if beyond current EOF
6051549Srgrimes	 */
60612767Sdyson	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
6071549Srgrimes		return VM_PAGER_BAD;
6081549Srgrimes	} else {
6091549Srgrimes		size = PAGE_SIZE;
61012767Sdyson		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
61112767Sdyson			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
612121495Salc		vp = object->handle;
613121495Salc		VM_OBJECT_UNLOCK(object);
6147178Sdg
6155455Sdg		/*
6165455Sdg		 * Allocate a kernel virtual address and initialize so that
6175455Sdg		 * we can use VOP_READ/WRITE routines.
6185455Sdg		 */
619127926Salc		sf = sf_buf_alloc(m, 0);
6207178Sdg
621127926Salc		aiov.iov_base = (caddr_t)sf_buf_kva(sf);
6221549Srgrimes		aiov.iov_len = size;
6231549Srgrimes		auio.uio_iov = &aiov;
6241549Srgrimes		auio.uio_iovcnt = 1;
62512767Sdyson		auio.uio_offset = IDX_TO_OFF(m->pindex);
6261549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
6271549Srgrimes		auio.uio_rw = UIO_READ;
6281549Srgrimes		auio.uio_resid = size;
62983366Sjulian		auio.uio_td = curthread;
6301549Srgrimes
63191406Sjhb		error = VOP_READ(vp, &auio, 0, curthread->td_ucred);
6321549Srgrimes		if (!error) {
63379242Sdillon			int count = size - auio.uio_resid;
6341549Srgrimes
6351549Srgrimes			if (count == 0)
6361549Srgrimes				error = EINVAL;
6371549Srgrimes			else if (count != PAGE_SIZE)
638127926Salc				bzero((caddr_t)sf_buf_kva(sf) + count,
639127926Salc				    PAGE_SIZE - count);
6401549Srgrimes		}
641127926Salc		sf_buf_free(sf);
642121230Salc
643121230Salc		VM_OBJECT_LOCK(object);
6441549Srgrimes	}
645191935Salc	KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m));
64639739Srvb	if (!error)
64739739Srvb		m->valid = VM_PAGE_BITS_ALL;
6484207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
6491549Srgrimes}
6501549Srgrimes
6511549Srgrimes/*
6521549Srgrimes * generic vnode pager input routine
6531549Srgrimes */
65410556Sdyson
65533847Smsmith/*
65676827Salfred * Local media VFS's that do not implement their own VOP_GETPAGES
65799211Srobert * should have their VOP_GETPAGES call to vnode_pager_generic_getpages()
65899211Srobert * to implement the previous behaviour.
65933847Smsmith *
66033847Smsmith * All other FS's should use the bypass to get to the local media
66133847Smsmith * backing vp's VOP_GETPAGES.
66233847Smsmith */
66312820Sphkstatic int
6649507Sdgvnode_pager_getpages(object, m, count, reqpage)
6659507Sdg	vm_object_t object;
6661549Srgrimes	vm_page_t *m;
6679507Sdg	int count;
6689507Sdg	int reqpage;
6691549Srgrimes{
67010556Sdyson	int rtval;
67110556Sdyson	struct vnode *vp;
67234403Smsmith	int bytes = count * PAGE_SIZE;
673140723Sjeff	int vfslocked;
67432286Sdyson
67510556Sdyson	vp = object->handle;
676116279Salc	VM_OBJECT_UNLOCK(object);
677140723Sjeff	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
67834403Smsmith	rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
67976827Salfred	KASSERT(rtval != EOPNOTSUPP,
68076827Salfred	    ("vnode_pager: FS getpages not implemented\n"));
681140723Sjeff	VFS_UNLOCK_GIANT(vfslocked);
682116279Salc	VM_OBJECT_LOCK(object);
68333847Smsmith	return rtval;
68410556Sdyson}
68510556Sdyson
68633847Smsmith/*
68733847Smsmith * This is now called from local media FS's to operate against their
68833847Smsmith * own vnodes if they fail to implement VOP_GETPAGES.
68933847Smsmith */
69033847Smsmithint
69133847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage)
69233847Smsmith	struct vnode *vp;
69310556Sdyson	vm_page_t *m;
69433847Smsmith	int bytecount;
69510556Sdyson	int reqpage;
69610556Sdyson{
69733847Smsmith	vm_object_t object;
69812767Sdyson	vm_offset_t kva;
69934206Sdyson	off_t foff, tfoff, nextoff;
700146340Sbz	int i, j, size, bsize, first;
701163140Salc	daddr_t firstaddr, reqblock;
702137726Sphk	struct bufobj *bo;
7036151Sdg	int runpg;
7046151Sdg	int runend;
7057178Sdg	struct buf *bp;
70633847Smsmith	int count;
707163210Salc	int error;
7081549Srgrimes
70933847Smsmith	object = vp->v_object;
71033847Smsmith	count = bytecount / PAGE_SIZE;
71133847Smsmith
712137726Sphk	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
713137726Sphk	    ("vnode_pager_generic_getpages does not support devices"));
714155384Sjeff	if (vp->v_iflag & VI_DOOMED)
71511701Sdyson		return VM_PAGER_BAD;
71611701Sdyson
7171549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
7181549Srgrimes
7191549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
7201827Sdg
7211549Srgrimes	/*
7221827Sdg	 * originally, we did not check for an error return value -- assuming
7231827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
7241549Srgrimes	 */
72512767Sdyson	foff = IDX_TO_OFF(m[reqpage]->pindex);
7261827Sdg
7271549Srgrimes	/*
7281887Sdg	 * if we can't bmap, use old VOP code
7291549Srgrimes	 */
730163210Salc	error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL);
731163210Salc	if (error == EOPNOTSUPP) {
732116512Salc		VM_OBJECT_LOCK(object);
733207410Skmacy
734100832Salc		for (i = 0; i < count; i++)
735207410Skmacy			if (i != reqpage) {
736207410Skmacy				vm_page_lock(m[i]);
73775692Salfred				vm_page_free(m[i]);
738207410Skmacy				vm_page_unlock(m[i]);
739207410Skmacy			}
740170292Sattilio		PCPU_INC(cnt.v_vnodein);
741170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
742121495Salc		error = vnode_pager_input_old(object, m[reqpage]);
743121495Salc		VM_OBJECT_UNLOCK(object);
744121495Salc		return (error);
745163210Salc	} else if (error != 0) {
746163210Salc		VM_OBJECT_LOCK(object);
747163210Salc		for (i = 0; i < count; i++)
748207410Skmacy			if (i != reqpage) {
749207410Skmacy				vm_page_lock(m[i]);
750163210Salc				vm_page_free(m[i]);
751207410Skmacy				vm_page_unlock(m[i]);
752207410Skmacy			}
753163210Salc		VM_OBJECT_UNLOCK(object);
754163210Salc		return (VM_PAGER_ERROR);
7551549Srgrimes
7561827Sdg		/*
7571827Sdg		 * if the blocksize is smaller than a page size, then use
7581827Sdg		 * special small filesystem code.  NFS sometimes has a small
7591827Sdg		 * blocksize, but it can handle large reads itself.
7601827Sdg		 */
7611827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
76238866Sbde	    (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
763116512Salc		VM_OBJECT_LOCK(object);
764100832Salc		for (i = 0; i < count; i++)
765207410Skmacy			if (i != reqpage) {
766207410Skmacy				vm_page_lock(m[i]);
76775692Salfred				vm_page_free(m[i]);
768207410Skmacy				vm_page_unlock(m[i]);
769207410Skmacy			}
770116512Salc		VM_OBJECT_UNLOCK(object);
771170292Sattilio		PCPU_INC(cnt.v_vnodein);
772170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
7739507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
7741549Srgrimes	}
77545347Sjulian
7761549Srgrimes	/*
77745347Sjulian	 * If we have a completely valid page available to us, we can
77845347Sjulian	 * clean up and return.  Otherwise we have to re-read the
77945347Sjulian	 * media.
7801549Srgrimes	 */
781121227Salc	VM_OBJECT_LOCK(object);
78245347Sjulian	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
783100832Salc		for (i = 0; i < count; i++)
784207410Skmacy			if (i != reqpage) {
785207410Skmacy				vm_page_lock(m[i]);
78675692Salfred				vm_page_free(m[i]);
787207410Skmacy				vm_page_unlock(m[i]);
788207410Skmacy			}
789116512Salc		VM_OBJECT_UNLOCK(object);
7905455Sdg		return VM_PAGER_OK;
791163140Salc	} else if (reqblock == -1) {
792163140Salc		pmap_zero_page(m[reqpage]);
793192010Salc		KASSERT(m[reqpage]->dirty == 0,
794192010Salc		    ("vnode_pager_generic_getpages: page %p is dirty", m));
795163140Salc		m[reqpage]->valid = VM_PAGE_BITS_ALL;
796163140Salc		for (i = 0; i < count; i++)
797207410Skmacy			if (i != reqpage) {
798207410Skmacy				vm_page_lock(m[i]);
799163140Salc				vm_page_free(m[i]);
800207410Skmacy				vm_page_unlock(m[i]);
801207410Skmacy			}
802163140Salc		VM_OBJECT_UNLOCK(object);
803163140Salc		return (VM_PAGER_OK);
8041549Srgrimes	}
80545347Sjulian	m[reqpage]->valid = 0;
806121227Salc	VM_OBJECT_UNLOCK(object);
8077178Sdg
8085455Sdg	/*
8095455Sdg	 * here on direct device I/O
8105455Sdg	 */
81192029Seivind	firstaddr = -1;
8121549Srgrimes
8131549Srgrimes	/*
8146151Sdg	 * calculate the run that includes the required page
8151549Srgrimes	 */
81692029Seivind	for (first = 0, i = 0; i < count; i = runend) {
817163359Salc		if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr,
818163359Salc		    &runpg) != 0) {
819163359Salc			VM_OBJECT_LOCK(object);
820163359Salc			for (; i < count; i++)
821207410Skmacy				if (i != reqpage) {
822207410Skmacy					vm_page_lock(m[i]);
823163359Salc					vm_page_free(m[i]);
824207410Skmacy					vm_page_unlock(m[i]);
825207410Skmacy				}
826163359Salc			VM_OBJECT_UNLOCK(object);
827163359Salc			return (VM_PAGER_ERROR);
828163359Salc		}
8296151Sdg		if (firstaddr == -1) {
830116512Salc			VM_OBJECT_LOCK(object);
8319507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
832146340Sbz				panic("vnode_pager_getpages: unexpected missing page: firstaddr: %jd, foff: 0x%jx%08jx, vnp_size: 0x%jx%08jx",
833146340Sbz				    (intmax_t)firstaddr, (uintmax_t)(foff >> 32),
834106603Smux				    (uintmax_t)foff,
835106603Smux				    (uintmax_t)
836106603Smux				    (object->un_pager.vnp.vnp_size >> 32),
837106603Smux				    (uintmax_t)object->un_pager.vnp.vnp_size);
8386151Sdg			}
839207410Skmacy			vm_page_lock(m[i]);
84075692Salfred			vm_page_free(m[i]);
841207410Skmacy			vm_page_unlock(m[i]);
842116512Salc			VM_OBJECT_UNLOCK(object);
8436151Sdg			runend = i + 1;
8446151Sdg			first = runend;
8456151Sdg			continue;
8461549Srgrimes		}
8476151Sdg		runend = i + runpg;
8489507Sdg		if (runend <= reqpage) {
849116512Salc			VM_OBJECT_LOCK(object);
850207410Skmacy			for (j = i; j < runend; j++) {
851207410Skmacy				vm_page_lock(m[j]);
85275692Salfred				vm_page_free(m[j]);
853207410Skmacy				vm_page_unlock(m[j]);
854207410Skmacy			}
855116512Salc			VM_OBJECT_UNLOCK(object);
8561549Srgrimes		} else {
8579507Sdg			if (runpg < (count - first)) {
858116512Salc				VM_OBJECT_LOCK(object);
859207410Skmacy				for (i = first + runpg; i < count; i++) {
860207410Skmacy					vm_page_lock(m[i]);
86175692Salfred					vm_page_free(m[i]);
862207410Skmacy					vm_page_unlock(m[i]);
863207410Skmacy				}
864116512Salc				VM_OBJECT_UNLOCK(object);
8656151Sdg				count = first + runpg;
8666151Sdg			}
8676151Sdg			break;
8681549Srgrimes		}
8696151Sdg		first = runend;
8701549Srgrimes	}
8711549Srgrimes
8721549Srgrimes	/*
8731827Sdg	 * the first and last page have been calculated now, move input pages
8741827Sdg	 * to be zero based...
8751549Srgrimes	 */
8761549Srgrimes	if (first != 0) {
877163361Salc		m += first;
8781549Srgrimes		count -= first;
8791549Srgrimes		reqpage -= first;
8801549Srgrimes	}
8816151Sdg
8821549Srgrimes	/*
8831549Srgrimes	 * calculate the file virtual address for the transfer
8841549Srgrimes	 */
88512767Sdyson	foff = IDX_TO_OFF(m[0]->pindex);
8861827Sdg
8871549Srgrimes	/*
8881549Srgrimes	 * calculate the size of the transfer
8891549Srgrimes	 */
8901549Srgrimes	size = count * PAGE_SIZE;
891134892Sphk	KASSERT(count > 0, ("zero count"));
8929507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
8939507Sdg		size = object->un_pager.vnp.vnp_size - foff;
894134892Sphk	KASSERT(size > 0, ("zero size"));
8951549Srgrimes
8961549Srgrimes	/*
89751340Sdillon	 * round up physical size for real devices.
8981549Srgrimes	 */
899137726Sphk	if (1) {
900137726Sphk		int secmask = bo->bo_bsize - 1;
901136977Sphk		KASSERT(secmask < PAGE_SIZE && secmask > 0,
902136977Sphk		    ("vnode_pager_generic_getpages: sector size %d too large",
903136977Sphk		    secmask + 1));
90451340Sdillon		size = (size + secmask) & ~secmask;
90551340Sdillon	}
9061549Srgrimes
90742957Sdillon	bp = getpbuf(&vnode_pbuf_freecnt);
9085455Sdg	kva = (vm_offset_t) bp->b_data;
9091887Sdg
9101549Srgrimes	/*
9111549Srgrimes	 * and map the pages to be read into the kva
9121549Srgrimes	 */
9131887Sdg	pmap_qenter(kva, m, count);
9141549Srgrimes
9151549Srgrimes	/* build a minimal buffer header */
91658345Sphk	bp->b_iocmd = BIO_READ;
917119092Sphk	bp->b_iodone = bdone;
91884827Sjhb	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
91984827Sjhb	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
92091406Sjhb	bp->b_rcred = crhold(curthread->td_ucred);
92191406Sjhb	bp->b_wcred = crhold(curthread->td_ucred);
9226626Sdg	bp->b_blkno = firstaddr;
923137726Sphk	pbgetbo(bo, bp);
924233627Smckusick	bp->b_vp = vp;
9251549Srgrimes	bp->b_bcount = size;
9261549Srgrimes	bp->b_bufsize = size;
92770374Sdillon	bp->b_runningbufspace = bp->b_bufsize;
928189595Sjhb	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
9291549Srgrimes
930170292Sattilio	PCPU_INC(cnt.v_vnodein);
931170292Sattilio	PCPU_ADD(cnt.v_vnodepgsin, count);
9323612Sdg
9331549Srgrimes	/* do the input */
934121205Sphk	bp->b_iooffset = dbtob(bp->b_blkno);
935136927Sphk	bstrategy(bp);
9363612Sdg
937119092Sphk	bwait(bp, PVM, "vnread");
9381549Srgrimes
93958934Sphk	if ((bp->b_ioflags & BIO_ERROR) != 0)
9401549Srgrimes		error = EIO;
9411549Srgrimes
9421549Srgrimes	if (!error) {
9431549Srgrimes		if (size != count * PAGE_SIZE)
9441827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
9451549Srgrimes	}
9465455Sdg	pmap_qremove(kva, count);
9471549Srgrimes
9481549Srgrimes	/*
9491549Srgrimes	 * free the buffer header back to the swap buffer pool
9501549Srgrimes	 */
951233627Smckusick	bp->b_vp = NULL;
952137726Sphk	pbrelbo(bp);
95342957Sdillon	relpbuf(bp, &vnode_pbuf_freecnt);
9541549Srgrimes
955116512Salc	VM_OBJECT_LOCK(object);
95634206Sdyson	for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
95734206Sdyson		vm_page_t mt;
95834206Sdyson
95934206Sdyson		nextoff = tfoff + PAGE_SIZE;
96034206Sdyson		mt = m[i];
96134206Sdyson
96247239Sdt		if (nextoff <= object->un_pager.vnp.vnp_size) {
96345347Sjulian			/*
96445347Sjulian			 * Read filled up entire page.
96545347Sjulian			 */
96634206Sdyson			mt->valid = VM_PAGE_BITS_ALL;
967191478Salc			KASSERT(mt->dirty == 0,
968191478Salc			    ("vnode_pager_generic_getpages: page %p is dirty",
969191478Salc			    mt));
970191478Salc			KASSERT(!pmap_page_is_mapped(mt),
971191478Salc			    ("vnode_pager_generic_getpages: page %p is mapped",
972191478Salc			    mt));
97334206Sdyson		} else {
97445347Sjulian			/*
975192134Salc			 * Read did not fill up entire page.
97645347Sjulian			 *
97745347Sjulian			 * Currently we do not set the entire page valid,
97845347Sjulian			 * we just try to clear the piece that we couldn't
97945347Sjulian			 * read.
98045347Sjulian			 */
981228156Skib			vm_page_set_valid_range(mt, 0,
98247239Sdt			    object->un_pager.vnp.vnp_size - tfoff);
983192134Salc			KASSERT((mt->dirty & vm_page_bits(0,
984192134Salc			    object->un_pager.vnp.vnp_size - tfoff)) == 0,
985192134Salc			    ("vnode_pager_generic_getpages: page %p is dirty",
986192134Salc			    mt));
98734206Sdyson		}
98834206Sdyson
989239040Skib		if (i != reqpage)
990239246Skib			vm_page_readahead_finish(mt);
9911549Srgrimes	}
992116512Salc	VM_OBJECT_UNLOCK(object);
9931549Srgrimes	if (error) {
9949507Sdg		printf("vnode_pager_getpages: I/O read error\n");
9951549Srgrimes	}
9964207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
9971549Srgrimes}
9981549Srgrimes
99933847Smsmith/*
100033847Smsmith * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
100133847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
100233847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour.
100333847Smsmith *
100433847Smsmith * All other FS's should use the bypass to get to the local media
100533847Smsmith * backing vp's VOP_PUTPAGES.
100633847Smsmith */
100743129Sdillonstatic void
100810556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
100910556Sdyson	vm_object_t object;
101010556Sdyson	vm_page_t *m;
101110556Sdyson	int count;
101210556Sdyson	boolean_t sync;
101310556Sdyson	int *rtvals;
101410556Sdyson{
101510556Sdyson	int rtval;
101610556Sdyson	struct vnode *vp;
101734403Smsmith	int bytes = count * PAGE_SIZE;
101818973Sdyson
101944321Salc	/*
102044321Salc	 * Force synchronous operation if we are extremely low on memory
102144321Salc	 * to prevent a low-memory deadlock.  VOP operations often need to
102244321Salc	 * allocate more memory to initiate the I/O ( i.e. do a BMAP
102344321Salc	 * operation ).  The swapper handles the case by limiting the amount
102444321Salc	 * of asynchronous I/O, but that sort of solution doesn't scale well
102544321Salc	 * for the vnode pager without a lot of work.
102644321Salc	 *
102744321Salc	 * Also, the backing vnode's iodone routine may not wake the pageout
102844321Salc	 * daemon up.  This should be probably be addressed XXX.
102944321Salc	 */
103044321Salc
1031170170Sattilio	if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
103244321Salc		sync |= OBJPC_SYNC;
103344321Salc
103444321Salc	/*
103544321Salc	 * Call device-specific putpages function
103644321Salc	 */
103710556Sdyson	vp = object->handle;
1038121455Salc	VM_OBJECT_UNLOCK(object);
103934403Smsmith	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
104076827Salfred	KASSERT(rtval != EOPNOTSUPP,
104176827Salfred	    ("vnode_pager: stale FS putpages\n"));
1042121455Salc	VM_OBJECT_LOCK(object);
104310556Sdyson}
104410556Sdyson
104533847Smsmith
10461549Srgrimes/*
104733847Smsmith * This is now called from local media FS's to operate against their
104845057Seivind * own vnodes if they fail to implement VOP_PUTPAGES.
104970374Sdillon *
105070374Sdillon * This is typically called indirectly via the pageout daemon and
105170374Sdillon * clustering has already typically occured, so in general we ask the
105270374Sdillon * underlying filesystem to write the data out asynchronously rather
105370374Sdillon * then delayed.
10541549Srgrimes */
105533847Smsmithint
1056208574Salcvnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
1057208574Salc    int flags, int *rtvals)
10581549Srgrimes{
10597695Sdg	int i;
106033847Smsmith	vm_object_t object;
1061208574Salc	vm_page_t m;
106233847Smsmith	int count;
10631549Srgrimes
10647695Sdg	int maxsize, ncount;
106512767Sdyson	vm_ooffset_t poffset;
10667695Sdg	struct uio auio;
10677695Sdg	struct iovec aiov;
10687695Sdg	int error;
106934206Sdyson	int ioflags;
1070151951Sps	int ppscheck = 0;
1071151951Sps	static struct timeval lastfail;
1072151951Sps	static int curfail;
10731549Srgrimes
107433847Smsmith	object = vp->v_object;
107533847Smsmith	count = bytecount / PAGE_SIZE;
107633847Smsmith
10771827Sdg	for (i = 0; i < count; i++)
1078222586Skib		rtvals[i] = VM_PAGER_ERROR;
10791549Srgrimes
1080208574Salc	if ((int64_t)ma[0]->pindex < 0) {
1081119544Smarcel		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n",
1082208574Salc		    (long)ma[0]->pindex, (u_long)ma[0]->dirty);
10837695Sdg		rtvals[0] = VM_PAGER_BAD;
10847695Sdg		return VM_PAGER_BAD;
10855455Sdg	}
10867178Sdg
10877695Sdg	maxsize = count * PAGE_SIZE;
10887695Sdg	ncount = count;
10891549Srgrimes
1090208574Salc	poffset = IDX_TO_OFF(ma[0]->pindex);
109184854Sdillon
109284854Sdillon	/*
109384854Sdillon	 * If the page-aligned write is larger then the actual file we
109484854Sdillon	 * have to invalidate pages occuring beyond the file EOF.  However,
109584854Sdillon	 * there is an edge case where a file may not be page-aligned where
109684854Sdillon	 * the last page is partially invalid.  In this case the filesystem
109784854Sdillon	 * may not properly clear the dirty bits for the entire page (which
109884854Sdillon	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
109984854Sdillon	 * With the page locked we are free to fix-up the dirty bits here.
110087834Sdillon	 *
110187834Sdillon	 * We do not under any circumstances truncate the valid bits, as
110287834Sdillon	 * this will screw up bogus page replacement.
110384854Sdillon	 */
1104208574Salc	VM_OBJECT_LOCK(object);
110512767Sdyson	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
110684854Sdillon		if (object->un_pager.vnp.vnp_size > poffset) {
110784854Sdillon			int pgoff;
110884854Sdillon
110912767Sdyson			maxsize = object->un_pager.vnp.vnp_size - poffset;
111084854Sdillon			ncount = btoc(maxsize);
111184854Sdillon			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
1112208574Salc				/*
1113208574Salc				 * If the object is locked and the following
1114208574Salc				 * conditions hold, then the page's dirty
1115208574Salc				 * field cannot be concurrently changed by a
1116208574Salc				 * pmap operation.
1117208574Salc				 */
1118208574Salc				m = ma[ncount - 1];
1119208574Salc				KASSERT(m->busy > 0,
1120208574Salc		("vnode_pager_generic_putpages: page %p is not busy", m));
1121237168Salc				KASSERT(!pmap_page_is_write_mapped(m),
1122208574Salc		("vnode_pager_generic_putpages: page %p is not read-only", m));
1123208574Salc				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
1124208574Salc				    pgoff);
112584854Sdillon			}
112684854Sdillon		} else {
11278585Sdg			maxsize = 0;
112884854Sdillon			ncount = 0;
112984854Sdillon		}
11308585Sdg		if (ncount < count) {
11318585Sdg			for (i = ncount; i < count; i++) {
11327695Sdg				rtvals[i] = VM_PAGER_BAD;
11331549Srgrimes			}
11341549Srgrimes		}
11351541Srgrimes	}
1136208574Salc	VM_OBJECT_UNLOCK(object);
11377695Sdg
113870374Sdillon	/*
1139226366Sjhb	 * pageouts are already clustered, use IO_ASYNC to force a bawrite()
114070374Sdillon	 * rather then a bdwrite() to prevent paging I/O from saturating
1141108358Sdillon	 * the buffer cache.  Dummy-up the sequential heuristic to cause
1142108358Sdillon	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
1143108358Sdillon	 * the system decides how to cluster.
114470374Sdillon	 */
114534206Sdyson	ioflags = IO_VMIO;
1146108358Sdillon	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
1147108358Sdillon		ioflags |= IO_SYNC;
1148108358Sdillon	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
1149108358Sdillon		ioflags |= IO_ASYNC;
115034206Sdyson	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
1151108358Sdillon	ioflags |= IO_SEQMAX << IO_SEQSHIFT;
11521827Sdg
11537695Sdg	aiov.iov_base = (caddr_t) 0;
11547695Sdg	aiov.iov_len = maxsize;
11557695Sdg	auio.uio_iov = &aiov;
11567695Sdg	auio.uio_iovcnt = 1;
115712767Sdyson	auio.uio_offset = poffset;
11587695Sdg	auio.uio_segflg = UIO_NOCOPY;
11597695Sdg	auio.uio_rw = UIO_WRITE;
11607695Sdg	auio.uio_resid = maxsize;
116183366Sjulian	auio.uio_td = (struct thread *) 0;
116291406Sjhb	error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred);
1163170292Sattilio	PCPU_INC(cnt.v_vnodeout);
1164170292Sattilio	PCPU_ADD(cnt.v_vnodepgsout, ncount);
11653612Sdg
11668585Sdg	if (error) {
1167151951Sps		if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1)))
1168151951Sps			printf("vnode_pager_putpages: I/O error %d\n", error);
11697695Sdg	}
11708585Sdg	if (auio.uio_resid) {
1171151951Sps		if (ppscheck || ppsratecheck(&lastfail, &curfail, 1))
1172194990Skib			printf("vnode_pager_putpages: residual I/O %zd at %lu\n",
1173208574Salc			    auio.uio_resid, (u_long)ma[0]->pindex);
11747695Sdg	}
117533936Sdyson	for (i = 0; i < ncount; i++) {
117633936Sdyson		rtvals[i] = VM_PAGER_OK;
11777695Sdg	}
11787695Sdg	return rtvals[0];
11797695Sdg}
1180222586Skib
1181222586Skibvoid
1182222586Skibvnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written)
1183222586Skib{
1184222991Skib	vm_object_t obj;
1185222586Skib	int i, pos;
1186222586Skib
1187222991Skib	if (written == 0)
1188222991Skib		return;
1189222991Skib	obj = ma[0]->object;
1190222991Skib	VM_OBJECT_LOCK(obj);
1191222586Skib	for (i = 0, pos = 0; pos < written; i++, pos += PAGE_SIZE) {
1192222586Skib		if (pos < trunc_page(written)) {
1193222586Skib			rtvals[i] = VM_PAGER_OK;
1194222586Skib			vm_page_undirty(ma[i]);
1195222586Skib		} else {
1196222586Skib			/* Partially written page. */
1197222586Skib			rtvals[i] = VM_PAGER_AGAIN;
1198222586Skib			vm_page_clear_dirty(ma[i], 0, written & PAGE_MASK);
1199222586Skib		}
1200222586Skib	}
1201222991Skib	VM_OBJECT_UNLOCK(obj);
1202222586Skib}
1203232071Skib
1204232071Skibvoid
1205232071Skibvnode_pager_update_writecount(vm_object_t object, vm_offset_t start,
1206232071Skib    vm_offset_t end)
1207232071Skib{
1208232071Skib	struct vnode *vp;
1209232071Skib	vm_ooffset_t old_wm;
1210232071Skib
1211232071Skib	VM_OBJECT_LOCK(object);
1212232071Skib	if (object->type != OBJT_VNODE) {
1213232071Skib		VM_OBJECT_UNLOCK(object);
1214232071Skib		return;
1215232071Skib	}
1216232071Skib	old_wm = object->un_pager.vnp.writemappings;
1217232071Skib	object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start;
1218232071Skib	vp = object->handle;
1219232071Skib	if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) {
1220232071Skib		ASSERT_VOP_ELOCKED(vp, "v_writecount inc");
1221232071Skib		vp->v_writecount++;
1222232701Sjhb		CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
1223232701Sjhb		    __func__, vp, vp->v_writecount);
1224232071Skib	} else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) {
1225232071Skib		ASSERT_VOP_ELOCKED(vp, "v_writecount dec");
1226232071Skib		vp->v_writecount--;
1227232701Sjhb		CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
1228232701Sjhb		    __func__, vp, vp->v_writecount);
1229232071Skib	}
1230232071Skib	VM_OBJECT_UNLOCK(object);
1231232071Skib}
1232232071Skib
1233232071Skibvoid
1234232071Skibvnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
1235232071Skib    vm_offset_t end)
1236232071Skib{
1237232071Skib	struct vnode *vp;
1238232071Skib	struct mount *mp;
1239232071Skib	vm_offset_t inc;
1240232071Skib	int vfslocked;
1241232071Skib
1242232071Skib	VM_OBJECT_LOCK(object);
1243232071Skib
1244232071Skib	/*
1245232071Skib	 * First, recheck the object type to account for the race when
1246232071Skib	 * the vnode is reclaimed.
1247232071Skib	 */
1248232071Skib	if (object->type != OBJT_VNODE) {
1249232071Skib		VM_OBJECT_UNLOCK(object);
1250232071Skib		return;
1251232071Skib	}
1252232071Skib
1253232071Skib	/*
1254232071Skib	 * Optimize for the case when writemappings is not going to
1255232071Skib	 * zero.
1256232071Skib	 */
1257232071Skib	inc = end - start;
1258232071Skib	if (object->un_pager.vnp.writemappings != inc) {
1259232071Skib		object->un_pager.vnp.writemappings -= inc;
1260232071Skib		VM_OBJECT_UNLOCK(object);
1261232071Skib		return;
1262232071Skib	}
1263232071Skib
1264232071Skib	vp = object->handle;
1265232071Skib	vhold(vp);
1266232071Skib	VM_OBJECT_UNLOCK(object);
1267232071Skib	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1268232071Skib	mp = NULL;
1269232071Skib	vn_start_write(vp, &mp, V_WAIT);
1270232071Skib	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1271232071Skib
1272232071Skib	/*
1273232071Skib	 * Decrement the object's writemappings, by swapping the start
1274232071Skib	 * and end arguments for vnode_pager_update_writecount().  If
1275232071Skib	 * there was not a race with vnode reclaimation, then the
1276232071Skib	 * vnode's v_writecount is decremented.
1277232071Skib	 */
1278232071Skib	vnode_pager_update_writecount(object, end, start);
1279232071Skib	VOP_UNLOCK(vp, 0);
1280232071Skib	vdrop(vp);
1281232071Skib	if (mp != NULL)
1282232071Skib		vn_finished_write(mp);
1283232071Skib	VFS_UNLOCK_GIANT(vfslocked);
1284232071Skib}
1285