vfs_default.c revision 302408
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed
6 * to Berkeley by John Heidemann of the UCLA Ficus project.
7 *
8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/11/sys/kern/vfs_default.c 296572 2016-03-09 19:05:11Z jhb $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bio.h>
41#include <sys/buf.h>
42#include <sys/conf.h>
43#include <sys/event.h>
44#include <sys/kernel.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/lockf.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/namei.h>
51#include <sys/rwlock.h>
52#include <sys/fcntl.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/dirent.h>
56#include <sys/poll.h>
57
58#include <security/mac/mac_framework.h>
59
60#include <vm/vm.h>
61#include <vm/vm_object.h>
62#include <vm/vm_extern.h>
63#include <vm/pmap.h>
64#include <vm/vm_map.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vnode_pager.h>
68
69static int	vop_nolookup(struct vop_lookup_args *);
70static int	vop_norename(struct vop_rename_args *);
71static int	vop_nostrategy(struct vop_strategy_args *);
72static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
73				char *dirbuf, int dirbuflen, off_t *off,
74				char **cpos, int *len, int *eofflag,
75				struct thread *td);
76static int	dirent_exists(struct vnode *vp, const char *dirname,
77			      struct thread *td);
78
79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
80
81static int vop_stdis_text(struct vop_is_text_args *ap);
82static int vop_stdset_text(struct vop_set_text_args *ap);
83static int vop_stdunset_text(struct vop_unset_text_args *ap);
84static int vop_stdget_writecount(struct vop_get_writecount_args *ap);
85static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
86static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
87
88/*
89 * This vnode table stores what we want to do if the filesystem doesn't
90 * implement a particular VOP.
91 *
92 * If there is no specific entry here, we will return EOPNOTSUPP.
93 *
94 * Note that every filesystem has to implement either vop_access
95 * or vop_accessx; failing to do so will result in immediate crash
96 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
97 * which calls vop_stdaccess() etc.
98 */
99
100struct vop_vector default_vnodeops = {
101	.vop_default =		NULL,
102	.vop_bypass =		VOP_EOPNOTSUPP,
103
104	.vop_access =		vop_stdaccess,
105	.vop_accessx =		vop_stdaccessx,
106	.vop_advise =		vop_stdadvise,
107	.vop_advlock =		vop_stdadvlock,
108	.vop_advlockasync =	vop_stdadvlockasync,
109	.vop_advlockpurge =	vop_stdadvlockpurge,
110	.vop_allocate =		vop_stdallocate,
111	.vop_bmap =		vop_stdbmap,
112	.vop_close =		VOP_NULL,
113	.vop_fsync =		VOP_NULL,
114	.vop_getpages =		vop_stdgetpages,
115	.vop_getpages_async =	vop_stdgetpages_async,
116	.vop_getwritemount = 	vop_stdgetwritemount,
117	.vop_inactive =		VOP_NULL,
118	.vop_ioctl =		VOP_ENOTTY,
119	.vop_kqfilter =		vop_stdkqfilter,
120	.vop_islocked =		vop_stdislocked,
121	.vop_lock1 =		vop_stdlock,
122	.vop_lookup =		vop_nolookup,
123	.vop_open =		VOP_NULL,
124	.vop_pathconf =		VOP_EINVAL,
125	.vop_poll =		vop_nopoll,
126	.vop_putpages =		vop_stdputpages,
127	.vop_readlink =		VOP_EINVAL,
128	.vop_rename =		vop_norename,
129	.vop_revoke =		VOP_PANIC,
130	.vop_strategy =		vop_nostrategy,
131	.vop_unlock =		vop_stdunlock,
132	.vop_vptocnp =		vop_stdvptocnp,
133	.vop_vptofh =		vop_stdvptofh,
134	.vop_unp_bind =		vop_stdunp_bind,
135	.vop_unp_connect =	vop_stdunp_connect,
136	.vop_unp_detach =	vop_stdunp_detach,
137	.vop_is_text =		vop_stdis_text,
138	.vop_set_text =		vop_stdset_text,
139	.vop_unset_text =	vop_stdunset_text,
140	.vop_get_writecount =	vop_stdget_writecount,
141	.vop_add_writecount =	vop_stdadd_writecount,
142};
143
144/*
145 * Series of placeholder functions for various error returns for
146 * VOPs.
147 */
148
149int
150vop_eopnotsupp(struct vop_generic_args *ap)
151{
152	/*
153	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
154	*/
155
156	return (EOPNOTSUPP);
157}
158
159int
160vop_ebadf(struct vop_generic_args *ap)
161{
162
163	return (EBADF);
164}
165
166int
167vop_enotty(struct vop_generic_args *ap)
168{
169
170	return (ENOTTY);
171}
172
173int
174vop_einval(struct vop_generic_args *ap)
175{
176
177	return (EINVAL);
178}
179
180int
181vop_enoent(struct vop_generic_args *ap)
182{
183
184	return (ENOENT);
185}
186
187int
188vop_null(struct vop_generic_args *ap)
189{
190
191	return (0);
192}
193
194/*
195 * Helper function to panic on some bad VOPs in some filesystems.
196 */
197int
198vop_panic(struct vop_generic_args *ap)
199{
200
201	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
202}
203
204/*
205 * vop_std<something> and vop_no<something> are default functions for use by
206 * filesystems that need the "default reasonable" implementation for a
207 * particular operation.
208 *
209 * The documentation for the operations they implement exists (if it exists)
210 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
211 */
212
213/*
214 * Default vop for filesystems that do not support name lookup
215 */
216static int
217vop_nolookup(ap)
218	struct vop_lookup_args /* {
219		struct vnode *a_dvp;
220		struct vnode **a_vpp;
221		struct componentname *a_cnp;
222	} */ *ap;
223{
224
225	*ap->a_vpp = NULL;
226	return (ENOTDIR);
227}
228
229/*
230 * vop_norename:
231 *
232 * Handle unlock and reference counting for arguments of vop_rename
233 * for filesystems that do not implement rename operation.
234 */
235static int
236vop_norename(struct vop_rename_args *ap)
237{
238
239	vop_rename_fail(ap);
240	return (EOPNOTSUPP);
241}
242
243/*
244 *	vop_nostrategy:
245 *
246 *	Strategy routine for VFS devices that have none.
247 *
248 *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
249 *	routine.  Typically this is done for a BIO_READ strategy call.
250 *	Typically B_INVAL is assumed to already be clear prior to a write
251 *	and should not be cleared manually unless you just made the buffer
252 *	invalid.  BIO_ERROR should be cleared either way.
253 */
254
255static int
256vop_nostrategy (struct vop_strategy_args *ap)
257{
258	printf("No strategy for buffer at %p\n", ap->a_bp);
259	vprint("vnode", ap->a_vp);
260	ap->a_bp->b_ioflags |= BIO_ERROR;
261	ap->a_bp->b_error = EOPNOTSUPP;
262	bufdone(ap->a_bp);
263	return (EOPNOTSUPP);
264}
265
266static int
267get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
268		int dirbuflen, off_t *off, char **cpos, int *len,
269		int *eofflag, struct thread *td)
270{
271	int error, reclen;
272	struct uio uio;
273	struct iovec iov;
274	struct dirent *dp;
275
276	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
277	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
278
279	if (*len == 0) {
280		iov.iov_base = dirbuf;
281		iov.iov_len = dirbuflen;
282
283		uio.uio_iov = &iov;
284		uio.uio_iovcnt = 1;
285		uio.uio_offset = *off;
286		uio.uio_resid = dirbuflen;
287		uio.uio_segflg = UIO_SYSSPACE;
288		uio.uio_rw = UIO_READ;
289		uio.uio_td = td;
290
291		*eofflag = 0;
292
293#ifdef MAC
294		error = mac_vnode_check_readdir(td->td_ucred, vp);
295		if (error == 0)
296#endif
297			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
298		    		NULL, NULL);
299		if (error)
300			return (error);
301
302		*off = uio.uio_offset;
303
304		*cpos = dirbuf;
305		*len = (dirbuflen - uio.uio_resid);
306
307		if (*len == 0)
308			return (ENOENT);
309	}
310
311	dp = (struct dirent *)(*cpos);
312	reclen = dp->d_reclen;
313	*dpp = dp;
314
315	/* check for malformed directory.. */
316	if (reclen < DIRENT_MINSIZE)
317		return (EINVAL);
318
319	*cpos += reclen;
320	*len -= reclen;
321
322	return (0);
323}
324
325/*
326 * Check if a named file exists in a given directory vnode.
327 */
328static int
329dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
330{
331	char *dirbuf, *cpos;
332	int error, eofflag, dirbuflen, len, found;
333	off_t off;
334	struct dirent *dp;
335	struct vattr va;
336
337	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
338	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
339
340	found = 0;
341
342	error = VOP_GETATTR(vp, &va, td->td_ucred);
343	if (error)
344		return (found);
345
346	dirbuflen = DEV_BSIZE;
347	if (dirbuflen < va.va_blocksize)
348		dirbuflen = va.va_blocksize;
349	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
350
351	off = 0;
352	len = 0;
353	do {
354		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
355					&cpos, &len, &eofflag, td);
356		if (error)
357			goto out;
358
359		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
360		    strcmp(dp->d_name, dirname) == 0) {
361			found = 1;
362			goto out;
363		}
364	} while (len > 0 || !eofflag);
365
366out:
367	free(dirbuf, M_TEMP);
368	return (found);
369}
370
371int
372vop_stdaccess(struct vop_access_args *ap)
373{
374
375	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
376	    VAPPEND)) == 0, ("invalid bit in accmode"));
377
378	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
379}
380
381int
382vop_stdaccessx(struct vop_accessx_args *ap)
383{
384	int error;
385	accmode_t accmode = ap->a_accmode;
386
387	error = vfs_unixify_accmode(&accmode);
388	if (error != 0)
389		return (error);
390
391	if (accmode == 0)
392		return (0);
393
394	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
395}
396
397/*
398 * Advisory record locking support
399 */
400int
401vop_stdadvlock(struct vop_advlock_args *ap)
402{
403	struct vnode *vp;
404	struct vattr vattr;
405	int error;
406
407	vp = ap->a_vp;
408	if (ap->a_fl->l_whence == SEEK_END) {
409		/*
410		 * The NFSv4 server must avoid doing a vn_lock() here, since it
411		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
412		 * the NFSv4 server always uses SEEK_SET and this code is
413		 * only required for the SEEK_END case.
414		 */
415		vn_lock(vp, LK_SHARED | LK_RETRY);
416		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
417		VOP_UNLOCK(vp, 0);
418		if (error)
419			return (error);
420	} else
421		vattr.va_size = 0;
422
423	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
424}
425
426int
427vop_stdadvlockasync(struct vop_advlockasync_args *ap)
428{
429	struct vnode *vp;
430	struct vattr vattr;
431	int error;
432
433	vp = ap->a_vp;
434	if (ap->a_fl->l_whence == SEEK_END) {
435		/* The size argument is only needed for SEEK_END. */
436		vn_lock(vp, LK_SHARED | LK_RETRY);
437		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
438		VOP_UNLOCK(vp, 0);
439		if (error)
440			return (error);
441	} else
442		vattr.va_size = 0;
443
444	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
445}
446
447int
448vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
449{
450	struct vnode *vp;
451
452	vp = ap->a_vp;
453	lf_purgelocks(vp, &vp->v_lockf);
454	return (0);
455}
456
457/*
458 * vop_stdpathconf:
459 *
460 * Standard implementation of POSIX pathconf, to get information about limits
461 * for a filesystem.
462 * Override per filesystem for the case where the filesystem has smaller
463 * limits.
464 */
465int
466vop_stdpathconf(ap)
467	struct vop_pathconf_args /* {
468	struct vnode *a_vp;
469	int a_name;
470	int *a_retval;
471	} */ *ap;
472{
473
474	switch (ap->a_name) {
475		case _PC_ASYNC_IO:
476			*ap->a_retval = _POSIX_ASYNCHRONOUS_IO;
477			return (0);
478		case _PC_NAME_MAX:
479			*ap->a_retval = NAME_MAX;
480			return (0);
481		case _PC_PATH_MAX:
482			*ap->a_retval = PATH_MAX;
483			return (0);
484		case _PC_LINK_MAX:
485			*ap->a_retval = LINK_MAX;
486			return (0);
487		case _PC_MAX_CANON:
488			*ap->a_retval = MAX_CANON;
489			return (0);
490		case _PC_MAX_INPUT:
491			*ap->a_retval = MAX_INPUT;
492			return (0);
493		case _PC_PIPE_BUF:
494			*ap->a_retval = PIPE_BUF;
495			return (0);
496		case _PC_CHOWN_RESTRICTED:
497			*ap->a_retval = 1;
498			return (0);
499		case _PC_VDISABLE:
500			*ap->a_retval = _POSIX_VDISABLE;
501			return (0);
502		default:
503			return (EINVAL);
504	}
505	/* NOTREACHED */
506}
507
508/*
509 * Standard lock, unlock and islocked functions.
510 */
511int
512vop_stdlock(ap)
513	struct vop_lock1_args /* {
514		struct vnode *a_vp;
515		int a_flags;
516		char *file;
517		int line;
518	} */ *ap;
519{
520	struct vnode *vp = ap->a_vp;
521
522	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
523	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
524	    ap->a_line));
525}
526
527/* See above. */
528int
529vop_stdunlock(ap)
530	struct vop_unlock_args /* {
531		struct vnode *a_vp;
532		int a_flags;
533	} */ *ap;
534{
535	struct vnode *vp = ap->a_vp;
536
537	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
538}
539
540/* See above. */
541int
542vop_stdislocked(ap)
543	struct vop_islocked_args /* {
544		struct vnode *a_vp;
545	} */ *ap;
546{
547
548	return (lockstatus(ap->a_vp->v_vnlock));
549}
550
551/*
552 * Return true for select/poll.
553 */
554int
555vop_nopoll(ap)
556	struct vop_poll_args /* {
557		struct vnode *a_vp;
558		int  a_events;
559		struct ucred *a_cred;
560		struct thread *a_td;
561	} */ *ap;
562{
563
564	return (poll_no_poll(ap->a_events));
565}
566
567/*
568 * Implement poll for local filesystems that support it.
569 */
570int
571vop_stdpoll(ap)
572	struct vop_poll_args /* {
573		struct vnode *a_vp;
574		int  a_events;
575		struct ucred *a_cred;
576		struct thread *a_td;
577	} */ *ap;
578{
579	if (ap->a_events & ~POLLSTANDARD)
580		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
581	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
582}
583
584/*
585 * Return our mount point, as we will take charge of the writes.
586 */
587int
588vop_stdgetwritemount(ap)
589	struct vop_getwritemount_args /* {
590		struct vnode *a_vp;
591		struct mount **a_mpp;
592	} */ *ap;
593{
594	struct mount *mp;
595
596	/*
597	 * XXX Since this is called unlocked we may be recycled while
598	 * attempting to ref the mount.  If this is the case or mountpoint
599	 * will be set to NULL.  We only have to prevent this call from
600	 * returning with a ref to an incorrect mountpoint.  It is not
601	 * harmful to return with a ref to our previous mountpoint.
602	 */
603	mp = ap->a_vp->v_mount;
604	if (mp != NULL) {
605		vfs_ref(mp);
606		if (mp != ap->a_vp->v_mount) {
607			vfs_rel(mp);
608			mp = NULL;
609		}
610	}
611	*(ap->a_mpp) = mp;
612	return (0);
613}
614
615/* XXX Needs good comment and VOP_BMAP(9) manpage */
616int
617vop_stdbmap(ap)
618	struct vop_bmap_args /* {
619		struct vnode *a_vp;
620		daddr_t  a_bn;
621		struct bufobj **a_bop;
622		daddr_t *a_bnp;
623		int *a_runp;
624		int *a_runb;
625	} */ *ap;
626{
627
628	if (ap->a_bop != NULL)
629		*ap->a_bop = &ap->a_vp->v_bufobj;
630	if (ap->a_bnp != NULL)
631		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
632	if (ap->a_runp != NULL)
633		*ap->a_runp = 0;
634	if (ap->a_runb != NULL)
635		*ap->a_runb = 0;
636	return (0);
637}
638
639int
640vop_stdfsync(ap)
641	struct vop_fsync_args /* {
642		struct vnode *a_vp;
643		struct ucred *a_cred;
644		int a_waitfor;
645		struct thread *a_td;
646	} */ *ap;
647{
648	struct vnode *vp = ap->a_vp;
649	struct buf *bp;
650	struct bufobj *bo;
651	struct buf *nbp;
652	int error = 0;
653	int maxretry = 1000;     /* large, arbitrarily chosen */
654
655	bo = &vp->v_bufobj;
656	BO_LOCK(bo);
657loop1:
658	/*
659	 * MARK/SCAN initialization to avoid infinite loops.
660	 */
661        TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
662                bp->b_vflags &= ~BV_SCANNED;
663		bp->b_error = 0;
664	}
665
666	/*
667	 * Flush all dirty buffers associated with a vnode.
668	 */
669loop2:
670	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
671		if ((bp->b_vflags & BV_SCANNED) != 0)
672			continue;
673		bp->b_vflags |= BV_SCANNED;
674		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
675			if (ap->a_waitfor != MNT_WAIT)
676				continue;
677			if (BUF_LOCK(bp,
678			    LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL,
679			    BO_LOCKPTR(bo)) != 0) {
680				BO_LOCK(bo);
681				goto loop1;
682			}
683			BO_LOCK(bo);
684		}
685		BO_UNLOCK(bo);
686		KASSERT(bp->b_bufobj == bo,
687		    ("bp %p wrong b_bufobj %p should be %p",
688		    bp, bp->b_bufobj, bo));
689		if ((bp->b_flags & B_DELWRI) == 0)
690			panic("fsync: not dirty");
691		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
692			vfs_bio_awrite(bp);
693		} else {
694			bremfree(bp);
695			bawrite(bp);
696		}
697		BO_LOCK(bo);
698		goto loop2;
699	}
700
701	/*
702	 * If synchronous the caller expects us to completely resolve all
703	 * dirty buffers in the system.  Wait for in-progress I/O to
704	 * complete (which could include background bitmap writes), then
705	 * retry if dirty blocks still exist.
706	 */
707	if (ap->a_waitfor == MNT_WAIT) {
708		bufobj_wwait(bo, 0, 0);
709		if (bo->bo_dirty.bv_cnt > 0) {
710			/*
711			 * If we are unable to write any of these buffers
712			 * then we fail now rather than trying endlessly
713			 * to write them out.
714			 */
715			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
716				if ((error = bp->b_error) == 0)
717					continue;
718			if (error == 0 && --maxretry >= 0)
719				goto loop1;
720			error = EAGAIN;
721		}
722	}
723	BO_UNLOCK(bo);
724	if (error == EAGAIN)
725		vprint("fsync: giving up on dirty", vp);
726
727	return (error);
728}
729
730/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
731int
732vop_stdgetpages(ap)
733	struct vop_getpages_args /* {
734		struct vnode *a_vp;
735		vm_page_t *a_m;
736		int a_count;
737		int *a_rbehind;
738		int *a_rahead;
739	} */ *ap;
740{
741
742	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
743	    ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL);
744}
745
746static int
747vop_stdgetpages_async(struct vop_getpages_async_args *ap)
748{
749	int error;
750
751	error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
752	    ap->a_rahead);
753	ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
754	return (error);
755}
756
757int
758vop_stdkqfilter(struct vop_kqfilter_args *ap)
759{
760	return vfs_kqfilter(ap);
761}
762
763/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
764int
765vop_stdputpages(ap)
766	struct vop_putpages_args /* {
767		struct vnode *a_vp;
768		vm_page_t *a_m;
769		int a_count;
770		int a_sync;
771		int *a_rtvals;
772	} */ *ap;
773{
774
775	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
776	     ap->a_sync, ap->a_rtvals);
777}
778
779int
780vop_stdvptofh(struct vop_vptofh_args *ap)
781{
782	return (EOPNOTSUPP);
783}
784
785int
786vop_stdvptocnp(struct vop_vptocnp_args *ap)
787{
788	struct vnode *vp = ap->a_vp;
789	struct vnode **dvp = ap->a_vpp;
790	struct ucred *cred = ap->a_cred;
791	char *buf = ap->a_buf;
792	int *buflen = ap->a_buflen;
793	char *dirbuf, *cpos;
794	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
795	off_t off;
796	ino_t fileno;
797	struct vattr va;
798	struct nameidata nd;
799	struct thread *td;
800	struct dirent *dp;
801	struct vnode *mvp;
802
803	i = *buflen;
804	error = 0;
805	covered = 0;
806	td = curthread;
807
808	if (vp->v_type != VDIR)
809		return (ENOENT);
810
811	error = VOP_GETATTR(vp, &va, cred);
812	if (error)
813		return (error);
814
815	VREF(vp);
816	locked = VOP_ISLOCKED(vp);
817	VOP_UNLOCK(vp, 0);
818	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
819	    "..", vp, td);
820	flags = FREAD;
821	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
822	if (error) {
823		vn_lock(vp, locked | LK_RETRY);
824		return (error);
825	}
826	NDFREE(&nd, NDF_ONLY_PNBUF);
827
828	mvp = *dvp = nd.ni_vp;
829
830	if (vp->v_mount != (*dvp)->v_mount &&
831	    ((*dvp)->v_vflag & VV_ROOT) &&
832	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
833		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
834		VREF(mvp);
835		VOP_UNLOCK(mvp, 0);
836		vn_close(mvp, FREAD, cred, td);
837		VREF(*dvp);
838		vn_lock(*dvp, LK_SHARED | LK_RETRY);
839		covered = 1;
840	}
841
842	fileno = va.va_fileid;
843
844	dirbuflen = DEV_BSIZE;
845	if (dirbuflen < va.va_blocksize)
846		dirbuflen = va.va_blocksize;
847	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
848
849	if ((*dvp)->v_type != VDIR) {
850		error = ENOENT;
851		goto out;
852	}
853
854	off = 0;
855	len = 0;
856	do {
857		/* call VOP_READDIR of parent */
858		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
859					&cpos, &len, &eofflag, td);
860		if (error)
861			goto out;
862
863		if ((dp->d_type != DT_WHT) &&
864		    (dp->d_fileno == fileno)) {
865			if (covered) {
866				VOP_UNLOCK(*dvp, 0);
867				vn_lock(mvp, LK_SHARED | LK_RETRY);
868				if (dirent_exists(mvp, dp->d_name, td)) {
869					error = ENOENT;
870					VOP_UNLOCK(mvp, 0);
871					vn_lock(*dvp, LK_SHARED | LK_RETRY);
872					goto out;
873				}
874				VOP_UNLOCK(mvp, 0);
875				vn_lock(*dvp, LK_SHARED | LK_RETRY);
876			}
877			i -= dp->d_namlen;
878
879			if (i < 0) {
880				error = ENOMEM;
881				goto out;
882			}
883			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
884				error = ENOENT;
885			} else {
886				bcopy(dp->d_name, buf + i, dp->d_namlen);
887				error = 0;
888			}
889			goto out;
890		}
891	} while (len > 0 || !eofflag);
892	error = ENOENT;
893
894out:
895	free(dirbuf, M_TEMP);
896	if (!error) {
897		*buflen = i;
898		vref(*dvp);
899	}
900	if (covered) {
901		vput(*dvp);
902		vrele(mvp);
903	} else {
904		VOP_UNLOCK(mvp, 0);
905		vn_close(mvp, FREAD, cred, td);
906	}
907	vn_lock(vp, locked | LK_RETRY);
908	return (error);
909}
910
911int
912vop_stdallocate(struct vop_allocate_args *ap)
913{
914#ifdef __notyet__
915	struct statfs sfs;
916#endif
917	struct iovec aiov;
918	struct vattr vattr, *vap;
919	struct uio auio;
920	off_t fsize, len, cur, offset;
921	uint8_t *buf;
922	struct thread *td;
923	struct vnode *vp;
924	size_t iosize;
925	int error;
926
927	buf = NULL;
928	error = 0;
929	td = curthread;
930	vap = &vattr;
931	vp = ap->a_vp;
932	len = *ap->a_len;
933	offset = *ap->a_offset;
934
935	error = VOP_GETATTR(vp, vap, td->td_ucred);
936	if (error != 0)
937		goto out;
938	fsize = vap->va_size;
939	iosize = vap->va_blocksize;
940	if (iosize == 0)
941		iosize = BLKDEV_IOSIZE;
942	if (iosize > MAXPHYS)
943		iosize = MAXPHYS;
944	buf = malloc(iosize, M_TEMP, M_WAITOK);
945
946#ifdef __notyet__
947	/*
948	 * Check if the filesystem sets f_maxfilesize; if not use
949	 * VOP_SETATTR to perform the check.
950	 */
951	error = VFS_STATFS(vp->v_mount, &sfs, td);
952	if (error != 0)
953		goto out;
954	if (sfs.f_maxfilesize) {
955		if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
956		    offset + len > sfs.f_maxfilesize) {
957			error = EFBIG;
958			goto out;
959		}
960	} else
961#endif
962	if (offset + len > vap->va_size) {
963		/*
964		 * Test offset + len against the filesystem's maxfilesize.
965		 */
966		VATTR_NULL(vap);
967		vap->va_size = offset + len;
968		error = VOP_SETATTR(vp, vap, td->td_ucred);
969		if (error != 0)
970			goto out;
971		VATTR_NULL(vap);
972		vap->va_size = fsize;
973		error = VOP_SETATTR(vp, vap, td->td_ucred);
974		if (error != 0)
975			goto out;
976	}
977
978	for (;;) {
979		/*
980		 * Read and write back anything below the nominal file
981		 * size.  There's currently no way outside the filesystem
982		 * to know whether this area is sparse or not.
983		 */
984		cur = iosize;
985		if ((offset % iosize) != 0)
986			cur -= (offset % iosize);
987		if (cur > len)
988			cur = len;
989		if (offset < fsize) {
990			aiov.iov_base = buf;
991			aiov.iov_len = cur;
992			auio.uio_iov = &aiov;
993			auio.uio_iovcnt = 1;
994			auio.uio_offset = offset;
995			auio.uio_resid = cur;
996			auio.uio_segflg = UIO_SYSSPACE;
997			auio.uio_rw = UIO_READ;
998			auio.uio_td = td;
999			error = VOP_READ(vp, &auio, 0, td->td_ucred);
1000			if (error != 0)
1001				break;
1002			if (auio.uio_resid > 0) {
1003				bzero(buf + cur - auio.uio_resid,
1004				    auio.uio_resid);
1005			}
1006		} else {
1007			bzero(buf, cur);
1008		}
1009
1010		aiov.iov_base = buf;
1011		aiov.iov_len = cur;
1012		auio.uio_iov = &aiov;
1013		auio.uio_iovcnt = 1;
1014		auio.uio_offset = offset;
1015		auio.uio_resid = cur;
1016		auio.uio_segflg = UIO_SYSSPACE;
1017		auio.uio_rw = UIO_WRITE;
1018		auio.uio_td = td;
1019
1020		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
1021		if (error != 0)
1022			break;
1023
1024		len -= cur;
1025		offset += cur;
1026		if (len == 0)
1027			break;
1028		if (should_yield())
1029			break;
1030	}
1031
1032 out:
1033	*ap->a_len = len;
1034	*ap->a_offset = offset;
1035	free(buf, M_TEMP);
1036	return (error);
1037}
1038
1039int
1040vop_stdadvise(struct vop_advise_args *ap)
1041{
1042	struct vnode *vp;
1043	struct bufobj *bo;
1044	daddr_t startn, endn;
1045	off_t start, end;
1046	int bsize, error;
1047
1048	vp = ap->a_vp;
1049	switch (ap->a_advice) {
1050	case POSIX_FADV_WILLNEED:
1051		/*
1052		 * Do nothing for now.  Filesystems should provide a
1053		 * custom method which starts an asynchronous read of
1054		 * the requested region.
1055		 */
1056		error = 0;
1057		break;
1058	case POSIX_FADV_DONTNEED:
1059		error = 0;
1060		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1061		if (vp->v_iflag & VI_DOOMED) {
1062			VOP_UNLOCK(vp, 0);
1063			break;
1064		}
1065
1066		/*
1067		 * Deactivate pages in the specified range from the backing VM
1068		 * object.  Pages that are resident in the buffer cache will
1069		 * remain wired until their corresponding buffers are released
1070		 * below.
1071		 */
1072		if (vp->v_object != NULL) {
1073			start = trunc_page(ap->a_start);
1074			end = round_page(ap->a_end);
1075			VM_OBJECT_WLOCK(vp->v_object);
1076			vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
1077			    OFF_TO_IDX(end));
1078			VM_OBJECT_WUNLOCK(vp->v_object);
1079		}
1080
1081		bo = &vp->v_bufobj;
1082		BO_RLOCK(bo);
1083		bsize = vp->v_bufobj.bo_bsize;
1084		startn = ap->a_start / bsize;
1085		endn = ap->a_end / bsize;
1086		error = bnoreuselist(&bo->bo_clean, bo, startn, endn);
1087		if (error == 0)
1088			error = bnoreuselist(&bo->bo_dirty, bo, startn, endn);
1089		BO_RUNLOCK(bo);
1090		VOP_UNLOCK(vp, 0);
1091		break;
1092	default:
1093		error = EINVAL;
1094		break;
1095	}
1096	return (error);
1097}
1098
1099int
1100vop_stdunp_bind(struct vop_unp_bind_args *ap)
1101{
1102
1103	ap->a_vp->v_socket = ap->a_socket;
1104	return (0);
1105}
1106
1107int
1108vop_stdunp_connect(struct vop_unp_connect_args *ap)
1109{
1110
1111	*ap->a_socket = ap->a_vp->v_socket;
1112	return (0);
1113}
1114
1115int
1116vop_stdunp_detach(struct vop_unp_detach_args *ap)
1117{
1118
1119	ap->a_vp->v_socket = NULL;
1120	return (0);
1121}
1122
1123static int
1124vop_stdis_text(struct vop_is_text_args *ap)
1125{
1126
1127	return ((ap->a_vp->v_vflag & VV_TEXT) != 0);
1128}
1129
1130static int
1131vop_stdset_text(struct vop_set_text_args *ap)
1132{
1133
1134	ap->a_vp->v_vflag |= VV_TEXT;
1135	return (0);
1136}
1137
1138static int
1139vop_stdunset_text(struct vop_unset_text_args *ap)
1140{
1141
1142	ap->a_vp->v_vflag &= ~VV_TEXT;
1143	return (0);
1144}
1145
1146static int
1147vop_stdget_writecount(struct vop_get_writecount_args *ap)
1148{
1149
1150	*ap->a_writecount = ap->a_vp->v_writecount;
1151	return (0);
1152}
1153
1154static int
1155vop_stdadd_writecount(struct vop_add_writecount_args *ap)
1156{
1157
1158	ap->a_vp->v_writecount += ap->a_inc;
1159	return (0);
1160}
1161
1162/*
1163 * vfs default ops
1164 * used to fill the vfs function table to get reasonable default return values.
1165 */
1166int
1167vfs_stdroot (mp, flags, vpp)
1168	struct mount *mp;
1169	int flags;
1170	struct vnode **vpp;
1171{
1172
1173	return (EOPNOTSUPP);
1174}
1175
1176int
1177vfs_stdstatfs (mp, sbp)
1178	struct mount *mp;
1179	struct statfs *sbp;
1180{
1181
1182	return (EOPNOTSUPP);
1183}
1184
1185int
1186vfs_stdquotactl (mp, cmds, uid, arg)
1187	struct mount *mp;
1188	int cmds;
1189	uid_t uid;
1190	void *arg;
1191{
1192
1193	return (EOPNOTSUPP);
1194}
1195
1196int
1197vfs_stdsync(mp, waitfor)
1198	struct mount *mp;
1199	int waitfor;
1200{
1201	struct vnode *vp, *mvp;
1202	struct thread *td;
1203	int error, lockreq, allerror = 0;
1204
1205	td = curthread;
1206	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1207	if (waitfor != MNT_WAIT)
1208		lockreq |= LK_NOWAIT;
1209	/*
1210	 * Force stale buffer cache information to be flushed.
1211	 */
1212loop:
1213	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1214		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1215			VI_UNLOCK(vp);
1216			continue;
1217		}
1218		if ((error = vget(vp, lockreq, td)) != 0) {
1219			if (error == ENOENT) {
1220				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1221				goto loop;
1222			}
1223			continue;
1224		}
1225		error = VOP_FSYNC(vp, waitfor, td);
1226		if (error)
1227			allerror = error;
1228		vput(vp);
1229	}
1230	return (allerror);
1231}
1232
1233int
1234vfs_stdnosync (mp, waitfor)
1235	struct mount *mp;
1236	int waitfor;
1237{
1238
1239	return (0);
1240}
1241
1242int
1243vfs_stdvget (mp, ino, flags, vpp)
1244	struct mount *mp;
1245	ino_t ino;
1246	int flags;
1247	struct vnode **vpp;
1248{
1249
1250	return (EOPNOTSUPP);
1251}
1252
1253int
1254vfs_stdfhtovp (mp, fhp, flags, vpp)
1255	struct mount *mp;
1256	struct fid *fhp;
1257	int flags;
1258	struct vnode **vpp;
1259{
1260
1261	return (EOPNOTSUPP);
1262}
1263
1264int
1265vfs_stdinit (vfsp)
1266	struct vfsconf *vfsp;
1267{
1268
1269	return (0);
1270}
1271
1272int
1273vfs_stduninit (vfsp)
1274	struct vfsconf *vfsp;
1275{
1276
1277	return(0);
1278}
1279
1280int
1281vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1282	struct mount *mp;
1283	int cmd;
1284	struct vnode *filename_vp;
1285	int attrnamespace;
1286	const char *attrname;
1287{
1288
1289	if (filename_vp != NULL)
1290		VOP_UNLOCK(filename_vp, 0);
1291	return (EOPNOTSUPP);
1292}
1293
1294int
1295vfs_stdsysctl(mp, op, req)
1296	struct mount *mp;
1297	fsctlop_t op;
1298	struct sysctl_req *req;
1299{
1300
1301	return (EOPNOTSUPP);
1302}
1303
1304/* end of vfs default ops */
1305