vfs_default.c revision 276200
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed
6 * to Berkeley by John Heidemann of the UCLA Ficus project.
7 *
8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 276200 2014-12-25 14:44:04Z rmacklem $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bio.h>
41#include <sys/buf.h>
42#include <sys/conf.h>
43#include <sys/event.h>
44#include <sys/kernel.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/lockf.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/namei.h>
51#include <sys/rwlock.h>
52#include <sys/fcntl.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/dirent.h>
56#include <sys/poll.h>
57
58#include <security/mac/mac_framework.h>
59
60#include <vm/vm.h>
61#include <vm/vm_object.h>
62#include <vm/vm_extern.h>
63#include <vm/pmap.h>
64#include <vm/vm_map.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vnode_pager.h>
68
69static int	vop_nolookup(struct vop_lookup_args *);
70static int	vop_norename(struct vop_rename_args *);
71static int	vop_nostrategy(struct vop_strategy_args *);
72static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
73				char *dirbuf, int dirbuflen, off_t *off,
74				char **cpos, int *len, int *eofflag,
75				struct thread *td);
76static int	dirent_exists(struct vnode *vp, const char *dirname,
77			      struct thread *td);
78
79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
80
81static int vop_stdis_text(struct vop_is_text_args *ap);
82static int vop_stdset_text(struct vop_set_text_args *ap);
83static int vop_stdunset_text(struct vop_unset_text_args *ap);
84static int vop_stdget_writecount(struct vop_get_writecount_args *ap);
85static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
86static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
87
88/*
89 * This vnode table stores what we want to do if the filesystem doesn't
90 * implement a particular VOP.
91 *
92 * If there is no specific entry here, we will return EOPNOTSUPP.
93 *
94 * Note that every filesystem has to implement either vop_access
95 * or vop_accessx; failing to do so will result in immediate crash
96 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
97 * which calls vop_stdaccess() etc.
98 */
99
100struct vop_vector default_vnodeops = {
101	.vop_default =		NULL,
102	.vop_bypass =		VOP_EOPNOTSUPP,
103
104	.vop_access =		vop_stdaccess,
105	.vop_accessx =		vop_stdaccessx,
106	.vop_advise =		vop_stdadvise,
107	.vop_advlock =		vop_stdadvlock,
108	.vop_advlockasync =	vop_stdadvlockasync,
109	.vop_advlockpurge =	vop_stdadvlockpurge,
110	.vop_allocate =		vop_stdallocate,
111	.vop_bmap =		vop_stdbmap,
112	.vop_close =		VOP_NULL,
113	.vop_fsync =		VOP_NULL,
114	.vop_getpages =		vop_stdgetpages,
115	.vop_getpages_async =	vop_stdgetpages_async,
116	.vop_getwritemount = 	vop_stdgetwritemount,
117	.vop_inactive =		VOP_NULL,
118	.vop_ioctl =		VOP_ENOTTY,
119	.vop_kqfilter =		vop_stdkqfilter,
120	.vop_islocked =		vop_stdislocked,
121	.vop_lock1 =		vop_stdlock,
122	.vop_lookup =		vop_nolookup,
123	.vop_open =		VOP_NULL,
124	.vop_pathconf =		VOP_EINVAL,
125	.vop_poll =		vop_nopoll,
126	.vop_putpages =		vop_stdputpages,
127	.vop_readlink =		VOP_EINVAL,
128	.vop_rename =		vop_norename,
129	.vop_revoke =		VOP_PANIC,
130	.vop_strategy =		vop_nostrategy,
131	.vop_unlock =		vop_stdunlock,
132	.vop_vptocnp =		vop_stdvptocnp,
133	.vop_vptofh =		vop_stdvptofh,
134	.vop_unp_bind =		vop_stdunp_bind,
135	.vop_unp_connect =	vop_stdunp_connect,
136	.vop_unp_detach =	vop_stdunp_detach,
137	.vop_is_text =		vop_stdis_text,
138	.vop_set_text =		vop_stdset_text,
139	.vop_unset_text =	vop_stdunset_text,
140	.vop_get_writecount =	vop_stdget_writecount,
141	.vop_add_writecount =	vop_stdadd_writecount,
142};
143
144/*
145 * Series of placeholder functions for various error returns for
146 * VOPs.
147 */
148
149int
150vop_eopnotsupp(struct vop_generic_args *ap)
151{
152	/*
153	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
154	*/
155
156	return (EOPNOTSUPP);
157}
158
159int
160vop_ebadf(struct vop_generic_args *ap)
161{
162
163	return (EBADF);
164}
165
166int
167vop_enotty(struct vop_generic_args *ap)
168{
169
170	return (ENOTTY);
171}
172
173int
174vop_einval(struct vop_generic_args *ap)
175{
176
177	return (EINVAL);
178}
179
180int
181vop_enoent(struct vop_generic_args *ap)
182{
183
184	return (ENOENT);
185}
186
187int
188vop_null(struct vop_generic_args *ap)
189{
190
191	return (0);
192}
193
194/*
195 * Helper function to panic on some bad VOPs in some filesystems.
196 */
197int
198vop_panic(struct vop_generic_args *ap)
199{
200
201	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
202}
203
204/*
205 * vop_std<something> and vop_no<something> are default functions for use by
206 * filesystems that need the "default reasonable" implementation for a
207 * particular operation.
208 *
209 * The documentation for the operations they implement exists (if it exists)
210 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
211 */
212
213/*
214 * Default vop for filesystems that do not support name lookup
215 */
216static int
217vop_nolookup(ap)
218	struct vop_lookup_args /* {
219		struct vnode *a_dvp;
220		struct vnode **a_vpp;
221		struct componentname *a_cnp;
222	} */ *ap;
223{
224
225	*ap->a_vpp = NULL;
226	return (ENOTDIR);
227}
228
229/*
230 * vop_norename:
231 *
232 * Handle unlock and reference counting for arguments of vop_rename
233 * for filesystems that do not implement rename operation.
234 */
235static int
236vop_norename(struct vop_rename_args *ap)
237{
238
239	vop_rename_fail(ap);
240	return (EOPNOTSUPP);
241}
242
243/*
244 *	vop_nostrategy:
245 *
246 *	Strategy routine for VFS devices that have none.
247 *
248 *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
249 *	routine.  Typically this is done for a BIO_READ strategy call.
250 *	Typically B_INVAL is assumed to already be clear prior to a write
251 *	and should not be cleared manually unless you just made the buffer
252 *	invalid.  BIO_ERROR should be cleared either way.
253 */
254
255static int
256vop_nostrategy (struct vop_strategy_args *ap)
257{
258	printf("No strategy for buffer at %p\n", ap->a_bp);
259	vprint("vnode", ap->a_vp);
260	ap->a_bp->b_ioflags |= BIO_ERROR;
261	ap->a_bp->b_error = EOPNOTSUPP;
262	bufdone(ap->a_bp);
263	return (EOPNOTSUPP);
264}
265
266static int
267get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
268		int dirbuflen, off_t *off, char **cpos, int *len,
269		int *eofflag, struct thread *td)
270{
271	int error, reclen;
272	struct uio uio;
273	struct iovec iov;
274	struct dirent *dp;
275
276	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
277	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
278
279	if (*len == 0) {
280		iov.iov_base = dirbuf;
281		iov.iov_len = dirbuflen;
282
283		uio.uio_iov = &iov;
284		uio.uio_iovcnt = 1;
285		uio.uio_offset = *off;
286		uio.uio_resid = dirbuflen;
287		uio.uio_segflg = UIO_SYSSPACE;
288		uio.uio_rw = UIO_READ;
289		uio.uio_td = td;
290
291		*eofflag = 0;
292
293#ifdef MAC
294		error = mac_vnode_check_readdir(td->td_ucred, vp);
295		if (error == 0)
296#endif
297			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
298		    		NULL, NULL);
299		if (error)
300			return (error);
301
302		*off = uio.uio_offset;
303
304		*cpos = dirbuf;
305		*len = (dirbuflen - uio.uio_resid);
306
307		if (*len == 0)
308			return (ENOENT);
309	}
310
311	dp = (struct dirent *)(*cpos);
312	reclen = dp->d_reclen;
313	*dpp = dp;
314
315	/* check for malformed directory.. */
316	if (reclen < DIRENT_MINSIZE)
317		return (EINVAL);
318
319	*cpos += reclen;
320	*len -= reclen;
321
322	return (0);
323}
324
325/*
326 * Check if a named file exists in a given directory vnode.
327 */
328static int
329dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
330{
331	char *dirbuf, *cpos;
332	int error, eofflag, dirbuflen, len, found;
333	off_t off;
334	struct dirent *dp;
335	struct vattr va;
336
337	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
338	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
339
340	found = 0;
341
342	error = VOP_GETATTR(vp, &va, td->td_ucred);
343	if (error)
344		return (found);
345
346	dirbuflen = DEV_BSIZE;
347	if (dirbuflen < va.va_blocksize)
348		dirbuflen = va.va_blocksize;
349	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
350
351	off = 0;
352	len = 0;
353	do {
354		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
355					&cpos, &len, &eofflag, td);
356		if (error)
357			goto out;
358
359		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
360		    strcmp(dp->d_name, dirname) == 0) {
361			found = 1;
362			goto out;
363		}
364	} while (len > 0 || !eofflag);
365
366out:
367	free(dirbuf, M_TEMP);
368	return (found);
369}
370
371int
372vop_stdaccess(struct vop_access_args *ap)
373{
374
375	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
376	    VAPPEND)) == 0, ("invalid bit in accmode"));
377
378	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
379}
380
381int
382vop_stdaccessx(struct vop_accessx_args *ap)
383{
384	int error;
385	accmode_t accmode = ap->a_accmode;
386
387	error = vfs_unixify_accmode(&accmode);
388	if (error != 0)
389		return (error);
390
391	if (accmode == 0)
392		return (0);
393
394	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
395}
396
397/*
398 * Advisory record locking support
399 */
400int
401vop_stdadvlock(struct vop_advlock_args *ap)
402{
403	struct vnode *vp;
404	struct vattr vattr;
405	int error;
406
407	vp = ap->a_vp;
408	if (ap->a_fl->l_whence == SEEK_END) {
409		/*
410		 * The NFSv4 server must avoid doing a vn_lock() here, since it
411		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
412		 * the NFSv4 server always uses SEEK_SET and this code is
413		 * only required for the SEEK_END case.
414		 */
415		vn_lock(vp, LK_SHARED | LK_RETRY);
416		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
417		VOP_UNLOCK(vp, 0);
418		if (error)
419			return (error);
420	} else
421		vattr.va_size = 0;
422
423	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
424}
425
426int
427vop_stdadvlockasync(struct vop_advlockasync_args *ap)
428{
429	struct vnode *vp;
430	struct vattr vattr;
431	int error;
432
433	vp = ap->a_vp;
434	if (ap->a_fl->l_whence == SEEK_END) {
435		/* The size argument is only needed for SEEK_END. */
436		vn_lock(vp, LK_SHARED | LK_RETRY);
437		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
438		VOP_UNLOCK(vp, 0);
439		if (error)
440			return (error);
441	} else
442		vattr.va_size = 0;
443
444	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
445}
446
447int
448vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
449{
450	struct vnode *vp;
451
452	vp = ap->a_vp;
453	lf_purgelocks(vp, &vp->v_lockf);
454	return (0);
455}
456
457/*
458 * vop_stdpathconf:
459 *
460 * Standard implementation of POSIX pathconf, to get information about limits
461 * for a filesystem.
462 * Override per filesystem for the case where the filesystem has smaller
463 * limits.
464 */
465int
466vop_stdpathconf(ap)
467	struct vop_pathconf_args /* {
468	struct vnode *a_vp;
469	int a_name;
470	int *a_retval;
471	} */ *ap;
472{
473
474	switch (ap->a_name) {
475		case _PC_NAME_MAX:
476			*ap->a_retval = NAME_MAX;
477			return (0);
478		case _PC_PATH_MAX:
479			*ap->a_retval = PATH_MAX;
480			return (0);
481		case _PC_LINK_MAX:
482			*ap->a_retval = LINK_MAX;
483			return (0);
484		case _PC_MAX_CANON:
485			*ap->a_retval = MAX_CANON;
486			return (0);
487		case _PC_MAX_INPUT:
488			*ap->a_retval = MAX_INPUT;
489			return (0);
490		case _PC_PIPE_BUF:
491			*ap->a_retval = PIPE_BUF;
492			return (0);
493		case _PC_CHOWN_RESTRICTED:
494			*ap->a_retval = 1;
495			return (0);
496		case _PC_VDISABLE:
497			*ap->a_retval = _POSIX_VDISABLE;
498			return (0);
499		default:
500			return (EINVAL);
501	}
502	/* NOTREACHED */
503}
504
505/*
506 * Standard lock, unlock and islocked functions.
507 */
508int
509vop_stdlock(ap)
510	struct vop_lock1_args /* {
511		struct vnode *a_vp;
512		int a_flags;
513		char *file;
514		int line;
515	} */ *ap;
516{
517	struct vnode *vp = ap->a_vp;
518
519	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
520	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
521	    ap->a_line));
522}
523
524/* See above. */
525int
526vop_stdunlock(ap)
527	struct vop_unlock_args /* {
528		struct vnode *a_vp;
529		int a_flags;
530	} */ *ap;
531{
532	struct vnode *vp = ap->a_vp;
533
534	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
535}
536
537/* See above. */
538int
539vop_stdislocked(ap)
540	struct vop_islocked_args /* {
541		struct vnode *a_vp;
542	} */ *ap;
543{
544
545	return (lockstatus(ap->a_vp->v_vnlock));
546}
547
548/*
549 * Return true for select/poll.
550 */
551int
552vop_nopoll(ap)
553	struct vop_poll_args /* {
554		struct vnode *a_vp;
555		int  a_events;
556		struct ucred *a_cred;
557		struct thread *a_td;
558	} */ *ap;
559{
560
561	return (poll_no_poll(ap->a_events));
562}
563
564/*
565 * Implement poll for local filesystems that support it.
566 */
567int
568vop_stdpoll(ap)
569	struct vop_poll_args /* {
570		struct vnode *a_vp;
571		int  a_events;
572		struct ucred *a_cred;
573		struct thread *a_td;
574	} */ *ap;
575{
576	if (ap->a_events & ~POLLSTANDARD)
577		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
578	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
579}
580
581/*
582 * Return our mount point, as we will take charge of the writes.
583 */
584int
585vop_stdgetwritemount(ap)
586	struct vop_getwritemount_args /* {
587		struct vnode *a_vp;
588		struct mount **a_mpp;
589	} */ *ap;
590{
591	struct mount *mp;
592
593	/*
594	 * XXX Since this is called unlocked we may be recycled while
595	 * attempting to ref the mount.  If this is the case or mountpoint
596	 * will be set to NULL.  We only have to prevent this call from
597	 * returning with a ref to an incorrect mountpoint.  It is not
598	 * harmful to return with a ref to our previous mountpoint.
599	 */
600	mp = ap->a_vp->v_mount;
601	if (mp != NULL) {
602		vfs_ref(mp);
603		if (mp != ap->a_vp->v_mount) {
604			vfs_rel(mp);
605			mp = NULL;
606		}
607	}
608	*(ap->a_mpp) = mp;
609	return (0);
610}
611
612/* XXX Needs good comment and VOP_BMAP(9) manpage */
613int
614vop_stdbmap(ap)
615	struct vop_bmap_args /* {
616		struct vnode *a_vp;
617		daddr_t  a_bn;
618		struct bufobj **a_bop;
619		daddr_t *a_bnp;
620		int *a_runp;
621		int *a_runb;
622	} */ *ap;
623{
624
625	if (ap->a_bop != NULL)
626		*ap->a_bop = &ap->a_vp->v_bufobj;
627	if (ap->a_bnp != NULL)
628		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
629	if (ap->a_runp != NULL)
630		*ap->a_runp = 0;
631	if (ap->a_runb != NULL)
632		*ap->a_runb = 0;
633	return (0);
634}
635
636int
637vop_stdfsync(ap)
638	struct vop_fsync_args /* {
639		struct vnode *a_vp;
640		struct ucred *a_cred;
641		int a_waitfor;
642		struct thread *a_td;
643	} */ *ap;
644{
645	struct vnode *vp = ap->a_vp;
646	struct buf *bp;
647	struct bufobj *bo;
648	struct buf *nbp;
649	int error = 0;
650	int maxretry = 1000;     /* large, arbitrarily chosen */
651
652	bo = &vp->v_bufobj;
653	BO_LOCK(bo);
654loop1:
655	/*
656	 * MARK/SCAN initialization to avoid infinite loops.
657	 */
658        TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
659                bp->b_vflags &= ~BV_SCANNED;
660		bp->b_error = 0;
661	}
662
663	/*
664	 * Flush all dirty buffers associated with a vnode.
665	 */
666loop2:
667	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
668		if ((bp->b_vflags & BV_SCANNED) != 0)
669			continue;
670		bp->b_vflags |= BV_SCANNED;
671		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
672			if (ap->a_waitfor != MNT_WAIT)
673				continue;
674			if (BUF_LOCK(bp,
675			    LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL,
676			    BO_LOCKPTR(bo)) != 0) {
677				BO_LOCK(bo);
678				goto loop1;
679			}
680			BO_LOCK(bo);
681		}
682		BO_UNLOCK(bo);
683		KASSERT(bp->b_bufobj == bo,
684		    ("bp %p wrong b_bufobj %p should be %p",
685		    bp, bp->b_bufobj, bo));
686		if ((bp->b_flags & B_DELWRI) == 0)
687			panic("fsync: not dirty");
688		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
689			vfs_bio_awrite(bp);
690		} else {
691			bremfree(bp);
692			bawrite(bp);
693		}
694		BO_LOCK(bo);
695		goto loop2;
696	}
697
698	/*
699	 * If synchronous the caller expects us to completely resolve all
700	 * dirty buffers in the system.  Wait for in-progress I/O to
701	 * complete (which could include background bitmap writes), then
702	 * retry if dirty blocks still exist.
703	 */
704	if (ap->a_waitfor == MNT_WAIT) {
705		bufobj_wwait(bo, 0, 0);
706		if (bo->bo_dirty.bv_cnt > 0) {
707			/*
708			 * If we are unable to write any of these buffers
709			 * then we fail now rather than trying endlessly
710			 * to write them out.
711			 */
712			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
713				if ((error = bp->b_error) == 0)
714					continue;
715			if (error == 0 && --maxretry >= 0)
716				goto loop1;
717			error = EAGAIN;
718		}
719	}
720	BO_UNLOCK(bo);
721	if (error == EAGAIN)
722		vprint("fsync: giving up on dirty", vp);
723
724	return (error);
725}
726
727/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
728int
729vop_stdgetpages(ap)
730	struct vop_getpages_args /* {
731		struct vnode *a_vp;
732		vm_page_t *a_m;
733		int a_count;
734		int a_reqpage;
735	} */ *ap;
736{
737
738	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
739	    ap->a_count, ap->a_reqpage, NULL, NULL);
740}
741
742static int
743vop_stdgetpages_async(struct vop_getpages_async_args *ap)
744{
745	int error;
746
747	error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage);
748	ap->a_iodone(ap->a_arg, ap->a_m, ap->a_reqpage, error);
749	return (error);
750}
751
752int
753vop_stdkqfilter(struct vop_kqfilter_args *ap)
754{
755	return vfs_kqfilter(ap);
756}
757
758/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
759int
760vop_stdputpages(ap)
761	struct vop_putpages_args /* {
762		struct vnode *a_vp;
763		vm_page_t *a_m;
764		int a_count;
765		int a_sync;
766		int *a_rtvals;
767	} */ *ap;
768{
769
770	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
771	     ap->a_sync, ap->a_rtvals);
772}
773
774int
775vop_stdvptofh(struct vop_vptofh_args *ap)
776{
777	return (EOPNOTSUPP);
778}
779
780int
781vop_stdvptocnp(struct vop_vptocnp_args *ap)
782{
783	struct vnode *vp = ap->a_vp;
784	struct vnode **dvp = ap->a_vpp;
785	struct ucred *cred = ap->a_cred;
786	char *buf = ap->a_buf;
787	int *buflen = ap->a_buflen;
788	char *dirbuf, *cpos;
789	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
790	off_t off;
791	ino_t fileno;
792	struct vattr va;
793	struct nameidata nd;
794	struct thread *td;
795	struct dirent *dp;
796	struct vnode *mvp;
797
798	i = *buflen;
799	error = 0;
800	covered = 0;
801	td = curthread;
802
803	if (vp->v_type != VDIR)
804		return (ENOENT);
805
806	error = VOP_GETATTR(vp, &va, cred);
807	if (error)
808		return (error);
809
810	VREF(vp);
811	locked = VOP_ISLOCKED(vp);
812	VOP_UNLOCK(vp, 0);
813	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
814	    "..", vp, td);
815	flags = FREAD;
816	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
817	if (error) {
818		vn_lock(vp, locked | LK_RETRY);
819		return (error);
820	}
821	NDFREE(&nd, NDF_ONLY_PNBUF);
822
823	mvp = *dvp = nd.ni_vp;
824
825	if (vp->v_mount != (*dvp)->v_mount &&
826	    ((*dvp)->v_vflag & VV_ROOT) &&
827	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
828		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
829		VREF(mvp);
830		VOP_UNLOCK(mvp, 0);
831		vn_close(mvp, FREAD, cred, td);
832		VREF(*dvp);
833		vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
834		covered = 1;
835	}
836
837	fileno = va.va_fileid;
838
839	dirbuflen = DEV_BSIZE;
840	if (dirbuflen < va.va_blocksize)
841		dirbuflen = va.va_blocksize;
842	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
843
844	if ((*dvp)->v_type != VDIR) {
845		error = ENOENT;
846		goto out;
847	}
848
849	off = 0;
850	len = 0;
851	do {
852		/* call VOP_READDIR of parent */
853		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
854					&cpos, &len, &eofflag, td);
855		if (error)
856			goto out;
857
858		if ((dp->d_type != DT_WHT) &&
859		    (dp->d_fileno == fileno)) {
860			if (covered) {
861				VOP_UNLOCK(*dvp, 0);
862				vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
863				if (dirent_exists(mvp, dp->d_name, td)) {
864					error = ENOENT;
865					VOP_UNLOCK(mvp, 0);
866					vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
867					goto out;
868				}
869				VOP_UNLOCK(mvp, 0);
870				vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
871			}
872			i -= dp->d_namlen;
873
874			if (i < 0) {
875				error = ENOMEM;
876				goto out;
877			}
878			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
879				error = ENOENT;
880			} else {
881				bcopy(dp->d_name, buf + i, dp->d_namlen);
882				error = 0;
883			}
884			goto out;
885		}
886	} while (len > 0 || !eofflag);
887	error = ENOENT;
888
889out:
890	free(dirbuf, M_TEMP);
891	if (!error) {
892		*buflen = i;
893		vref(*dvp);
894	}
895	if (covered) {
896		vput(*dvp);
897		vrele(mvp);
898	} else {
899		VOP_UNLOCK(mvp, 0);
900		vn_close(mvp, FREAD, cred, td);
901	}
902	vn_lock(vp, locked | LK_RETRY);
903	return (error);
904}
905
906int
907vop_stdallocate(struct vop_allocate_args *ap)
908{
909#ifdef __notyet__
910	struct statfs sfs;
911#endif
912	struct iovec aiov;
913	struct vattr vattr, *vap;
914	struct uio auio;
915	off_t fsize, len, cur, offset;
916	uint8_t *buf;
917	struct thread *td;
918	struct vnode *vp;
919	size_t iosize;
920	int error;
921
922	buf = NULL;
923	error = 0;
924	td = curthread;
925	vap = &vattr;
926	vp = ap->a_vp;
927	len = *ap->a_len;
928	offset = *ap->a_offset;
929
930	error = VOP_GETATTR(vp, vap, td->td_ucred);
931	if (error != 0)
932		goto out;
933	fsize = vap->va_size;
934	iosize = vap->va_blocksize;
935	if (iosize == 0)
936		iosize = BLKDEV_IOSIZE;
937	if (iosize > MAXPHYS)
938		iosize = MAXPHYS;
939	buf = malloc(iosize, M_TEMP, M_WAITOK);
940
941#ifdef __notyet__
942	/*
943	 * Check if the filesystem sets f_maxfilesize; if not use
944	 * VOP_SETATTR to perform the check.
945	 */
946	error = VFS_STATFS(vp->v_mount, &sfs, td);
947	if (error != 0)
948		goto out;
949	if (sfs.f_maxfilesize) {
950		if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
951		    offset + len > sfs.f_maxfilesize) {
952			error = EFBIG;
953			goto out;
954		}
955	} else
956#endif
957	if (offset + len > vap->va_size) {
958		/*
959		 * Test offset + len against the filesystem's maxfilesize.
960		 */
961		VATTR_NULL(vap);
962		vap->va_size = offset + len;
963		error = VOP_SETATTR(vp, vap, td->td_ucred);
964		if (error != 0)
965			goto out;
966		VATTR_NULL(vap);
967		vap->va_size = fsize;
968		error = VOP_SETATTR(vp, vap, td->td_ucred);
969		if (error != 0)
970			goto out;
971	}
972
973	for (;;) {
974		/*
975		 * Read and write back anything below the nominal file
976		 * size.  There's currently no way outside the filesystem
977		 * to know whether this area is sparse or not.
978		 */
979		cur = iosize;
980		if ((offset % iosize) != 0)
981			cur -= (offset % iosize);
982		if (cur > len)
983			cur = len;
984		if (offset < fsize) {
985			aiov.iov_base = buf;
986			aiov.iov_len = cur;
987			auio.uio_iov = &aiov;
988			auio.uio_iovcnt = 1;
989			auio.uio_offset = offset;
990			auio.uio_resid = cur;
991			auio.uio_segflg = UIO_SYSSPACE;
992			auio.uio_rw = UIO_READ;
993			auio.uio_td = td;
994			error = VOP_READ(vp, &auio, 0, td->td_ucred);
995			if (error != 0)
996				break;
997			if (auio.uio_resid > 0) {
998				bzero(buf + cur - auio.uio_resid,
999				    auio.uio_resid);
1000			}
1001		} else {
1002			bzero(buf, cur);
1003		}
1004
1005		aiov.iov_base = buf;
1006		aiov.iov_len = cur;
1007		auio.uio_iov = &aiov;
1008		auio.uio_iovcnt = 1;
1009		auio.uio_offset = offset;
1010		auio.uio_resid = cur;
1011		auio.uio_segflg = UIO_SYSSPACE;
1012		auio.uio_rw = UIO_WRITE;
1013		auio.uio_td = td;
1014
1015		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
1016		if (error != 0)
1017			break;
1018
1019		len -= cur;
1020		offset += cur;
1021		if (len == 0)
1022			break;
1023		if (should_yield())
1024			break;
1025	}
1026
1027 out:
1028	*ap->a_len = len;
1029	*ap->a_offset = offset;
1030	free(buf, M_TEMP);
1031	return (error);
1032}
1033
1034int
1035vop_stdadvise(struct vop_advise_args *ap)
1036{
1037	struct vnode *vp;
1038	off_t start, end;
1039	int error;
1040
1041	vp = ap->a_vp;
1042	switch (ap->a_advice) {
1043	case POSIX_FADV_WILLNEED:
1044		/*
1045		 * Do nothing for now.  Filesystems should provide a
1046		 * custom method which starts an asynchronous read of
1047		 * the requested region.
1048		 */
1049		error = 0;
1050		break;
1051	case POSIX_FADV_DONTNEED:
1052		/*
1053		 * Flush any open FS buffers and then remove pages
1054		 * from the backing VM object.  Using vinvalbuf() here
1055		 * is a bit heavy-handed as it flushes all buffers for
1056		 * the given vnode, not just the buffers covering the
1057		 * requested range.
1058		 */
1059		error = 0;
1060		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1061		if (vp->v_iflag & VI_DOOMED) {
1062			VOP_UNLOCK(vp, 0);
1063			break;
1064		}
1065		vinvalbuf(vp, V_CLEANONLY, 0, 0);
1066		if (vp->v_object != NULL) {
1067			start = trunc_page(ap->a_start);
1068			end = round_page(ap->a_end);
1069			VM_OBJECT_WLOCK(vp->v_object);
1070			vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
1071			    OFF_TO_IDX(end));
1072			VM_OBJECT_WUNLOCK(vp->v_object);
1073		}
1074		VOP_UNLOCK(vp, 0);
1075		break;
1076	default:
1077		error = EINVAL;
1078		break;
1079	}
1080	return (error);
1081}
1082
1083int
1084vop_stdunp_bind(struct vop_unp_bind_args *ap)
1085{
1086
1087	ap->a_vp->v_socket = ap->a_socket;
1088	return (0);
1089}
1090
1091int
1092vop_stdunp_connect(struct vop_unp_connect_args *ap)
1093{
1094
1095	*ap->a_socket = ap->a_vp->v_socket;
1096	return (0);
1097}
1098
1099int
1100vop_stdunp_detach(struct vop_unp_detach_args *ap)
1101{
1102
1103	ap->a_vp->v_socket = NULL;
1104	return (0);
1105}
1106
1107static int
1108vop_stdis_text(struct vop_is_text_args *ap)
1109{
1110
1111	return ((ap->a_vp->v_vflag & VV_TEXT) != 0);
1112}
1113
1114static int
1115vop_stdset_text(struct vop_set_text_args *ap)
1116{
1117
1118	ap->a_vp->v_vflag |= VV_TEXT;
1119	return (0);
1120}
1121
1122static int
1123vop_stdunset_text(struct vop_unset_text_args *ap)
1124{
1125
1126	ap->a_vp->v_vflag &= ~VV_TEXT;
1127	return (0);
1128}
1129
1130static int
1131vop_stdget_writecount(struct vop_get_writecount_args *ap)
1132{
1133
1134	*ap->a_writecount = ap->a_vp->v_writecount;
1135	return (0);
1136}
1137
1138static int
1139vop_stdadd_writecount(struct vop_add_writecount_args *ap)
1140{
1141
1142	ap->a_vp->v_writecount += ap->a_inc;
1143	return (0);
1144}
1145
1146/*
1147 * vfs default ops
1148 * used to fill the vfs function table to get reasonable default return values.
1149 */
1150int
1151vfs_stdroot (mp, flags, vpp)
1152	struct mount *mp;
1153	int flags;
1154	struct vnode **vpp;
1155{
1156
1157	return (EOPNOTSUPP);
1158}
1159
1160int
1161vfs_stdstatfs (mp, sbp)
1162	struct mount *mp;
1163	struct statfs *sbp;
1164{
1165
1166	return (EOPNOTSUPP);
1167}
1168
1169int
1170vfs_stdquotactl (mp, cmds, uid, arg)
1171	struct mount *mp;
1172	int cmds;
1173	uid_t uid;
1174	void *arg;
1175{
1176
1177	return (EOPNOTSUPP);
1178}
1179
1180int
1181vfs_stdsync(mp, waitfor)
1182	struct mount *mp;
1183	int waitfor;
1184{
1185	struct vnode *vp, *mvp;
1186	struct thread *td;
1187	int error, lockreq, allerror = 0;
1188
1189	td = curthread;
1190	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1191	if (waitfor != MNT_WAIT)
1192		lockreq |= LK_NOWAIT;
1193	/*
1194	 * Force stale buffer cache information to be flushed.
1195	 */
1196loop:
1197	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1198		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1199			VI_UNLOCK(vp);
1200			continue;
1201		}
1202		if ((error = vget(vp, lockreq, td)) != 0) {
1203			if (error == ENOENT) {
1204				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1205				goto loop;
1206			}
1207			continue;
1208		}
1209		error = VOP_FSYNC(vp, waitfor, td);
1210		if (error)
1211			allerror = error;
1212		vput(vp);
1213	}
1214	return (allerror);
1215}
1216
1217int
1218vfs_stdnosync (mp, waitfor)
1219	struct mount *mp;
1220	int waitfor;
1221{
1222
1223	return (0);
1224}
1225
1226int
1227vfs_stdvget (mp, ino, flags, vpp)
1228	struct mount *mp;
1229	ino_t ino;
1230	int flags;
1231	struct vnode **vpp;
1232{
1233
1234	return (EOPNOTSUPP);
1235}
1236
1237int
1238vfs_stdfhtovp (mp, fhp, flags, vpp)
1239	struct mount *mp;
1240	struct fid *fhp;
1241	int flags;
1242	struct vnode **vpp;
1243{
1244
1245	return (EOPNOTSUPP);
1246}
1247
1248int
1249vfs_stdinit (vfsp)
1250	struct vfsconf *vfsp;
1251{
1252
1253	return (0);
1254}
1255
1256int
1257vfs_stduninit (vfsp)
1258	struct vfsconf *vfsp;
1259{
1260
1261	return(0);
1262}
1263
1264int
1265vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1266	struct mount *mp;
1267	int cmd;
1268	struct vnode *filename_vp;
1269	int attrnamespace;
1270	const char *attrname;
1271{
1272
1273	if (filename_vp != NULL)
1274		VOP_UNLOCK(filename_vp, 0);
1275	return (EOPNOTSUPP);
1276}
1277
1278int
1279vfs_stdsysctl(mp, op, req)
1280	struct mount *mp;
1281	fsctlop_t op;
1282	struct sysctl_req *req;
1283{
1284
1285	return (EOPNOTSUPP);
1286}
1287
1288/* end of vfs default ops */
1289