vfs_default.c revision 220791
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed
6 * to Berkeley by John Heidemann of the UCLA Ficus project.
7 *
8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 220791 2011-04-18 16:32:22Z mdf $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bio.h>
41#include <sys/buf.h>
42#include <sys/conf.h>
43#include <sys/event.h>
44#include <sys/kernel.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/lockf.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/namei.h>
52#include <sys/fcntl.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/dirent.h>
56#include <sys/poll.h>
57
58#include <security/mac/mac_framework.h>
59
60#include <vm/vm.h>
61#include <vm/vm_object.h>
62#include <vm/vm_extern.h>
63#include <vm/pmap.h>
64#include <vm/vm_map.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vnode_pager.h>
68
69static int	vop_nolookup(struct vop_lookup_args *);
70static int	vop_norename(struct vop_rename_args *);
71static int	vop_nostrategy(struct vop_strategy_args *);
72static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
73				char *dirbuf, int dirbuflen, off_t *off,
74				char **cpos, int *len, int *eofflag,
75				struct thread *td);
76static int	dirent_exists(struct vnode *vp, const char *dirname,
77			      struct thread *td);
78
79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
80
81/*
82 * This vnode table stores what we want to do if the filesystem doesn't
83 * implement a particular VOP.
84 *
85 * If there is no specific entry here, we will return EOPNOTSUPP.
86 *
87 * Note that every filesystem has to implement either vop_access
88 * or vop_accessx; failing to do so will result in immediate crash
89 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
90 * which calls vop_stdaccess() etc.
91 */
92
93struct vop_vector default_vnodeops = {
94	.vop_default =		NULL,
95	.vop_bypass =		VOP_EOPNOTSUPP,
96
97	.vop_access =		vop_stdaccess,
98	.vop_accessx =		vop_stdaccessx,
99	.vop_advlock =		vop_stdadvlock,
100	.vop_advlockasync =	vop_stdadvlockasync,
101	.vop_advlockpurge =	vop_stdadvlockpurge,
102	.vop_allocate =		vop_stdallocate,
103	.vop_bmap =		vop_stdbmap,
104	.vop_close =		VOP_NULL,
105	.vop_fsync =		VOP_NULL,
106	.vop_getpages =		vop_stdgetpages,
107	.vop_getwritemount = 	vop_stdgetwritemount,
108	.vop_inactive =		VOP_NULL,
109	.vop_ioctl =		VOP_ENOTTY,
110	.vop_kqfilter =		vop_stdkqfilter,
111	.vop_islocked =		vop_stdislocked,
112	.vop_lock1 =		vop_stdlock,
113	.vop_lookup =		vop_nolookup,
114	.vop_open =		VOP_NULL,
115	.vop_pathconf =		VOP_EINVAL,
116	.vop_poll =		vop_nopoll,
117	.vop_putpages =		vop_stdputpages,
118	.vop_readlink =		VOP_EINVAL,
119	.vop_rename =		vop_norename,
120	.vop_revoke =		VOP_PANIC,
121	.vop_strategy =		vop_nostrategy,
122	.vop_unlock =		vop_stdunlock,
123	.vop_vptocnp =		vop_stdvptocnp,
124	.vop_vptofh =		vop_stdvptofh,
125};
126
127/*
128 * Series of placeholder functions for various error returns for
129 * VOPs.
130 */
131
132int
133vop_eopnotsupp(struct vop_generic_args *ap)
134{
135	/*
136	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
137	*/
138
139	return (EOPNOTSUPP);
140}
141
142int
143vop_ebadf(struct vop_generic_args *ap)
144{
145
146	return (EBADF);
147}
148
149int
150vop_enotty(struct vop_generic_args *ap)
151{
152
153	return (ENOTTY);
154}
155
156int
157vop_einval(struct vop_generic_args *ap)
158{
159
160	return (EINVAL);
161}
162
163int
164vop_enoent(struct vop_generic_args *ap)
165{
166
167	return (ENOENT);
168}
169
170int
171vop_null(struct vop_generic_args *ap)
172{
173
174	return (0);
175}
176
177/*
178 * Helper function to panic on some bad VOPs in some filesystems.
179 */
180int
181vop_panic(struct vop_generic_args *ap)
182{
183
184	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
185}
186
187/*
188 * vop_std<something> and vop_no<something> are default functions for use by
189 * filesystems that need the "default reasonable" implementation for a
190 * particular operation.
191 *
192 * The documentation for the operations they implement exists (if it exists)
193 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
194 */
195
196/*
197 * Default vop for filesystems that do not support name lookup
198 */
199static int
200vop_nolookup(ap)
201	struct vop_lookup_args /* {
202		struct vnode *a_dvp;
203		struct vnode **a_vpp;
204		struct componentname *a_cnp;
205	} */ *ap;
206{
207
208	*ap->a_vpp = NULL;
209	return (ENOTDIR);
210}
211
212/*
213 * vop_norename:
214 *
215 * Handle unlock and reference counting for arguments of vop_rename
216 * for filesystems that do not implement rename operation.
217 */
218static int
219vop_norename(struct vop_rename_args *ap)
220{
221
222	vop_rename_fail(ap);
223	return (EOPNOTSUPP);
224}
225
226/*
227 *	vop_nostrategy:
228 *
229 *	Strategy routine for VFS devices that have none.
230 *
231 *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
232 *	routine.  Typically this is done for a BIO_READ strategy call.
233 *	Typically B_INVAL is assumed to already be clear prior to a write
234 *	and should not be cleared manually unless you just made the buffer
235 *	invalid.  BIO_ERROR should be cleared either way.
236 */
237
238static int
239vop_nostrategy (struct vop_strategy_args *ap)
240{
241	printf("No strategy for buffer at %p\n", ap->a_bp);
242	vprint("vnode", ap->a_vp);
243	ap->a_bp->b_ioflags |= BIO_ERROR;
244	ap->a_bp->b_error = EOPNOTSUPP;
245	bufdone(ap->a_bp);
246	return (EOPNOTSUPP);
247}
248
249static int
250get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
251		int dirbuflen, off_t *off, char **cpos, int *len,
252		int *eofflag, struct thread *td)
253{
254	int error, reclen;
255	struct uio uio;
256	struct iovec iov;
257	struct dirent *dp;
258
259	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
260	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
261
262	if (*len == 0) {
263		iov.iov_base = dirbuf;
264		iov.iov_len = dirbuflen;
265
266		uio.uio_iov = &iov;
267		uio.uio_iovcnt = 1;
268		uio.uio_offset = *off;
269		uio.uio_resid = dirbuflen;
270		uio.uio_segflg = UIO_SYSSPACE;
271		uio.uio_rw = UIO_READ;
272		uio.uio_td = td;
273
274		*eofflag = 0;
275
276#ifdef MAC
277		error = mac_vnode_check_readdir(td->td_ucred, vp);
278		if (error == 0)
279#endif
280			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
281		    		NULL, NULL);
282		if (error)
283			return (error);
284
285		*off = uio.uio_offset;
286
287		*cpos = dirbuf;
288		*len = (dirbuflen - uio.uio_resid);
289
290		if (*len == 0)
291			return (ENOENT);
292	}
293
294	dp = (struct dirent *)(*cpos);
295	reclen = dp->d_reclen;
296	*dpp = dp;
297
298	/* check for malformed directory.. */
299	if (reclen < DIRENT_MINSIZE)
300		return (EINVAL);
301
302	*cpos += reclen;
303	*len -= reclen;
304
305	return (0);
306}
307
308/*
309 * Check if a named file exists in a given directory vnode.
310 */
311static int
312dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
313{
314	char *dirbuf, *cpos;
315	int error, eofflag, dirbuflen, len, found;
316	off_t off;
317	struct dirent *dp;
318	struct vattr va;
319
320	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
321	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
322
323	found = 0;
324
325	error = VOP_GETATTR(vp, &va, td->td_ucred);
326	if (error)
327		return (found);
328
329	dirbuflen = DEV_BSIZE;
330	if (dirbuflen < va.va_blocksize)
331		dirbuflen = va.va_blocksize;
332	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
333
334	off = 0;
335	len = 0;
336	do {
337		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
338					&cpos, &len, &eofflag, td);
339		if (error)
340			goto out;
341
342		if ((dp->d_type != DT_WHT) &&
343		    !strcmp(dp->d_name, dirname)) {
344			found = 1;
345			goto out;
346		}
347	} while (len > 0 || !eofflag);
348
349out:
350	free(dirbuf, M_TEMP);
351	return (found);
352}
353
354int
355vop_stdaccess(struct vop_access_args *ap)
356{
357
358	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
359	    VAPPEND)) == 0, ("invalid bit in accmode"));
360
361	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
362}
363
364int
365vop_stdaccessx(struct vop_accessx_args *ap)
366{
367	int error;
368	accmode_t accmode = ap->a_accmode;
369
370	error = vfs_unixify_accmode(&accmode);
371	if (error != 0)
372		return (error);
373
374	if (accmode == 0)
375		return (0);
376
377	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
378}
379
380/*
381 * Advisory record locking support
382 */
383int
384vop_stdadvlock(struct vop_advlock_args *ap)
385{
386	struct vnode *vp;
387	struct ucred *cred;
388	struct vattr vattr;
389	int error;
390
391	vp = ap->a_vp;
392	cred = curthread->td_ucred;
393	vn_lock(vp, LK_SHARED | LK_RETRY);
394	error = VOP_GETATTR(vp, &vattr, cred);
395	VOP_UNLOCK(vp, 0);
396	if (error)
397		return (error);
398
399	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
400}
401
402int
403vop_stdadvlockasync(struct vop_advlockasync_args *ap)
404{
405	struct vnode *vp;
406	struct ucred *cred;
407	struct vattr vattr;
408	int error;
409
410	vp = ap->a_vp;
411	cred = curthread->td_ucred;
412	vn_lock(vp, LK_SHARED | LK_RETRY);
413	error = VOP_GETATTR(vp, &vattr, cred);
414	VOP_UNLOCK(vp, 0);
415	if (error)
416		return (error);
417
418	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
419}
420
421int
422vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
423{
424	struct vnode *vp;
425
426	vp = ap->a_vp;
427	lf_purgelocks(vp, &vp->v_lockf);
428	return (0);
429}
430
431/*
432 * vop_stdpathconf:
433 *
434 * Standard implementation of POSIX pathconf, to get information about limits
435 * for a filesystem.
436 * Override per filesystem for the case where the filesystem has smaller
437 * limits.
438 */
439int
440vop_stdpathconf(ap)
441	struct vop_pathconf_args /* {
442	struct vnode *a_vp;
443	int a_name;
444	int *a_retval;
445	} */ *ap;
446{
447
448	switch (ap->a_name) {
449		case _PC_NAME_MAX:
450			*ap->a_retval = NAME_MAX;
451			return (0);
452		case _PC_PATH_MAX:
453			*ap->a_retval = PATH_MAX;
454			return (0);
455		case _PC_LINK_MAX:
456			*ap->a_retval = LINK_MAX;
457			return (0);
458		case _PC_MAX_CANON:
459			*ap->a_retval = MAX_CANON;
460			return (0);
461		case _PC_MAX_INPUT:
462			*ap->a_retval = MAX_INPUT;
463			return (0);
464		case _PC_PIPE_BUF:
465			*ap->a_retval = PIPE_BUF;
466			return (0);
467		case _PC_CHOWN_RESTRICTED:
468			*ap->a_retval = 1;
469			return (0);
470		case _PC_VDISABLE:
471			*ap->a_retval = _POSIX_VDISABLE;
472			return (0);
473		default:
474			return (EINVAL);
475	}
476	/* NOTREACHED */
477}
478
479/*
480 * Standard lock, unlock and islocked functions.
481 */
482int
483vop_stdlock(ap)
484	struct vop_lock1_args /* {
485		struct vnode *a_vp;
486		int a_flags;
487		char *file;
488		int line;
489	} */ *ap;
490{
491	struct vnode *vp = ap->a_vp;
492
493	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
494	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
495	    ap->a_line));
496}
497
498/* See above. */
499int
500vop_stdunlock(ap)
501	struct vop_unlock_args /* {
502		struct vnode *a_vp;
503		int a_flags;
504	} */ *ap;
505{
506	struct vnode *vp = ap->a_vp;
507
508	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
509}
510
511/* See above. */
512int
513vop_stdislocked(ap)
514	struct vop_islocked_args /* {
515		struct vnode *a_vp;
516	} */ *ap;
517{
518
519	return (lockstatus(ap->a_vp->v_vnlock));
520}
521
522/*
523 * Return true for select/poll.
524 */
525int
526vop_nopoll(ap)
527	struct vop_poll_args /* {
528		struct vnode *a_vp;
529		int  a_events;
530		struct ucred *a_cred;
531		struct thread *a_td;
532	} */ *ap;
533{
534
535	return (poll_no_poll(ap->a_events));
536}
537
538/*
539 * Implement poll for local filesystems that support it.
540 */
541int
542vop_stdpoll(ap)
543	struct vop_poll_args /* {
544		struct vnode *a_vp;
545		int  a_events;
546		struct ucred *a_cred;
547		struct thread *a_td;
548	} */ *ap;
549{
550	if (ap->a_events & ~POLLSTANDARD)
551		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
552	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
553}
554
555/*
556 * Return our mount point, as we will take charge of the writes.
557 */
558int
559vop_stdgetwritemount(ap)
560	struct vop_getwritemount_args /* {
561		struct vnode *a_vp;
562		struct mount **a_mpp;
563	} */ *ap;
564{
565	struct mount *mp;
566
567	/*
568	 * XXX Since this is called unlocked we may be recycled while
569	 * attempting to ref the mount.  If this is the case or mountpoint
570	 * will be set to NULL.  We only have to prevent this call from
571	 * returning with a ref to an incorrect mountpoint.  It is not
572	 * harmful to return with a ref to our previous mountpoint.
573	 */
574	mp = ap->a_vp->v_mount;
575	if (mp != NULL) {
576		vfs_ref(mp);
577		if (mp != ap->a_vp->v_mount) {
578			vfs_rel(mp);
579			mp = NULL;
580		}
581	}
582	*(ap->a_mpp) = mp;
583	return (0);
584}
585
586/* XXX Needs good comment and VOP_BMAP(9) manpage */
587int
588vop_stdbmap(ap)
589	struct vop_bmap_args /* {
590		struct vnode *a_vp;
591		daddr_t  a_bn;
592		struct bufobj **a_bop;
593		daddr_t *a_bnp;
594		int *a_runp;
595		int *a_runb;
596	} */ *ap;
597{
598
599	if (ap->a_bop != NULL)
600		*ap->a_bop = &ap->a_vp->v_bufobj;
601	if (ap->a_bnp != NULL)
602		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
603	if (ap->a_runp != NULL)
604		*ap->a_runp = 0;
605	if (ap->a_runb != NULL)
606		*ap->a_runb = 0;
607	return (0);
608}
609
610int
611vop_stdfsync(ap)
612	struct vop_fsync_args /* {
613		struct vnode *a_vp;
614		struct ucred *a_cred;
615		int a_waitfor;
616		struct thread *a_td;
617	} */ *ap;
618{
619	struct vnode *vp = ap->a_vp;
620	struct buf *bp;
621	struct bufobj *bo;
622	struct buf *nbp;
623	int error = 0;
624	int maxretry = 1000;     /* large, arbitrarily chosen */
625
626	bo = &vp->v_bufobj;
627	BO_LOCK(bo);
628loop1:
629	/*
630	 * MARK/SCAN initialization to avoid infinite loops.
631	 */
632        TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
633                bp->b_vflags &= ~BV_SCANNED;
634		bp->b_error = 0;
635	}
636
637	/*
638	 * Flush all dirty buffers associated with a vnode.
639	 */
640loop2:
641	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
642		if ((bp->b_vflags & BV_SCANNED) != 0)
643			continue;
644		bp->b_vflags |= BV_SCANNED;
645		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
646			continue;
647		BO_UNLOCK(bo);
648		KASSERT(bp->b_bufobj == bo,
649		    ("bp %p wrong b_bufobj %p should be %p",
650		    bp, bp->b_bufobj, bo));
651		if ((bp->b_flags & B_DELWRI) == 0)
652			panic("fsync: not dirty");
653		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
654			vfs_bio_awrite(bp);
655		} else {
656			bremfree(bp);
657			bawrite(bp);
658		}
659		BO_LOCK(bo);
660		goto loop2;
661	}
662
663	/*
664	 * If synchronous the caller expects us to completely resolve all
665	 * dirty buffers in the system.  Wait for in-progress I/O to
666	 * complete (which could include background bitmap writes), then
667	 * retry if dirty blocks still exist.
668	 */
669	if (ap->a_waitfor == MNT_WAIT) {
670		bufobj_wwait(bo, 0, 0);
671		if (bo->bo_dirty.bv_cnt > 0) {
672			/*
673			 * If we are unable to write any of these buffers
674			 * then we fail now rather than trying endlessly
675			 * to write them out.
676			 */
677			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
678				if ((error = bp->b_error) == 0)
679					continue;
680			if (error == 0 && --maxretry >= 0)
681				goto loop1;
682			error = EAGAIN;
683		}
684	}
685	BO_UNLOCK(bo);
686	if (error == EAGAIN)
687		vprint("fsync: giving up on dirty", vp);
688
689	return (error);
690}
691
692/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
693int
694vop_stdgetpages(ap)
695	struct vop_getpages_args /* {
696		struct vnode *a_vp;
697		vm_page_t *a_m;
698		int a_count;
699		int a_reqpage;
700		vm_ooffset_t a_offset;
701	} */ *ap;
702{
703
704	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
705	    ap->a_count, ap->a_reqpage);
706}
707
708int
709vop_stdkqfilter(struct vop_kqfilter_args *ap)
710{
711	return vfs_kqfilter(ap);
712}
713
714/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
715int
716vop_stdputpages(ap)
717	struct vop_putpages_args /* {
718		struct vnode *a_vp;
719		vm_page_t *a_m;
720		int a_count;
721		int a_sync;
722		int *a_rtvals;
723		vm_ooffset_t a_offset;
724	} */ *ap;
725{
726
727	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
728	     ap->a_sync, ap->a_rtvals);
729}
730
731int
732vop_stdvptofh(struct vop_vptofh_args *ap)
733{
734	return (EOPNOTSUPP);
735}
736
737int
738vop_stdvptocnp(struct vop_vptocnp_args *ap)
739{
740	struct vnode *vp = ap->a_vp;
741	struct vnode **dvp = ap->a_vpp;
742	struct ucred *cred = ap->a_cred;
743	char *buf = ap->a_buf;
744	int *buflen = ap->a_buflen;
745	char *dirbuf, *cpos;
746	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
747	off_t off;
748	ino_t fileno;
749	struct vattr va;
750	struct nameidata nd;
751	struct thread *td;
752	struct dirent *dp;
753	struct vnode *mvp;
754
755	i = *buflen;
756	error = 0;
757	covered = 0;
758	td = curthread;
759
760	if (vp->v_type != VDIR)
761		return (ENOENT);
762
763	error = VOP_GETATTR(vp, &va, cred);
764	if (error)
765		return (error);
766
767	VREF(vp);
768	locked = VOP_ISLOCKED(vp);
769	VOP_UNLOCK(vp, 0);
770	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
771	    "..", vp, td);
772	flags = FREAD;
773	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
774	if (error) {
775		vn_lock(vp, locked | LK_RETRY);
776		return (error);
777	}
778	NDFREE(&nd, NDF_ONLY_PNBUF);
779
780	mvp = *dvp = nd.ni_vp;
781
782	if (vp->v_mount != (*dvp)->v_mount &&
783	    ((*dvp)->v_vflag & VV_ROOT) &&
784	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
785		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
786		VREF(mvp);
787		VOP_UNLOCK(mvp, 0);
788		vn_close(mvp, FREAD, cred, td);
789		VREF(*dvp);
790		vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
791		covered = 1;
792	}
793
794	fileno = va.va_fileid;
795
796	dirbuflen = DEV_BSIZE;
797	if (dirbuflen < va.va_blocksize)
798		dirbuflen = va.va_blocksize;
799	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
800
801	if ((*dvp)->v_type != VDIR) {
802		error = ENOENT;
803		goto out;
804	}
805
806	off = 0;
807	len = 0;
808	do {
809		/* call VOP_READDIR of parent */
810		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
811					&cpos, &len, &eofflag, td);
812		if (error)
813			goto out;
814
815		if ((dp->d_type != DT_WHT) &&
816		    (dp->d_fileno == fileno)) {
817			if (covered) {
818				VOP_UNLOCK(*dvp, 0);
819				vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
820				if (dirent_exists(mvp, dp->d_name, td)) {
821					error = ENOENT;
822					VOP_UNLOCK(mvp, 0);
823					vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
824					goto out;
825				}
826				VOP_UNLOCK(mvp, 0);
827				vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
828			}
829			i -= dp->d_namlen;
830
831			if (i < 0) {
832				error = ENOMEM;
833				goto out;
834			}
835			bcopy(dp->d_name, buf + i, dp->d_namlen);
836			error = 0;
837			goto out;
838		}
839	} while (len > 0 || !eofflag);
840	error = ENOENT;
841
842out:
843	free(dirbuf, M_TEMP);
844	if (!error) {
845		*buflen = i;
846		vhold(*dvp);
847	}
848	if (covered) {
849		vput(*dvp);
850		vrele(mvp);
851	} else {
852		VOP_UNLOCK(mvp, 0);
853		vn_close(mvp, FREAD, cred, td);
854	}
855	vn_lock(vp, locked | LK_RETRY);
856	return (error);
857}
858
859int
860vop_stdallocate(struct vop_allocate_args *ap)
861{
862#ifdef __notyet__
863	struct statfs sfs;
864#endif
865	struct iovec aiov;
866	struct vattr vattr, *vap;
867	struct uio auio;
868	off_t len, cur, offset;
869	uint8_t *buf;
870	struct thread *td;
871	struct vnode *vp;
872	size_t iosize;
873	int error, locked;
874
875	buf = NULL;
876	error = 0;
877	locked = 1;
878	td = curthread;
879	vap = &vattr;
880	vp = ap->a_vp;
881	len = ap->a_len;
882	offset = ap->a_offset;
883
884	error = VOP_GETATTR(vp, vap, td->td_ucred);
885	if (error != 0)
886		goto out;
887	iosize = vap->va_blocksize;
888	if (iosize == 0)
889		iosize = BLKDEV_IOSIZE;
890	if (iosize > MAXPHYS)
891		iosize = MAXPHYS;
892	buf = malloc(iosize, M_TEMP, M_WAITOK);
893
894#ifdef __notyet__
895	/*
896	 * Check if the filesystem sets f_maxfilesize; if not use
897	 * VOP_SETATTR to perform the check.
898	 */
899	error = VFS_STATFS(vp->v_mount, &sfs, td);
900	if (error != 0)
901		goto out;
902	if (sfs.f_maxfilesize) {
903		if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
904		    offset + len > sfs.f_maxfilesize) {
905			error = EFBIG;
906			goto out;
907		}
908	} else
909#endif
910	if (offset + len > vap->va_size) {
911		VATTR_NULL(vap);
912		vap->va_size = offset + len;
913		error = VOP_SETATTR(vp, vap, td->td_ucred);
914		if (error != 0)
915			goto out;
916	}
917
918	while (len > 0) {
919		if (should_yield()) {
920			VOP_UNLOCK(vp, 0);
921			locked = 0;
922			kern_yield(-1);
923			error = vn_lock(vp, LK_EXCLUSIVE);
924			if (error != 0)
925				break;
926			locked = 1;
927			error = VOP_GETATTR(vp, vap, td->td_ucred);
928			if (error != 0)
929				break;
930		}
931
932		/*
933		 * Read and write back anything below the nominal file
934		 * size.  There's currently no way outside the filesystem
935		 * to know whether this area is sparse or not.
936		 */
937		cur = iosize;
938		if ((offset % iosize) != 0)
939			cur -= (offset % iosize);
940		if (cur > len)
941			cur = len;
942		if (offset < vap->va_size) {
943			aiov.iov_base = buf;
944			aiov.iov_len = cur;
945			auio.uio_iov = &aiov;
946			auio.uio_iovcnt = 1;
947			auio.uio_offset = offset;
948			auio.uio_resid = cur;
949			auio.uio_segflg = UIO_SYSSPACE;
950			auio.uio_rw = UIO_READ;
951			auio.uio_td = td;
952			error = VOP_READ(vp, &auio, 0, td->td_ucred);
953			if (error != 0)
954				break;
955			if (auio.uio_resid > 0) {
956				bzero(buf + cur - auio.uio_resid,
957				    auio.uio_resid);
958			}
959		} else {
960			bzero(buf, cur);
961		}
962
963		aiov.iov_base = buf;
964		aiov.iov_len = cur;
965		auio.uio_iov = &aiov;
966		auio.uio_iovcnt = 1;
967		auio.uio_offset = offset;
968		auio.uio_resid = cur;
969		auio.uio_segflg = UIO_SYSSPACE;
970		auio.uio_rw = UIO_WRITE;
971		auio.uio_td = td;
972
973		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
974		if (error != 0)
975			break;
976
977		len -= cur;
978		offset += cur;
979	}
980
981 out:
982	KASSERT(locked || error != 0, ("How'd I get unlocked with no error?"));
983	if (locked && error != 0)
984		VOP_UNLOCK(vp, 0);
985	free(buf, M_TEMP);
986	return (error);
987}
988
989/*
990 * vfs default ops
991 * used to fill the vfs function table to get reasonable default return values.
992 */
993int
994vfs_stdroot (mp, flags, vpp)
995	struct mount *mp;
996	int flags;
997	struct vnode **vpp;
998{
999
1000	return (EOPNOTSUPP);
1001}
1002
1003int
1004vfs_stdstatfs (mp, sbp)
1005	struct mount *mp;
1006	struct statfs *sbp;
1007{
1008
1009	return (EOPNOTSUPP);
1010}
1011
1012int
1013vfs_stdquotactl (mp, cmds, uid, arg)
1014	struct mount *mp;
1015	int cmds;
1016	uid_t uid;
1017	void *arg;
1018{
1019
1020	return (EOPNOTSUPP);
1021}
1022
1023int
1024vfs_stdsync(mp, waitfor)
1025	struct mount *mp;
1026	int waitfor;
1027{
1028	struct vnode *vp, *mvp;
1029	struct thread *td;
1030	int error, lockreq, allerror = 0;
1031
1032	td = curthread;
1033	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1034	if (waitfor != MNT_WAIT)
1035		lockreq |= LK_NOWAIT;
1036	/*
1037	 * Force stale buffer cache information to be flushed.
1038	 */
1039	MNT_ILOCK(mp);
1040loop:
1041	MNT_VNODE_FOREACH(vp, mp, mvp) {
1042		/* bv_cnt is an acceptable race here. */
1043		if (vp->v_bufobj.bo_dirty.bv_cnt == 0)
1044			continue;
1045		VI_LOCK(vp);
1046		MNT_IUNLOCK(mp);
1047		if ((error = vget(vp, lockreq, td)) != 0) {
1048			MNT_ILOCK(mp);
1049			if (error == ENOENT) {
1050				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1051				goto loop;
1052			}
1053			continue;
1054		}
1055		error = VOP_FSYNC(vp, waitfor, td);
1056		if (error)
1057			allerror = error;
1058		vput(vp);
1059		MNT_ILOCK(mp);
1060	}
1061	MNT_IUNLOCK(mp);
1062	return (allerror);
1063}
1064
1065int
1066vfs_stdnosync (mp, waitfor)
1067	struct mount *mp;
1068	int waitfor;
1069{
1070
1071	return (0);
1072}
1073
1074int
1075vfs_stdvget (mp, ino, flags, vpp)
1076	struct mount *mp;
1077	ino_t ino;
1078	int flags;
1079	struct vnode **vpp;
1080{
1081
1082	return (EOPNOTSUPP);
1083}
1084
1085int
1086vfs_stdfhtovp (mp, fhp, vpp)
1087	struct mount *mp;
1088	struct fid *fhp;
1089	struct vnode **vpp;
1090{
1091
1092	return (EOPNOTSUPP);
1093}
1094
1095int
1096vfs_stdinit (vfsp)
1097	struct vfsconf *vfsp;
1098{
1099
1100	return (0);
1101}
1102
1103int
1104vfs_stduninit (vfsp)
1105	struct vfsconf *vfsp;
1106{
1107
1108	return(0);
1109}
1110
1111int
1112vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1113	struct mount *mp;
1114	int cmd;
1115	struct vnode *filename_vp;
1116	int attrnamespace;
1117	const char *attrname;
1118{
1119
1120	if (filename_vp != NULL)
1121		VOP_UNLOCK(filename_vp, 0);
1122	return (EOPNOTSUPP);
1123}
1124
1125int
1126vfs_stdsysctl(mp, op, req)
1127	struct mount *mp;
1128	fsctlop_t op;
1129	struct sysctl_req *req;
1130{
1131
1132	return (EOPNOTSUPP);
1133}
1134
1135/* end of vfs default ops */
1136