vfs_default.c revision 227070
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed
6 * to Berkeley by John Heidemann of the UCLA Ficus project.
7 *
8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 227070 2011-11-04 04:02:50Z jhb $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bio.h>
41#include <sys/buf.h>
42#include <sys/conf.h>
43#include <sys/event.h>
44#include <sys/kernel.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/lockf.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/namei.h>
52#include <sys/fcntl.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/dirent.h>
56#include <sys/poll.h>
57
58#include <security/mac/mac_framework.h>
59
60#include <vm/vm.h>
61#include <vm/vm_object.h>
62#include <vm/vm_extern.h>
63#include <vm/pmap.h>
64#include <vm/vm_map.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vnode_pager.h>
68
69static int	vop_nolookup(struct vop_lookup_args *);
70static int	vop_norename(struct vop_rename_args *);
71static int	vop_nostrategy(struct vop_strategy_args *);
72static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
73				char *dirbuf, int dirbuflen, off_t *off,
74				char **cpos, int *len, int *eofflag,
75				struct thread *td);
76static int	dirent_exists(struct vnode *vp, const char *dirname,
77			      struct thread *td);
78
79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
80
81/*
82 * This vnode table stores what we want to do if the filesystem doesn't
83 * implement a particular VOP.
84 *
85 * If there is no specific entry here, we will return EOPNOTSUPP.
86 *
87 * Note that every filesystem has to implement either vop_access
88 * or vop_accessx; failing to do so will result in immediate crash
89 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
90 * which calls vop_stdaccess() etc.
91 */
92
93struct vop_vector default_vnodeops = {
94	.vop_default =		NULL,
95	.vop_bypass =		VOP_EOPNOTSUPP,
96
97	.vop_access =		vop_stdaccess,
98	.vop_accessx =		vop_stdaccessx,
99	.vop_advise =		vop_stdadvise,
100	.vop_advlock =		vop_stdadvlock,
101	.vop_advlockasync =	vop_stdadvlockasync,
102	.vop_advlockpurge =	vop_stdadvlockpurge,
103	.vop_allocate =		vop_stdallocate,
104	.vop_bmap =		vop_stdbmap,
105	.vop_close =		VOP_NULL,
106	.vop_fsync =		VOP_NULL,
107	.vop_getpages =		vop_stdgetpages,
108	.vop_getwritemount = 	vop_stdgetwritemount,
109	.vop_inactive =		VOP_NULL,
110	.vop_ioctl =		VOP_ENOTTY,
111	.vop_kqfilter =		vop_stdkqfilter,
112	.vop_islocked =		vop_stdislocked,
113	.vop_lock1 =		vop_stdlock,
114	.vop_lookup =		vop_nolookup,
115	.vop_open =		VOP_NULL,
116	.vop_pathconf =		VOP_EINVAL,
117	.vop_poll =		vop_nopoll,
118	.vop_putpages =		vop_stdputpages,
119	.vop_readlink =		VOP_EINVAL,
120	.vop_rename =		vop_norename,
121	.vop_revoke =		VOP_PANIC,
122	.vop_strategy =		vop_nostrategy,
123	.vop_unlock =		vop_stdunlock,
124	.vop_vptocnp =		vop_stdvptocnp,
125	.vop_vptofh =		vop_stdvptofh,
126};
127
128/*
129 * Series of placeholder functions for various error returns for
130 * VOPs.
131 */
132
133int
134vop_eopnotsupp(struct vop_generic_args *ap)
135{
136	/*
137	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
138	*/
139
140	return (EOPNOTSUPP);
141}
142
143int
144vop_ebadf(struct vop_generic_args *ap)
145{
146
147	return (EBADF);
148}
149
150int
151vop_enotty(struct vop_generic_args *ap)
152{
153
154	return (ENOTTY);
155}
156
157int
158vop_einval(struct vop_generic_args *ap)
159{
160
161	return (EINVAL);
162}
163
164int
165vop_enoent(struct vop_generic_args *ap)
166{
167
168	return (ENOENT);
169}
170
171int
172vop_null(struct vop_generic_args *ap)
173{
174
175	return (0);
176}
177
178/*
179 * Helper function to panic on some bad VOPs in some filesystems.
180 */
181int
182vop_panic(struct vop_generic_args *ap)
183{
184
185	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
186}
187
188/*
189 * vop_std<something> and vop_no<something> are default functions for use by
190 * filesystems that need the "default reasonable" implementation for a
191 * particular operation.
192 *
193 * The documentation for the operations they implement exists (if it exists)
194 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
195 */
196
197/*
198 * Default vop for filesystems that do not support name lookup
199 */
200static int
201vop_nolookup(ap)
202	struct vop_lookup_args /* {
203		struct vnode *a_dvp;
204		struct vnode **a_vpp;
205		struct componentname *a_cnp;
206	} */ *ap;
207{
208
209	*ap->a_vpp = NULL;
210	return (ENOTDIR);
211}
212
213/*
214 * vop_norename:
215 *
216 * Handle unlock and reference counting for arguments of vop_rename
217 * for filesystems that do not implement rename operation.
218 */
219static int
220vop_norename(struct vop_rename_args *ap)
221{
222
223	vop_rename_fail(ap);
224	return (EOPNOTSUPP);
225}
226
227/*
228 *	vop_nostrategy:
229 *
230 *	Strategy routine for VFS devices that have none.
231 *
232 *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
233 *	routine.  Typically this is done for a BIO_READ strategy call.
234 *	Typically B_INVAL is assumed to already be clear prior to a write
235 *	and should not be cleared manually unless you just made the buffer
236 *	invalid.  BIO_ERROR should be cleared either way.
237 */
238
239static int
240vop_nostrategy (struct vop_strategy_args *ap)
241{
242	printf("No strategy for buffer at %p\n", ap->a_bp);
243	vprint("vnode", ap->a_vp);
244	ap->a_bp->b_ioflags |= BIO_ERROR;
245	ap->a_bp->b_error = EOPNOTSUPP;
246	bufdone(ap->a_bp);
247	return (EOPNOTSUPP);
248}
249
250static int
251get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
252		int dirbuflen, off_t *off, char **cpos, int *len,
253		int *eofflag, struct thread *td)
254{
255	int error, reclen;
256	struct uio uio;
257	struct iovec iov;
258	struct dirent *dp;
259
260	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
261	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
262
263	if (*len == 0) {
264		iov.iov_base = dirbuf;
265		iov.iov_len = dirbuflen;
266
267		uio.uio_iov = &iov;
268		uio.uio_iovcnt = 1;
269		uio.uio_offset = *off;
270		uio.uio_resid = dirbuflen;
271		uio.uio_segflg = UIO_SYSSPACE;
272		uio.uio_rw = UIO_READ;
273		uio.uio_td = td;
274
275		*eofflag = 0;
276
277#ifdef MAC
278		error = mac_vnode_check_readdir(td->td_ucred, vp);
279		if (error == 0)
280#endif
281			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
282		    		NULL, NULL);
283		if (error)
284			return (error);
285
286		*off = uio.uio_offset;
287
288		*cpos = dirbuf;
289		*len = (dirbuflen - uio.uio_resid);
290
291		if (*len == 0)
292			return (ENOENT);
293	}
294
295	dp = (struct dirent *)(*cpos);
296	reclen = dp->d_reclen;
297	*dpp = dp;
298
299	/* check for malformed directory.. */
300	if (reclen < DIRENT_MINSIZE)
301		return (EINVAL);
302
303	*cpos += reclen;
304	*len -= reclen;
305
306	return (0);
307}
308
309/*
310 * Check if a named file exists in a given directory vnode.
311 */
312static int
313dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
314{
315	char *dirbuf, *cpos;
316	int error, eofflag, dirbuflen, len, found;
317	off_t off;
318	struct dirent *dp;
319	struct vattr va;
320
321	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
322	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
323
324	found = 0;
325
326	error = VOP_GETATTR(vp, &va, td->td_ucred);
327	if (error)
328		return (found);
329
330	dirbuflen = DEV_BSIZE;
331	if (dirbuflen < va.va_blocksize)
332		dirbuflen = va.va_blocksize;
333	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
334
335	off = 0;
336	len = 0;
337	do {
338		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
339					&cpos, &len, &eofflag, td);
340		if (error)
341			goto out;
342
343		if ((dp->d_type != DT_WHT) &&
344		    !strcmp(dp->d_name, dirname)) {
345			found = 1;
346			goto out;
347		}
348	} while (len > 0 || !eofflag);
349
350out:
351	free(dirbuf, M_TEMP);
352	return (found);
353}
354
355int
356vop_stdaccess(struct vop_access_args *ap)
357{
358
359	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
360	    VAPPEND)) == 0, ("invalid bit in accmode"));
361
362	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
363}
364
365int
366vop_stdaccessx(struct vop_accessx_args *ap)
367{
368	int error;
369	accmode_t accmode = ap->a_accmode;
370
371	error = vfs_unixify_accmode(&accmode);
372	if (error != 0)
373		return (error);
374
375	if (accmode == 0)
376		return (0);
377
378	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
379}
380
381/*
382 * Advisory record locking support
383 */
384int
385vop_stdadvlock(struct vop_advlock_args *ap)
386{
387	struct vnode *vp;
388	struct ucred *cred;
389	struct vattr vattr;
390	int error;
391
392	vp = ap->a_vp;
393	cred = curthread->td_ucred;
394	vn_lock(vp, LK_SHARED | LK_RETRY);
395	error = VOP_GETATTR(vp, &vattr, cred);
396	VOP_UNLOCK(vp, 0);
397	if (error)
398		return (error);
399
400	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
401}
402
403int
404vop_stdadvlockasync(struct vop_advlockasync_args *ap)
405{
406	struct vnode *vp;
407	struct ucred *cred;
408	struct vattr vattr;
409	int error;
410
411	vp = ap->a_vp;
412	cred = curthread->td_ucred;
413	vn_lock(vp, LK_SHARED | LK_RETRY);
414	error = VOP_GETATTR(vp, &vattr, cred);
415	VOP_UNLOCK(vp, 0);
416	if (error)
417		return (error);
418
419	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
420}
421
422int
423vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
424{
425	struct vnode *vp;
426
427	vp = ap->a_vp;
428	lf_purgelocks(vp, &vp->v_lockf);
429	return (0);
430}
431
432/*
433 * vop_stdpathconf:
434 *
435 * Standard implementation of POSIX pathconf, to get information about limits
436 * for a filesystem.
437 * Override per filesystem for the case where the filesystem has smaller
438 * limits.
439 */
440int
441vop_stdpathconf(ap)
442	struct vop_pathconf_args /* {
443	struct vnode *a_vp;
444	int a_name;
445	int *a_retval;
446	} */ *ap;
447{
448
449	switch (ap->a_name) {
450		case _PC_NAME_MAX:
451			*ap->a_retval = NAME_MAX;
452			return (0);
453		case _PC_PATH_MAX:
454			*ap->a_retval = PATH_MAX;
455			return (0);
456		case _PC_LINK_MAX:
457			*ap->a_retval = LINK_MAX;
458			return (0);
459		case _PC_MAX_CANON:
460			*ap->a_retval = MAX_CANON;
461			return (0);
462		case _PC_MAX_INPUT:
463			*ap->a_retval = MAX_INPUT;
464			return (0);
465		case _PC_PIPE_BUF:
466			*ap->a_retval = PIPE_BUF;
467			return (0);
468		case _PC_CHOWN_RESTRICTED:
469			*ap->a_retval = 1;
470			return (0);
471		case _PC_VDISABLE:
472			*ap->a_retval = _POSIX_VDISABLE;
473			return (0);
474		default:
475			return (EINVAL);
476	}
477	/* NOTREACHED */
478}
479
480/*
481 * Standard lock, unlock and islocked functions.
482 */
483int
484vop_stdlock(ap)
485	struct vop_lock1_args /* {
486		struct vnode *a_vp;
487		int a_flags;
488		char *file;
489		int line;
490	} */ *ap;
491{
492	struct vnode *vp = ap->a_vp;
493
494	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
495	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
496	    ap->a_line));
497}
498
499/* See above. */
500int
501vop_stdunlock(ap)
502	struct vop_unlock_args /* {
503		struct vnode *a_vp;
504		int a_flags;
505	} */ *ap;
506{
507	struct vnode *vp = ap->a_vp;
508
509	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
510}
511
512/* See above. */
513int
514vop_stdislocked(ap)
515	struct vop_islocked_args /* {
516		struct vnode *a_vp;
517	} */ *ap;
518{
519
520	return (lockstatus(ap->a_vp->v_vnlock));
521}
522
523/*
524 * Return true for select/poll.
525 */
526int
527vop_nopoll(ap)
528	struct vop_poll_args /* {
529		struct vnode *a_vp;
530		int  a_events;
531		struct ucred *a_cred;
532		struct thread *a_td;
533	} */ *ap;
534{
535
536	return (poll_no_poll(ap->a_events));
537}
538
539/*
540 * Implement poll for local filesystems that support it.
541 */
542int
543vop_stdpoll(ap)
544	struct vop_poll_args /* {
545		struct vnode *a_vp;
546		int  a_events;
547		struct ucred *a_cred;
548		struct thread *a_td;
549	} */ *ap;
550{
551	if (ap->a_events & ~POLLSTANDARD)
552		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
553	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
554}
555
556/*
557 * Return our mount point, as we will take charge of the writes.
558 */
559int
560vop_stdgetwritemount(ap)
561	struct vop_getwritemount_args /* {
562		struct vnode *a_vp;
563		struct mount **a_mpp;
564	} */ *ap;
565{
566	struct mount *mp;
567
568	/*
569	 * XXX Since this is called unlocked we may be recycled while
570	 * attempting to ref the mount.  If this is the case or mountpoint
571	 * will be set to NULL.  We only have to prevent this call from
572	 * returning with a ref to an incorrect mountpoint.  It is not
573	 * harmful to return with a ref to our previous mountpoint.
574	 */
575	mp = ap->a_vp->v_mount;
576	if (mp != NULL) {
577		vfs_ref(mp);
578		if (mp != ap->a_vp->v_mount) {
579			vfs_rel(mp);
580			mp = NULL;
581		}
582	}
583	*(ap->a_mpp) = mp;
584	return (0);
585}
586
587/* XXX Needs good comment and VOP_BMAP(9) manpage */
588int
589vop_stdbmap(ap)
590	struct vop_bmap_args /* {
591		struct vnode *a_vp;
592		daddr_t  a_bn;
593		struct bufobj **a_bop;
594		daddr_t *a_bnp;
595		int *a_runp;
596		int *a_runb;
597	} */ *ap;
598{
599
600	if (ap->a_bop != NULL)
601		*ap->a_bop = &ap->a_vp->v_bufobj;
602	if (ap->a_bnp != NULL)
603		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
604	if (ap->a_runp != NULL)
605		*ap->a_runp = 0;
606	if (ap->a_runb != NULL)
607		*ap->a_runb = 0;
608	return (0);
609}
610
611int
612vop_stdfsync(ap)
613	struct vop_fsync_args /* {
614		struct vnode *a_vp;
615		struct ucred *a_cred;
616		int a_waitfor;
617		struct thread *a_td;
618	} */ *ap;
619{
620	struct vnode *vp = ap->a_vp;
621	struct buf *bp;
622	struct bufobj *bo;
623	struct buf *nbp;
624	int error = 0;
625	int maxretry = 1000;     /* large, arbitrarily chosen */
626
627	bo = &vp->v_bufobj;
628	BO_LOCK(bo);
629loop1:
630	/*
631	 * MARK/SCAN initialization to avoid infinite loops.
632	 */
633        TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
634                bp->b_vflags &= ~BV_SCANNED;
635		bp->b_error = 0;
636	}
637
638	/*
639	 * Flush all dirty buffers associated with a vnode.
640	 */
641loop2:
642	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
643		if ((bp->b_vflags & BV_SCANNED) != 0)
644			continue;
645		bp->b_vflags |= BV_SCANNED;
646		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
647			continue;
648		BO_UNLOCK(bo);
649		KASSERT(bp->b_bufobj == bo,
650		    ("bp %p wrong b_bufobj %p should be %p",
651		    bp, bp->b_bufobj, bo));
652		if ((bp->b_flags & B_DELWRI) == 0)
653			panic("fsync: not dirty");
654		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
655			vfs_bio_awrite(bp);
656		} else {
657			bremfree(bp);
658			bawrite(bp);
659		}
660		BO_LOCK(bo);
661		goto loop2;
662	}
663
664	/*
665	 * If synchronous the caller expects us to completely resolve all
666	 * dirty buffers in the system.  Wait for in-progress I/O to
667	 * complete (which could include background bitmap writes), then
668	 * retry if dirty blocks still exist.
669	 */
670	if (ap->a_waitfor == MNT_WAIT) {
671		bufobj_wwait(bo, 0, 0);
672		if (bo->bo_dirty.bv_cnt > 0) {
673			/*
674			 * If we are unable to write any of these buffers
675			 * then we fail now rather than trying endlessly
676			 * to write them out.
677			 */
678			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
679				if ((error = bp->b_error) == 0)
680					continue;
681			if (error == 0 && --maxretry >= 0)
682				goto loop1;
683			error = EAGAIN;
684		}
685	}
686	BO_UNLOCK(bo);
687	if (error == EAGAIN)
688		vprint("fsync: giving up on dirty", vp);
689
690	return (error);
691}
692
693/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
694int
695vop_stdgetpages(ap)
696	struct vop_getpages_args /* {
697		struct vnode *a_vp;
698		vm_page_t *a_m;
699		int a_count;
700		int a_reqpage;
701		vm_ooffset_t a_offset;
702	} */ *ap;
703{
704
705	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
706	    ap->a_count, ap->a_reqpage);
707}
708
709int
710vop_stdkqfilter(struct vop_kqfilter_args *ap)
711{
712	return vfs_kqfilter(ap);
713}
714
715/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
716int
717vop_stdputpages(ap)
718	struct vop_putpages_args /* {
719		struct vnode *a_vp;
720		vm_page_t *a_m;
721		int a_count;
722		int a_sync;
723		int *a_rtvals;
724		vm_ooffset_t a_offset;
725	} */ *ap;
726{
727
728	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
729	     ap->a_sync, ap->a_rtvals);
730}
731
732int
733vop_stdvptofh(struct vop_vptofh_args *ap)
734{
735	return (EOPNOTSUPP);
736}
737
738int
739vop_stdvptocnp(struct vop_vptocnp_args *ap)
740{
741	struct vnode *vp = ap->a_vp;
742	struct vnode **dvp = ap->a_vpp;
743	struct ucred *cred = ap->a_cred;
744	char *buf = ap->a_buf;
745	int *buflen = ap->a_buflen;
746	char *dirbuf, *cpos;
747	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
748	off_t off;
749	ino_t fileno;
750	struct vattr va;
751	struct nameidata nd;
752	struct thread *td;
753	struct dirent *dp;
754	struct vnode *mvp;
755
756	i = *buflen;
757	error = 0;
758	covered = 0;
759	td = curthread;
760
761	if (vp->v_type != VDIR)
762		return (ENOENT);
763
764	error = VOP_GETATTR(vp, &va, cred);
765	if (error)
766		return (error);
767
768	VREF(vp);
769	locked = VOP_ISLOCKED(vp);
770	VOP_UNLOCK(vp, 0);
771	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
772	    "..", vp, td);
773	flags = FREAD;
774	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
775	if (error) {
776		vn_lock(vp, locked | LK_RETRY);
777		return (error);
778	}
779	NDFREE(&nd, NDF_ONLY_PNBUF);
780
781	mvp = *dvp = nd.ni_vp;
782
783	if (vp->v_mount != (*dvp)->v_mount &&
784	    ((*dvp)->v_vflag & VV_ROOT) &&
785	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
786		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
787		VREF(mvp);
788		VOP_UNLOCK(mvp, 0);
789		vn_close(mvp, FREAD, cred, td);
790		VREF(*dvp);
791		vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
792		covered = 1;
793	}
794
795	fileno = va.va_fileid;
796
797	dirbuflen = DEV_BSIZE;
798	if (dirbuflen < va.va_blocksize)
799		dirbuflen = va.va_blocksize;
800	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
801
802	if ((*dvp)->v_type != VDIR) {
803		error = ENOENT;
804		goto out;
805	}
806
807	off = 0;
808	len = 0;
809	do {
810		/* call VOP_READDIR of parent */
811		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
812					&cpos, &len, &eofflag, td);
813		if (error)
814			goto out;
815
816		if ((dp->d_type != DT_WHT) &&
817		    (dp->d_fileno == fileno)) {
818			if (covered) {
819				VOP_UNLOCK(*dvp, 0);
820				vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
821				if (dirent_exists(mvp, dp->d_name, td)) {
822					error = ENOENT;
823					VOP_UNLOCK(mvp, 0);
824					vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
825					goto out;
826				}
827				VOP_UNLOCK(mvp, 0);
828				vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
829			}
830			i -= dp->d_namlen;
831
832			if (i < 0) {
833				error = ENOMEM;
834				goto out;
835			}
836			bcopy(dp->d_name, buf + i, dp->d_namlen);
837			error = 0;
838			goto out;
839		}
840	} while (len > 0 || !eofflag);
841	error = ENOENT;
842
843out:
844	free(dirbuf, M_TEMP);
845	if (!error) {
846		*buflen = i;
847		vhold(*dvp);
848	}
849	if (covered) {
850		vput(*dvp);
851		vrele(mvp);
852	} else {
853		VOP_UNLOCK(mvp, 0);
854		vn_close(mvp, FREAD, cred, td);
855	}
856	vn_lock(vp, locked | LK_RETRY);
857	return (error);
858}
859
860int
861vop_stdallocate(struct vop_allocate_args *ap)
862{
863#ifdef __notyet__
864	struct statfs sfs;
865#endif
866	struct iovec aiov;
867	struct vattr vattr, *vap;
868	struct uio auio;
869	off_t fsize, len, cur, offset;
870	uint8_t *buf;
871	struct thread *td;
872	struct vnode *vp;
873	size_t iosize;
874	int error;
875
876	buf = NULL;
877	error = 0;
878	td = curthread;
879	vap = &vattr;
880	vp = ap->a_vp;
881	len = *ap->a_len;
882	offset = *ap->a_offset;
883
884	error = VOP_GETATTR(vp, vap, td->td_ucred);
885	if (error != 0)
886		goto out;
887	fsize = vap->va_size;
888	iosize = vap->va_blocksize;
889	if (iosize == 0)
890		iosize = BLKDEV_IOSIZE;
891	if (iosize > MAXPHYS)
892		iosize = MAXPHYS;
893	buf = malloc(iosize, M_TEMP, M_WAITOK);
894
895#ifdef __notyet__
896	/*
897	 * Check if the filesystem sets f_maxfilesize; if not use
898	 * VOP_SETATTR to perform the check.
899	 */
900	error = VFS_STATFS(vp->v_mount, &sfs, td);
901	if (error != 0)
902		goto out;
903	if (sfs.f_maxfilesize) {
904		if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
905		    offset + len > sfs.f_maxfilesize) {
906			error = EFBIG;
907			goto out;
908		}
909	} else
910#endif
911	if (offset + len > vap->va_size) {
912		/*
913		 * Test offset + len against the filesystem's maxfilesize.
914		 */
915		VATTR_NULL(vap);
916		vap->va_size = offset + len;
917		error = VOP_SETATTR(vp, vap, td->td_ucred);
918		if (error != 0)
919			goto out;
920		VATTR_NULL(vap);
921		vap->va_size = fsize;
922		error = VOP_SETATTR(vp, vap, td->td_ucred);
923		if (error != 0)
924			goto out;
925	}
926
927	for (;;) {
928		/*
929		 * Read and write back anything below the nominal file
930		 * size.  There's currently no way outside the filesystem
931		 * to know whether this area is sparse or not.
932		 */
933		cur = iosize;
934		if ((offset % iosize) != 0)
935			cur -= (offset % iosize);
936		if (cur > len)
937			cur = len;
938		if (offset < fsize) {
939			aiov.iov_base = buf;
940			aiov.iov_len = cur;
941			auio.uio_iov = &aiov;
942			auio.uio_iovcnt = 1;
943			auio.uio_offset = offset;
944			auio.uio_resid = cur;
945			auio.uio_segflg = UIO_SYSSPACE;
946			auio.uio_rw = UIO_READ;
947			auio.uio_td = td;
948			error = VOP_READ(vp, &auio, 0, td->td_ucred);
949			if (error != 0)
950				break;
951			if (auio.uio_resid > 0) {
952				bzero(buf + cur - auio.uio_resid,
953				    auio.uio_resid);
954			}
955		} else {
956			bzero(buf, cur);
957		}
958
959		aiov.iov_base = buf;
960		aiov.iov_len = cur;
961		auio.uio_iov = &aiov;
962		auio.uio_iovcnt = 1;
963		auio.uio_offset = offset;
964		auio.uio_resid = cur;
965		auio.uio_segflg = UIO_SYSSPACE;
966		auio.uio_rw = UIO_WRITE;
967		auio.uio_td = td;
968
969		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
970		if (error != 0)
971			break;
972
973		len -= cur;
974		offset += cur;
975		if (len == 0)
976			break;
977		if (should_yield())
978			break;
979	}
980
981 out:
982	*ap->a_len = len;
983	*ap->a_offset = offset;
984	free(buf, M_TEMP);
985	return (error);
986}
987
988int
989vop_stdadvise(struct vop_advise_args *ap)
990{
991	struct vnode *vp;
992	off_t start, end;
993	int error, vfslocked;
994
995	vp = ap->a_vp;
996	switch (ap->a_advice) {
997	case POSIX_FADV_WILLNEED:
998		/*
999		 * Do nothing for now.  Filesystems should provide a
1000		 * custom method which starts an asynchronous read of
1001		 * the requested region.
1002		 */
1003		error = 0;
1004		break;
1005	case POSIX_FADV_DONTNEED:
1006		/*
1007		 * Flush any open FS buffers and then remove pages
1008		 * from the backing VM object.  Using vinvalbuf() here
1009		 * is a bit heavy-handed as it flushes all buffers for
1010		 * the given vnode, not just the buffers covering the
1011		 * requested range.
1012		 */
1013		error = 0;
1014		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1015		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1016		if (vp->v_iflag & VI_DOOMED) {
1017			VOP_UNLOCK(vp, 0);
1018			VFS_UNLOCK_GIANT(vfslocked);
1019			break;
1020		}
1021		vinvalbuf(vp, V_CLEANONLY, 0, 0);
1022		if (vp->v_object != NULL) {
1023			start = trunc_page(ap->a_start);
1024			end = round_page(ap->a_end);
1025			VM_OBJECT_LOCK(vp->v_object);
1026			vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
1027			    OFF_TO_IDX(end));
1028			VM_OBJECT_UNLOCK(vp->v_object);
1029		}
1030		VOP_UNLOCK(vp, 0);
1031		VFS_UNLOCK_GIANT(vfslocked);
1032		break;
1033	default:
1034		error = EINVAL;
1035		break;
1036	}
1037	return (error);
1038}
1039
1040/*
1041 * vfs default ops
1042 * used to fill the vfs function table to get reasonable default return values.
1043 */
1044int
1045vfs_stdroot (mp, flags, vpp)
1046	struct mount *mp;
1047	int flags;
1048	struct vnode **vpp;
1049{
1050
1051	return (EOPNOTSUPP);
1052}
1053
1054int
1055vfs_stdstatfs (mp, sbp)
1056	struct mount *mp;
1057	struct statfs *sbp;
1058{
1059
1060	return (EOPNOTSUPP);
1061}
1062
1063int
1064vfs_stdquotactl (mp, cmds, uid, arg)
1065	struct mount *mp;
1066	int cmds;
1067	uid_t uid;
1068	void *arg;
1069{
1070
1071	return (EOPNOTSUPP);
1072}
1073
1074int
1075vfs_stdsync(mp, waitfor)
1076	struct mount *mp;
1077	int waitfor;
1078{
1079	struct vnode *vp, *mvp;
1080	struct thread *td;
1081	int error, lockreq, allerror = 0;
1082
1083	td = curthread;
1084	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1085	if (waitfor != MNT_WAIT)
1086		lockreq |= LK_NOWAIT;
1087	/*
1088	 * Force stale buffer cache information to be flushed.
1089	 */
1090	MNT_ILOCK(mp);
1091loop:
1092	MNT_VNODE_FOREACH(vp, mp, mvp) {
1093		/* bv_cnt is an acceptable race here. */
1094		if (vp->v_bufobj.bo_dirty.bv_cnt == 0)
1095			continue;
1096		VI_LOCK(vp);
1097		MNT_IUNLOCK(mp);
1098		if ((error = vget(vp, lockreq, td)) != 0) {
1099			MNT_ILOCK(mp);
1100			if (error == ENOENT) {
1101				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1102				goto loop;
1103			}
1104			continue;
1105		}
1106		error = VOP_FSYNC(vp, waitfor, td);
1107		if (error)
1108			allerror = error;
1109		vput(vp);
1110		MNT_ILOCK(mp);
1111	}
1112	MNT_IUNLOCK(mp);
1113	return (allerror);
1114}
1115
1116int
1117vfs_stdnosync (mp, waitfor)
1118	struct mount *mp;
1119	int waitfor;
1120{
1121
1122	return (0);
1123}
1124
1125int
1126vfs_stdvget (mp, ino, flags, vpp)
1127	struct mount *mp;
1128	ino_t ino;
1129	int flags;
1130	struct vnode **vpp;
1131{
1132
1133	return (EOPNOTSUPP);
1134}
1135
1136int
1137vfs_stdfhtovp (mp, fhp, flags, vpp)
1138	struct mount *mp;
1139	struct fid *fhp;
1140	int flags;
1141	struct vnode **vpp;
1142{
1143
1144	return (EOPNOTSUPP);
1145}
1146
1147int
1148vfs_stdinit (vfsp)
1149	struct vfsconf *vfsp;
1150{
1151
1152	return (0);
1153}
1154
1155int
1156vfs_stduninit (vfsp)
1157	struct vfsconf *vfsp;
1158{
1159
1160	return(0);
1161}
1162
1163int
1164vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1165	struct mount *mp;
1166	int cmd;
1167	struct vnode *filename_vp;
1168	int attrnamespace;
1169	const char *attrname;
1170{
1171
1172	if (filename_vp != NULL)
1173		VOP_UNLOCK(filename_vp, 0);
1174	return (EOPNOTSUPP);
1175}
1176
1177int
1178vfs_stdsysctl(mp, op, req)
1179	struct mount *mp;
1180	fsctlop_t op;
1181	struct sysctl_req *req;
1182{
1183
1184	return (EOPNOTSUPP);
1185}
1186
1187/* end of vfs default ops */
1188