vfs_default.c revision 241025
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed
6 * to Berkeley by John Heidemann of the UCLA Ficus project.
7 *
8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 241025 2012-09-28 11:25:02Z kib $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bio.h>
41#include <sys/buf.h>
42#include <sys/conf.h>
43#include <sys/event.h>
44#include <sys/kernel.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/lockf.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/namei.h>
52#include <sys/fcntl.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/dirent.h>
56#include <sys/poll.h>
57
58#include <security/mac/mac_framework.h>
59
60#include <vm/vm.h>
61#include <vm/vm_object.h>
62#include <vm/vm_extern.h>
63#include <vm/pmap.h>
64#include <vm/vm_map.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vnode_pager.h>
68
69static int	vop_nolookup(struct vop_lookup_args *);
70static int	vop_norename(struct vop_rename_args *);
71static int	vop_nostrategy(struct vop_strategy_args *);
72static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
73				char *dirbuf, int dirbuflen, off_t *off,
74				char **cpos, int *len, int *eofflag,
75				struct thread *td);
76static int	dirent_exists(struct vnode *vp, const char *dirname,
77			      struct thread *td);
78
79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
80
81static int vop_stdis_text(struct vop_is_text_args *ap);
82static int vop_stdset_text(struct vop_set_text_args *ap);
83static int vop_stdunset_text(struct vop_unset_text_args *ap);
84
85/*
86 * This vnode table stores what we want to do if the filesystem doesn't
87 * implement a particular VOP.
88 *
89 * If there is no specific entry here, we will return EOPNOTSUPP.
90 *
91 * Note that every filesystem has to implement either vop_access
92 * or vop_accessx; failing to do so will result in immediate crash
93 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
94 * which calls vop_stdaccess() etc.
95 */
96
97struct vop_vector default_vnodeops = {
98	.vop_default =		NULL,
99	.vop_bypass =		VOP_EOPNOTSUPP,
100
101	.vop_access =		vop_stdaccess,
102	.vop_accessx =		vop_stdaccessx,
103	.vop_advise =		vop_stdadvise,
104	.vop_advlock =		vop_stdadvlock,
105	.vop_advlockasync =	vop_stdadvlockasync,
106	.vop_advlockpurge =	vop_stdadvlockpurge,
107	.vop_allocate =		vop_stdallocate,
108	.vop_bmap =		vop_stdbmap,
109	.vop_close =		VOP_NULL,
110	.vop_fsync =		VOP_NULL,
111	.vop_getpages =		vop_stdgetpages,
112	.vop_getwritemount = 	vop_stdgetwritemount,
113	.vop_inactive =		VOP_NULL,
114	.vop_ioctl =		VOP_ENOTTY,
115	.vop_kqfilter =		vop_stdkqfilter,
116	.vop_islocked =		vop_stdislocked,
117	.vop_lock1 =		vop_stdlock,
118	.vop_lookup =		vop_nolookup,
119	.vop_open =		VOP_NULL,
120	.vop_pathconf =		VOP_EINVAL,
121	.vop_poll =		vop_nopoll,
122	.vop_putpages =		vop_stdputpages,
123	.vop_readlink =		VOP_EINVAL,
124	.vop_rename =		vop_norename,
125	.vop_revoke =		VOP_PANIC,
126	.vop_strategy =		vop_nostrategy,
127	.vop_unlock =		vop_stdunlock,
128	.vop_vptocnp =		vop_stdvptocnp,
129	.vop_vptofh =		vop_stdvptofh,
130	.vop_unp_bind =		vop_stdunp_bind,
131	.vop_unp_connect =	vop_stdunp_connect,
132	.vop_unp_detach =	vop_stdunp_detach,
133	.vop_is_text =		vop_stdis_text,
134	.vop_set_text =		vop_stdset_text,
135	.vop_unset_text =	vop_stdunset_text,
136};
137
138/*
139 * Series of placeholder functions for various error returns for
140 * VOPs.
141 */
142
143int
144vop_eopnotsupp(struct vop_generic_args *ap)
145{
146	/*
147	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
148	*/
149
150	return (EOPNOTSUPP);
151}
152
153int
154vop_ebadf(struct vop_generic_args *ap)
155{
156
157	return (EBADF);
158}
159
160int
161vop_enotty(struct vop_generic_args *ap)
162{
163
164	return (ENOTTY);
165}
166
167int
168vop_einval(struct vop_generic_args *ap)
169{
170
171	return (EINVAL);
172}
173
174int
175vop_enoent(struct vop_generic_args *ap)
176{
177
178	return (ENOENT);
179}
180
181int
182vop_null(struct vop_generic_args *ap)
183{
184
185	return (0);
186}
187
188/*
189 * Helper function to panic on some bad VOPs in some filesystems.
190 */
191int
192vop_panic(struct vop_generic_args *ap)
193{
194
195	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
196}
197
198/*
199 * vop_std<something> and vop_no<something> are default functions for use by
200 * filesystems that need the "default reasonable" implementation for a
201 * particular operation.
202 *
203 * The documentation for the operations they implement exists (if it exists)
204 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
205 */
206
207/*
208 * Default vop for filesystems that do not support name lookup
209 */
210static int
211vop_nolookup(ap)
212	struct vop_lookup_args /* {
213		struct vnode *a_dvp;
214		struct vnode **a_vpp;
215		struct componentname *a_cnp;
216	} */ *ap;
217{
218
219	*ap->a_vpp = NULL;
220	return (ENOTDIR);
221}
222
223/*
224 * vop_norename:
225 *
226 * Handle unlock and reference counting for arguments of vop_rename
227 * for filesystems that do not implement rename operation.
228 */
229static int
230vop_norename(struct vop_rename_args *ap)
231{
232
233	vop_rename_fail(ap);
234	return (EOPNOTSUPP);
235}
236
237/*
238 *	vop_nostrategy:
239 *
240 *	Strategy routine for VFS devices that have none.
241 *
242 *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
243 *	routine.  Typically this is done for a BIO_READ strategy call.
244 *	Typically B_INVAL is assumed to already be clear prior to a write
245 *	and should not be cleared manually unless you just made the buffer
246 *	invalid.  BIO_ERROR should be cleared either way.
247 */
248
249static int
250vop_nostrategy (struct vop_strategy_args *ap)
251{
252	printf("No strategy for buffer at %p\n", ap->a_bp);
253	vprint("vnode", ap->a_vp);
254	ap->a_bp->b_ioflags |= BIO_ERROR;
255	ap->a_bp->b_error = EOPNOTSUPP;
256	bufdone(ap->a_bp);
257	return (EOPNOTSUPP);
258}
259
260static int
261get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
262		int dirbuflen, off_t *off, char **cpos, int *len,
263		int *eofflag, struct thread *td)
264{
265	int error, reclen;
266	struct uio uio;
267	struct iovec iov;
268	struct dirent *dp;
269
270	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
271	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
272
273	if (*len == 0) {
274		iov.iov_base = dirbuf;
275		iov.iov_len = dirbuflen;
276
277		uio.uio_iov = &iov;
278		uio.uio_iovcnt = 1;
279		uio.uio_offset = *off;
280		uio.uio_resid = dirbuflen;
281		uio.uio_segflg = UIO_SYSSPACE;
282		uio.uio_rw = UIO_READ;
283		uio.uio_td = td;
284
285		*eofflag = 0;
286
287#ifdef MAC
288		error = mac_vnode_check_readdir(td->td_ucred, vp);
289		if (error == 0)
290#endif
291			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
292		    		NULL, NULL);
293		if (error)
294			return (error);
295
296		*off = uio.uio_offset;
297
298		*cpos = dirbuf;
299		*len = (dirbuflen - uio.uio_resid);
300
301		if (*len == 0)
302			return (ENOENT);
303	}
304
305	dp = (struct dirent *)(*cpos);
306	reclen = dp->d_reclen;
307	*dpp = dp;
308
309	/* check for malformed directory.. */
310	if (reclen < DIRENT_MINSIZE)
311		return (EINVAL);
312
313	*cpos += reclen;
314	*len -= reclen;
315
316	return (0);
317}
318
319/*
320 * Check if a named file exists in a given directory vnode.
321 */
322static int
323dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
324{
325	char *dirbuf, *cpos;
326	int error, eofflag, dirbuflen, len, found;
327	off_t off;
328	struct dirent *dp;
329	struct vattr va;
330
331	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
332	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
333
334	found = 0;
335
336	error = VOP_GETATTR(vp, &va, td->td_ucred);
337	if (error)
338		return (found);
339
340	dirbuflen = DEV_BSIZE;
341	if (dirbuflen < va.va_blocksize)
342		dirbuflen = va.va_blocksize;
343	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
344
345	off = 0;
346	len = 0;
347	do {
348		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
349					&cpos, &len, &eofflag, td);
350		if (error)
351			goto out;
352
353		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
354		    strcmp(dp->d_name, dirname) == 0) {
355			found = 1;
356			goto out;
357		}
358	} while (len > 0 || !eofflag);
359
360out:
361	free(dirbuf, M_TEMP);
362	return (found);
363}
364
365int
366vop_stdaccess(struct vop_access_args *ap)
367{
368
369	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
370	    VAPPEND)) == 0, ("invalid bit in accmode"));
371
372	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
373}
374
375int
376vop_stdaccessx(struct vop_accessx_args *ap)
377{
378	int error;
379	accmode_t accmode = ap->a_accmode;
380
381	error = vfs_unixify_accmode(&accmode);
382	if (error != 0)
383		return (error);
384
385	if (accmode == 0)
386		return (0);
387
388	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
389}
390
391/*
392 * Advisory record locking support
393 */
394int
395vop_stdadvlock(struct vop_advlock_args *ap)
396{
397	struct vnode *vp;
398	struct ucred *cred;
399	struct vattr vattr;
400	int error;
401
402	vp = ap->a_vp;
403	cred = curthread->td_ucred;
404	vn_lock(vp, LK_SHARED | LK_RETRY);
405	error = VOP_GETATTR(vp, &vattr, cred);
406	VOP_UNLOCK(vp, 0);
407	if (error)
408		return (error);
409
410	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
411}
412
413int
414vop_stdadvlockasync(struct vop_advlockasync_args *ap)
415{
416	struct vnode *vp;
417	struct ucred *cred;
418	struct vattr vattr;
419	int error;
420
421	vp = ap->a_vp;
422	cred = curthread->td_ucred;
423	vn_lock(vp, LK_SHARED | LK_RETRY);
424	error = VOP_GETATTR(vp, &vattr, cred);
425	VOP_UNLOCK(vp, 0);
426	if (error)
427		return (error);
428
429	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
430}
431
432int
433vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
434{
435	struct vnode *vp;
436
437	vp = ap->a_vp;
438	lf_purgelocks(vp, &vp->v_lockf);
439	return (0);
440}
441
442/*
443 * vop_stdpathconf:
444 *
445 * Standard implementation of POSIX pathconf, to get information about limits
446 * for a filesystem.
447 * Override per filesystem for the case where the filesystem has smaller
448 * limits.
449 */
450int
451vop_stdpathconf(ap)
452	struct vop_pathconf_args /* {
453	struct vnode *a_vp;
454	int a_name;
455	int *a_retval;
456	} */ *ap;
457{
458
459	switch (ap->a_name) {
460		case _PC_NAME_MAX:
461			*ap->a_retval = NAME_MAX;
462			return (0);
463		case _PC_PATH_MAX:
464			*ap->a_retval = PATH_MAX;
465			return (0);
466		case _PC_LINK_MAX:
467			*ap->a_retval = LINK_MAX;
468			return (0);
469		case _PC_MAX_CANON:
470			*ap->a_retval = MAX_CANON;
471			return (0);
472		case _PC_MAX_INPUT:
473			*ap->a_retval = MAX_INPUT;
474			return (0);
475		case _PC_PIPE_BUF:
476			*ap->a_retval = PIPE_BUF;
477			return (0);
478		case _PC_CHOWN_RESTRICTED:
479			*ap->a_retval = 1;
480			return (0);
481		case _PC_VDISABLE:
482			*ap->a_retval = _POSIX_VDISABLE;
483			return (0);
484		default:
485			return (EINVAL);
486	}
487	/* NOTREACHED */
488}
489
490/*
491 * Standard lock, unlock and islocked functions.
492 */
493int
494vop_stdlock(ap)
495	struct vop_lock1_args /* {
496		struct vnode *a_vp;
497		int a_flags;
498		char *file;
499		int line;
500	} */ *ap;
501{
502	struct vnode *vp = ap->a_vp;
503
504	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
505	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
506	    ap->a_line));
507}
508
509/* See above. */
510int
511vop_stdunlock(ap)
512	struct vop_unlock_args /* {
513		struct vnode *a_vp;
514		int a_flags;
515	} */ *ap;
516{
517	struct vnode *vp = ap->a_vp;
518
519	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
520}
521
522/* See above. */
523int
524vop_stdislocked(ap)
525	struct vop_islocked_args /* {
526		struct vnode *a_vp;
527	} */ *ap;
528{
529
530	return (lockstatus(ap->a_vp->v_vnlock));
531}
532
533/*
534 * Return true for select/poll.
535 */
536int
537vop_nopoll(ap)
538	struct vop_poll_args /* {
539		struct vnode *a_vp;
540		int  a_events;
541		struct ucred *a_cred;
542		struct thread *a_td;
543	} */ *ap;
544{
545
546	return (poll_no_poll(ap->a_events));
547}
548
549/*
550 * Implement poll for local filesystems that support it.
551 */
552int
553vop_stdpoll(ap)
554	struct vop_poll_args /* {
555		struct vnode *a_vp;
556		int  a_events;
557		struct ucred *a_cred;
558		struct thread *a_td;
559	} */ *ap;
560{
561	if (ap->a_events & ~POLLSTANDARD)
562		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
563	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
564}
565
566/*
567 * Return our mount point, as we will take charge of the writes.
568 */
569int
570vop_stdgetwritemount(ap)
571	struct vop_getwritemount_args /* {
572		struct vnode *a_vp;
573		struct mount **a_mpp;
574	} */ *ap;
575{
576	struct mount *mp;
577
578	/*
579	 * XXX Since this is called unlocked we may be recycled while
580	 * attempting to ref the mount.  If this is the case or mountpoint
581	 * will be set to NULL.  We only have to prevent this call from
582	 * returning with a ref to an incorrect mountpoint.  It is not
583	 * harmful to return with a ref to our previous mountpoint.
584	 */
585	mp = ap->a_vp->v_mount;
586	if (mp != NULL) {
587		vfs_ref(mp);
588		if (mp != ap->a_vp->v_mount) {
589			vfs_rel(mp);
590			mp = NULL;
591		}
592	}
593	*(ap->a_mpp) = mp;
594	return (0);
595}
596
597/* XXX Needs good comment and VOP_BMAP(9) manpage */
598int
599vop_stdbmap(ap)
600	struct vop_bmap_args /* {
601		struct vnode *a_vp;
602		daddr_t  a_bn;
603		struct bufobj **a_bop;
604		daddr_t *a_bnp;
605		int *a_runp;
606		int *a_runb;
607	} */ *ap;
608{
609
610	if (ap->a_bop != NULL)
611		*ap->a_bop = &ap->a_vp->v_bufobj;
612	if (ap->a_bnp != NULL)
613		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
614	if (ap->a_runp != NULL)
615		*ap->a_runp = 0;
616	if (ap->a_runb != NULL)
617		*ap->a_runb = 0;
618	return (0);
619}
620
621int
622vop_stdfsync(ap)
623	struct vop_fsync_args /* {
624		struct vnode *a_vp;
625		struct ucred *a_cred;
626		int a_waitfor;
627		struct thread *a_td;
628	} */ *ap;
629{
630	struct vnode *vp = ap->a_vp;
631	struct buf *bp;
632	struct bufobj *bo;
633	struct buf *nbp;
634	int error = 0;
635	int maxretry = 1000;     /* large, arbitrarily chosen */
636
637	bo = &vp->v_bufobj;
638	BO_LOCK(bo);
639loop1:
640	/*
641	 * MARK/SCAN initialization to avoid infinite loops.
642	 */
643        TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
644                bp->b_vflags &= ~BV_SCANNED;
645		bp->b_error = 0;
646	}
647
648	/*
649	 * Flush all dirty buffers associated with a vnode.
650	 */
651loop2:
652	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
653		if ((bp->b_vflags & BV_SCANNED) != 0)
654			continue;
655		bp->b_vflags |= BV_SCANNED;
656		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
657			if (ap->a_waitfor != MNT_WAIT)
658				continue;
659			if (BUF_LOCK(bp,
660			    LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL,
661			    BO_MTX(bo)) != 0) {
662				BO_LOCK(bo);
663				goto loop1;
664			}
665			BO_LOCK(bo);
666		}
667		BO_UNLOCK(bo);
668		KASSERT(bp->b_bufobj == bo,
669		    ("bp %p wrong b_bufobj %p should be %p",
670		    bp, bp->b_bufobj, bo));
671		if ((bp->b_flags & B_DELWRI) == 0)
672			panic("fsync: not dirty");
673		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
674			vfs_bio_awrite(bp);
675		} else {
676			bremfree(bp);
677			bawrite(bp);
678		}
679		BO_LOCK(bo);
680		goto loop2;
681	}
682
683	/*
684	 * If synchronous the caller expects us to completely resolve all
685	 * dirty buffers in the system.  Wait for in-progress I/O to
686	 * complete (which could include background bitmap writes), then
687	 * retry if dirty blocks still exist.
688	 */
689	if (ap->a_waitfor == MNT_WAIT) {
690		bufobj_wwait(bo, 0, 0);
691		if (bo->bo_dirty.bv_cnt > 0) {
692			/*
693			 * If we are unable to write any of these buffers
694			 * then we fail now rather than trying endlessly
695			 * to write them out.
696			 */
697			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
698				if ((error = bp->b_error) == 0)
699					continue;
700			if (error == 0 && --maxretry >= 0)
701				goto loop1;
702			error = EAGAIN;
703		}
704	}
705	BO_UNLOCK(bo);
706	if (error == EAGAIN)
707		vprint("fsync: giving up on dirty", vp);
708
709	return (error);
710}
711
712/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
713int
714vop_stdgetpages(ap)
715	struct vop_getpages_args /* {
716		struct vnode *a_vp;
717		vm_page_t *a_m;
718		int a_count;
719		int a_reqpage;
720		vm_ooffset_t a_offset;
721	} */ *ap;
722{
723
724	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
725	    ap->a_count, ap->a_reqpage);
726}
727
728int
729vop_stdkqfilter(struct vop_kqfilter_args *ap)
730{
731	return vfs_kqfilter(ap);
732}
733
734/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
735int
736vop_stdputpages(ap)
737	struct vop_putpages_args /* {
738		struct vnode *a_vp;
739		vm_page_t *a_m;
740		int a_count;
741		int a_sync;
742		int *a_rtvals;
743		vm_ooffset_t a_offset;
744	} */ *ap;
745{
746
747	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
748	     ap->a_sync, ap->a_rtvals);
749}
750
751int
752vop_stdvptofh(struct vop_vptofh_args *ap)
753{
754	return (EOPNOTSUPP);
755}
756
757int
758vop_stdvptocnp(struct vop_vptocnp_args *ap)
759{
760	struct vnode *vp = ap->a_vp;
761	struct vnode **dvp = ap->a_vpp;
762	struct ucred *cred = ap->a_cred;
763	char *buf = ap->a_buf;
764	int *buflen = ap->a_buflen;
765	char *dirbuf, *cpos;
766	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
767	off_t off;
768	ino_t fileno;
769	struct vattr va;
770	struct nameidata nd;
771	struct thread *td;
772	struct dirent *dp;
773	struct vnode *mvp;
774
775	i = *buflen;
776	error = 0;
777	covered = 0;
778	td = curthread;
779
780	if (vp->v_type != VDIR)
781		return (ENOENT);
782
783	error = VOP_GETATTR(vp, &va, cred);
784	if (error)
785		return (error);
786
787	VREF(vp);
788	locked = VOP_ISLOCKED(vp);
789	VOP_UNLOCK(vp, 0);
790	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
791	    "..", vp, td);
792	flags = FREAD;
793	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
794	if (error) {
795		vn_lock(vp, locked | LK_RETRY);
796		return (error);
797	}
798	NDFREE(&nd, NDF_ONLY_PNBUF);
799
800	mvp = *dvp = nd.ni_vp;
801
802	if (vp->v_mount != (*dvp)->v_mount &&
803	    ((*dvp)->v_vflag & VV_ROOT) &&
804	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
805		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
806		VREF(mvp);
807		VOP_UNLOCK(mvp, 0);
808		vn_close(mvp, FREAD, cred, td);
809		VREF(*dvp);
810		vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
811		covered = 1;
812	}
813
814	fileno = va.va_fileid;
815
816	dirbuflen = DEV_BSIZE;
817	if (dirbuflen < va.va_blocksize)
818		dirbuflen = va.va_blocksize;
819	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
820
821	if ((*dvp)->v_type != VDIR) {
822		error = ENOENT;
823		goto out;
824	}
825
826	off = 0;
827	len = 0;
828	do {
829		/* call VOP_READDIR of parent */
830		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
831					&cpos, &len, &eofflag, td);
832		if (error)
833			goto out;
834
835		if ((dp->d_type != DT_WHT) &&
836		    (dp->d_fileno == fileno)) {
837			if (covered) {
838				VOP_UNLOCK(*dvp, 0);
839				vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
840				if (dirent_exists(mvp, dp->d_name, td)) {
841					error = ENOENT;
842					VOP_UNLOCK(mvp, 0);
843					vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
844					goto out;
845				}
846				VOP_UNLOCK(mvp, 0);
847				vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
848			}
849			i -= dp->d_namlen;
850
851			if (i < 0) {
852				error = ENOMEM;
853				goto out;
854			}
855			bcopy(dp->d_name, buf + i, dp->d_namlen);
856			error = 0;
857			goto out;
858		}
859	} while (len > 0 || !eofflag);
860	error = ENOENT;
861
862out:
863	free(dirbuf, M_TEMP);
864	if (!error) {
865		*buflen = i;
866		vref(*dvp);
867	}
868	if (covered) {
869		vput(*dvp);
870		vrele(mvp);
871	} else {
872		VOP_UNLOCK(mvp, 0);
873		vn_close(mvp, FREAD, cred, td);
874	}
875	vn_lock(vp, locked | LK_RETRY);
876	return (error);
877}
878
879int
880vop_stdallocate(struct vop_allocate_args *ap)
881{
882#ifdef __notyet__
883	struct statfs sfs;
884#endif
885	struct iovec aiov;
886	struct vattr vattr, *vap;
887	struct uio auio;
888	off_t fsize, len, cur, offset;
889	uint8_t *buf;
890	struct thread *td;
891	struct vnode *vp;
892	size_t iosize;
893	int error;
894
895	buf = NULL;
896	error = 0;
897	td = curthread;
898	vap = &vattr;
899	vp = ap->a_vp;
900	len = *ap->a_len;
901	offset = *ap->a_offset;
902
903	error = VOP_GETATTR(vp, vap, td->td_ucred);
904	if (error != 0)
905		goto out;
906	fsize = vap->va_size;
907	iosize = vap->va_blocksize;
908	if (iosize == 0)
909		iosize = BLKDEV_IOSIZE;
910	if (iosize > MAXPHYS)
911		iosize = MAXPHYS;
912	buf = malloc(iosize, M_TEMP, M_WAITOK);
913
914#ifdef __notyet__
915	/*
916	 * Check if the filesystem sets f_maxfilesize; if not use
917	 * VOP_SETATTR to perform the check.
918	 */
919	error = VFS_STATFS(vp->v_mount, &sfs, td);
920	if (error != 0)
921		goto out;
922	if (sfs.f_maxfilesize) {
923		if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
924		    offset + len > sfs.f_maxfilesize) {
925			error = EFBIG;
926			goto out;
927		}
928	} else
929#endif
930	if (offset + len > vap->va_size) {
931		/*
932		 * Test offset + len against the filesystem's maxfilesize.
933		 */
934		VATTR_NULL(vap);
935		vap->va_size = offset + len;
936		error = VOP_SETATTR(vp, vap, td->td_ucred);
937		if (error != 0)
938			goto out;
939		VATTR_NULL(vap);
940		vap->va_size = fsize;
941		error = VOP_SETATTR(vp, vap, td->td_ucred);
942		if (error != 0)
943			goto out;
944	}
945
946	for (;;) {
947		/*
948		 * Read and write back anything below the nominal file
949		 * size.  There's currently no way outside the filesystem
950		 * to know whether this area is sparse or not.
951		 */
952		cur = iosize;
953		if ((offset % iosize) != 0)
954			cur -= (offset % iosize);
955		if (cur > len)
956			cur = len;
957		if (offset < fsize) {
958			aiov.iov_base = buf;
959			aiov.iov_len = cur;
960			auio.uio_iov = &aiov;
961			auio.uio_iovcnt = 1;
962			auio.uio_offset = offset;
963			auio.uio_resid = cur;
964			auio.uio_segflg = UIO_SYSSPACE;
965			auio.uio_rw = UIO_READ;
966			auio.uio_td = td;
967			error = VOP_READ(vp, &auio, 0, td->td_ucred);
968			if (error != 0)
969				break;
970			if (auio.uio_resid > 0) {
971				bzero(buf + cur - auio.uio_resid,
972				    auio.uio_resid);
973			}
974		} else {
975			bzero(buf, cur);
976		}
977
978		aiov.iov_base = buf;
979		aiov.iov_len = cur;
980		auio.uio_iov = &aiov;
981		auio.uio_iovcnt = 1;
982		auio.uio_offset = offset;
983		auio.uio_resid = cur;
984		auio.uio_segflg = UIO_SYSSPACE;
985		auio.uio_rw = UIO_WRITE;
986		auio.uio_td = td;
987
988		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
989		if (error != 0)
990			break;
991
992		len -= cur;
993		offset += cur;
994		if (len == 0)
995			break;
996		if (should_yield())
997			break;
998	}
999
1000 out:
1001	*ap->a_len = len;
1002	*ap->a_offset = offset;
1003	free(buf, M_TEMP);
1004	return (error);
1005}
1006
1007int
1008vop_stdadvise(struct vop_advise_args *ap)
1009{
1010	struct vnode *vp;
1011	off_t start, end;
1012	int error, vfslocked;
1013
1014	vp = ap->a_vp;
1015	switch (ap->a_advice) {
1016	case POSIX_FADV_WILLNEED:
1017		/*
1018		 * Do nothing for now.  Filesystems should provide a
1019		 * custom method which starts an asynchronous read of
1020		 * the requested region.
1021		 */
1022		error = 0;
1023		break;
1024	case POSIX_FADV_DONTNEED:
1025		/*
1026		 * Flush any open FS buffers and then remove pages
1027		 * from the backing VM object.  Using vinvalbuf() here
1028		 * is a bit heavy-handed as it flushes all buffers for
1029		 * the given vnode, not just the buffers covering the
1030		 * requested range.
1031		 */
1032		error = 0;
1033		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1034		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1035		if (vp->v_iflag & VI_DOOMED) {
1036			VOP_UNLOCK(vp, 0);
1037			VFS_UNLOCK_GIANT(vfslocked);
1038			break;
1039		}
1040		vinvalbuf(vp, V_CLEANONLY, 0, 0);
1041		if (vp->v_object != NULL) {
1042			start = trunc_page(ap->a_start);
1043			end = round_page(ap->a_end);
1044			VM_OBJECT_LOCK(vp->v_object);
1045			vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
1046			    OFF_TO_IDX(end));
1047			VM_OBJECT_UNLOCK(vp->v_object);
1048		}
1049		VOP_UNLOCK(vp, 0);
1050		VFS_UNLOCK_GIANT(vfslocked);
1051		break;
1052	default:
1053		error = EINVAL;
1054		break;
1055	}
1056	return (error);
1057}
1058
1059int
1060vop_stdunp_bind(struct vop_unp_bind_args *ap)
1061{
1062
1063	ap->a_vp->v_socket = ap->a_socket;
1064	return (0);
1065}
1066
1067int
1068vop_stdunp_connect(struct vop_unp_connect_args *ap)
1069{
1070
1071	*ap->a_socket = ap->a_vp->v_socket;
1072	return (0);
1073}
1074
1075int
1076vop_stdunp_detach(struct vop_unp_detach_args *ap)
1077{
1078
1079	ap->a_vp->v_socket = NULL;
1080	return (0);
1081}
1082
1083static int
1084vop_stdis_text(struct vop_is_text_args *ap)
1085{
1086
1087	return ((ap->a_vp->v_vflag & VV_TEXT) != 0);
1088}
1089
1090static int
1091vop_stdset_text(struct vop_set_text_args *ap)
1092{
1093
1094	ap->a_vp->v_vflag |= VV_TEXT;
1095	return (0);
1096}
1097
1098static int
1099vop_stdunset_text(struct vop_unset_text_args *ap)
1100{
1101
1102	ap->a_vp->v_vflag &= ~VV_TEXT;
1103	return (0);
1104}
1105
1106/*
1107 * vfs default ops
1108 * used to fill the vfs function table to get reasonable default return values.
1109 */
1110int
1111vfs_stdroot (mp, flags, vpp)
1112	struct mount *mp;
1113	int flags;
1114	struct vnode **vpp;
1115{
1116
1117	return (EOPNOTSUPP);
1118}
1119
1120int
1121vfs_stdstatfs (mp, sbp)
1122	struct mount *mp;
1123	struct statfs *sbp;
1124{
1125
1126	return (EOPNOTSUPP);
1127}
1128
1129int
1130vfs_stdquotactl (mp, cmds, uid, arg)
1131	struct mount *mp;
1132	int cmds;
1133	uid_t uid;
1134	void *arg;
1135{
1136
1137	return (EOPNOTSUPP);
1138}
1139
1140int
1141vfs_stdsync(mp, waitfor)
1142	struct mount *mp;
1143	int waitfor;
1144{
1145	struct vnode *vp, *mvp;
1146	struct thread *td;
1147	int error, lockreq, allerror = 0;
1148
1149	td = curthread;
1150	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1151	if (waitfor != MNT_WAIT)
1152		lockreq |= LK_NOWAIT;
1153	/*
1154	 * Force stale buffer cache information to be flushed.
1155	 */
1156loop:
1157	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1158		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1159			VI_UNLOCK(vp);
1160			continue;
1161		}
1162		if ((error = vget(vp, lockreq, td)) != 0) {
1163			if (error == ENOENT) {
1164				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1165				goto loop;
1166			}
1167			continue;
1168		}
1169		error = VOP_FSYNC(vp, waitfor, td);
1170		if (error)
1171			allerror = error;
1172		vput(vp);
1173	}
1174	return (allerror);
1175}
1176
1177int
1178vfs_stdnosync (mp, waitfor)
1179	struct mount *mp;
1180	int waitfor;
1181{
1182
1183	return (0);
1184}
1185
1186int
1187vfs_stdvget (mp, ino, flags, vpp)
1188	struct mount *mp;
1189	ino_t ino;
1190	int flags;
1191	struct vnode **vpp;
1192{
1193
1194	return (EOPNOTSUPP);
1195}
1196
1197int
1198vfs_stdfhtovp (mp, fhp, flags, vpp)
1199	struct mount *mp;
1200	struct fid *fhp;
1201	int flags;
1202	struct vnode **vpp;
1203{
1204
1205	return (EOPNOTSUPP);
1206}
1207
1208int
1209vfs_stdinit (vfsp)
1210	struct vfsconf *vfsp;
1211{
1212
1213	return (0);
1214}
1215
1216int
1217vfs_stduninit (vfsp)
1218	struct vfsconf *vfsp;
1219{
1220
1221	return(0);
1222}
1223
1224int
1225vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1226	struct mount *mp;
1227	int cmd;
1228	struct vnode *filename_vp;
1229	int attrnamespace;
1230	const char *attrname;
1231{
1232
1233	if (filename_vp != NULL)
1234		VOP_UNLOCK(filename_vp, 0);
1235	return (EOPNOTSUPP);
1236}
1237
1238int
1239vfs_stdsysctl(mp, op, req)
1240	struct mount *mp;
1241	fsctlop_t op;
1242	struct sysctl_req *req;
1243{
1244
1245	return (EOPNOTSUPP);
1246}
1247
1248/* end of vfs default ops */
1249