1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed
6 * to Berkeley by John Heidemann of the UCLA Ficus project.
7 *
8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/10/sys/kern/vfs_default.c 330266 2018-03-02 04:43:07Z mckusick $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bio.h>
41#include <sys/buf.h>
42#include <sys/conf.h>
43#include <sys/event.h>
44#include <sys/kernel.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/lockf.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/namei.h>
51#include <sys/rwlock.h>
52#include <sys/fcntl.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/dirent.h>
56#include <sys/poll.h>
57
58#include <security/mac/mac_framework.h>
59
60#include <vm/vm.h>
61#include <vm/vm_object.h>
62#include <vm/vm_extern.h>
63#include <vm/pmap.h>
64#include <vm/vm_map.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vnode_pager.h>
68
69static int	vop_nolookup(struct vop_lookup_args *);
70static int	vop_norename(struct vop_rename_args *);
71static int	vop_nostrategy(struct vop_strategy_args *);
72static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
73				char *dirbuf, int dirbuflen, off_t *off,
74				char **cpos, int *len, int *eofflag,
75				struct thread *td);
76static int	dirent_exists(struct vnode *vp, const char *dirname,
77			      struct thread *td);
78
79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
80
81static int vop_stdis_text(struct vop_is_text_args *ap);
82static int vop_stdset_text(struct vop_set_text_args *ap);
83static int vop_stdunset_text(struct vop_unset_text_args *ap);
84static int vop_stdget_writecount(struct vop_get_writecount_args *ap);
85static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
86
87/*
88 * This vnode table stores what we want to do if the filesystem doesn't
89 * implement a particular VOP.
90 *
91 * If there is no specific entry here, we will return EOPNOTSUPP.
92 *
93 * Note that every filesystem has to implement either vop_access
94 * or vop_accessx; failing to do so will result in immediate crash
95 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
96 * which calls vop_stdaccess() etc.
97 */
98
99struct vop_vector default_vnodeops = {
100	.vop_default =		NULL,
101	.vop_bypass =		VOP_EOPNOTSUPP,
102
103	.vop_access =		vop_stdaccess,
104	.vop_accessx =		vop_stdaccessx,
105	.vop_advise =		vop_stdadvise,
106	.vop_advlock =		vop_stdadvlock,
107	.vop_advlockasync =	vop_stdadvlockasync,
108	.vop_advlockpurge =	vop_stdadvlockpurge,
109	.vop_allocate =		vop_stdallocate,
110	.vop_bmap =		vop_stdbmap,
111	.vop_close =		VOP_NULL,
112	.vop_fsync =		VOP_NULL,
113	.vop_getpages =		vop_stdgetpages,
114	.vop_getwritemount = 	vop_stdgetwritemount,
115	.vop_inactive =		VOP_NULL,
116	.vop_ioctl =		VOP_ENOTTY,
117	.vop_kqfilter =		vop_stdkqfilter,
118	.vop_islocked =		vop_stdislocked,
119	.vop_lock1 =		vop_stdlock,
120	.vop_lookup =		vop_nolookup,
121	.vop_open =		VOP_NULL,
122	.vop_pathconf =		VOP_EINVAL,
123	.vop_poll =		vop_nopoll,
124	.vop_putpages =		vop_stdputpages,
125	.vop_readlink =		VOP_EINVAL,
126	.vop_rename =		vop_norename,
127	.vop_revoke =		VOP_PANIC,
128	.vop_strategy =		vop_nostrategy,
129	.vop_unlock =		vop_stdunlock,
130	.vop_vptocnp =		vop_stdvptocnp,
131	.vop_vptofh =		vop_stdvptofh,
132	.vop_unp_bind =		vop_stdunp_bind,
133	.vop_unp_connect =	vop_stdunp_connect,
134	.vop_unp_detach =	vop_stdunp_detach,
135	.vop_is_text =		vop_stdis_text,
136	.vop_set_text =		vop_stdset_text,
137	.vop_unset_text =	vop_stdunset_text,
138	.vop_get_writecount =	vop_stdget_writecount,
139	.vop_add_writecount =	vop_stdadd_writecount,
140};
141
142/*
143 * Series of placeholder functions for various error returns for
144 * VOPs.
145 */
146
147int
148vop_eopnotsupp(struct vop_generic_args *ap)
149{
150	/*
151	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
152	*/
153
154	return (EOPNOTSUPP);
155}
156
157int
158vop_ebadf(struct vop_generic_args *ap)
159{
160
161	return (EBADF);
162}
163
164int
165vop_enotty(struct vop_generic_args *ap)
166{
167
168	return (ENOTTY);
169}
170
171int
172vop_einval(struct vop_generic_args *ap)
173{
174
175	return (EINVAL);
176}
177
178int
179vop_enoent(struct vop_generic_args *ap)
180{
181
182	return (ENOENT);
183}
184
185int
186vop_null(struct vop_generic_args *ap)
187{
188
189	return (0);
190}
191
192/*
193 * Helper function to panic on some bad VOPs in some filesystems.
194 */
195int
196vop_panic(struct vop_generic_args *ap)
197{
198
199	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
200}
201
202/*
203 * vop_std<something> and vop_no<something> are default functions for use by
204 * filesystems that need the "default reasonable" implementation for a
205 * particular operation.
206 *
207 * The documentation for the operations they implement exists (if it exists)
208 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
209 */
210
211/*
212 * Default vop for filesystems that do not support name lookup
213 */
214static int
215vop_nolookup(ap)
216	struct vop_lookup_args /* {
217		struct vnode *a_dvp;
218		struct vnode **a_vpp;
219		struct componentname *a_cnp;
220	} */ *ap;
221{
222
223	*ap->a_vpp = NULL;
224	return (ENOTDIR);
225}
226
227/*
228 * vop_norename:
229 *
230 * Handle unlock and reference counting for arguments of vop_rename
231 * for filesystems that do not implement rename operation.
232 */
233static int
234vop_norename(struct vop_rename_args *ap)
235{
236
237	vop_rename_fail(ap);
238	return (EOPNOTSUPP);
239}
240
241/*
242 *	vop_nostrategy:
243 *
244 *	Strategy routine for VFS devices that have none.
245 *
246 *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
247 *	routine.  Typically this is done for a BIO_READ strategy call.
248 *	Typically B_INVAL is assumed to already be clear prior to a write
249 *	and should not be cleared manually unless you just made the buffer
250 *	invalid.  BIO_ERROR should be cleared either way.
251 */
252
253static int
254vop_nostrategy (struct vop_strategy_args *ap)
255{
256	printf("No strategy for buffer at %p\n", ap->a_bp);
257	vprint("vnode", ap->a_vp);
258	ap->a_bp->b_ioflags |= BIO_ERROR;
259	ap->a_bp->b_error = EOPNOTSUPP;
260	bufdone(ap->a_bp);
261	return (EOPNOTSUPP);
262}
263
264static int
265get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
266		int dirbuflen, off_t *off, char **cpos, int *len,
267		int *eofflag, struct thread *td)
268{
269	int error, reclen;
270	struct uio uio;
271	struct iovec iov;
272	struct dirent *dp;
273
274	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
275	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
276
277	if (*len == 0) {
278		iov.iov_base = dirbuf;
279		iov.iov_len = dirbuflen;
280
281		uio.uio_iov = &iov;
282		uio.uio_iovcnt = 1;
283		uio.uio_offset = *off;
284		uio.uio_resid = dirbuflen;
285		uio.uio_segflg = UIO_SYSSPACE;
286		uio.uio_rw = UIO_READ;
287		uio.uio_td = td;
288
289		*eofflag = 0;
290
291#ifdef MAC
292		error = mac_vnode_check_readdir(td->td_ucred, vp);
293		if (error == 0)
294#endif
295			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
296		    		NULL, NULL);
297		if (error)
298			return (error);
299
300		*off = uio.uio_offset;
301
302		*cpos = dirbuf;
303		*len = (dirbuflen - uio.uio_resid);
304
305		if (*len == 0)
306			return (ENOENT);
307	}
308
309	dp = (struct dirent *)(*cpos);
310	reclen = dp->d_reclen;
311	*dpp = dp;
312
313	/* check for malformed directory.. */
314	if (reclen < DIRENT_MINSIZE)
315		return (EINVAL);
316
317	*cpos += reclen;
318	*len -= reclen;
319
320	return (0);
321}
322
323/*
324 * Check if a named file exists in a given directory vnode.
325 */
326static int
327dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
328{
329	char *dirbuf, *cpos;
330	int error, eofflag, dirbuflen, len, found;
331	off_t off;
332	struct dirent *dp;
333	struct vattr va;
334
335	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
336	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
337
338	found = 0;
339
340	error = VOP_GETATTR(vp, &va, td->td_ucred);
341	if (error)
342		return (found);
343
344	dirbuflen = DEV_BSIZE;
345	if (dirbuflen < va.va_blocksize)
346		dirbuflen = va.va_blocksize;
347	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
348
349	off = 0;
350	len = 0;
351	do {
352		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
353					&cpos, &len, &eofflag, td);
354		if (error)
355			goto out;
356
357		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
358		    strcmp(dp->d_name, dirname) == 0) {
359			found = 1;
360			goto out;
361		}
362	} while (len > 0 || !eofflag);
363
364out:
365	free(dirbuf, M_TEMP);
366	return (found);
367}
368
369int
370vop_stdaccess(struct vop_access_args *ap)
371{
372
373	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
374	    VAPPEND)) == 0, ("invalid bit in accmode"));
375
376	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
377}
378
379int
380vop_stdaccessx(struct vop_accessx_args *ap)
381{
382	int error;
383	accmode_t accmode = ap->a_accmode;
384
385	error = vfs_unixify_accmode(&accmode);
386	if (error != 0)
387		return (error);
388
389	if (accmode == 0)
390		return (0);
391
392	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
393}
394
395/*
396 * Advisory record locking support
397 */
398int
399vop_stdadvlock(struct vop_advlock_args *ap)
400{
401	struct vnode *vp;
402	struct vattr vattr;
403	int error;
404
405	vp = ap->a_vp;
406	if (ap->a_fl->l_whence == SEEK_END) {
407		/*
408		 * The NFSv4 server must avoid doing a vn_lock() here, since it
409		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
410		 * the NFSv4 server always uses SEEK_SET and this code is
411		 * only required for the SEEK_END case.
412		 */
413		vn_lock(vp, LK_SHARED | LK_RETRY);
414		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
415		VOP_UNLOCK(vp, 0);
416		if (error)
417			return (error);
418	} else
419		vattr.va_size = 0;
420
421	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
422}
423
424int
425vop_stdadvlockasync(struct vop_advlockasync_args *ap)
426{
427	struct vnode *vp;
428	struct vattr vattr;
429	int error;
430
431	vp = ap->a_vp;
432	if (ap->a_fl->l_whence == SEEK_END) {
433		/* The size argument is only needed for SEEK_END. */
434		vn_lock(vp, LK_SHARED | LK_RETRY);
435		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
436		VOP_UNLOCK(vp, 0);
437		if (error)
438			return (error);
439	} else
440		vattr.va_size = 0;
441
442	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
443}
444
445int
446vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
447{
448	struct vnode *vp;
449
450	vp = ap->a_vp;
451	lf_purgelocks(vp, &vp->v_lockf);
452	return (0);
453}
454
455/*
456 * vop_stdpathconf:
457 *
458 * Standard implementation of POSIX pathconf, to get information about limits
459 * for a filesystem.
460 * Override per filesystem for the case where the filesystem has smaller
461 * limits.
462 */
463int
464vop_stdpathconf(ap)
465	struct vop_pathconf_args /* {
466	struct vnode *a_vp;
467	int a_name;
468	int *a_retval;
469	} */ *ap;
470{
471
472	switch (ap->a_name) {
473		case _PC_NAME_MAX:
474			*ap->a_retval = NAME_MAX;
475			return (0);
476		case _PC_PATH_MAX:
477			*ap->a_retval = PATH_MAX;
478			return (0);
479		case _PC_LINK_MAX:
480			*ap->a_retval = LINK_MAX;
481			return (0);
482		case _PC_MAX_CANON:
483			*ap->a_retval = MAX_CANON;
484			return (0);
485		case _PC_MAX_INPUT:
486			*ap->a_retval = MAX_INPUT;
487			return (0);
488		case _PC_PIPE_BUF:
489			*ap->a_retval = PIPE_BUF;
490			return (0);
491		case _PC_CHOWN_RESTRICTED:
492			*ap->a_retval = 1;
493			return (0);
494		case _PC_VDISABLE:
495			*ap->a_retval = _POSIX_VDISABLE;
496			return (0);
497		default:
498			return (EINVAL);
499	}
500	/* NOTREACHED */
501}
502
503/*
504 * Standard lock, unlock and islocked functions.
505 */
506int
507vop_stdlock(ap)
508	struct vop_lock1_args /* {
509		struct vnode *a_vp;
510		int a_flags;
511		char *file;
512		int line;
513	} */ *ap;
514{
515	struct vnode *vp = ap->a_vp;
516
517	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
518	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
519	    ap->a_line));
520}
521
522/* See above. */
523int
524vop_stdunlock(ap)
525	struct vop_unlock_args /* {
526		struct vnode *a_vp;
527		int a_flags;
528	} */ *ap;
529{
530	struct vnode *vp = ap->a_vp;
531
532	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
533}
534
535/* See above. */
536int
537vop_stdislocked(ap)
538	struct vop_islocked_args /* {
539		struct vnode *a_vp;
540	} */ *ap;
541{
542
543	return (lockstatus(ap->a_vp->v_vnlock));
544}
545
546/*
547 * Return true for select/poll.
548 */
549int
550vop_nopoll(ap)
551	struct vop_poll_args /* {
552		struct vnode *a_vp;
553		int  a_events;
554		struct ucred *a_cred;
555		struct thread *a_td;
556	} */ *ap;
557{
558
559	return (poll_no_poll(ap->a_events));
560}
561
562/*
563 * Implement poll for local filesystems that support it.
564 */
565int
566vop_stdpoll(ap)
567	struct vop_poll_args /* {
568		struct vnode *a_vp;
569		int  a_events;
570		struct ucred *a_cred;
571		struct thread *a_td;
572	} */ *ap;
573{
574	if (ap->a_events & ~POLLSTANDARD)
575		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
576	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
577}
578
579/*
580 * Return our mount point, as we will take charge of the writes.
581 */
582int
583vop_stdgetwritemount(ap)
584	struct vop_getwritemount_args /* {
585		struct vnode *a_vp;
586		struct mount **a_mpp;
587	} */ *ap;
588{
589	struct mount *mp;
590
591	/*
592	 * XXX Since this is called unlocked we may be recycled while
593	 * attempting to ref the mount.  If this is the case or mountpoint
594	 * will be set to NULL.  We only have to prevent this call from
595	 * returning with a ref to an incorrect mountpoint.  It is not
596	 * harmful to return with a ref to our previous mountpoint.
597	 */
598	mp = ap->a_vp->v_mount;
599	if (mp != NULL) {
600		vfs_ref(mp);
601		if (mp != ap->a_vp->v_mount) {
602			vfs_rel(mp);
603			mp = NULL;
604		}
605	}
606	*(ap->a_mpp) = mp;
607	return (0);
608}
609
610/* XXX Needs good comment and VOP_BMAP(9) manpage */
611int
612vop_stdbmap(ap)
613	struct vop_bmap_args /* {
614		struct vnode *a_vp;
615		daddr_t  a_bn;
616		struct bufobj **a_bop;
617		daddr_t *a_bnp;
618		int *a_runp;
619		int *a_runb;
620	} */ *ap;
621{
622
623	if (ap->a_bop != NULL)
624		*ap->a_bop = &ap->a_vp->v_bufobj;
625	if (ap->a_bnp != NULL)
626		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
627	if (ap->a_runp != NULL)
628		*ap->a_runp = 0;
629	if (ap->a_runb != NULL)
630		*ap->a_runb = 0;
631	return (0);
632}
633
634int
635vop_stdfsync(ap)
636	struct vop_fsync_args /* {
637		struct vnode *a_vp;
638		int a_waitfor;
639		struct thread *a_td;
640	} */ *ap;
641{
642	struct vnode *vp;
643	struct buf *bp, *nbp;
644	struct bufobj *bo;
645	struct mount *mp;
646	int error, maxretry;
647
648	error = 0;
649	maxretry = 10000;     /* large, arbitrarily chosen */
650	vp = ap->a_vp;
651	mp = NULL;
652	if (vp->v_type == VCHR) {
653		VI_LOCK(vp);
654		mp = vp->v_rdev->si_mountpt;
655		VI_UNLOCK(vp);
656	}
657	bo = &vp->v_bufobj;
658	BO_LOCK(bo);
659loop1:
660	/*
661	 * MARK/SCAN initialization to avoid infinite loops.
662	 */
663        TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
664                bp->b_vflags &= ~BV_SCANNED;
665		bp->b_error = 0;
666	}
667
668	/*
669	 * Flush all dirty buffers associated with a vnode.
670	 */
671loop2:
672	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
673		if ((bp->b_vflags & BV_SCANNED) != 0)
674			continue;
675		bp->b_vflags |= BV_SCANNED;
676		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
677			if (ap->a_waitfor != MNT_WAIT)
678				continue;
679			if (BUF_LOCK(bp,
680			    LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL,
681			    BO_LOCKPTR(bo)) != 0) {
682				BO_LOCK(bo);
683				goto loop1;
684			}
685			BO_LOCK(bo);
686		}
687		BO_UNLOCK(bo);
688		KASSERT(bp->b_bufobj == bo,
689		    ("bp %p wrong b_bufobj %p should be %p",
690		    bp, bp->b_bufobj, bo));
691		if ((bp->b_flags & B_DELWRI) == 0)
692			panic("fsync: not dirty");
693		if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
694			vfs_bio_awrite(bp);
695		} else {
696			bremfree(bp);
697			bawrite(bp);
698		}
699		if (maxretry < 1000)
700			pause("dirty", hz < 1000 ? 1 : hz / 1000);
701		BO_LOCK(bo);
702		goto loop2;
703	}
704
705	/*
706	 * If synchronous the caller expects us to completely resolve all
707	 * dirty buffers in the system.  Wait for in-progress I/O to
708	 * complete (which could include background bitmap writes), then
709	 * retry if dirty blocks still exist.
710	 */
711	if (ap->a_waitfor == MNT_WAIT) {
712		bufobj_wwait(bo, 0, 0);
713		if (bo->bo_dirty.bv_cnt > 0) {
714			/*
715			 * If we are unable to write any of these buffers
716			 * then we fail now rather than trying endlessly
717			 * to write them out.
718			 */
719			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
720				if ((error = bp->b_error) == 0)
721					continue;
722			if ((mp != NULL && mp->mnt_secondary_writes > 0) ||
723			    (error == 0 && --maxretry >= 0))
724				goto loop1;
725			if (error == 0)
726				error = EAGAIN;
727		}
728	}
729	BO_UNLOCK(bo);
730	if (error != 0)
731		vn_printf(vp, "fsync: giving up on dirty (error = %d) ", error);
732
733	return (error);
734}
735
736/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
737int
738vop_stdgetpages(ap)
739	struct vop_getpages_args /* {
740		struct vnode *a_vp;
741		vm_page_t *a_m;
742		int a_count;
743		int a_reqpage;
744		vm_ooffset_t a_offset;
745	} */ *ap;
746{
747
748	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
749	    ap->a_count, ap->a_reqpage);
750}
751
752int
753vop_stdkqfilter(struct vop_kqfilter_args *ap)
754{
755	return vfs_kqfilter(ap);
756}
757
758/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
759int
760vop_stdputpages(ap)
761	struct vop_putpages_args /* {
762		struct vnode *a_vp;
763		vm_page_t *a_m;
764		int a_count;
765		int a_sync;
766		int *a_rtvals;
767		vm_ooffset_t a_offset;
768	} */ *ap;
769{
770
771	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
772	     ap->a_sync, ap->a_rtvals);
773}
774
775int
776vop_stdvptofh(struct vop_vptofh_args *ap)
777{
778	return (EOPNOTSUPP);
779}
780
781int
782vop_stdvptocnp(struct vop_vptocnp_args *ap)
783{
784	struct vnode *vp = ap->a_vp;
785	struct vnode **dvp = ap->a_vpp;
786	struct ucred *cred = ap->a_cred;
787	char *buf = ap->a_buf;
788	int *buflen = ap->a_buflen;
789	char *dirbuf, *cpos;
790	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
791	off_t off;
792	ino_t fileno;
793	struct vattr va;
794	struct nameidata nd;
795	struct thread *td;
796	struct dirent *dp;
797	struct vnode *mvp;
798
799	i = *buflen;
800	error = 0;
801	covered = 0;
802	td = curthread;
803
804	if (vp->v_type != VDIR)
805		return (ENOENT);
806
807	error = VOP_GETATTR(vp, &va, cred);
808	if (error)
809		return (error);
810
811	VREF(vp);
812	locked = VOP_ISLOCKED(vp);
813	VOP_UNLOCK(vp, 0);
814	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
815	    "..", vp, td);
816	flags = FREAD;
817	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
818	if (error) {
819		vn_lock(vp, locked | LK_RETRY);
820		return (error);
821	}
822	NDFREE(&nd, NDF_ONLY_PNBUF);
823
824	mvp = *dvp = nd.ni_vp;
825
826	if (vp->v_mount != (*dvp)->v_mount &&
827	    ((*dvp)->v_vflag & VV_ROOT) &&
828	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
829		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
830		VREF(mvp);
831		VOP_UNLOCK(mvp, 0);
832		vn_close(mvp, FREAD, cred, td);
833		VREF(*dvp);
834		vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
835		covered = 1;
836	}
837
838	fileno = va.va_fileid;
839
840	dirbuflen = DEV_BSIZE;
841	if (dirbuflen < va.va_blocksize)
842		dirbuflen = va.va_blocksize;
843	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
844
845	if ((*dvp)->v_type != VDIR) {
846		error = ENOENT;
847		goto out;
848	}
849
850	off = 0;
851	len = 0;
852	do {
853		/* call VOP_READDIR of parent */
854		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
855					&cpos, &len, &eofflag, td);
856		if (error)
857			goto out;
858
859		if ((dp->d_type != DT_WHT) &&
860		    (dp->d_fileno == fileno)) {
861			if (covered) {
862				VOP_UNLOCK(*dvp, 0);
863				vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
864				if (dirent_exists(mvp, dp->d_name, td)) {
865					error = ENOENT;
866					VOP_UNLOCK(mvp, 0);
867					vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
868					goto out;
869				}
870				VOP_UNLOCK(mvp, 0);
871				vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
872			}
873			i -= dp->d_namlen;
874
875			if (i < 0) {
876				error = ENOMEM;
877				goto out;
878			}
879			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
880				error = ENOENT;
881			} else {
882				bcopy(dp->d_name, buf + i, dp->d_namlen);
883				error = 0;
884			}
885			goto out;
886		}
887	} while (len > 0 || !eofflag);
888	error = ENOENT;
889
890out:
891	free(dirbuf, M_TEMP);
892	if (!error) {
893		*buflen = i;
894		vref(*dvp);
895	}
896	if (covered) {
897		vput(*dvp);
898		vrele(mvp);
899	} else {
900		VOP_UNLOCK(mvp, 0);
901		vn_close(mvp, FREAD, cred, td);
902	}
903	vn_lock(vp, locked | LK_RETRY);
904	return (error);
905}
906
907int
908vop_stdallocate(struct vop_allocate_args *ap)
909{
910#ifdef __notyet__
911	struct statfs sfs;
912#endif
913	struct iovec aiov;
914	struct vattr vattr, *vap;
915	struct uio auio;
916	off_t fsize, len, cur, offset;
917	uint8_t *buf;
918	struct thread *td;
919	struct vnode *vp;
920	size_t iosize;
921	int error;
922
923	buf = NULL;
924	error = 0;
925	td = curthread;
926	vap = &vattr;
927	vp = ap->a_vp;
928	len = *ap->a_len;
929	offset = *ap->a_offset;
930
931	error = VOP_GETATTR(vp, vap, td->td_ucred);
932	if (error != 0)
933		goto out;
934	fsize = vap->va_size;
935	iosize = vap->va_blocksize;
936	if (iosize == 0)
937		iosize = BLKDEV_IOSIZE;
938	if (iosize > MAXPHYS)
939		iosize = MAXPHYS;
940	buf = malloc(iosize, M_TEMP, M_WAITOK);
941
942#ifdef __notyet__
943	/*
944	 * Check if the filesystem sets f_maxfilesize; if not use
945	 * VOP_SETATTR to perform the check.
946	 */
947	error = VFS_STATFS(vp->v_mount, &sfs, td);
948	if (error != 0)
949		goto out;
950	if (sfs.f_maxfilesize) {
951		if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
952		    offset + len > sfs.f_maxfilesize) {
953			error = EFBIG;
954			goto out;
955		}
956	} else
957#endif
958	if (offset + len > vap->va_size) {
959		/*
960		 * Test offset + len against the filesystem's maxfilesize.
961		 */
962		VATTR_NULL(vap);
963		vap->va_size = offset + len;
964		error = VOP_SETATTR(vp, vap, td->td_ucred);
965		if (error != 0)
966			goto out;
967		VATTR_NULL(vap);
968		vap->va_size = fsize;
969		error = VOP_SETATTR(vp, vap, td->td_ucred);
970		if (error != 0)
971			goto out;
972	}
973
974	for (;;) {
975		/*
976		 * Read and write back anything below the nominal file
977		 * size.  There's currently no way outside the filesystem
978		 * to know whether this area is sparse or not.
979		 */
980		cur = iosize;
981		if ((offset % iosize) != 0)
982			cur -= (offset % iosize);
983		if (cur > len)
984			cur = len;
985		if (offset < fsize) {
986			aiov.iov_base = buf;
987			aiov.iov_len = cur;
988			auio.uio_iov = &aiov;
989			auio.uio_iovcnt = 1;
990			auio.uio_offset = offset;
991			auio.uio_resid = cur;
992			auio.uio_segflg = UIO_SYSSPACE;
993			auio.uio_rw = UIO_READ;
994			auio.uio_td = td;
995			error = VOP_READ(vp, &auio, 0, td->td_ucred);
996			if (error != 0)
997				break;
998			if (auio.uio_resid > 0) {
999				bzero(buf + cur - auio.uio_resid,
1000				    auio.uio_resid);
1001			}
1002		} else {
1003			bzero(buf, cur);
1004		}
1005
1006		aiov.iov_base = buf;
1007		aiov.iov_len = cur;
1008		auio.uio_iov = &aiov;
1009		auio.uio_iovcnt = 1;
1010		auio.uio_offset = offset;
1011		auio.uio_resid = cur;
1012		auio.uio_segflg = UIO_SYSSPACE;
1013		auio.uio_rw = UIO_WRITE;
1014		auio.uio_td = td;
1015
1016		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
1017		if (error != 0)
1018			break;
1019
1020		len -= cur;
1021		offset += cur;
1022		if (len == 0)
1023			break;
1024		if (should_yield())
1025			break;
1026	}
1027
1028 out:
1029	*ap->a_len = len;
1030	*ap->a_offset = offset;
1031	free(buf, M_TEMP);
1032	return (error);
1033}
1034
1035int
1036vop_stdadvise(struct vop_advise_args *ap)
1037{
1038	struct vnode *vp;
1039	off_t start, end;
1040	int error;
1041
1042	vp = ap->a_vp;
1043	switch (ap->a_advice) {
1044	case POSIX_FADV_WILLNEED:
1045		/*
1046		 * Do nothing for now.  Filesystems should provide a
1047		 * custom method which starts an asynchronous read of
1048		 * the requested region.
1049		 */
1050		error = 0;
1051		break;
1052	case POSIX_FADV_DONTNEED:
1053		/*
1054		 * Flush any open FS buffers and then remove pages
1055		 * from the backing VM object.  Using vinvalbuf() here
1056		 * is a bit heavy-handed as it flushes all buffers for
1057		 * the given vnode, not just the buffers covering the
1058		 * requested range.
1059		 */
1060		error = 0;
1061		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1062		if (vp->v_iflag & VI_DOOMED) {
1063			VOP_UNLOCK(vp, 0);
1064			break;
1065		}
1066		vinvalbuf(vp, V_CLEANONLY, 0, 0);
1067		if (vp->v_object != NULL) {
1068			start = trunc_page(ap->a_start);
1069			end = round_page(ap->a_end);
1070			VM_OBJECT_WLOCK(vp->v_object);
1071			vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
1072			    OFF_TO_IDX(end));
1073			VM_OBJECT_WUNLOCK(vp->v_object);
1074		}
1075		VOP_UNLOCK(vp, 0);
1076		break;
1077	default:
1078		error = EINVAL;
1079		break;
1080	}
1081	return (error);
1082}
1083
1084int
1085vop_stdunp_bind(struct vop_unp_bind_args *ap)
1086{
1087
1088	ap->a_vp->v_socket = ap->a_socket;
1089	return (0);
1090}
1091
1092int
1093vop_stdunp_connect(struct vop_unp_connect_args *ap)
1094{
1095
1096	*ap->a_socket = ap->a_vp->v_socket;
1097	return (0);
1098}
1099
1100int
1101vop_stdunp_detach(struct vop_unp_detach_args *ap)
1102{
1103
1104	ap->a_vp->v_socket = NULL;
1105	return (0);
1106}
1107
1108static int
1109vop_stdis_text(struct vop_is_text_args *ap)
1110{
1111
1112	return ((ap->a_vp->v_vflag & VV_TEXT) != 0);
1113}
1114
1115static int
1116vop_stdset_text(struct vop_set_text_args *ap)
1117{
1118
1119	ap->a_vp->v_vflag |= VV_TEXT;
1120	return (0);
1121}
1122
1123static int
1124vop_stdunset_text(struct vop_unset_text_args *ap)
1125{
1126
1127	ap->a_vp->v_vflag &= ~VV_TEXT;
1128	return (0);
1129}
1130
1131static int
1132vop_stdget_writecount(struct vop_get_writecount_args *ap)
1133{
1134
1135	*ap->a_writecount = ap->a_vp->v_writecount;
1136	return (0);
1137}
1138
1139static int
1140vop_stdadd_writecount(struct vop_add_writecount_args *ap)
1141{
1142
1143	ap->a_vp->v_writecount += ap->a_inc;
1144	return (0);
1145}
1146
1147/*
1148 * vfs default ops
1149 * used to fill the vfs function table to get reasonable default return values.
1150 */
1151int
1152vfs_stdroot (mp, flags, vpp)
1153	struct mount *mp;
1154	int flags;
1155	struct vnode **vpp;
1156{
1157
1158	return (EOPNOTSUPP);
1159}
1160
1161int
1162vfs_stdstatfs (mp, sbp)
1163	struct mount *mp;
1164	struct statfs *sbp;
1165{
1166
1167	return (EOPNOTSUPP);
1168}
1169
1170int
1171vfs_stdquotactl (mp, cmds, uid, arg)
1172	struct mount *mp;
1173	int cmds;
1174	uid_t uid;
1175	void *arg;
1176{
1177
1178	return (EOPNOTSUPP);
1179}
1180
1181int
1182vfs_stdsync(mp, waitfor)
1183	struct mount *mp;
1184	int waitfor;
1185{
1186	struct vnode *vp, *mvp;
1187	struct thread *td;
1188	int error, lockreq, allerror = 0;
1189
1190	td = curthread;
1191	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1192	if (waitfor != MNT_WAIT)
1193		lockreq |= LK_NOWAIT;
1194	/*
1195	 * Force stale buffer cache information to be flushed.
1196	 */
1197loop:
1198	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1199		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1200			VI_UNLOCK(vp);
1201			continue;
1202		}
1203		if ((error = vget(vp, lockreq, td)) != 0) {
1204			if (error == ENOENT) {
1205				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1206				goto loop;
1207			}
1208			continue;
1209		}
1210		error = VOP_FSYNC(vp, waitfor, td);
1211		if (error)
1212			allerror = error;
1213		vput(vp);
1214	}
1215	return (allerror);
1216}
1217
1218int
1219vfs_stdnosync (mp, waitfor)
1220	struct mount *mp;
1221	int waitfor;
1222{
1223
1224	return (0);
1225}
1226
1227int
1228vfs_stdvget (mp, ino, flags, vpp)
1229	struct mount *mp;
1230	ino_t ino;
1231	int flags;
1232	struct vnode **vpp;
1233{
1234
1235	return (EOPNOTSUPP);
1236}
1237
1238int
1239vfs_stdfhtovp (mp, fhp, flags, vpp)
1240	struct mount *mp;
1241	struct fid *fhp;
1242	int flags;
1243	struct vnode **vpp;
1244{
1245
1246	return (EOPNOTSUPP);
1247}
1248
1249int
1250vfs_stdinit (vfsp)
1251	struct vfsconf *vfsp;
1252{
1253
1254	return (0);
1255}
1256
1257int
1258vfs_stduninit (vfsp)
1259	struct vfsconf *vfsp;
1260{
1261
1262	return(0);
1263}
1264
1265int
1266vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1267	struct mount *mp;
1268	int cmd;
1269	struct vnode *filename_vp;
1270	int attrnamespace;
1271	const char *attrname;
1272{
1273
1274	if (filename_vp != NULL)
1275		VOP_UNLOCK(filename_vp, 0);
1276	return (EOPNOTSUPP);
1277}
1278
1279int
1280vfs_stdsysctl(mp, op, req)
1281	struct mount *mp;
1282	fsctlop_t op;
1283	struct sysctl_req *req;
1284{
1285
1286	return (EOPNOTSUPP);
1287}
1288
1289/* end of vfs default ops */
1290